Spaces:
Running
Running
# File: orchestrator/provenance.py | |
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine | |
from sqlalchemy.orm import declarative_base, relationship, sessionmaker | |
from datetime import datetime | |
Base = declarative_base() | |
class Paper(Base): | |
__tablename__ = 'papers' | |
id = Column(String, primary_key=True) | |
title = Column(String) | |
authors = Column(String) | |
abstract = Column(String) | |
fetched_at = Column(DateTime, default=datetime.utcnow) | |
runs = relationship('Run', back_populates='paper') | |
class Run(Base): | |
__tablename__ = 'runs' | |
id = Column(Integer, primary_key=True, autoincrement=True) | |
paper_id = Column(String, ForeignKey('papers.id')) | |
cell_index = Column(Integer) | |
output = Column(String) | |
executed_at = Column(DateTime, default=datetime.utcnow) | |
paper = relationship('Paper', back_populates='runs') | |
def init_db(db_url: str): | |
engine = create_engine(db_url) | |
Base.metadata.create_all(engine) | |
return sessionmaker(bind=engine) | |
# File: scripts/ingest.py | |
import sys | |
import yaml | |
from orchestrator.client import MCPClient | |
""" | |
Usage: | |
python ingest.py "search query" | |
""" | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print('Please provide a search query.') | |
sys.exit(1) | |
query = sys.argv[1] | |
cfg = yaml.safe_load(open('config.yaml')) | |
web = MCPClient(cfg['mcp_servers']['web_search']) | |
pubmed = MCPClient(cfg['mcp_servers']['pubmed']) | |
chroma = MCPClient(cfg['mcp_servers']['chroma']) | |
print(f'Ingesting papers for query: {query}') | |
papers = [] | |
try: | |
papers += web.call('web_search.search', {'q': query}) or [] | |
except Exception as e: | |
print('Web search error:', e) | |
try: | |
papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or [] | |
except Exception as e: | |
print('PubMed error:', e) | |
for paper in papers: | |
pid = paper.get('id') | |
txt = paper.get('abstract', '') | |
meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))} | |
chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta}) | |
print('Done ingesting!') |