# File: orchestrator/provenance.py from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine from sqlalchemy.orm import declarative_base, relationship, sessionmaker from datetime import datetime Base = declarative_base() class Paper(Base): __tablename__ = 'papers' id = Column(String, primary_key=True) title = Column(String) authors = Column(String) abstract = Column(String) fetched_at = Column(DateTime, default=datetime.utcnow) runs = relationship('Run', back_populates='paper') class Run(Base): __tablename__ = 'runs' id = Column(Integer, primary_key=True, autoincrement=True) paper_id = Column(String, ForeignKey('papers.id')) cell_index = Column(Integer) output = Column(String) executed_at = Column(DateTime, default=datetime.utcnow) paper = relationship('Paper', back_populates='runs') def init_db(db_url: str): engine = create_engine(db_url) Base.metadata.create_all(engine) return sessionmaker(bind=engine) # File: scripts/ingest.py import sys import yaml from orchestrator.client import MCPClient """ Usage: python ingest.py "search query" """ if __name__ == '__main__': if len(sys.argv) < 2: print('Please provide a search query.') sys.exit(1) query = sys.argv[1] cfg = yaml.safe_load(open('config.yaml')) web = MCPClient(cfg['mcp_servers']['web_search']) pubmed = MCPClient(cfg['mcp_servers']['pubmed']) chroma = MCPClient(cfg['mcp_servers']['chroma']) print(f'Ingesting papers for query: {query}') papers = [] try: papers += web.call('web_search.search', {'q': query}) or [] except Exception as e: print('Web search error:', e) try: papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or [] except Exception as e: print('PubMed error:', e) for paper in papers: pid = paper.get('id') txt = paper.get('abstract', '') meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))} chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta}) print('Done ingesting!')