MCP_Research / orchestrator /provenance.py
mgbam's picture
Update orchestrator/provenance.py
0170414 verified
# File: orchestrator/provenance.py
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
from datetime import datetime
Base = declarative_base()
class Paper(Base):
__tablename__ = 'papers'
id = Column(String, primary_key=True)
title = Column(String)
authors = Column(String)
abstract = Column(String)
fetched_at = Column(DateTime, default=datetime.utcnow)
runs = relationship('Run', back_populates='paper')
class Run(Base):
__tablename__ = 'runs'
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(String, ForeignKey('papers.id'))
cell_index = Column(Integer)
output = Column(String)
executed_at = Column(DateTime, default=datetime.utcnow)
paper = relationship('Paper', back_populates='runs')
def init_db(db_url: str):
engine = create_engine(db_url)
Base.metadata.create_all(engine)
return sessionmaker(bind=engine)
# File: scripts/ingest.py
import sys
import yaml
from orchestrator.client import MCPClient
"""
Usage:
python ingest.py "search query"
"""
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Please provide a search query.')
sys.exit(1)
query = sys.argv[1]
cfg = yaml.safe_load(open('config.yaml'))
web = MCPClient(cfg['mcp_servers']['web_search'])
pubmed = MCPClient(cfg['mcp_servers']['pubmed'])
chroma = MCPClient(cfg['mcp_servers']['chroma'])
print(f'Ingesting papers for query: {query}')
papers = []
try:
papers += web.call('web_search.search', {'q': query}) or []
except Exception as e:
print('Web search error:', e)
try:
papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or []
except Exception as e:
print('PubMed error:', e)
for paper in papers:
pid = paper.get('id')
txt = paper.get('abstract', '')
meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))}
chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta})
print('Done ingesting!')