mgbam commited on
Commit
0170414
·
verified ·
1 Parent(s): e875333

Update orchestrator/provenance.py

Browse files
Files changed (1) hide show
  1. orchestrator/provenance.py +41 -7
orchestrator/provenance.py CHANGED
@@ -1,6 +1,5 @@
1
- # SQLAlchemy models for provenance
2
  # File: orchestrator/provenance.py
3
- from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, create_engine
4
  from sqlalchemy.orm import declarative_base, relationship, sessionmaker
5
  from datetime import datetime
6
 
@@ -13,7 +12,7 @@ class Paper(Base):
13
  authors = Column(String)
14
  abstract = Column(String)
15
  fetched_at = Column(DateTime, default=datetime.utcnow)
16
- runs = relationship("Run", back_populates="paper")
17
 
18
  class Run(Base):
19
  __tablename__ = 'runs'
@@ -22,11 +21,46 @@ class Run(Base):
22
  cell_index = Column(Integer)
23
  output = Column(String)
24
  executed_at = Column(DateTime, default=datetime.utcnow)
25
- paper = relationship("Paper", back_populates="runs")
26
-
27
- # Utility to initialize and get a session
28
 
29
  def init_db(db_url: str):
30
  engine = create_engine(db_url)
31
  Base.metadata.create_all(engine)
32
- return sessionmaker(bind=engine)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # File: orchestrator/provenance.py
2
+ from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine
3
  from sqlalchemy.orm import declarative_base, relationship, sessionmaker
4
  from datetime import datetime
5
 
 
12
  authors = Column(String)
13
  abstract = Column(String)
14
  fetched_at = Column(DateTime, default=datetime.utcnow)
15
+ runs = relationship('Run', back_populates='paper')
16
 
17
  class Run(Base):
18
  __tablename__ = 'runs'
 
21
  cell_index = Column(Integer)
22
  output = Column(String)
23
  executed_at = Column(DateTime, default=datetime.utcnow)
24
+ paper = relationship('Paper', back_populates='runs')
 
 
25
 
26
  def init_db(db_url: str):
27
  engine = create_engine(db_url)
28
  Base.metadata.create_all(engine)
29
+ return sessionmaker(bind=engine)
30
+
31
+ # File: scripts/ingest.py
32
+ import sys
33
+ import yaml
34
+ from orchestrator.client import MCPClient
35
+
36
+ """
37
+ Usage:
38
+ python ingest.py "search query"
39
+ """
40
+ if __name__ == '__main__':
41
+ if len(sys.argv) < 2:
42
+ print('Please provide a search query.')
43
+ sys.exit(1)
44
+ query = sys.argv[1]
45
+ cfg = yaml.safe_load(open('config.yaml'))
46
+ web = MCPClient(cfg['mcp_servers']['web_search'])
47
+ pubmed = MCPClient(cfg['mcp_servers']['pubmed'])
48
+ chroma = MCPClient(cfg['mcp_servers']['chroma'])
49
+
50
+ print(f'Ingesting papers for query: {query}')
51
+ papers = []
52
+ try:
53
+ papers += web.call('web_search.search', {'q': query}) or []
54
+ except Exception as e:
55
+ print('Web search error:', e)
56
+ try:
57
+ papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or []
58
+ except Exception as e:
59
+ print('PubMed error:', e)
60
+
61
+ for paper in papers:
62
+ pid = paper.get('id')
63
+ txt = paper.get('abstract', '')
64
+ meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))}
65
+ chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta})
66
+ print('Done ingesting!')