Spaces:
Running
Running
Update orchestrator/provenance.py
Browse files- orchestrator/provenance.py +41 -7
orchestrator/provenance.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
-
# SQLAlchemy models for provenance
|
2 |
# File: orchestrator/provenance.py
|
3 |
-
from sqlalchemy import Column,
|
4 |
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
|
5 |
from datetime import datetime
|
6 |
|
@@ -13,7 +12,7 @@ class Paper(Base):
|
|
13 |
authors = Column(String)
|
14 |
abstract = Column(String)
|
15 |
fetched_at = Column(DateTime, default=datetime.utcnow)
|
16 |
-
runs = relationship(
|
17 |
|
18 |
class Run(Base):
|
19 |
__tablename__ = 'runs'
|
@@ -22,11 +21,46 @@ class Run(Base):
|
|
22 |
cell_index = Column(Integer)
|
23 |
output = Column(String)
|
24 |
executed_at = Column(DateTime, default=datetime.utcnow)
|
25 |
-
paper = relationship(
|
26 |
-
|
27 |
-
# Utility to initialize and get a session
|
28 |
|
29 |
def init_db(db_url: str):
|
30 |
engine = create_engine(db_url)
|
31 |
Base.metadata.create_all(engine)
|
32 |
-
return sessionmaker(bind=engine)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# File: orchestrator/provenance.py
|
2 |
+
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine
|
3 |
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
|
4 |
from datetime import datetime
|
5 |
|
|
|
12 |
authors = Column(String)
|
13 |
abstract = Column(String)
|
14 |
fetched_at = Column(DateTime, default=datetime.utcnow)
|
15 |
+
runs = relationship('Run', back_populates='paper')
|
16 |
|
17 |
class Run(Base):
|
18 |
__tablename__ = 'runs'
|
|
|
21 |
cell_index = Column(Integer)
|
22 |
output = Column(String)
|
23 |
executed_at = Column(DateTime, default=datetime.utcnow)
|
24 |
+
paper = relationship('Paper', back_populates='runs')
|
|
|
|
|
25 |
|
26 |
def init_db(db_url: str):
|
27 |
engine = create_engine(db_url)
|
28 |
Base.metadata.create_all(engine)
|
29 |
+
return sessionmaker(bind=engine)
|
30 |
+
|
31 |
+
# File: scripts/ingest.py
|
32 |
+
import sys
|
33 |
+
import yaml
|
34 |
+
from orchestrator.client import MCPClient
|
35 |
+
|
36 |
+
"""
|
37 |
+
Usage:
|
38 |
+
python ingest.py "search query"
|
39 |
+
"""
|
40 |
+
if __name__ == '__main__':
|
41 |
+
if len(sys.argv) < 2:
|
42 |
+
print('Please provide a search query.')
|
43 |
+
sys.exit(1)
|
44 |
+
query = sys.argv[1]
|
45 |
+
cfg = yaml.safe_load(open('config.yaml'))
|
46 |
+
web = MCPClient(cfg['mcp_servers']['web_search'])
|
47 |
+
pubmed = MCPClient(cfg['mcp_servers']['pubmed'])
|
48 |
+
chroma = MCPClient(cfg['mcp_servers']['chroma'])
|
49 |
+
|
50 |
+
print(f'Ingesting papers for query: {query}')
|
51 |
+
papers = []
|
52 |
+
try:
|
53 |
+
papers += web.call('web_search.search', {'q': query}) or []
|
54 |
+
except Exception as e:
|
55 |
+
print('Web search error:', e)
|
56 |
+
try:
|
57 |
+
papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or []
|
58 |
+
except Exception as e:
|
59 |
+
print('PubMed error:', e)
|
60 |
+
|
61 |
+
for paper in papers:
|
62 |
+
pid = paper.get('id')
|
63 |
+
txt = paper.get('abstract', '')
|
64 |
+
meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))}
|
65 |
+
chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta})
|
66 |
+
print('Done ingesting!')
|