mgbam commited on
Commit
562ae95
·
verified ·
1 Parent(s): 7ed183c

Update scripts/ingest.py

Browse files
Files changed (1) hide show
  1. scripts/ingest.py +41 -0
scripts/ingest.py CHANGED
@@ -1 +1,42 @@
1
  # Ingest script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Ingest script
2
+ # File: scripts/ingest.py
3
+ import yaml
4
+ import sys
5
+ from orchestrator.client import MCPClient
6
+
7
+
8
+ def main():
9
+ """
10
+ Ingest papers for a given query into Chroma vector store via MCP.
11
+ Usage: python ingest.py "your search query"
12
+ """
13
+ if len(sys.argv) < 2:
14
+ print("Usage: python ingest.py <query>")
15
+ sys.exit(1)
16
+ query = sys.argv[1]
17
+
18
+ cfg = yaml.safe_load(open("config.yaml"))
19
+ web = MCPClient(cfg['mcp_servers']['web_search'])
20
+ pubmed = MCPClient(cfg['mcp_servers']['pubmed'])
21
+ chroma = MCPClient(cfg['mcp_servers']['chroma'])
22
+
23
+ print(f"Ingesting papers for query: {query}")
24
+ results = []
25
+ try:
26
+ results += web.call("web_search.search", {"q": query})
27
+ except Exception as e:
28
+ print("Web search failed:", e)
29
+ try:
30
+ results += pubmed.call("metatool.query", {"source": "PubMed", "q": query})
31
+ except Exception as e:
32
+ print("PubMed search failed:", e)
33
+
34
+ for paper in results:
35
+ paper_id = paper.get('id')
36
+ text = paper.get('abstract', '')
37
+ meta = {"title": paper.get('title'), "authors": ",".join(paper.get('authors', []))}
38
+ chroma.call("chroma.insert", {"id": paper_id, "text": text, "metadata": meta})
39
+ print("Ingestion complete!")
40
+
41
+ if __name__ == "__main__":
42
+ main()