mgbam commited on
Commit
fb85888
·
verified ·
1 Parent(s): 562ae95

Update scripts/reindex.py

Browse files
Files changed (1) hide show
  1. scripts/reindex.py +27 -0
scripts/reindex.py CHANGED
@@ -1 +1,28 @@
1
  # Reindex script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Reindex script
2
+ # File: scripts/reindex.py
3
+ import yaml
4
+ from orchestrator.client import MCPClient
5
+ from orchestrator.provenance import init_db, Paper
6
+
7
+
8
+ def main():
9
+ """
10
+ Reindex all existing papers from the provenance DB into Chroma via MCP.
11
+ """
12
+ cfg = yaml.safe_load(open("config.yaml"))
13
+ chroma = MCPClient(cfg['mcp_servers']['chroma'])
14
+ Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db'))
15
+ session = Session()
16
+
17
+ print("Fetching all paper IDs from DB...")
18
+ paper_ids = [p.id for p in session.query(Paper).all()]
19
+ print(f"Found {len(paper_ids)} papers. Reindexing...")
20
+
21
+ for pid in paper_ids:
22
+ paper = session.query(Paper).filter_by(id=pid).first()
23
+ text = (paper.title or '') + ' ' + (paper.abstract or '')
24
+ chroma.call("chroma.insert", {"id": pid, "text": text, "metadata": {}})
25
+ print("Reindexing finished.")
26
+
27
+ if __name__ == "__main__":
28
+ main()