mgbam commited on
Commit
d9b98b5
·
verified ·
1 Parent(s): 0170414

Update scripts/reindex.py

Browse files
Files changed (1) hide show
  1. scripts/reindex.py +7 -18
scripts/reindex.py CHANGED
@@ -1,28 +1,17 @@
1
- # Reindex script
2
  # File: scripts/reindex.py
3
  import yaml
4
  from orchestrator.client import MCPClient
5
  from orchestrator.provenance import init_db, Paper
6
 
7
-
8
- def main():
9
- """
10
- Reindex all existing papers from the provenance DB into Chroma via MCP.
11
- """
12
- cfg = yaml.safe_load(open("config.yaml"))
13
  chroma = MCPClient(cfg['mcp_servers']['chroma'])
14
  Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db'))
15
  session = Session()
16
 
17
- print("Fetching all paper IDs from DB...")
18
- paper_ids = [p.id for p in session.query(Paper).all()]
19
- print(f"Found {len(paper_ids)} papers. Reindexing...")
20
-
21
- for pid in paper_ids:
22
- paper = session.query(Paper).filter_by(id=pid).first()
23
  text = (paper.title or '') + ' ' + (paper.abstract or '')
24
- chroma.call("chroma.insert", {"id": pid, "text": text, "metadata": {}})
25
- print("Reindexing finished.")
26
-
27
- if __name__ == "__main__":
28
- main()
 
 
1
  # File: scripts/reindex.py
2
  import yaml
3
  from orchestrator.client import MCPClient
4
  from orchestrator.provenance import init_db, Paper
5
 
6
+ if __name__ == '__main__':
7
+ cfg = yaml.safe_load(open('config.yaml'))
 
 
 
 
8
  chroma = MCPClient(cfg['mcp_servers']['chroma'])
9
  Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db'))
10
  session = Session()
11
 
12
+ papers = session.query(Paper).all()
13
+ print(f'Reindexing {len(papers)} papers...')
14
+ for paper in papers:
 
 
 
15
  text = (paper.title or '') + ' ' + (paper.abstract or '')
16
+ chroma.call('chroma.insert', {'id': paper.id, 'text': text, 'metadata': {}})
17
+ print('Reindex complete!')