File size: 921 Bytes
8a525cb
fb85888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Reindex script
# File: scripts/reindex.py
import yaml
from orchestrator.client import MCPClient
from orchestrator.provenance import init_db, Paper


def main():
    """
    Reindex all existing papers from the provenance DB into Chroma via MCP.
    """
    cfg = yaml.safe_load(open("config.yaml"))
    chroma = MCPClient(cfg['mcp_servers']['chroma'])
    Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db'))
    session = Session()

    print("Fetching all paper IDs from DB...")
    paper_ids = [p.id for p in session.query(Paper).all()]
    print(f"Found {len(paper_ids)} papers. Reindexing...")

    for pid in paper_ids:
        paper = session.query(Paper).filter_by(id=pid).first()
        text = (paper.title or '') + ' ' + (paper.abstract or '')
        chroma.call("chroma.insert", {"id": pid, "text": text, "metadata": {}})
    print("Reindexing finished.")

if __name__ == "__main__":
    main()