# Reindex script # File: scripts/reindex.py import yaml from orchestrator.client import MCPClient from orchestrator.provenance import init_db, Paper def main(): """ Reindex all existing papers from the provenance DB into Chroma via MCP. """ cfg = yaml.safe_load(open("config.yaml")) chroma = MCPClient(cfg['mcp_servers']['chroma']) Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db')) session = Session() print("Fetching all paper IDs from DB...") paper_ids = [p.id for p in session.query(Paper).all()] print(f"Found {len(paper_ids)} papers. Reindexing...") for pid in paper_ids: paper = session.query(Paper).filter_by(id=pid).first() text = (paper.title or '') + ' ' + (paper.abstract or '') chroma.call("chroma.insert", {"id": pid, "text": text, "metadata": {}}) print("Reindexing finished.") if __name__ == "__main__": main()