Spaces:
Sleeping
Sleeping
File size: 921 Bytes
8a525cb fb85888 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# Reindex script
# File: scripts/reindex.py
import yaml
from orchestrator.client import MCPClient
from orchestrator.provenance import init_db, Paper
def main():
"""
Reindex all existing papers from the provenance DB into Chroma via MCP.
"""
cfg = yaml.safe_load(open("config.yaml"))
chroma = MCPClient(cfg['mcp_servers']['chroma'])
Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db'))
session = Session()
print("Fetching all paper IDs from DB...")
paper_ids = [p.id for p in session.query(Paper).all()]
print(f"Found {len(paper_ids)} papers. Reindexing...")
for pid in paper_ids:
paper = session.query(Paper).filter_by(id=pid).first()
text = (paper.title or '') + ' ' + (paper.abstract or '')
chroma.call("chroma.insert", {"id": pid, "text": text, "metadata": {}})
print("Reindexing finished.")
if __name__ == "__main__":
main()
|