MCP_Research / scripts /reindex.py
mgbam's picture
Update scripts/reindex.py
fb85888 verified
raw
history blame
921 Bytes
# Reindex script
# File: scripts/reindex.py
import yaml
from orchestrator.client import MCPClient
from orchestrator.provenance import init_db, Paper
def main():
"""
Reindex all existing papers from the provenance DB into Chroma via MCP.
"""
cfg = yaml.safe_load(open("config.yaml"))
chroma = MCPClient(cfg['mcp_servers']['chroma'])
Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db'))
session = Session()
print("Fetching all paper IDs from DB...")
paper_ids = [p.id for p in session.query(Paper).all()]
print(f"Found {len(paper_ids)} papers. Reindexing...")
for pid in paper_ids:
paper = session.query(Paper).filter_by(id=pid).first()
text = (paper.title or '') + ' ' + (paper.abstract or '')
chroma.call("chroma.insert", {"id": pid, "text": text, "metadata": {}})
print("Reindexing finished.")
if __name__ == "__main__":
main()