Spaces:
Sleeping
Sleeping
# File: scripts/reindex.py | |
import yaml | |
from orchestrator.client import MCPClient | |
from orchestrator.provenance import init_db, Paper | |
if __name__ == '__main__': | |
cfg = yaml.safe_load(open('config.yaml')) | |
chroma = MCPClient(cfg['mcp_servers']['chroma']) | |
Session = init_db(cfg.get('db_url', 'sqlite:///embeddings.db')) | |
session = Session() | |
papers = session.query(Paper).all() | |
print(f'Reindexing {len(papers)} papers...') | |
for paper in papers: | |
text = (paper.title or '') + ' ' + (paper.abstract or '') | |
chroma.call('chroma.insert', {'id': paper.id, 'text': text, 'metadata': {}}) | |
print('Reindex complete!') |