Spaces:
Running
on
Zero
Running
on
Zero
File size: 791 Bytes
261056f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import os
from dataset_utils import DatasetManager
from app import init_embedding_model
# Dataset details
dataset_name = "nomadicsynth/arxiv-dataset-abstract-embeddings"
HF_TOKEN = os.getenv("HF_TOKEN")
if __name__ == "__main__":
# Initialize the embedding model
embedding_model = init_embedding_model(
model_name_or_path="nomadicsynth/research-compass-arxiv-abstracts-embedding-model",
model_revision="2025-01-28_23-06-17-1epochs-12batch-32eval-512embed-final",
hf_token=HF_TOKEN,
)
# Initialize DatasetManager with the embedding model
dataset_manager = DatasetManager(dataset_name=dataset_name, hf_token=HF_TOKEN, embedding_model=embedding_model)
# Update the dataset with new papers
dataset_manager.update_dataset_with_new_papers()
|