Spaces:
Running
on
Zero
Running
on
Zero
import os | |
from dataset_utils import DatasetManager | |
from app import init_embedding_model | |
# Dataset details | |
dataset_name = "nomadicsynth/arxiv-dataset-abstract-embeddings" | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
if __name__ == "__main__": | |
# Initialize the embedding model | |
embedding_model = init_embedding_model( | |
model_name_or_path="nomadicsynth/research-compass-arxiv-abstracts-embedding-model", | |
model_revision="2025-01-28_23-06-17-1epochs-12batch-32eval-512embed-final", | |
hf_token=HF_TOKEN, | |
) | |
# Initialize DatasetManager with the embedding model | |
dataset_manager = DatasetManager(dataset_name=dataset_name, hf_token=HF_TOKEN, embedding_model=embedding_model) | |
# Update the dataset with new papers | |
dataset_manager.update_dataset_with_new_papers() | |