File size: 791 Bytes
261056f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import os
from dataset_utils import DatasetManager
from app import init_embedding_model

# Dataset details
dataset_name = "nomadicsynth/arxiv-dataset-abstract-embeddings"
HF_TOKEN = os.getenv("HF_TOKEN")

if __name__ == "__main__":
    # Initialize the embedding model
    embedding_model = init_embedding_model(
        model_name_or_path="nomadicsynth/research-compass-arxiv-abstracts-embedding-model",
        model_revision="2025-01-28_23-06-17-1epochs-12batch-32eval-512embed-final",
        hf_token=HF_TOKEN,
    )

    # Initialize DatasetManager with the embedding model
    dataset_manager = DatasetManager(dataset_name=dataset_name, hf_token=HF_TOKEN, embedding_model=embedding_model)

    # Update the dataset with new papers
    dataset_manager.update_dataset_with_new_papers()