import os from dataset_utils import DatasetManager from app import init_embedding_model # Dataset details dataset_name = "nomadicsynth/arxiv-dataset-abstract-embeddings" HF_TOKEN = os.getenv("HF_TOKEN") if __name__ == "__main__": # Initialize the embedding model embedding_model = init_embedding_model( model_name_or_path="nomadicsynth/research-compass-arxiv-abstracts-embedding-model", model_revision="2025-01-28_23-06-17-1epochs-12batch-32eval-512embed-final", hf_token=HF_TOKEN, ) # Initialize DatasetManager with the embedding model dataset_manager = DatasetManager(dataset_name=dataset_name, hf_token=HF_TOKEN, embedding_model=embedding_model) # Update the dataset with new papers dataset_manager.update_dataset_with_new_papers()