from typing import Dict, Any # FastText model paths configuration FASTTEXT_CONFIG = { "use_huggingface": True, # Set to True in production "repo_id": "talhasarit41/fasttext", # HuggingFace repository ID "huggingface_paths": { "fasttext_default": "fasttext_raw.bin", "fasttext_preprocessed": "fasttext_preprocessed.bin", "word_n_gram_1": "word_n_gram_1.bin", "word_n_gram_2": "word_n_gram_2.bin", "word_n_gram_3": "word_n_gram_3.bin", "low_overfit": "low_overfit.bin" }, "local_paths": { "fasttext_default": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/fasttext_raw.bin", "fasttext_preprocessed": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/fasttext_preprocessed.bin", "word_n_gram_1": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/word_n_gram_1.bin", "word_n_gram_2": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/word_n_gram_2.bin", "word_n_gram_3": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/word_n_gram_3.bin", "low_overfit": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/low_overfit.bin" } } # Model enablement configuration MODEL_CONFIG = { "FastText Default": True, "Fasttext Low Overfit": True, "Fasttext WordnNGram 1": True, "Fasttext WordnNGram 2": True, "Fasttext WordnNGram 3": True, "E5 Classifier": False, "E5-Instruct Classifier": False, "Azure Classifier": False, "Azure KNN Classifier": False, "GTE Classifier": False } def get_fasttext_path(model_name: str) -> str: """Get the appropriate FastText model path based on configuration.""" if FASTTEXT_CONFIG["use_huggingface"]: from huggingface_hub import hf_hub_download return hf_hub_download( repo_id=FASTTEXT_CONFIG["repo_id"], filename=FASTTEXT_CONFIG["huggingface_paths"][model_name] ) else: return FASTTEXT_CONFIG["local_paths"][model_name] def is_model_enabled(model_name: str) -> bool: """Check if a model is enabled in the configuration.""" return MODEL_CONFIG.get(model_name, False)