|
from typing import Dict, Any |
|
|
|
|
|
FASTTEXT_CONFIG = { |
|
"use_huggingface": True, |
|
"repo_id": "talhasarit41/fasttext", |
|
"huggingface_paths": { |
|
"fasttext_default": "fasttext_raw.bin", |
|
"fasttext_preprocessed": "fasttext_preprocessed.bin", |
|
"word_n_gram_1": "word_n_gram_1.bin", |
|
"word_n_gram_2": "word_n_gram_2.bin", |
|
"word_n_gram_3": "word_n_gram_3.bin", |
|
"low_overfit": "low_overfit.bin" |
|
}, |
|
"local_paths": { |
|
"fasttext_default": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/fasttext_raw.bin", |
|
"fasttext_preprocessed": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/fasttext_preprocessed.bin", |
|
"word_n_gram_1": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/word_n_gram_1.bin", |
|
"word_n_gram_2": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/word_n_gram_2.bin", |
|
"word_n_gram_3": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/word_n_gram_3.bin", |
|
"low_overfit": "/home/seeknndestroy/jetlink/bitbucket/fasttext_related/saved_models/manual_configs/low_overfit.bin" |
|
} |
|
} |
|
|
|
|
|
MODEL_CONFIG = { |
|
"FastText Default": True, |
|
"Fasttext Low Overfit": True, |
|
"Fasttext WordnNGram 1": True, |
|
"Fasttext WordnNGram 2": True, |
|
"Fasttext WordnNGram 3": True, |
|
"E5 Classifier": False, |
|
"E5-Instruct Classifier": False, |
|
"Azure Classifier": False, |
|
"Azure KNN Classifier": False, |
|
"GTE Classifier": False |
|
} |
|
|
|
def get_fasttext_path(model_name: str) -> str: |
|
"""Get the appropriate FastText model path based on configuration.""" |
|
if FASTTEXT_CONFIG["use_huggingface"]: |
|
from huggingface_hub import hf_hub_download |
|
return hf_hub_download( |
|
repo_id=FASTTEXT_CONFIG["repo_id"], |
|
filename=FASTTEXT_CONFIG["huggingface_paths"][model_name] |
|
) |
|
else: |
|
return FASTTEXT_CONFIG["local_paths"][model_name] |
|
|
|
def is_model_enabled(model_name: str) -> bool: |
|
"""Check if a model is enabled in the configuration.""" |
|
return MODEL_CONFIG.get(model_name, False) |