SeeknnDestroy
commited on
download models from hub
Browse files
app.py
CHANGED
@@ -8,6 +8,11 @@ import time
|
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch.nn.functional as F
|
10 |
from openai import AzureOpenAI
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# Azure OpenAI Configuration
|
13 |
AZURE_API_VERSION = "2024-02-01"
|
@@ -17,9 +22,9 @@ MODEL_DIR = "models"
|
|
17 |
|
18 |
# Initialize Azure OpenAI client
|
19 |
azure_client = AzureOpenAI(
|
20 |
-
api_key=os.getenv(AZURE_OPENAI_API_KEY),
|
21 |
api_version=AZURE_API_VERSION,
|
22 |
-
azure_endpoint=os.getenv(AZURE_OPENAI_EMBEDDING_ENDPOINT)
|
23 |
)
|
24 |
|
25 |
def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'):
|
@@ -123,8 +128,8 @@ def load_models():
|
|
123 |
models['GTE Classifier'] = pickle.load(f)
|
124 |
|
125 |
# Load FastText models
|
126 |
-
models['FastText Raw'] = fasttext.load_model(
|
127 |
-
models['FastText Preprocessed'] = fasttext.load_model(
|
128 |
|
129 |
return models
|
130 |
|
@@ -177,7 +182,7 @@ def get_confidence_color(confidence):
|
|
177 |
return "#ff4444" # Bright red for low confidence
|
178 |
|
179 |
# [Add GTE embedding generation function]
|
180 |
-
def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-base'):
|
181 |
"""Generate GTE embeddings for a single text."""
|
182 |
start_time = time.time()
|
183 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch.nn.functional as F
|
10 |
from openai import AzureOpenAI
|
11 |
+
from huggingface_hub import hf_hub_download
|
12 |
+
|
13 |
+
# Download the FastText model from Hugging Face
|
14 |
+
model_path_fasttext_raw = hf_hub_download(repo_id="talhasarit41/fasttext", filename="fasttext_raw.bin")
|
15 |
+
model_path_fasttext_preprocessed = hf_hub_download(repo_id="talhasarit41/fasttext", filename="fasttext_preprocessed.bin")
|
16 |
|
17 |
# Azure OpenAI Configuration
|
18 |
AZURE_API_VERSION = "2024-02-01"
|
|
|
22 |
|
23 |
# Initialize Azure OpenAI client
|
24 |
azure_client = AzureOpenAI(
|
25 |
+
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
26 |
api_version=AZURE_API_VERSION,
|
27 |
+
azure_endpoint=os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT")
|
28 |
)
|
29 |
|
30 |
def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'):
|
|
|
128 |
models['GTE Classifier'] = pickle.load(f)
|
129 |
|
130 |
# Load FastText models
|
131 |
+
models['FastText Raw'] = fasttext.load_model(model_path_fasttext_raw)
|
132 |
+
models['FastText Preprocessed'] = fasttext.load_model(model_path_fasttext_preprocessed)
|
133 |
|
134 |
return models
|
135 |
|
|
|
182 |
return "#ff4444" # Bright red for low confidence
|
183 |
|
184 |
# [Add GTE embedding generation function]
|
185 |
+
def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-multilingual-base'):
|
186 |
"""Generate GTE embeddings for a single text."""
|
187 |
start_time = time.time()
|
188 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|