SeeknnDestroy commited on
Commit
cf9e3cb
·
unverified ·
1 Parent(s): 38f2ca5

download models from hub

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -8,6 +8,11 @@ import time
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch.nn.functional as F
10
  from openai import AzureOpenAI
 
 
 
 
 
11
 
12
  # Azure OpenAI Configuration
13
  AZURE_API_VERSION = "2024-02-01"
@@ -17,9 +22,9 @@ MODEL_DIR = "models"
17
 
18
  # Initialize Azure OpenAI client
19
  azure_client = AzureOpenAI(
20
- api_key=os.getenv(AZURE_OPENAI_API_KEY),
21
  api_version=AZURE_API_VERSION,
22
- azure_endpoint=os.getenv(AZURE_OPENAI_EMBEDDING_ENDPOINT)
23
  )
24
 
25
  def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'):
@@ -123,8 +128,8 @@ def load_models():
123
  models['GTE Classifier'] = pickle.load(f)
124
 
125
  # Load FastText models
126
- models['FastText Raw'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_raw.bin'))
127
- models['FastText Preprocessed'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_preprocessed.bin'))
128
 
129
  return models
130
 
@@ -177,7 +182,7 @@ def get_confidence_color(confidence):
177
  return "#ff4444" # Bright red for low confidence
178
 
179
  # [Add GTE embedding generation function]
180
- def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-base'):
181
  """Generate GTE embeddings for a single text."""
182
  start_time = time.time()
183
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch.nn.functional as F
10
  from openai import AzureOpenAI
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ # Download the FastText model from Hugging Face
14
+ model_path_fasttext_raw = hf_hub_download(repo_id="talhasarit41/fasttext", filename="fasttext_raw.bin")
15
+ model_path_fasttext_preprocessed = hf_hub_download(repo_id="talhasarit41/fasttext", filename="fasttext_preprocessed.bin")
16
 
17
  # Azure OpenAI Configuration
18
  AZURE_API_VERSION = "2024-02-01"
 
22
 
23
  # Initialize Azure OpenAI client
24
  azure_client = AzureOpenAI(
25
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
26
  api_version=AZURE_API_VERSION,
27
+ azure_endpoint=os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT")
28
  )
29
 
30
  def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'):
 
128
  models['GTE Classifier'] = pickle.load(f)
129
 
130
  # Load FastText models
131
+ models['FastText Raw'] = fasttext.load_model(model_path_fasttext_raw)
132
+ models['FastText Preprocessed'] = fasttext.load_model(model_path_fasttext_preprocessed)
133
 
134
  return models
135
 
 
182
  return "#ff4444" # Bright red for low confidence
183
 
184
  # [Add GTE embedding generation function]
185
+ def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-multilingual-base'):
186
  """Generate GTE embeddings for a single text."""
187
  start_time = time.time()
188
  tokenizer = AutoTokenizer.from_pretrained(model_name)