Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,11 @@ from tabulate import tabulate
|
|
12 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
13 |
from multiprocessing import Pool, cpu_count
|
14 |
from flask import Flask, request, jsonify
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# Paths for saving artifacts
|
17 |
MODEL_DIR = "./saved_models"
|
@@ -22,7 +27,16 @@ SKILL_TFIDF_PATH = os.path.join(MODEL_DIR, "skill_tfidf.pkl")
|
|
22 |
QUESTION_ANSWER_PATH = os.path.join(MODEL_DIR, "question_to_answer.pkl")
|
23 |
FAISS_INDEX_PATH = os.path.join(MODEL_DIR, "faiss_index.index")
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Load Datasets
|
28 |
def load_dataset(file_path, required_columns=[]):
|
@@ -84,7 +98,7 @@ def initialize_resources(user_skills):
|
|
84 |
answer_embeddings = universal_model.encode(list(question_to_answer.values()), convert_to_tensor=True, show_progress_bar=False).cpu().numpy()
|
85 |
|
86 |
if not resources_valid(skill_tfidf.keys(), [s.lower() for s in user_skills]):
|
87 |
-
|
88 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
89 |
all_texts = user_skills + questions_df['Answer'].fillna("").tolist() + questions_df['Question'].tolist()
|
90 |
tfidf_vectorizer.fit(all_texts)
|
@@ -113,7 +127,7 @@ def initialize_resources(user_skills):
|
|
113 |
universal_model.save_pretrained(UNIVERSAL_MODEL_PATH)
|
114 |
detector_model.save_pretrained(DETECTOR_MODEL_PATH)
|
115 |
detector_tokenizer.save_pretrained(DETECTOR_MODEL_PATH)
|
116 |
-
|
117 |
|
118 |
# Evaluate Responses
|
119 |
def evaluate_response(args):
|
|
|
12 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
13 |
from multiprocessing import Pool, cpu_count
|
14 |
from flask import Flask, request, jsonify
|
15 |
+
import logging
|
16 |
+
|
17 |
+
# Set up logging
|
18 |
+
logging.basicConfig(level=logging.INFO)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
|
21 |
# Paths for saving artifacts
|
22 |
MODEL_DIR = "./saved_models"
|
|
|
27 |
QUESTION_ANSWER_PATH = os.path.join(MODEL_DIR, "question_to_answer.pkl")
|
28 |
FAISS_INDEX_PATH = os.path.join(MODEL_DIR, "faiss_index.index")
|
29 |
|
30 |
+
# Ensure the directory exists with error handling
|
31 |
+
try:
|
32 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
33 |
+
logger.info(f"Successfully created/accessed directory: {MODEL_DIR}")
|
34 |
+
except PermissionError as e:
|
35 |
+
logger.error(f"Permission denied creating directory {MODEL_DIR}: {e}")
|
36 |
+
raise
|
37 |
+
except Exception as e:
|
38 |
+
logger.error(f"Unexpected error creating directory {MODEL_DIR}: {e}")
|
39 |
+
raise
|
40 |
|
41 |
# Load Datasets
|
42 |
def load_dataset(file_path, required_columns=[]):
|
|
|
98 |
answer_embeddings = universal_model.encode(list(question_to_answer.values()), convert_to_tensor=True, show_progress_bar=False).cpu().numpy()
|
99 |
|
100 |
if not resources_valid(skill_tfidf.keys(), [s.lower() for s in user_skills]):
|
101 |
+
logger.info("⚠ Saved skill TF-IDF mismatch detected. Recomputing resources.")
|
102 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
103 |
all_texts = user_skills + questions_df['Answer'].fillna("").tolist() + questions_df['Question'].tolist()
|
104 |
tfidf_vectorizer.fit(all_texts)
|
|
|
127 |
universal_model.save_pretrained(UNIVERSAL_MODEL_PATH)
|
128 |
detector_model.save_pretrained(DETECTOR_MODEL_PATH)
|
129 |
detector_tokenizer.save_pretrained(DETECTOR_MODEL_PATH)
|
130 |
+
logger.info(f"Models and resources saved to {MODEL_DIR}")
|
131 |
|
132 |
# Evaluate Responses
|
133 |
def evaluate_response(args):
|