Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -38,9 +38,19 @@ TFIDF_PATH = os.path.join(chosen_model_dir, "tfidf_vectorizer.pkl")
|
|
38 |
SKILL_TFIDF_PATH = os.path.join(chosen_model_dir, "skill_tfidf.pkl")
|
39 |
QUESTION_ANSWER_PATH = os.path.join(chosen_model_dir, "question_to_answer.pkl")
|
40 |
FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
|
|
|
41 |
COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
|
42 |
JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# Improved dataset loading with fallback
|
45 |
def load_dataset(file_path, required_columns=[], fallback_data=None):
|
46 |
try:
|
@@ -108,20 +118,13 @@ def load_universal_model():
|
|
108 |
logger.info(f"Loading universal model from {UNIVERSAL_MODEL_PATH}")
|
109 |
return SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
110 |
else:
|
111 |
-
logger.info(f"Loading universal model:
|
112 |
-
model = SentenceTransformer("all-MiniLM-L6-v2")
|
113 |
-
model.save(UNIVERSAL_MODEL_PATH)
|
114 |
-
return model
|
115 |
-
except Exception as e:
|
116 |
-
logger.error(f"Failed to load universal model all-MiniLM-L6-v2: {e}. Falling back to default.")
|
117 |
-
try:
|
118 |
-
logger.info(f"Loading fallback model: {default_model}")
|
119 |
model = SentenceTransformer(default_model)
|
120 |
model.save(UNIVERSAL_MODEL_PATH)
|
121 |
return model
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
universal_model = load_universal_model()
|
127 |
|
@@ -132,23 +135,16 @@ else:
|
|
132 |
detector_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
|
133 |
detector_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
|
134 |
|
135 |
-
# Global variables for precomputed data
|
136 |
-
tfidf_vectorizer = None
|
137 |
-
skill_tfidf = None
|
138 |
-
question_to_answer = None
|
139 |
-
faiss_index = None
|
140 |
-
course_similarity = None
|
141 |
-
job_similarity = None
|
142 |
-
|
143 |
# Load Precomputed Resources
|
144 |
def load_precomputed_resources():
|
145 |
-
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, course_similarity, job_similarity
|
146 |
-
if all(os.path.exists(p) for p in [TFIDF_PATH, SKILL_TFIDF_PATH, QUESTION_ANSWER_PATH, FAISS_INDEX_PATH, COURSE_SIMILARITY_PATH, JOB_SIMILARITY_PATH]):
|
147 |
try:
|
148 |
with open(TFIDF_PATH, 'rb') as f: tfidf_vectorizer = pickle.load(f)
|
149 |
with open(SKILL_TFIDF_PATH, 'rb') as f: skill_tfidf = pickle.load(f)
|
150 |
with open(QUESTION_ANSWER_PATH, 'rb') as f: question_to_answer = pickle.load(f)
|
151 |
faiss_index = faiss.read_index(FAISS_INDEX_PATH)
|
|
|
152 |
with open(COURSE_SIMILARITY_PATH, 'rb') as f: course_similarity = pickle.load(f)
|
153 |
with open(JOB_SIMILARITY_PATH, 'rb') as f: job_similarity = pickle.load(f)
|
154 |
logger.info("Loaded precomputed resources successfully")
|
@@ -160,7 +156,7 @@ def load_precomputed_resources():
|
|
160 |
|
161 |
# Precompute Resources Offline (to be run separately)
|
162 |
def precompute_resources():
|
163 |
-
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, course_similarity, job_similarity
|
164 |
logger.info("Precomputing resources offline")
|
165 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
166 |
all_texts = questions_df['Answer'].tolist() + questions_df['Question'].tolist()
|
@@ -189,6 +185,7 @@ def precompute_resources():
|
|
189 |
with open(SKILL_TFIDF_PATH, 'wb') as f: pickle.dump(skill_tfidf, f)
|
190 |
with open(QUESTION_ANSWER_PATH, 'wb') as f: pickle.dump(question_to_answer, f)
|
191 |
faiss.write_index(faiss_index, FAISS_INDEX_PATH)
|
|
|
192 |
with open(COURSE_SIMILARITY_PATH, 'wb') as f: pickle.dump(course_similarity, f)
|
193 |
with open(JOB_SIMILARITY_PATH, 'wb') as f: pickle.dump(job_similarity, f)
|
194 |
universal_model.save(UNIVERSAL_MODEL_PATH)
|
@@ -232,7 +229,7 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
|
232 |
return []
|
233 |
|
234 |
similarities = course_similarity[skill_indices]
|
235 |
-
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df
|
236 |
|
237 |
target_level = 'Advanced' if upgrade else user_level
|
238 |
idx = np.argsort(-total_scores)[:5]
|
@@ -294,7 +291,7 @@ def assess_skills():
|
|
294 |
if len(answers) != len(user_skills):
|
295 |
return jsonify({"error": "Answers count must match skills count"}), 400
|
296 |
|
297 |
-
load_precomputed_resources()
|
298 |
|
299 |
user_questions = []
|
300 |
for skill in user_skills:
|
|
|
38 |
SKILL_TFIDF_PATH = os.path.join(chosen_model_dir, "skill_tfidf.pkl")
|
39 |
QUESTION_ANSWER_PATH = os.path.join(chosen_model_dir, "question_to_answer.pkl")
|
40 |
FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
|
41 |
+
ANSWER_EMBEDDINGS_PATH = os.path.join(chosen_model_dir, "answer_embeddings.pkl")
|
42 |
COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
|
43 |
JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
|
44 |
|
45 |
+
# Global variables for precomputed data
|
46 |
+
tfidf_vectorizer = None
|
47 |
+
skill_tfidf = None
|
48 |
+
question_to_answer = None
|
49 |
+
faiss_index = None
|
50 |
+
answer_embeddings = None
|
51 |
+
course_similarity = None
|
52 |
+
job_similarity = None
|
53 |
+
|
54 |
# Improved dataset loading with fallback
|
55 |
def load_dataset(file_path, required_columns=[], fallback_data=None):
|
56 |
try:
|
|
|
118 |
logger.info(f"Loading universal model from {UNIVERSAL_MODEL_PATH}")
|
119 |
return SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
120 |
else:
|
121 |
+
logger.info(f"Loading universal model: {default_model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
model = SentenceTransformer(default_model)
|
123 |
model.save(UNIVERSAL_MODEL_PATH)
|
124 |
return model
|
125 |
+
except Exception as e:
|
126 |
+
logger.error(f"Failed to load universal model {default_model}: {e}. Exiting.")
|
127 |
+
exit(1)
|
128 |
|
129 |
universal_model = load_universal_model()
|
130 |
|
|
|
135 |
detector_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
|
136 |
detector_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
# Load Precomputed Resources
|
139 |
def load_precomputed_resources():
|
140 |
+
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
|
141 |
+
if all(os.path.exists(p) for p in [TFIDF_PATH, SKILL_TFIDF_PATH, QUESTION_ANSWER_PATH, FAISS_INDEX_PATH, ANSWER_EMBEDDINGS_PATH, COURSE_SIMILARITY_PATH, JOB_SIMILARITY_PATH]):
|
142 |
try:
|
143 |
with open(TFIDF_PATH, 'rb') as f: tfidf_vectorizer = pickle.load(f)
|
144 |
with open(SKILL_TFIDF_PATH, 'rb') as f: skill_tfidf = pickle.load(f)
|
145 |
with open(QUESTION_ANSWER_PATH, 'rb') as f: question_to_answer = pickle.load(f)
|
146 |
faiss_index = faiss.read_index(FAISS_INDEX_PATH)
|
147 |
+
with open(ANSWER_EMBEDDINGS_PATH, 'rb') as f: answer_embeddings = pickle.load(f)
|
148 |
with open(COURSE_SIMILARITY_PATH, 'rb') as f: course_similarity = pickle.load(f)
|
149 |
with open(JOB_SIMILARITY_PATH, 'rb') as f: job_similarity = pickle.load(f)
|
150 |
logger.info("Loaded precomputed resources successfully")
|
|
|
156 |
|
157 |
# Precompute Resources Offline (to be run separately)
|
158 |
def precompute_resources():
|
159 |
+
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
|
160 |
logger.info("Precomputing resources offline")
|
161 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
162 |
all_texts = questions_df['Answer'].tolist() + questions_df['Question'].tolist()
|
|
|
185 |
with open(SKILL_TFIDF_PATH, 'wb') as f: pickle.dump(skill_tfidf, f)
|
186 |
with open(QUESTION_ANSWER_PATH, 'wb') as f: pickle.dump(question_to_answer, f)
|
187 |
faiss.write_index(faiss_index, FAISS_INDEX_PATH)
|
188 |
+
with open(ANSWER_EMBEDDINGS_PATH, 'wb') as f: pickle.dump(answer_embeddings, f)
|
189 |
with open(COURSE_SIMILARITY_PATH, 'wb') as f: pickle.dump(course_similarity, f)
|
190 |
with open(JOB_SIMILARITY_PATH, 'wb') as f: pickle.dump(job_similarity, f)
|
191 |
universal_model.save(UNIVERSAL_MODEL_PATH)
|
|
|
229 |
return []
|
230 |
|
231 |
similarities = course_similarity[skill_indices]
|
232 |
+
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
|
233 |
|
234 |
target_level = 'Advanced' if upgrade else user_level
|
235 |
idx = np.argsort(-total_scores)[:5]
|
|
|
291 |
if len(answers) != len(user_skills):
|
292 |
return jsonify({"error": "Answers count must match skills count"}), 400
|
293 |
|
294 |
+
load_precomputed_resources() # Load precomputed resources before processing
|
295 |
|
296 |
user_questions = []
|
297 |
for skill in user_skills:
|