Muhammad541 commited on
Commit
d51cb13
·
verified ·
1 Parent(s): 6a18322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -24
app.py CHANGED
@@ -38,9 +38,19 @@ TFIDF_PATH = os.path.join(chosen_model_dir, "tfidf_vectorizer.pkl")
38
  SKILL_TFIDF_PATH = os.path.join(chosen_model_dir, "skill_tfidf.pkl")
39
  QUESTION_ANSWER_PATH = os.path.join(chosen_model_dir, "question_to_answer.pkl")
40
  FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
 
41
  COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
42
  JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
43
 
 
 
 
 
 
 
 
 
 
44
  # Improved dataset loading with fallback
45
  def load_dataset(file_path, required_columns=[], fallback_data=None):
46
  try:
@@ -108,20 +118,13 @@ def load_universal_model():
108
  logger.info(f"Loading universal model from {UNIVERSAL_MODEL_PATH}")
109
  return SentenceTransformer(UNIVERSAL_MODEL_PATH)
110
  else:
111
- logger.info(f"Loading universal model: all-MiniLM-L6-v2")
112
- model = SentenceTransformer("all-MiniLM-L6-v2")
113
- model.save(UNIVERSAL_MODEL_PATH)
114
- return model
115
- except Exception as e:
116
- logger.error(f"Failed to load universal model all-MiniLM-L6-v2: {e}. Falling back to default.")
117
- try:
118
- logger.info(f"Loading fallback model: {default_model}")
119
  model = SentenceTransformer(default_model)
120
  model.save(UNIVERSAL_MODEL_PATH)
121
  return model
122
- except Exception as e:
123
- logger.error(f"Failed to load fallback model {default_model}: {e}. Exiting.")
124
- exit(1)
125
 
126
  universal_model = load_universal_model()
127
 
@@ -132,23 +135,16 @@ else:
132
  detector_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
133
  detector_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
134
 
135
- # Global variables for precomputed data
136
- tfidf_vectorizer = None
137
- skill_tfidf = None
138
- question_to_answer = None
139
- faiss_index = None
140
- course_similarity = None
141
- job_similarity = None
142
-
143
  # Load Precomputed Resources
144
  def load_precomputed_resources():
145
- global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, course_similarity, job_similarity
146
- if all(os.path.exists(p) for p in [TFIDF_PATH, SKILL_TFIDF_PATH, QUESTION_ANSWER_PATH, FAISS_INDEX_PATH, COURSE_SIMILARITY_PATH, JOB_SIMILARITY_PATH]):
147
  try:
148
  with open(TFIDF_PATH, 'rb') as f: tfidf_vectorizer = pickle.load(f)
149
  with open(SKILL_TFIDF_PATH, 'rb') as f: skill_tfidf = pickle.load(f)
150
  with open(QUESTION_ANSWER_PATH, 'rb') as f: question_to_answer = pickle.load(f)
151
  faiss_index = faiss.read_index(FAISS_INDEX_PATH)
 
152
  with open(COURSE_SIMILARITY_PATH, 'rb') as f: course_similarity = pickle.load(f)
153
  with open(JOB_SIMILARITY_PATH, 'rb') as f: job_similarity = pickle.load(f)
154
  logger.info("Loaded precomputed resources successfully")
@@ -160,7 +156,7 @@ def load_precomputed_resources():
160
 
161
  # Precompute Resources Offline (to be run separately)
162
  def precompute_resources():
163
- global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, course_similarity, job_similarity
164
  logger.info("Precomputing resources offline")
165
  tfidf_vectorizer = TfidfVectorizer(stop_words='english')
166
  all_texts = questions_df['Answer'].tolist() + questions_df['Question'].tolist()
@@ -189,6 +185,7 @@ def precompute_resources():
189
  with open(SKILL_TFIDF_PATH, 'wb') as f: pickle.dump(skill_tfidf, f)
190
  with open(QUESTION_ANSWER_PATH, 'wb') as f: pickle.dump(question_to_answer, f)
191
  faiss.write_index(faiss_index, FAISS_INDEX_PATH)
 
192
  with open(COURSE_SIMILARITY_PATH, 'wb') as f: pickle.dump(course_similarity, f)
193
  with open(JOB_SIMILARITY_PATH, 'wb') as f: pickle.dump(job_similarity, f)
194
  universal_model.save(UNIVERSAL_MODEL_PATH)
@@ -232,7 +229,7 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
232
  return []
233
 
234
  similarities = course_similarity[skill_indices]
235
- total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df.get('popularity', 0.8).values + 0.2 * courses_df.get('completion_rate', 0.7).values
236
 
237
  target_level = 'Advanced' if upgrade else user_level
238
  idx = np.argsort(-total_scores)[:5]
@@ -294,7 +291,7 @@ def assess_skills():
294
  if len(answers) != len(user_skills):
295
  return jsonify({"error": "Answers count must match skills count"}), 400
296
 
297
- load_precomputed_resources()
298
 
299
  user_questions = []
300
  for skill in user_skills:
 
38
  SKILL_TFIDF_PATH = os.path.join(chosen_model_dir, "skill_tfidf.pkl")
39
  QUESTION_ANSWER_PATH = os.path.join(chosen_model_dir, "question_to_answer.pkl")
40
  FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
41
+ ANSWER_EMBEDDINGS_PATH = os.path.join(chosen_model_dir, "answer_embeddings.pkl")
42
  COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
43
  JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
44
 
45
+ # Global variables for precomputed data
46
+ tfidf_vectorizer = None
47
+ skill_tfidf = None
48
+ question_to_answer = None
49
+ faiss_index = None
50
+ answer_embeddings = None
51
+ course_similarity = None
52
+ job_similarity = None
53
+
54
  # Improved dataset loading with fallback
55
  def load_dataset(file_path, required_columns=[], fallback_data=None):
56
  try:
 
118
  logger.info(f"Loading universal model from {UNIVERSAL_MODEL_PATH}")
119
  return SentenceTransformer(UNIVERSAL_MODEL_PATH)
120
  else:
121
+ logger.info(f"Loading universal model: {default_model}")
 
 
 
 
 
 
 
122
  model = SentenceTransformer(default_model)
123
  model.save(UNIVERSAL_MODEL_PATH)
124
  return model
125
+ except Exception as e:
126
+ logger.error(f"Failed to load universal model {default_model}: {e}. Exiting.")
127
+ exit(1)
128
 
129
  universal_model = load_universal_model()
130
 
 
135
  detector_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
136
  detector_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
137
 
 
 
 
 
 
 
 
 
138
  # Load Precomputed Resources
139
  def load_precomputed_resources():
140
+ global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
141
+ if all(os.path.exists(p) for p in [TFIDF_PATH, SKILL_TFIDF_PATH, QUESTION_ANSWER_PATH, FAISS_INDEX_PATH, ANSWER_EMBEDDINGS_PATH, COURSE_SIMILARITY_PATH, JOB_SIMILARITY_PATH]):
142
  try:
143
  with open(TFIDF_PATH, 'rb') as f: tfidf_vectorizer = pickle.load(f)
144
  with open(SKILL_TFIDF_PATH, 'rb') as f: skill_tfidf = pickle.load(f)
145
  with open(QUESTION_ANSWER_PATH, 'rb') as f: question_to_answer = pickle.load(f)
146
  faiss_index = faiss.read_index(FAISS_INDEX_PATH)
147
+ with open(ANSWER_EMBEDDINGS_PATH, 'rb') as f: answer_embeddings = pickle.load(f)
148
  with open(COURSE_SIMILARITY_PATH, 'rb') as f: course_similarity = pickle.load(f)
149
  with open(JOB_SIMILARITY_PATH, 'rb') as f: job_similarity = pickle.load(f)
150
  logger.info("Loaded precomputed resources successfully")
 
156
 
157
  # Precompute Resources Offline (to be run separately)
158
  def precompute_resources():
159
+ global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
160
  logger.info("Precomputing resources offline")
161
  tfidf_vectorizer = TfidfVectorizer(stop_words='english')
162
  all_texts = questions_df['Answer'].tolist() + questions_df['Question'].tolist()
 
185
  with open(SKILL_TFIDF_PATH, 'wb') as f: pickle.dump(skill_tfidf, f)
186
  with open(QUESTION_ANSWER_PATH, 'wb') as f: pickle.dump(question_to_answer, f)
187
  faiss.write_index(faiss_index, FAISS_INDEX_PATH)
188
+ with open(ANSWER_EMBEDDINGS_PATH, 'wb') as f: pickle.dump(answer_embeddings, f)
189
  with open(COURSE_SIMILARITY_PATH, 'wb') as f: pickle.dump(course_similarity, f)
190
  with open(JOB_SIMILARITY_PATH, 'wb') as f: pickle.dump(job_similarity, f)
191
  universal_model.save(UNIVERSAL_MODEL_PATH)
 
229
  return []
230
 
231
  similarities = course_similarity[skill_indices]
232
+ total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
233
 
234
  target_level = 'Advanced' if upgrade else user_level
235
  idx = np.argsort(-total_scores)[:5]
 
291
  if len(answers) != len(user_skills):
292
  return jsonify({"error": "Answers count must match skills count"}), 400
293
 
294
+ load_precomputed_resources() # Load precomputed resources before processing
295
 
296
  user_questions = []
297
  for skill in user_skills: