Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -49,11 +49,11 @@ def load_dataset(file_path, required_columns=[]):
|
|
49 |
df = pd.read_csv(file_path)
|
50 |
for col in required_columns:
|
51 |
if col not in df.columns:
|
52 |
-
|
53 |
df[col] = "" if col != 'level' else 'Intermediate'
|
54 |
return df
|
55 |
except FileNotFoundError:
|
56 |
-
|
57 |
return None
|
58 |
|
59 |
user_df = load_dataset("Updated_User_Profile_Dataset.csv", ["name", "skills", "level"])
|
@@ -72,6 +72,15 @@ if courses_df is None or 'skills' not in courses_df.columns or courses_df['skill
|
|
72 |
'completion_rate': [0.7, 0.65, 0.8, 0.6]
|
73 |
})
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
# Load or Initialize Models
|
76 |
if os.path.exists(UNIVERSAL_MODEL_PATH):
|
77 |
universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
@@ -216,49 +225,79 @@ app = Flask(__name__)
|
|
216 |
@app.route('/assess', methods=['POST'])
|
217 |
def assess_skills():
|
218 |
data = request.get_json()
|
|
|
|
|
219 |
if not data or 'user_index' not in data or 'answers' not in data:
|
|
|
220 |
return jsonify({"error": "Invalid input. Provide 'user_index' and 'answers' in JSON body."}), 400
|
221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
user_index = int(data['user_index'])
|
223 |
if user_index < 0 or user_index >= len(user_df):
|
|
|
224 |
return jsonify({"error": "Invalid user index."}), 400
|
225 |
|
226 |
user_text = user_df.loc[user_index, 'skills']
|
227 |
user_skills = [skill.strip() for skill in user_text.split(",") if skill.strip()] if isinstance(user_text, str) else ["Python", "SQL"]
|
228 |
user_name = user_df.loc[user_index, 'name']
|
229 |
user_level = user_df.loc[user_index, 'level'] if 'level' in user_df.columns and pd.notna(user_df.loc[user_index, 'level']) else 'Intermediate'
|
|
|
230 |
|
231 |
initialize_resources(user_skills)
|
232 |
|
233 |
-
|
|
|
|
|
|
|
234 |
if filtered_questions.empty:
|
|
|
235 |
return jsonify({"error": "No matching questions found!"}), 500
|
236 |
|
237 |
user_questions = []
|
238 |
for skill in user_skills:
|
239 |
-
skill_questions = filtered_questions[filtered_questions['Skill'] == skill]
|
|
|
240 |
if not skill_questions.empty:
|
241 |
user_questions.append(skill_questions.sample(1).iloc[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
user_questions = pd.DataFrame(user_questions)
|
|
|
243 |
|
244 |
if len(user_questions) != 4:
|
245 |
-
|
246 |
-
|
247 |
-
answers = data['answers']
|
248 |
-
if len(answers) != 4:
|
249 |
-
return jsonify({"error": "Please provide exactly 4 answers."}), 400
|
250 |
|
251 |
user_responses = []
|
252 |
for idx, row in user_questions.iterrows():
|
253 |
answer = answers[idx]
|
|
|
254 |
if not answer or answer.lower() == 'skip':
|
255 |
user_responses.append((row['Skill'], None, row['Question']))
|
256 |
else:
|
257 |
user_responses.append((row['Skill'], answer, row['Question']))
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
user_scores = {}
|
264 |
ai_flags = {}
|
@@ -301,6 +340,7 @@ def assess_skills():
|
|
301 |
"recommended_courses": courses_output,
|
302 |
"recommended_jobs": jobs_output
|
303 |
}
|
|
|
304 |
return jsonify(response)
|
305 |
|
306 |
if __name__ == '__main__':
|
|
|
49 |
df = pd.read_csv(file_path)
|
50 |
for col in required_columns:
|
51 |
if col not in df.columns:
|
52 |
+
logger.warning(f"Column '{col}' missing in {file_path}. Using default values.")
|
53 |
df[col] = "" if col != 'level' else 'Intermediate'
|
54 |
return df
|
55 |
except FileNotFoundError:
|
56 |
+
logger.error(f"Dataset not found at {file_path}. Exiting.")
|
57 |
return None
|
58 |
|
59 |
user_df = load_dataset("Updated_User_Profile_Dataset.csv", ["name", "skills", "level"])
|
|
|
72 |
'completion_rate': [0.7, 0.65, 0.8, 0.6]
|
73 |
})
|
74 |
|
75 |
+
# Validate questions_df
|
76 |
+
if questions_df is None or questions_df.empty:
|
77 |
+
logger.error("questions_df is empty or could not be loaded. Exiting.")
|
78 |
+
exit(1)
|
79 |
+
if not all(col in questions_df.columns for col in ["Skill", "Question", "Answer"]):
|
80 |
+
logger.error("questions_df is missing required columns. Exiting.")
|
81 |
+
exit(1)
|
82 |
+
logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {questions_df['Skill'].unique().tolist()}")
|
83 |
+
|
84 |
# Load or Initialize Models
|
85 |
if os.path.exists(UNIVERSAL_MODEL_PATH):
|
86 |
universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
|
|
225 |
@app.route('/assess', methods=['POST'])
|
226 |
def assess_skills():
|
227 |
data = request.get_json()
|
228 |
+
logger.info(f"Received request: {data}")
|
229 |
+
|
230 |
if not data or 'user_index' not in data or 'answers' not in data:
|
231 |
+
logger.error("Invalid input: Missing 'user_index' or 'answers' in JSON body.")
|
232 |
return jsonify({"error": "Invalid input. Provide 'user_index' and 'answers' in JSON body."}), 400
|
233 |
|
234 |
+
# Validate answers length immediately
|
235 |
+
answers = data['answers']
|
236 |
+
if not isinstance(answers, list):
|
237 |
+
logger.error(f"Answers must be a list, got: {type(answers)}")
|
238 |
+
return jsonify({"error": "Answers must be a list."}), 400
|
239 |
+
if len(answers) != 4:
|
240 |
+
logger.error(f"Expected exactly 4 answers, but received {len(answers)}.")
|
241 |
+
return jsonify({"error": f"Please provide exactly 4 answers. Received {len(answers)}."}), 400
|
242 |
+
|
243 |
user_index = int(data['user_index'])
|
244 |
if user_index < 0 or user_index >= len(user_df):
|
245 |
+
logger.error(f"Invalid user index: {user_index}. Must be between 0 and {len(user_df) - 1}.")
|
246 |
return jsonify({"error": "Invalid user index."}), 400
|
247 |
|
248 |
user_text = user_df.loc[user_index, 'skills']
|
249 |
user_skills = [skill.strip() for skill in user_text.split(",") if skill.strip()] if isinstance(user_text, str) else ["Python", "SQL"]
|
250 |
user_name = user_df.loc[user_index, 'name']
|
251 |
user_level = user_df.loc[user_index, 'level'] if 'level' in user_df.columns and pd.notna(user_df.loc[user_index, 'level']) else 'Intermediate'
|
252 |
+
logger.info(f"User: {user_name}, Skills: {user_skills}, Level: {user_level}")
|
253 |
|
254 |
initialize_resources(user_skills)
|
255 |
|
256 |
+
# Normalize skills for case-insensitive matching
|
257 |
+
filtered_questions = questions_df[questions_df['Skill'].str.lower().isin([skill.lower() for skill in user_skills])]
|
258 |
+
logger.info(f"Filtered questions shape: {filtered_questions.shape}")
|
259 |
+
logger.info(f"Available skills in questions_df: {filtered_questions['Skill'].unique().tolist()}")
|
260 |
if filtered_questions.empty:
|
261 |
+
logger.error("No matching questions found for the user's skills.")
|
262 |
return jsonify({"error": "No matching questions found!"}), 500
|
263 |
|
264 |
user_questions = []
|
265 |
for skill in user_skills:
|
266 |
+
skill_questions = filtered_questions[filtered_questions['Skill'].str.lower() == skill.lower()]
|
267 |
+
logger.info(f"Questions for skill '{skill}': {len(skill_questions)}")
|
268 |
if not skill_questions.empty:
|
269 |
user_questions.append(skill_questions.sample(1).iloc[0])
|
270 |
+
else:
|
271 |
+
logger.warning(f"No questions found for skill '{skill}'. Using a default question.")
|
272 |
+
user_questions.append({
|
273 |
+
'Skill': skill,
|
274 |
+
'Question': f"What are the best practices for using {skill} in a production environment?",
|
275 |
+
'Answer': f"Best practices for {skill} include proper documentation, monitoring, and security measures."
|
276 |
+
})
|
277 |
user_questions = pd.DataFrame(user_questions)
|
278 |
+
logger.info(f"Selected questions: {user_questions[['Skill', 'Question']].to_dict(orient='records')}")
|
279 |
|
280 |
if len(user_questions) != 4:
|
281 |
+
logger.error(f"Not enough questions for all skills. Expected 4, got {len(user_questions)}.")
|
282 |
+
return jsonify({"error": f"Not enough questions for all skills! Expected 4, got {len(user_questions)}."}), 500
|
|
|
|
|
|
|
283 |
|
284 |
user_responses = []
|
285 |
for idx, row in user_questions.iterrows():
|
286 |
answer = answers[idx]
|
287 |
+
logger.debug(f"Pairing question for skill '{row['Skill']}' with answer at index {idx}")
|
288 |
if not answer or answer.lower() == 'skip':
|
289 |
user_responses.append((row['Skill'], None, row['Question']))
|
290 |
else:
|
291 |
user_responses.append((row['Skill'], answer, row['Question']))
|
292 |
|
293 |
+
try:
|
294 |
+
with Pool(cpu_count()) as pool:
|
295 |
+
eval_args = [(skill, user_code, question) for skill, user_code, question in user_responses if user_code]
|
296 |
+
logger.info(f"Evaluating {len(eval_args)} answers using multiprocessing pool.")
|
297 |
+
results = pool.map(evaluate_response, eval_args)
|
298 |
+
except Exception as e:
|
299 |
+
logger.error(f"Error in evaluate_response: {str(e)}", exc_info=True)
|
300 |
+
return jsonify({"error": "Failed to evaluate answers due to an internal error."}), 500
|
301 |
|
302 |
user_scores = {}
|
303 |
ai_flags = {}
|
|
|
340 |
"recommended_courses": courses_output,
|
341 |
"recommended_jobs": jobs_output
|
342 |
}
|
343 |
+
logger.info(f"Response: {response}")
|
344 |
return jsonify(response)
|
345 |
|
346 |
if __name__ == '__main__':
|