Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from typing import Optional, Dict, List | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import time | |
import os | |
# Set cache directory | |
os.environ["HF_HOME"] = "/app/cache" | |
os.environ["TRANSFORMERS_CACHE"] = "/app/cache" | |
app = FastAPI() | |
# Load datasets | |
DATA_DIR = "/app/data/" | |
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1") | |
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv")) | |
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1") | |
# Preprocess datasets | |
coding_df = coding_df.rename(columns={ | |
'Question': 'question', | |
'Answer': 'solutions', | |
'Category': 'category', | |
'Difficulty': 'difficulty' | |
}) | |
coding_df.dropna(subset=['question', 'solutions', 'category', 'difficulty'], inplace=True) | |
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True) | |
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True) | |
job_df["job_description"] = job_df["job_description"].fillna("") | |
# Load BERT model and vectorizer | |
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
vectorizer = TfidfVectorizer() | |
# Pydantic models for request bodies | |
class ChallengeRequest(BaseModel): | |
skills: List[str] | |
difficulty: Optional[str] = None | |
class AssessmentRequest(BaseModel): | |
name: str | |
skills: List[str] | |
answers: Optional[Dict[str, Dict[str, str]]] = None | |
# Get coding challenges | |
def get_coding_challenges(categories: List[str], num_questions=5, difficulty: Optional[str] = None): | |
skill_challenges = {} | |
for category in categories: | |
relevant = coding_df[coding_df["category"].str.contains(category, case=False, na=False)] | |
if difficulty: | |
relevant = relevant[relevant["difficulty"].str.lower() == difficulty.lower()] | |
if not relevant.empty: | |
skill_challenges[category] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions", "difficulty"]].to_dict(orient="records") | |
else: | |
skill_challenges[category] = [] | |
return skill_challenges | |
# Evaluate coding answers | |
def evaluate_coding_with_time(user_code, correct_code, start_time): | |
end_time = time.time() | |
execution_time = end_time - start_time | |
vectorized = vectorizer.fit_transform([user_code, correct_code]) | |
similarity = cosine_similarity(vectorized)[0][1] * 100 | |
if execution_time > 120: | |
similarity -= (execution_time - 120) * 0.1 | |
return round(max(similarity, 0), 2) | |
# Assign proficiency level | |
def get_proficiency_level(score): | |
if score >= 80: | |
return "Expert" | |
elif score >= 50: | |
return "Intermediate" | |
else: | |
return "Beginner" | |
# Recommend courses | |
def recommend_courses(weak_skills): | |
if not weak_skills: | |
return [] | |
courses = course_df[course_df['skills'].str.contains('|'.join(weak_skills), case=False, na=False)] | |
return courses[['course_title', 'Organization']].head(5).to_dict(orient="records") | |
# Recommend jobs | |
def recommend_jobs(skills): | |
if not skills: | |
return [] | |
job_df["job_embeddings"] = job_df["job_description"].apply(lambda x: bert_model.encode(str(x))) | |
user_embedding = bert_model.encode(" ".join(skills)) | |
job_df["BERT_Similarity"] = job_df["job_embeddings"].apply(lambda x: cosine_similarity([x], [user_embedding])[0][0]) | |
top_jobs = job_df.sort_values(by="BERT_Similarity", ascending=False).head(5) | |
return top_jobs[["job_title", "company", "location", "BERT_Similarity"]].to_dict(orient="records") | |
def read_root(): | |
return {"message": "Skill Assessment API"} | |
# POST endpoint for fetching challenges | |
def get_user_challenges(request: ChallengeRequest): | |
skills = request.skills | |
difficulty = request.difficulty | |
if not skills: | |
raise HTTPException(status_code=400, detail="Skills list cannot be empty") | |
challenges = get_coding_challenges(skills, difficulty=difficulty) | |
# Return only questions and difficulty (exclude solutions for the user) | |
return { | |
"challenges": { | |
category: [ | |
{"question": challenge["question"], "difficulty": challenge["difficulty"]} | |
for challenge in challenge_list | |
] | |
for category, challenge_list in challenges.items() | |
} | |
} | |
# POST endpoint for assessing answers | |
def assess_skills(user_input: AssessmentRequest): | |
user_name = user_input.name | |
user_skills = user_input.skills | |
if not user_skills: | |
raise HTTPException(status_code=400, detail="Skills list cannot be empty") | |
challenges = get_coding_challenges(user_skills) | |
user_scores = {} | |
for skill, challenge_list in challenges.items(): | |
if not challenge_list: | |
user_scores[skill] = 0 | |
continue | |
total_score = 0 | |
num_questions = len(challenge_list) | |
if user_input.answers and skill in user_input.answers: | |
for challenge in challenge_list: | |
question = challenge["question"] | |
if question in user_input.answers[skill]: | |
start_time = time.time() - 10 # Simulate execution time | |
user_code = user_input.answers[skill][question] | |
correct_code = challenge["solutions"] | |
score = evaluate_coding_with_time(user_code, correct_code, start_time) | |
total_score += score | |
else: | |
total_score += 0 | |
else: | |
total_score = 50 * num_questions # Default score for unattempted questions | |
user_scores[skill] = round(total_score / num_questions, 2) | |
proficiency_levels = {skill: get_proficiency_level(score) for skill, score in user_scores.items()} | |
weak_skills = [skill for skill, level in proficiency_levels.items() if level in ["Beginner", "Intermediate"]] | |
courses = recommend_courses(weak_skills) | |
jobs = recommend_jobs(user_skills) | |
return { | |
"name": user_name, | |
"skills": user_skills, | |
"scores": user_scores, | |
"proficiency_levels": proficiency_levels, | |
"recommended_courses": courses, | |
"recommended_jobs": jobs | |
} | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) | |