Muhammad541 commited on
Commit
edecf53
·
verified ·
1 Parent(s): 898f47a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -145
app.py CHANGED
@@ -1,146 +1,150 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- from typing import Optional, Dict, List
4
- import pandas as pd
5
- from sentence_transformers import SentenceTransformer
6
- from sklearn.feature_extraction.text import TfidfVectorizer
7
- from sklearn.metrics.pairwise import cosine_similarity
8
- import time
9
- import os
10
-
11
- app = FastAPI()
12
-
13
- # Load datasets (only those needed for questions, jobs, and courses)
14
- DATA_DIR = "data/"
15
- job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
16
- course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
17
- coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
18
-
19
- # Preprocess datasets
20
- coding_df.rename(columns={'Question': 'question', 'Answer': 'solutions'}, inplace=True)
21
- job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
22
- course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
23
- coding_df.dropna(subset=['question', 'solutions'], inplace=True)
24
- job_df["job_description"] = job_df["job_description"].fillna("")
25
-
26
- # Load BERT model and vectorizer
27
- bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
28
- vectorizer = TfidfVectorizer()
29
-
30
- # Pydantic model for request body
31
- class UserInput(BaseModel):
32
- name: str
33
- skills: List[str] # Required list of skills
34
- answers: Optional[Dict[str, Dict[str, str]]] = None # Optional answers
35
-
36
- # Evaluate coding answers
37
- def evaluate_coding_with_time(user_code, correct_code, start_time):
38
- end_time = time.time()
39
- execution_time = end_time - start_time
40
- vectorized = vectorizer.fit_transform([user_code, correct_code])
41
- similarity = cosine_similarity(vectorized)[0][1] * 100
42
- if execution_time > 120:
43
- similarity -= (execution_time - 120) * 0.1
44
- return round(max(similarity, 0), 2)
45
-
46
- # Get coding challenges
47
- def get_coding_challenges(skills, num_questions=5):
48
- skill_challenges = {}
49
- for skill in skills:
50
- relevant = coding_df[coding_df["question"].str.contains(skill, case=False, na=False)]
51
- if not relevant.empty:
52
- skill_challenges[skill] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions"]].to_dict(orient="records")
53
- else:
54
- skill_challenges[skill] = []
55
- return skill_challenges
56
-
57
- # Assign proficiency level
58
- def get_proficiency_level(score):
59
- if score >= 80:
60
- return "Expert"
61
- elif score >= 50:
62
- return "Intermediate"
63
- else:
64
- return "Beginner"
65
-
66
- # Recommend courses
67
- def recommend_courses(weak_skills):
68
- if not weak_skills:
69
- return []
70
- courses = course_df[course_df['skills'].str.contains('|'.join(weak_skills), case=False, na=False)]
71
- return courses[['course_title', 'Organization']].head(5).to_dict(orient="records")
72
-
73
- # Recommend jobs
74
- def recommend_jobs(skills):
75
- if not skills:
76
- return []
77
- job_df["job_embeddings"] = job_df["job_description"].apply(lambda x: bert_model.encode(str(x)))
78
- user_embedding = bert_model.encode(" ".join(skills))
79
- job_df["BERT_Similarity"] = job_df["job_embeddings"].apply(lambda x: cosine_similarity([x], [user_embedding])[0][0])
80
- top_jobs = job_df.sort_values(by="BERT_Similarity", ascending=False).head(5)
81
- return top_jobs[["job_title", "company", "location", "BERT_Similarity"]].to_dict(orient="records")
82
-
83
- @app.get("/")
84
- def read_root():
85
- return {"message": "Skill Assessment API"}
86
-
87
- @app.post("/assess")
88
- def assess_skills(user_input: UserInput):
89
- # Extract user data from request
90
- user_name = user_input.name
91
- user_skills = user_input.skills
92
-
93
- if not user_skills:
94
- raise HTTPException(status_code=400, detail="Skills list cannot be empty")
95
-
96
- # Fetch coding challenges based on provided skills
97
- challenges = get_coding_challenges(user_skills)
98
-
99
- # Evaluate skills
100
- user_scores = {}
101
- for skill, challenge_list in challenges.items():
102
- if not challenge_list:
103
- user_scores[skill] = 0
104
- continue
105
-
106
- total_score = 0
107
- num_questions = len(challenge_list)
108
-
109
- if user_input.answers and skill in user_input.answers:
110
- # Use provided answers
111
- for challenge in challenge_list:
112
- question = challenge["question"]
113
- if question in user_input.answers[skill]:
114
- start_time = time.time() - 10 # Simulate execution time
115
- user_code = user_input.answers[skill][question]
116
- correct_code = challenge["solutions"]
117
- score = evaluate_coding_with_time(user_code, correct_code, start_time)
118
- total_score += score
119
- else:
120
- total_score += 0 # No answer provided for this question
121
- else:
122
- # No answers provided; assign default score (50% per question)
123
- total_score = 50 * num_questions
124
-
125
- user_scores[skill] = round(total_score / num_questions, 2)
126
-
127
- # Proficiency levels
128
- proficiency_levels = {skill: get_proficiency_level(score) for skill, score in user_scores.items()}
129
- weak_skills = [skill for skill, level in proficiency_levels.items() if level in ["Beginner", "Intermediate"]]
130
-
131
- # Recommendations
132
- courses = recommend_courses(weak_skills)
133
- jobs = recommend_jobs(user_skills)
134
-
135
- return {
136
- "name": user_name,
137
- "skills": user_skills,
138
- "scores": user_scores,
139
- "proficiency_levels": proficiency_levels,
140
- "recommended_courses": courses,
141
- "recommended_jobs": jobs
142
- }
143
-
144
- if __name__ == "__main__":
145
- import uvicorn
 
 
 
 
146
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Optional, Dict, List
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import time
9
+ import os
10
+
11
+ # Set cache directory explicitly (optional, as Dockerfile ENV should handle this)
12
+ os.environ["HF_HOME"] = "/app/cache"
13
+ os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
14
+
15
+ app = FastAPI()
16
+
17
+ # Load datasets (updated path to match Dockerfile)
18
+ DATA_DIR = "/app/data/" # Changed from "data/" to "/app/data/"
19
+ job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
20
+ course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
21
+ coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
22
+
23
+ # Preprocess datasets
24
+ coding_df.rename(columns={'Question': 'question', 'Answer': 'solutions'}, inplace=True)
25
+ job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
26
+ course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
27
+ coding_df.dropna(subset=['question', 'solutions'], inplace=True)
28
+ job_df["job_description"] = job_df["job_description"].fillna("")
29
+
30
+ # Load BERT model and vectorizer
31
+ bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
32
+ vectorizer = TfidfVectorizer()
33
+
34
+ # Pydantic model for request body
35
+ class UserInput(BaseModel):
36
+ name: str
37
+ skills: List[str] # Required list of skills
38
+ answers: Optional[Dict[str, Dict[str, str]]] = None # Optional answers
39
+
40
+ # Evaluate coding answers
41
+ def evaluate_coding_with_time(user_code, correct_code, start_time):
42
+ end_time = time.time()
43
+ execution_time = end_time - start_time
44
+ vectorized = vectorizer.fit_transform([user_code, correct_code])
45
+ similarity = cosine_similarity(vectorized)[0][1] * 100
46
+ if execution_time > 120:
47
+ similarity -= (execution_time - 120) * 0.1
48
+ return round(max(similarity, 0), 2)
49
+
50
+ # Get coding challenges
51
+ def get_coding_challenges(skills, num_questions=5):
52
+ skill_challenges = {}
53
+ for skill in skills:
54
+ relevant = coding_df[coding_df["question"].str.contains(skill, case=False, na=False)]
55
+ if not relevant.empty:
56
+ skill_challenges[skill] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions"]].to_dict(orient="records")
57
+ else:
58
+ skill_challenges[skill] = []
59
+ return skill_challenges
60
+
61
+ # Assign proficiency level
62
+ def get_proficiency_level(score):
63
+ if score >= 80:
64
+ return "Expert"
65
+ elif score >= 50:
66
+ return "Intermediate"
67
+ else:
68
+ return "Beginner"
69
+
70
+ # Recommend courses
71
+ def recommend_courses(weak_skills):
72
+ if not weak_skills:
73
+ return []
74
+ courses = course_df[course_df['skills'].str.contains('|'.join(weak_skills), case=False, na=False)]
75
+ return courses[['course_title', 'Organization']].head(5).to_dict(orient="records")
76
+
77
+ # Recommend jobs
78
+ def recommend_jobs(skills):
79
+ if not skills:
80
+ return []
81
+ job_df["job_embeddings"] = job_df["job_description"].apply(lambda x: bert_model.encode(str(x)))
82
+ user_embedding = bert_model.encode(" ".join(skills))
83
+ job_df["BERT_Similarity"] = job_df["job_embeddings"].apply(lambda x: cosine_similarity([x], [user_embedding])[0][0])
84
+ top_jobs = job_df.sort_values(by="BERT_Similarity", ascending=False).head(5)
85
+ return top_jobs[["job_title", "company", "location", "BERT_Similarity"]].to_dict(orient="records")
86
+
87
+ @app.get("/")
88
+ def read_root():
89
+ return {"message": "Skill Assessment API"}
90
+
91
+ @app.post("/assess")
92
+ def assess_skills(user_input: UserInput):
93
+ # Extract user data from request
94
+ user_name = user_input.name
95
+ user_skills = user_input.skills
96
+
97
+ if not user_skills:
98
+ raise HTTPException(status_code=400, detail="Skills list cannot be empty")
99
+
100
+ # Fetch coding challenges based on provided skills
101
+ challenges = get_coding_challenges(user_skills)
102
+
103
+ # Evaluate skills
104
+ user_scores = {}
105
+ for skill, challenge_list in challenges.items():
106
+ if not challenge_list:
107
+ user_scores[skill] = 0
108
+ continue
109
+
110
+ total_score = 0
111
+ num_questions = len(challenge_list)
112
+
113
+ if user_input.answers and skill in user_input.answers:
114
+ # Use provided answers
115
+ for challenge in challenge_list:
116
+ question = challenge["question"]
117
+ if question in user_input.answers[skill]:
118
+ start_time = time.time() - 10 # Simulate execution time
119
+ user_code = user_input.answers[skill][question]
120
+ correct_code = challenge["solutions"]
121
+ score = evaluate_coding_with_time(user_code, correct_code, start_time)
122
+ total_score += score
123
+ else:
124
+ total_score += 0 # No answer provided for this question
125
+ else:
126
+ # No answers provided; assign default score (50% per question)
127
+ total_score = 50 * num_questions
128
+
129
+ user_scores[skill] = round(total_score / num_questions, 2)
130
+
131
+ # Proficiency levels
132
+ proficiency_levels = {skill: get_proficiency_level(score) for skill, score in user_scores.items()}
133
+ weak_skills = [skill for skill, level in proficiency_levels.items() if level in ["Beginner", "Intermediate"]]
134
+
135
+ # Recommendations
136
+ courses = recommend_courses(weak_skills)
137
+ jobs = recommend_jobs(user_skills)
138
+
139
+ return {
140
+ "name": user_name,
141
+ "skills": user_skills,
142
+ "scores": user_scores,
143
+ "proficiency_levels": proficiency_levels,
144
+ "recommended_courses": courses,
145
+ "recommended_jobs": jobs
146
+ }
147
+
148
+ if __name__ == "__main__":
149
+ import uvicorn
150
  uvicorn.run(app, host="0.0.0.0", port=7860)