Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -41,39 +41,65 @@ FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
|
|
41 |
COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
|
42 |
JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
|
43 |
|
44 |
-
#
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {list(questions_df['Skill'].unique())}")
|
54 |
|
55 |
-
courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv", usecols=["skills", "course_title", "Organization", "level"])
|
56 |
-
if courses_df.empty:
|
57 |
-
courses_df = pd.DataFrame({
|
58 |
-
'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
59 |
-
'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
|
60 |
-
'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
|
61 |
-
'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
|
62 |
-
'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
|
63 |
-
'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
|
64 |
-
})
|
65 |
-
|
66 |
-
jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv", usecols=["job_title", "company_name", "location", "required_skills", "job_description"])
|
67 |
-
if jobs_df.empty:
|
68 |
-
jobs_df = pd.DataFrame({
|
69 |
-
'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
|
70 |
-
'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
|
71 |
-
'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
|
72 |
-
'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
|
73 |
-
'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
|
74 |
-
'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate']
|
75 |
-
})
|
76 |
-
|
77 |
# Load or Initialize Models (lighter model)
|
78 |
if os.path.exists(UNIVERSAL_MODEL_PATH):
|
79 |
universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
|
|
41 |
COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
|
42 |
JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
|
43 |
|
44 |
+
# Improved dataset loading with fallback
|
45 |
+
def load_dataset(file_path, required_columns=[], fallback_data=None):
|
46 |
+
try:
|
47 |
+
df = pd.read_csv(file_path)
|
48 |
+
missing_columns = [col for col in required_columns if col not in df.columns]
|
49 |
+
if missing_columns:
|
50 |
+
logger.warning(f"Columns {missing_columns} missing in {file_path}. Using default values.")
|
51 |
+
for col in required_columns:
|
52 |
+
if col not in df.columns:
|
53 |
+
df[col] = ""
|
54 |
+
return df
|
55 |
+
except ValueError as ve:
|
56 |
+
logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
|
57 |
+
if fallback_data is not None:
|
58 |
+
logger.info(f"Using fallback data for {file_path}")
|
59 |
+
return pd.DataFrame(fallback_data)
|
60 |
+
return None
|
61 |
+
except Exception as e:
|
62 |
+
logger.error(f"Error loading {file_path}: {e}. Using fallback data.")
|
63 |
+
if fallback_data is not None:
|
64 |
+
logger.info(f"Using fallback data for {file_path}")
|
65 |
+
return pd.DataFrame(fallback_data)
|
66 |
+
return None
|
67 |
+
|
68 |
+
# Load datasets with fallbacks
|
69 |
+
questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], {
|
70 |
+
'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
71 |
+
'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
|
72 |
+
'Intermediate Python question', 'Basic Kubernetes question'],
|
73 |
+
'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
|
74 |
+
})
|
75 |
+
|
76 |
+
courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
|
77 |
+
'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
78 |
+
'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
|
79 |
+
'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
|
80 |
+
'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
|
81 |
+
'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
|
82 |
+
'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
|
83 |
+
})
|
84 |
+
|
85 |
+
jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
|
86 |
+
'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
|
87 |
+
'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
|
88 |
+
'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
|
89 |
+
'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
|
90 |
+
'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
|
91 |
+
'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate']
|
92 |
+
})
|
93 |
+
|
94 |
+
# Validate questions_df
|
95 |
+
if questions_df is None or questions_df.empty:
|
96 |
+
logger.error("questions_df is empty or could not be loaded. Exiting.")
|
97 |
+
exit(1)
|
98 |
+
if not all(col in questions_df.columns for col in ["Skill", "Question", "Answer"]):
|
99 |
+
logger.error("questions_df is missing required columns. Exiting.")
|
100 |
+
exit(1)
|
101 |
logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {list(questions_df['Skill'].unique())}")
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
# Load or Initialize Models (lighter model)
|
104 |
if os.path.exists(UNIVERSAL_MODEL_PATH):
|
105 |
universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)
|