Muhammad541 commited on
Commit
3d8d910
·
verified ·
1 Parent(s): 729d876

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -31
app.py CHANGED
@@ -41,39 +41,65 @@ FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
41
  COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
42
  JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
43
 
44
- # Load datasets with fallbacks (precomputed offline)
45
- questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv", usecols=["Skill", "Question", "Answer"])
46
- if questions_df.empty:
47
- questions_df = pd.DataFrame({
48
- 'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
49
- 'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
50
- 'Intermediate Python question', 'Basic Kubernetes question'],
51
- 'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
52
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {list(questions_df['Skill'].unique())}")
54
 
55
- courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv", usecols=["skills", "course_title", "Organization", "level"])
56
- if courses_df.empty:
57
- courses_df = pd.DataFrame({
58
- 'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
59
- 'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
60
- 'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
61
- 'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
62
- 'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
63
- 'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
64
- })
65
-
66
- jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv", usecols=["job_title", "company_name", "location", "required_skills", "job_description"])
67
- if jobs_df.empty:
68
- jobs_df = pd.DataFrame({
69
- 'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
70
- 'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
71
- 'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
72
- 'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
73
- 'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
74
- 'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate']
75
- })
76
-
77
  # Load or Initialize Models (lighter model)
78
  if os.path.exists(UNIVERSAL_MODEL_PATH):
79
  universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)
 
41
  COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
42
  JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
43
 
44
+ # Improved dataset loading with fallback
45
+ def load_dataset(file_path, required_columns=[], fallback_data=None):
46
+ try:
47
+ df = pd.read_csv(file_path)
48
+ missing_columns = [col for col in required_columns if col not in df.columns]
49
+ if missing_columns:
50
+ logger.warning(f"Columns {missing_columns} missing in {file_path}. Using default values.")
51
+ for col in required_columns:
52
+ if col not in df.columns:
53
+ df[col] = ""
54
+ return df
55
+ except ValueError as ve:
56
+ logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
57
+ if fallback_data is not None:
58
+ logger.info(f"Using fallback data for {file_path}")
59
+ return pd.DataFrame(fallback_data)
60
+ return None
61
+ except Exception as e:
62
+ logger.error(f"Error loading {file_path}: {e}. Using fallback data.")
63
+ if fallback_data is not None:
64
+ logger.info(f"Using fallback data for {file_path}")
65
+ return pd.DataFrame(fallback_data)
66
+ return None
67
+
68
+ # Load datasets with fallbacks
69
+ questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], {
70
+ 'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
71
+ 'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
72
+ 'Intermediate Python question', 'Basic Kubernetes question'],
73
+ 'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
74
+ })
75
+
76
+ courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
77
+ 'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
78
+ 'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
79
+ 'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
80
+ 'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
81
+ 'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
82
+ 'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
83
+ })
84
+
85
+ jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
86
+ 'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
87
+ 'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
88
+ 'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
89
+ 'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
90
+ 'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
91
+ 'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate']
92
+ })
93
+
94
+ # Validate questions_df
95
+ if questions_df is None or questions_df.empty:
96
+ logger.error("questions_df is empty or could not be loaded. Exiting.")
97
+ exit(1)
98
+ if not all(col in questions_df.columns for col in ["Skill", "Question", "Answer"]):
99
+ logger.error("questions_df is missing required columns. Exiting.")
100
+ exit(1)
101
  logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {list(questions_df['Skill'].unique())}")
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  # Load or Initialize Models (lighter model)
104
  if os.path.exists(UNIVERSAL_MODEL_PATH):
105
  universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)