Spaces:

Muhammad541
/

Skill-assessment

Runtime error

App Files Files Community

Muhammad541 commited on Mar 7

Commit

3d8d910

verified ·

1 Parent(s): 729d876

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -31

app.py CHANGED Viewed

@@ -41,39 +41,65 @@ FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
 COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
 JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
-# Load datasets with fallbacks (precomputed offline)
-questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv", usecols=["Skill", "Question", "Answer"])
-if questions_df.empty:
-    questions_df = pd.DataFrame({
-        'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
-        'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
-                     'Intermediate Python question', 'Basic Kubernetes question'],
-        'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
-    })
 logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {list(questions_df['Skill'].unique())}")
-courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv", usecols=["skills", "course_title", "Organization", "level"])
-if courses_df.empty:
-    courses_df = pd.DataFrame({
-        'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
-        'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
-        'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
-        'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
-        'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
-        'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
-    })
-jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv", usecols=["job_title", "company_name", "location", "required_skills", "job_description"])
-if jobs_df.empty:
-    jobs_df = pd.DataFrame({
-        'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
-        'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
-        'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
-        'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
-        'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
-        'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate']
-    })
 # Load or Initialize Models (lighter model)
 if os.path.exists(UNIVERSAL_MODEL_PATH):
     universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)

 COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
 JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
+# Improved dataset loading with fallback
+def load_dataset(file_path, required_columns=[], fallback_data=None):
+    try:
+        df = pd.read_csv(file_path)
+        missing_columns = [col for col in required_columns if col not in df.columns]
+        if missing_columns:
+            logger.warning(f"Columns {missing_columns} missing in {file_path}. Using default values.")
+            for col in required_columns:
+                if col not in df.columns:
+                    df[col] = ""
+        return df
+    except ValueError as ve:
+        logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
+        if fallback_data is not None:
+            logger.info(f"Using fallback data for {file_path}")
+            return pd.DataFrame(fallback_data)
+        return None
+    except Exception as e:
+        logger.error(f"Error loading {file_path}: {e}. Using fallback data.")
+        if fallback_data is not None:
+            logger.info(f"Using fallback data for {file_path}")
+            return pd.DataFrame(fallback_data)
+        return None
+# Load datasets with fallbacks
+questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], {
+    'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
+    'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
+                 'Intermediate Python question', 'Basic Kubernetes question'],
+    'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
+})
+courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
+    'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
+    'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
+    'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
+    'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
+    'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
+    'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
+})
+jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
+    'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
+    'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
+    'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
+    'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
+    'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
+    'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate']
+})
+# Validate questions_df
+if questions_df is None or questions_df.empty:
+    logger.error("questions_df is empty or could not be loaded. Exiting.")
+    exit(1)
+if not all(col in questions_df.columns for col in ["Skill", "Question", "Answer"]):
+    logger.error("questions_df is missing required columns. Exiting.")
+    exit(1)
 logger.info(f"questions_df loaded with {len(questions_df)} rows. Skills available: {list(questions_df['Skill'].unique())}")
 # Load or Initialize Models (lighter model)
 if os.path.exists(UNIVERSAL_MODEL_PATH):
     universal_model = SentenceTransformer(UNIVERSAL_MODEL_PATH)