Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 22

Commit

2e8072e

1 Parent(s): c456d7a

ss

Browse files

Files changed (2) hide show

app.py +26 -26
explanation_generator.py +26 -25

app.py CHANGED Viewed

@@ -108,7 +108,7 @@ with st.sidebar:
     explanation_model_name = st.selectbox(
         "Explanation Model",
-        ["Qwen/QwQ-32B"],
         index=0
     )
@@ -158,7 +158,7 @@ if 'explanation_generator' not in st.session_state:
     st.session_state.explanation_generator = None
 class ResumeScreener:
-    def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/QwQ-32B"):
         """Initialize the ResumeScreener with the specified embedding model"""
         self.embedding_model_name = embedding_model_name
         self.explanation_model_name = explanation_model_name
@@ -173,7 +173,7 @@ class ResumeScreener:
         if use_explanation and st.session_state.explanation_generator is None:
             with st.spinner("Initializing explanation generator..."):
                 st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
-            self.explanation_generator = st.session_state.explanation_generator
         elif use_explanation:
             self.explanation_generator = st.session_state.explanation_generator
@@ -357,10 +357,10 @@ class ResumeScreener:
         # Initialize BM25
         try:
             bm25 = BM25Okapi(filtered_corpus)
-            # Calculate scores
-            scores = bm25.get_scores(job_tokens)
             # If we filtered out empty documents, we need to reconstruct the scores array
             if len(filtered_corpus) != len(corpus):
                 full_scores = []
@@ -373,7 +373,7 @@ class ResumeScreener:
                         full_scores.append(0.0)
                 return full_scores
             else:
-                return scores
         except Exception as e:
             st.error(f"Error in BM25 calculation: {str(e)}")
             return [0.0] * len(resume_texts)
@@ -718,7 +718,7 @@ elif upload_option == "Process Directory":
                 st.session_state.resumes_uploaded = True
                 st.success(f"Successfully processed {processed_count} out of {total_files} resume files.")
-            else:
                 st.error(f"No matching files found in {resume_dir}")
         else:
             st.error(f"Directory {resume_dir} does not exist or is not accessible.")
@@ -982,22 +982,22 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
             ]
         else:
             # Regular processing for smaller datasets
-            # Get resume embeddings
-            resume_embeddings = []
-            progress_bar = st.progress(0)
-            for i, text in enumerate(resume_texts):
-                embedding = screener.get_embedding(text)
-                resume_embeddings.append(embedding)
-                progress_bar.progress((i + 1) / len(resume_texts))
-            # Calculate hybrid scores
-            hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
-                resume_texts,
-                resume_embeddings,
-                job_embedding,
-                semantic_weight,
-                use_faiss
-            )
         # Get top candidates
         combined_data = list(zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores))
@@ -1104,4 +1104,4 @@ if st.session_state.results:
 # Footer
 st.markdown("---")
-st.markdown("Built with Streamlit and Hugging Face models (NV-Embed-v2 and QwQ-32B)")

     explanation_model_name = st.selectbox(
         "Explanation Model",
+        ["Qwen/Qwen3-14B"],
         index=0
     )
     st.session_state.explanation_generator = None
 class ResumeScreener:
+    def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/Qwen3-14B"):
         """Initialize the ResumeScreener with the specified embedding model"""
         self.embedding_model_name = embedding_model_name
         self.explanation_model_name = explanation_model_name
         if use_explanation and st.session_state.explanation_generator is None:
             with st.spinner("Initializing explanation generator..."):
                 st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
+                self.explanation_generator = st.session_state.explanation_generator
         elif use_explanation:
             self.explanation_generator = st.session_state.explanation_generator
         # Initialize BM25
         try:
             bm25 = BM25Okapi(filtered_corpus)
+        # Calculate scores
+        scores = bm25.get_scores(job_tokens)
             # If we filtered out empty documents, we need to reconstruct the scores array
             if len(filtered_corpus) != len(corpus):
                 full_scores = []
                         full_scores.append(0.0)
                 return full_scores
             else:
+        return scores
         except Exception as e:
             st.error(f"Error in BM25 calculation: {str(e)}")
             return [0.0] * len(resume_texts)
                 st.session_state.resumes_uploaded = True
                 st.success(f"Successfully processed {processed_count} out of {total_files} resume files.")
+else:
                 st.error(f"No matching files found in {resume_dir}")
         else:
             st.error(f"Directory {resume_dir} does not exist or is not accessible.")
             ]
         else:
             # Regular processing for smaller datasets
+        # Get resume embeddings
+        resume_embeddings = []
+        progress_bar = st.progress(0)
+        for i, text in enumerate(resume_texts):
+            embedding = screener.get_embedding(text)
+            resume_embeddings.append(embedding)
+            progress_bar.progress((i + 1) / len(resume_texts))
+        # Calculate hybrid scores
+        hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
+            resume_texts,
+            resume_embeddings,
+            job_embedding,
+            semantic_weight,
+            use_faiss
+        )
         # Get top candidates
         combined_data = list(zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores))
 # Footer
 st.markdown("---")
+st.markdown("Built with Streamlit and Hugging Face models (NV-Embed-v2 and Qwen3-14B)")

explanation_generator.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Explanation Generator Module
 This module handles the generation of explanations for resume rankings
-using the QwQ-32B model from Hugging Face.
 """
 import torch
@@ -49,13 +49,13 @@ except ImportError:
             sys.modules["transformers.models.qwen2.modeling_qwen2"] = type('', (), {})
         sys.modules["transformers.models.qwen2.modeling_qwen2"].Replicate = Replicate
-# Load QwQ model at initialization time
-print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
-QWQ_MODEL_NAME = "Qwen/QwQ-32B"
 if USE_ALT_MODELS:
     # Use the alternative loading approach
-    global_qwq_model, global_qwq_tokenizer = load_explanation_model(QWQ_MODEL_NAME)
 else:
     # Use original approach
     try:
@@ -67,40 +67,40 @@ else:
             bnb_4bit_use_double_quant=True
         )
-        # Load QwQ model and tokenizer
-        global_qwq_tokenizer = AutoTokenizer.from_pretrained(QWQ_MODEL_NAME, trust_remote_code=True)
-        global_qwq_model = None
         # Check if we have enough resources to load the model
         if torch.cuda.is_available():
             gpu_memory = torch.cuda.get_device_properties(0).total_memory
-            if gpu_memory >= 16 * (1024**3):  # 16 GB (reduced thanks to quantization)
-                global_qwq_model = AutoModelForCausalLM.from_pretrained(
-                    QWQ_MODEL_NAME,
                     quantization_config=quantization_config,
                     device_map="auto",
                     trust_remote_code=True,
                     torch_dtype=torch.float16
                 )
-                print("Successfully loaded QwQ-32B with 4-bit quantization")
             else:
                 print("Not enough GPU memory, using template-based explanations")
         else:
             print("CUDA not available, using template-based explanations")
     except Exception as e:
-        print(f"Error loading QwQ-32B model: {str(e)}")
         print("Falling back to template-based explanations.")
-        global_qwq_tokenizer = None
-        global_qwq_model = None
 class ExplanationGenerator:
-    def __init__(self, model_name="Qwen/QwQ-32B"):
         """Initialize the explanation generator with the specified model"""
         self.model_name = model_name
         # Use globally pre-loaded model and tokenizer
-        self.model = global_qwq_model
-        self.tokenizer = global_qwq_tokenizer
         self.initialized = True
     def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
@@ -108,7 +108,7 @@ class ExplanationGenerator:
         # Use the model if it's available
         if self.model is not None and self.tokenizer is not None:
             try:
-                # Prepare prompt for QwQ-32B
                 prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
                 # Create messages for chat format
@@ -116,23 +116,24 @@ class ExplanationGenerator:
                     {"role": "user", "content": prompt}
                 ]
-                # Apply chat template
                 text = self.tokenizer.apply_chat_template(
                     messages,
                     tokenize=False,
-                    add_generation_prompt=True
                 )
                 # Tokenize
                 inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
-                # Generate response
                 output_ids = self.model.generate(
                     **inputs,
-                    max_new_tokens=300,
                     temperature=0.6,
                     top_p=0.95,
-                    top_k=30
                 )
                 # Decode the response
@@ -144,7 +145,7 @@ class ExplanationGenerator:
                 return cleaned_response
             except Exception as e:
-                print(f"Error generating explanation with QwQ-32B: {str(e)}")
                 # Fall back to template-based explanation
                 return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
         else:

 Explanation Generator Module
 This module handles the generation of explanations for resume rankings
+using the Qwen3-14B model from Hugging Face.
 """
 import torch
             sys.modules["transformers.models.qwen2.modeling_qwen2"] = type('', (), {})
         sys.modules["transformers.models.qwen2.modeling_qwen2"].Replicate = Replicate
+# Load Qwen3 model at initialization time
+print("Loading Qwen/Qwen3-14B model with 4-bit quantization...")
+QWEN_MODEL_NAME = "Qwen/Qwen3-14B"
 if USE_ALT_MODELS:
     # Use the alternative loading approach
+    global_qwen_model, global_qwen_tokenizer = load_explanation_model(QWEN_MODEL_NAME)
 else:
     # Use original approach
     try:
             bnb_4bit_use_double_quant=True
         )
+        # Load Qwen3 model and tokenizer
+        global_qwen_tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_NAME, trust_remote_code=True)
+        global_qwen_model = None
         # Check if we have enough resources to load the model
         if torch.cuda.is_available():
             gpu_memory = torch.cuda.get_device_properties(0).total_memory
+            if gpu_memory >= 12 * (1024**3):  # 12 GB (reduced memory requirement compared to 32B model)
+                global_qwen_model = AutoModelForCausalLM.from_pretrained(
+                    QWEN_MODEL_NAME,
                     quantization_config=quantization_config,
                     device_map="auto",
                     trust_remote_code=True,
                     torch_dtype=torch.float16
                 )
+                print("Successfully loaded Qwen3-14B with 4-bit quantization")
             else:
                 print("Not enough GPU memory, using template-based explanations")
         else:
             print("CUDA not available, using template-based explanations")
     except Exception as e:
+        print(f"Error loading Qwen3-14B model: {str(e)}")
         print("Falling back to template-based explanations.")
+        global_qwen_tokenizer = None
+        global_qwen_model = None
 class ExplanationGenerator:
+    def __init__(self, model_name="Qwen/Qwen3-14B"):
         """Initialize the explanation generator with the specified model"""
         self.model_name = model_name
         # Use globally pre-loaded model and tokenizer
+        self.model = global_qwen_model
+        self.tokenizer = global_qwen_tokenizer
         self.initialized = True
     def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
         # Use the model if it's available
         if self.model is not None and self.tokenizer is not None:
             try:
+                # Prepare prompt for Qwen3-14B
                 prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
                 # Create messages for chat format
                     {"role": "user", "content": prompt}
                 ]
+                # Apply chat template with thinking mode enabled
                 text = self.tokenizer.apply_chat_template(
                     messages,
                     tokenize=False,
+                    add_generation_prompt=True,
+                    enable_thinking=True
                 )
                 # Tokenize
                 inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
+                # Generate response with recommended parameters for thinking mode
                 output_ids = self.model.generate(
                     **inputs,
+                    max_new_tokens=500,
                     temperature=0.6,
                     top_p=0.95,
+                    top_k=20
                 )
                 # Decode the response
                 return cleaned_response
             except Exception as e:
+                print(f"Error generating explanation with Qwen3-14B: {str(e)}")
                 # Fall back to template-based explanation
                 return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
         else: