root commited on
Commit
2e8072e
·
1 Parent(s): c456d7a
Files changed (2) hide show
  1. app.py +26 -26
  2. explanation_generator.py +26 -25
app.py CHANGED
@@ -108,7 +108,7 @@ with st.sidebar:
108
 
109
  explanation_model_name = st.selectbox(
110
  "Explanation Model",
111
- ["Qwen/QwQ-32B"],
112
  index=0
113
  )
114
 
@@ -158,7 +158,7 @@ if 'explanation_generator' not in st.session_state:
158
  st.session_state.explanation_generator = None
159
 
160
  class ResumeScreener:
161
- def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/QwQ-32B"):
162
  """Initialize the ResumeScreener with the specified embedding model"""
163
  self.embedding_model_name = embedding_model_name
164
  self.explanation_model_name = explanation_model_name
@@ -173,7 +173,7 @@ class ResumeScreener:
173
  if use_explanation and st.session_state.explanation_generator is None:
174
  with st.spinner("Initializing explanation generator..."):
175
  st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
176
- self.explanation_generator = st.session_state.explanation_generator
177
  elif use_explanation:
178
  self.explanation_generator = st.session_state.explanation_generator
179
 
@@ -357,10 +357,10 @@ class ResumeScreener:
357
  # Initialize BM25
358
  try:
359
  bm25 = BM25Okapi(filtered_corpus)
360
-
361
- # Calculate scores
362
- scores = bm25.get_scores(job_tokens)
363
-
364
  # If we filtered out empty documents, we need to reconstruct the scores array
365
  if len(filtered_corpus) != len(corpus):
366
  full_scores = []
@@ -373,7 +373,7 @@ class ResumeScreener:
373
  full_scores.append(0.0)
374
  return full_scores
375
  else:
376
- return scores
377
  except Exception as e:
378
  st.error(f"Error in BM25 calculation: {str(e)}")
379
  return [0.0] * len(resume_texts)
@@ -718,7 +718,7 @@ elif upload_option == "Process Directory":
718
 
719
  st.session_state.resumes_uploaded = True
720
  st.success(f"Successfully processed {processed_count} out of {total_files} resume files.")
721
- else:
722
  st.error(f"No matching files found in {resume_dir}")
723
  else:
724
  st.error(f"Directory {resume_dir} does not exist or is not accessible.")
@@ -982,22 +982,22 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
982
  ]
983
  else:
984
  # Regular processing for smaller datasets
985
- # Get resume embeddings
986
- resume_embeddings = []
987
- progress_bar = st.progress(0)
988
- for i, text in enumerate(resume_texts):
989
- embedding = screener.get_embedding(text)
990
- resume_embeddings.append(embedding)
991
- progress_bar.progress((i + 1) / len(resume_texts))
992
-
993
- # Calculate hybrid scores
994
- hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
995
- resume_texts,
996
- resume_embeddings,
997
- job_embedding,
998
- semantic_weight,
999
- use_faiss
1000
- )
1001
 
1002
  # Get top candidates
1003
  combined_data = list(zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores))
@@ -1104,4 +1104,4 @@ if st.session_state.results:
1104
 
1105
  # Footer
1106
  st.markdown("---")
1107
- st.markdown("Built with Streamlit and Hugging Face models (NV-Embed-v2 and QwQ-32B)")
 
108
 
109
  explanation_model_name = st.selectbox(
110
  "Explanation Model",
111
+ ["Qwen/Qwen3-14B"],
112
  index=0
113
  )
114
 
 
158
  st.session_state.explanation_generator = None
159
 
160
  class ResumeScreener:
161
+ def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/Qwen3-14B"):
162
  """Initialize the ResumeScreener with the specified embedding model"""
163
  self.embedding_model_name = embedding_model_name
164
  self.explanation_model_name = explanation_model_name
 
173
  if use_explanation and st.session_state.explanation_generator is None:
174
  with st.spinner("Initializing explanation generator..."):
175
  st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
176
+ self.explanation_generator = st.session_state.explanation_generator
177
  elif use_explanation:
178
  self.explanation_generator = st.session_state.explanation_generator
179
 
 
357
  # Initialize BM25
358
  try:
359
  bm25 = BM25Okapi(filtered_corpus)
360
+
361
+ # Calculate scores
362
+ scores = bm25.get_scores(job_tokens)
363
+
364
  # If we filtered out empty documents, we need to reconstruct the scores array
365
  if len(filtered_corpus) != len(corpus):
366
  full_scores = []
 
373
  full_scores.append(0.0)
374
  return full_scores
375
  else:
376
+ return scores
377
  except Exception as e:
378
  st.error(f"Error in BM25 calculation: {str(e)}")
379
  return [0.0] * len(resume_texts)
 
718
 
719
  st.session_state.resumes_uploaded = True
720
  st.success(f"Successfully processed {processed_count} out of {total_files} resume files.")
721
+ else:
722
  st.error(f"No matching files found in {resume_dir}")
723
  else:
724
  st.error(f"Directory {resume_dir} does not exist or is not accessible.")
 
982
  ]
983
  else:
984
  # Regular processing for smaller datasets
985
+ # Get resume embeddings
986
+ resume_embeddings = []
987
+ progress_bar = st.progress(0)
988
+ for i, text in enumerate(resume_texts):
989
+ embedding = screener.get_embedding(text)
990
+ resume_embeddings.append(embedding)
991
+ progress_bar.progress((i + 1) / len(resume_texts))
992
+
993
+ # Calculate hybrid scores
994
+ hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
995
+ resume_texts,
996
+ resume_embeddings,
997
+ job_embedding,
998
+ semantic_weight,
999
+ use_faiss
1000
+ )
1001
 
1002
  # Get top candidates
1003
  combined_data = list(zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores))
 
1104
 
1105
  # Footer
1106
  st.markdown("---")
1107
+ st.markdown("Built with Streamlit and Hugging Face models (NV-Embed-v2 and Qwen3-14B)")
explanation_generator.py CHANGED
@@ -2,7 +2,7 @@
2
  Explanation Generator Module
3
 
4
  This module handles the generation of explanations for resume rankings
5
- using the QwQ-32B model from Hugging Face.
6
  """
7
 
8
  import torch
@@ -49,13 +49,13 @@ except ImportError:
49
  sys.modules["transformers.models.qwen2.modeling_qwen2"] = type('', (), {})
50
  sys.modules["transformers.models.qwen2.modeling_qwen2"].Replicate = Replicate
51
 
52
- # Load QwQ model at initialization time
53
- print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
54
- QWQ_MODEL_NAME = "Qwen/QwQ-32B"
55
 
56
  if USE_ALT_MODELS:
57
  # Use the alternative loading approach
58
- global_qwq_model, global_qwq_tokenizer = load_explanation_model(QWQ_MODEL_NAME)
59
  else:
60
  # Use original approach
61
  try:
@@ -67,40 +67,40 @@ else:
67
  bnb_4bit_use_double_quant=True
68
  )
69
 
70
- # Load QwQ model and tokenizer
71
- global_qwq_tokenizer = AutoTokenizer.from_pretrained(QWQ_MODEL_NAME, trust_remote_code=True)
72
- global_qwq_model = None
73
 
74
  # Check if we have enough resources to load the model
75
  if torch.cuda.is_available():
76
  gpu_memory = torch.cuda.get_device_properties(0).total_memory
77
- if gpu_memory >= 16 * (1024**3): # 16 GB (reduced thanks to quantization)
78
- global_qwq_model = AutoModelForCausalLM.from_pretrained(
79
- QWQ_MODEL_NAME,
80
  quantization_config=quantization_config,
81
  device_map="auto",
82
  trust_remote_code=True,
83
  torch_dtype=torch.float16
84
  )
85
- print("Successfully loaded QwQ-32B with 4-bit quantization")
86
  else:
87
  print("Not enough GPU memory, using template-based explanations")
88
  else:
89
  print("CUDA not available, using template-based explanations")
90
 
91
  except Exception as e:
92
- print(f"Error loading QwQ-32B model: {str(e)}")
93
  print("Falling back to template-based explanations.")
94
- global_qwq_tokenizer = None
95
- global_qwq_model = None
96
 
97
  class ExplanationGenerator:
98
- def __init__(self, model_name="Qwen/QwQ-32B"):
99
  """Initialize the explanation generator with the specified model"""
100
  self.model_name = model_name
101
  # Use globally pre-loaded model and tokenizer
102
- self.model = global_qwq_model
103
- self.tokenizer = global_qwq_tokenizer
104
  self.initialized = True
105
 
106
  def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
@@ -108,7 +108,7 @@ class ExplanationGenerator:
108
  # Use the model if it's available
109
  if self.model is not None and self.tokenizer is not None:
110
  try:
111
- # Prepare prompt for QwQ-32B
112
  prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
113
 
114
  # Create messages for chat format
@@ -116,23 +116,24 @@ class ExplanationGenerator:
116
  {"role": "user", "content": prompt}
117
  ]
118
 
119
- # Apply chat template
120
  text = self.tokenizer.apply_chat_template(
121
  messages,
122
  tokenize=False,
123
- add_generation_prompt=True
 
124
  )
125
 
126
  # Tokenize
127
  inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
128
 
129
- # Generate response
130
  output_ids = self.model.generate(
131
  **inputs,
132
- max_new_tokens=300,
133
  temperature=0.6,
134
  top_p=0.95,
135
- top_k=30
136
  )
137
 
138
  # Decode the response
@@ -144,7 +145,7 @@ class ExplanationGenerator:
144
  return cleaned_response
145
 
146
  except Exception as e:
147
- print(f"Error generating explanation with QwQ-32B: {str(e)}")
148
  # Fall back to template-based explanation
149
  return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
150
  else:
 
2
  Explanation Generator Module
3
 
4
  This module handles the generation of explanations for resume rankings
5
+ using the Qwen3-14B model from Hugging Face.
6
  """
7
 
8
  import torch
 
49
  sys.modules["transformers.models.qwen2.modeling_qwen2"] = type('', (), {})
50
  sys.modules["transformers.models.qwen2.modeling_qwen2"].Replicate = Replicate
51
 
52
+ # Load Qwen3 model at initialization time
53
+ print("Loading Qwen/Qwen3-14B model with 4-bit quantization...")
54
+ QWEN_MODEL_NAME = "Qwen/Qwen3-14B"
55
 
56
  if USE_ALT_MODELS:
57
  # Use the alternative loading approach
58
+ global_qwen_model, global_qwen_tokenizer = load_explanation_model(QWEN_MODEL_NAME)
59
  else:
60
  # Use original approach
61
  try:
 
67
  bnb_4bit_use_double_quant=True
68
  )
69
 
70
+ # Load Qwen3 model and tokenizer
71
+ global_qwen_tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_NAME, trust_remote_code=True)
72
+ global_qwen_model = None
73
 
74
  # Check if we have enough resources to load the model
75
  if torch.cuda.is_available():
76
  gpu_memory = torch.cuda.get_device_properties(0).total_memory
77
+ if gpu_memory >= 12 * (1024**3): # 12 GB (reduced memory requirement compared to 32B model)
78
+ global_qwen_model = AutoModelForCausalLM.from_pretrained(
79
+ QWEN_MODEL_NAME,
80
  quantization_config=quantization_config,
81
  device_map="auto",
82
  trust_remote_code=True,
83
  torch_dtype=torch.float16
84
  )
85
+ print("Successfully loaded Qwen3-14B with 4-bit quantization")
86
  else:
87
  print("Not enough GPU memory, using template-based explanations")
88
  else:
89
  print("CUDA not available, using template-based explanations")
90
 
91
  except Exception as e:
92
+ print(f"Error loading Qwen3-14B model: {str(e)}")
93
  print("Falling back to template-based explanations.")
94
+ global_qwen_tokenizer = None
95
+ global_qwen_model = None
96
 
97
  class ExplanationGenerator:
98
+ def __init__(self, model_name="Qwen/Qwen3-14B"):
99
  """Initialize the explanation generator with the specified model"""
100
  self.model_name = model_name
101
  # Use globally pre-loaded model and tokenizer
102
+ self.model = global_qwen_model
103
+ self.tokenizer = global_qwen_tokenizer
104
  self.initialized = True
105
 
106
  def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
 
108
  # Use the model if it's available
109
  if self.model is not None and self.tokenizer is not None:
110
  try:
111
+ # Prepare prompt for Qwen3-14B
112
  prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
113
 
114
  # Create messages for chat format
 
116
  {"role": "user", "content": prompt}
117
  ]
118
 
119
+ # Apply chat template with thinking mode enabled
120
  text = self.tokenizer.apply_chat_template(
121
  messages,
122
  tokenize=False,
123
+ add_generation_prompt=True,
124
+ enable_thinking=True
125
  )
126
 
127
  # Tokenize
128
  inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
129
 
130
+ # Generate response with recommended parameters for thinking mode
131
  output_ids = self.model.generate(
132
  **inputs,
133
+ max_new_tokens=500,
134
  temperature=0.6,
135
  top_p=0.95,
136
+ top_k=20
137
  )
138
 
139
  # Decode the response
 
145
  return cleaned_response
146
 
147
  except Exception as e:
148
+ print(f"Error generating explanation with Qwen3-14B: {str(e)}")
149
  # Fall back to template-based explanation
150
  return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
151
  else: