root
commited on
Commit
·
2e8072e
1
Parent(s):
c456d7a
ss
Browse files- app.py +26 -26
- explanation_generator.py +26 -25
app.py
CHANGED
@@ -108,7 +108,7 @@ with st.sidebar:
|
|
108 |
|
109 |
explanation_model_name = st.selectbox(
|
110 |
"Explanation Model",
|
111 |
-
["Qwen/
|
112 |
index=0
|
113 |
)
|
114 |
|
@@ -158,7 +158,7 @@ if 'explanation_generator' not in st.session_state:
|
|
158 |
st.session_state.explanation_generator = None
|
159 |
|
160 |
class ResumeScreener:
|
161 |
-
def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/
|
162 |
"""Initialize the ResumeScreener with the specified embedding model"""
|
163 |
self.embedding_model_name = embedding_model_name
|
164 |
self.explanation_model_name = explanation_model_name
|
@@ -173,7 +173,7 @@ class ResumeScreener:
|
|
173 |
if use_explanation and st.session_state.explanation_generator is None:
|
174 |
with st.spinner("Initializing explanation generator..."):
|
175 |
st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
|
176 |
-
|
177 |
elif use_explanation:
|
178 |
self.explanation_generator = st.session_state.explanation_generator
|
179 |
|
@@ -357,10 +357,10 @@ class ResumeScreener:
|
|
357 |
# Initialize BM25
|
358 |
try:
|
359 |
bm25 = BM25Okapi(filtered_corpus)
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
# If we filtered out empty documents, we need to reconstruct the scores array
|
365 |
if len(filtered_corpus) != len(corpus):
|
366 |
full_scores = []
|
@@ -373,7 +373,7 @@ class ResumeScreener:
|
|
373 |
full_scores.append(0.0)
|
374 |
return full_scores
|
375 |
else:
|
376 |
-
|
377 |
except Exception as e:
|
378 |
st.error(f"Error in BM25 calculation: {str(e)}")
|
379 |
return [0.0] * len(resume_texts)
|
@@ -718,7 +718,7 @@ elif upload_option == "Process Directory":
|
|
718 |
|
719 |
st.session_state.resumes_uploaded = True
|
720 |
st.success(f"Successfully processed {processed_count} out of {total_files} resume files.")
|
721 |
-
|
722 |
st.error(f"No matching files found in {resume_dir}")
|
723 |
else:
|
724 |
st.error(f"Directory {resume_dir} does not exist or is not accessible.")
|
@@ -982,22 +982,22 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
|
|
982 |
]
|
983 |
else:
|
984 |
# Regular processing for smaller datasets
|
985 |
-
|
986 |
-
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
|
1002 |
# Get top candidates
|
1003 |
combined_data = list(zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores))
|
@@ -1104,4 +1104,4 @@ if st.session_state.results:
|
|
1104 |
|
1105 |
# Footer
|
1106 |
st.markdown("---")
|
1107 |
-
st.markdown("Built with Streamlit and Hugging Face models (NV-Embed-v2 and
|
|
|
108 |
|
109 |
explanation_model_name = st.selectbox(
|
110 |
"Explanation Model",
|
111 |
+
["Qwen/Qwen3-14B"],
|
112 |
index=0
|
113 |
)
|
114 |
|
|
|
158 |
st.session_state.explanation_generator = None
|
159 |
|
160 |
class ResumeScreener:
|
161 |
+
def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/Qwen3-14B"):
|
162 |
"""Initialize the ResumeScreener with the specified embedding model"""
|
163 |
self.embedding_model_name = embedding_model_name
|
164 |
self.explanation_model_name = explanation_model_name
|
|
|
173 |
if use_explanation and st.session_state.explanation_generator is None:
|
174 |
with st.spinner("Initializing explanation generator..."):
|
175 |
st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
|
176 |
+
self.explanation_generator = st.session_state.explanation_generator
|
177 |
elif use_explanation:
|
178 |
self.explanation_generator = st.session_state.explanation_generator
|
179 |
|
|
|
357 |
# Initialize BM25
|
358 |
try:
|
359 |
bm25 = BM25Okapi(filtered_corpus)
|
360 |
+
|
361 |
+
# Calculate scores
|
362 |
+
scores = bm25.get_scores(job_tokens)
|
363 |
+
|
364 |
# If we filtered out empty documents, we need to reconstruct the scores array
|
365 |
if len(filtered_corpus) != len(corpus):
|
366 |
full_scores = []
|
|
|
373 |
full_scores.append(0.0)
|
374 |
return full_scores
|
375 |
else:
|
376 |
+
return scores
|
377 |
except Exception as e:
|
378 |
st.error(f"Error in BM25 calculation: {str(e)}")
|
379 |
return [0.0] * len(resume_texts)
|
|
|
718 |
|
719 |
st.session_state.resumes_uploaded = True
|
720 |
st.success(f"Successfully processed {processed_count} out of {total_files} resume files.")
|
721 |
+
else:
|
722 |
st.error(f"No matching files found in {resume_dir}")
|
723 |
else:
|
724 |
st.error(f"Directory {resume_dir} does not exist or is not accessible.")
|
|
|
982 |
]
|
983 |
else:
|
984 |
# Regular processing for smaller datasets
|
985 |
+
# Get resume embeddings
|
986 |
+
resume_embeddings = []
|
987 |
+
progress_bar = st.progress(0)
|
988 |
+
for i, text in enumerate(resume_texts):
|
989 |
+
embedding = screener.get_embedding(text)
|
990 |
+
resume_embeddings.append(embedding)
|
991 |
+
progress_bar.progress((i + 1) / len(resume_texts))
|
992 |
+
|
993 |
+
# Calculate hybrid scores
|
994 |
+
hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
|
995 |
+
resume_texts,
|
996 |
+
resume_embeddings,
|
997 |
+
job_embedding,
|
998 |
+
semantic_weight,
|
999 |
+
use_faiss
|
1000 |
+
)
|
1001 |
|
1002 |
# Get top candidates
|
1003 |
combined_data = list(zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores))
|
|
|
1104 |
|
1105 |
# Footer
|
1106 |
st.markdown("---")
|
1107 |
+
st.markdown("Built with Streamlit and Hugging Face models (NV-Embed-v2 and Qwen3-14B)")
|
explanation_generator.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
Explanation Generator Module
|
3 |
|
4 |
This module handles the generation of explanations for resume rankings
|
5 |
-
using the
|
6 |
"""
|
7 |
|
8 |
import torch
|
@@ -49,13 +49,13 @@ except ImportError:
|
|
49 |
sys.modules["transformers.models.qwen2.modeling_qwen2"] = type('', (), {})
|
50 |
sys.modules["transformers.models.qwen2.modeling_qwen2"].Replicate = Replicate
|
51 |
|
52 |
-
# Load
|
53 |
-
print("Loading Qwen/
|
54 |
-
|
55 |
|
56 |
if USE_ALT_MODELS:
|
57 |
# Use the alternative loading approach
|
58 |
-
|
59 |
else:
|
60 |
# Use original approach
|
61 |
try:
|
@@ -67,40 +67,40 @@ else:
|
|
67 |
bnb_4bit_use_double_quant=True
|
68 |
)
|
69 |
|
70 |
-
# Load
|
71 |
-
|
72 |
-
|
73 |
|
74 |
# Check if we have enough resources to load the model
|
75 |
if torch.cuda.is_available():
|
76 |
gpu_memory = torch.cuda.get_device_properties(0).total_memory
|
77 |
-
if gpu_memory >=
|
78 |
-
|
79 |
-
|
80 |
quantization_config=quantization_config,
|
81 |
device_map="auto",
|
82 |
trust_remote_code=True,
|
83 |
torch_dtype=torch.float16
|
84 |
)
|
85 |
-
print("Successfully loaded
|
86 |
else:
|
87 |
print("Not enough GPU memory, using template-based explanations")
|
88 |
else:
|
89 |
print("CUDA not available, using template-based explanations")
|
90 |
|
91 |
except Exception as e:
|
92 |
-
print(f"Error loading
|
93 |
print("Falling back to template-based explanations.")
|
94 |
-
|
95 |
-
|
96 |
|
97 |
class ExplanationGenerator:
|
98 |
-
def __init__(self, model_name="Qwen/
|
99 |
"""Initialize the explanation generator with the specified model"""
|
100 |
self.model_name = model_name
|
101 |
# Use globally pre-loaded model and tokenizer
|
102 |
-
self.model =
|
103 |
-
self.tokenizer =
|
104 |
self.initialized = True
|
105 |
|
106 |
def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
|
@@ -108,7 +108,7 @@ class ExplanationGenerator:
|
|
108 |
# Use the model if it's available
|
109 |
if self.model is not None and self.tokenizer is not None:
|
110 |
try:
|
111 |
-
# Prepare prompt for
|
112 |
prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
|
113 |
|
114 |
# Create messages for chat format
|
@@ -116,23 +116,24 @@ class ExplanationGenerator:
|
|
116 |
{"role": "user", "content": prompt}
|
117 |
]
|
118 |
|
119 |
-
# Apply chat template
|
120 |
text = self.tokenizer.apply_chat_template(
|
121 |
messages,
|
122 |
tokenize=False,
|
123 |
-
add_generation_prompt=True
|
|
|
124 |
)
|
125 |
|
126 |
# Tokenize
|
127 |
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
|
128 |
|
129 |
-
# Generate response
|
130 |
output_ids = self.model.generate(
|
131 |
**inputs,
|
132 |
-
max_new_tokens=
|
133 |
temperature=0.6,
|
134 |
top_p=0.95,
|
135 |
-
top_k=
|
136 |
)
|
137 |
|
138 |
# Decode the response
|
@@ -144,7 +145,7 @@ class ExplanationGenerator:
|
|
144 |
return cleaned_response
|
145 |
|
146 |
except Exception as e:
|
147 |
-
print(f"Error generating explanation with
|
148 |
# Fall back to template-based explanation
|
149 |
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
|
150 |
else:
|
|
|
2 |
Explanation Generator Module
|
3 |
|
4 |
This module handles the generation of explanations for resume rankings
|
5 |
+
using the Qwen3-14B model from Hugging Face.
|
6 |
"""
|
7 |
|
8 |
import torch
|
|
|
49 |
sys.modules["transformers.models.qwen2.modeling_qwen2"] = type('', (), {})
|
50 |
sys.modules["transformers.models.qwen2.modeling_qwen2"].Replicate = Replicate
|
51 |
|
52 |
+
# Load Qwen3 model at initialization time
|
53 |
+
print("Loading Qwen/Qwen3-14B model with 4-bit quantization...")
|
54 |
+
QWEN_MODEL_NAME = "Qwen/Qwen3-14B"
|
55 |
|
56 |
if USE_ALT_MODELS:
|
57 |
# Use the alternative loading approach
|
58 |
+
global_qwen_model, global_qwen_tokenizer = load_explanation_model(QWEN_MODEL_NAME)
|
59 |
else:
|
60 |
# Use original approach
|
61 |
try:
|
|
|
67 |
bnb_4bit_use_double_quant=True
|
68 |
)
|
69 |
|
70 |
+
# Load Qwen3 model and tokenizer
|
71 |
+
global_qwen_tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_NAME, trust_remote_code=True)
|
72 |
+
global_qwen_model = None
|
73 |
|
74 |
# Check if we have enough resources to load the model
|
75 |
if torch.cuda.is_available():
|
76 |
gpu_memory = torch.cuda.get_device_properties(0).total_memory
|
77 |
+
if gpu_memory >= 12 * (1024**3): # 12 GB (reduced memory requirement compared to 32B model)
|
78 |
+
global_qwen_model = AutoModelForCausalLM.from_pretrained(
|
79 |
+
QWEN_MODEL_NAME,
|
80 |
quantization_config=quantization_config,
|
81 |
device_map="auto",
|
82 |
trust_remote_code=True,
|
83 |
torch_dtype=torch.float16
|
84 |
)
|
85 |
+
print("Successfully loaded Qwen3-14B with 4-bit quantization")
|
86 |
else:
|
87 |
print("Not enough GPU memory, using template-based explanations")
|
88 |
else:
|
89 |
print("CUDA not available, using template-based explanations")
|
90 |
|
91 |
except Exception as e:
|
92 |
+
print(f"Error loading Qwen3-14B model: {str(e)}")
|
93 |
print("Falling back to template-based explanations.")
|
94 |
+
global_qwen_tokenizer = None
|
95 |
+
global_qwen_model = None
|
96 |
|
97 |
class ExplanationGenerator:
|
98 |
+
def __init__(self, model_name="Qwen/Qwen3-14B"):
|
99 |
"""Initialize the explanation generator with the specified model"""
|
100 |
self.model_name = model_name
|
101 |
# Use globally pre-loaded model and tokenizer
|
102 |
+
self.model = global_qwen_model
|
103 |
+
self.tokenizer = global_qwen_tokenizer
|
104 |
self.initialized = True
|
105 |
|
106 |
def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
|
|
|
108 |
# Use the model if it's available
|
109 |
if self.model is not None and self.tokenizer is not None:
|
110 |
try:
|
111 |
+
# Prepare prompt for Qwen3-14B
|
112 |
prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
|
113 |
|
114 |
# Create messages for chat format
|
|
|
116 |
{"role": "user", "content": prompt}
|
117 |
]
|
118 |
|
119 |
+
# Apply chat template with thinking mode enabled
|
120 |
text = self.tokenizer.apply_chat_template(
|
121 |
messages,
|
122 |
tokenize=False,
|
123 |
+
add_generation_prompt=True,
|
124 |
+
enable_thinking=True
|
125 |
)
|
126 |
|
127 |
# Tokenize
|
128 |
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
|
129 |
|
130 |
+
# Generate response with recommended parameters for thinking mode
|
131 |
output_ids = self.model.generate(
|
132 |
**inputs,
|
133 |
+
max_new_tokens=500,
|
134 |
temperature=0.6,
|
135 |
top_p=0.95,
|
136 |
+
top_k=20
|
137 |
)
|
138 |
|
139 |
# Decode the response
|
|
|
145 |
return cleaned_response
|
146 |
|
147 |
except Exception as e:
|
148 |
+
print(f"Error generating explanation with Qwen3-14B: {str(e)}")
|
149 |
# Fall back to template-based explanation
|
150 |
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
|
151 |
else:
|