Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,9 @@ import streamlit as st
|
|
4 |
import docx
|
5 |
import textract
|
6 |
from transformers import pipeline
|
|
|
|
|
|
|
7 |
|
8 |
# Set page title
|
9 |
st.set_page_config(page_title="Resume Analyzer and Company Suitability Checker")
|
@@ -12,14 +15,16 @@ st.set_page_config(page_title="Resume Analyzer and Company Suitability Checker")
|
|
12 |
# Preload Models
|
13 |
#####################################
|
14 |
@st.cache_resource(show_spinner=True)
|
15 |
-
def load_models():
|
16 |
-
"""Load
|
17 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
18 |
models = {}
|
19 |
# Load summarization model
|
20 |
-
models['summarizer'] = pipeline("summarization", model=
|
21 |
-
|
22 |
-
|
|
|
|
|
23 |
return models
|
24 |
|
25 |
# Preload models immediately when app starts
|
@@ -61,12 +66,14 @@ def extract_text_from_file(file_obj):
|
|
61 |
#####################################
|
62 |
def summarize_resume_text(resume_text, models):
|
63 |
"""
|
64 |
-
Generates a concise summary of the resume text using the summarization model.
|
65 |
"""
|
|
|
|
|
66 |
summarizer = models['summarizer']
|
67 |
|
68 |
# Handle long text
|
69 |
-
max_input_length = 1024 #
|
70 |
|
71 |
if len(resume_text) > max_input_length:
|
72 |
# Process in chunks if text is too long
|
@@ -83,7 +90,9 @@ def summarize_resume_text(resume_text, models):
|
|
83 |
else:
|
84 |
candidate_summary = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
85 |
|
86 |
-
|
|
|
|
|
87 |
|
88 |
#####################################
|
89 |
# Function: Compare Candidate Summary to Company Prompt
|
@@ -91,22 +100,29 @@ def summarize_resume_text(resume_text, models):
|
|
91 |
def compute_suitability(candidate_summary, company_prompt, models):
|
92 |
"""
|
93 |
Compute the similarity between candidate summary and company prompt.
|
94 |
-
Returns a score in the range [0, 1].
|
95 |
"""
|
96 |
-
|
|
|
|
|
97 |
|
98 |
-
#
|
99 |
-
|
100 |
-
|
101 |
-
[company_prompt]
|
102 |
-
)
|
103 |
|
104 |
-
#
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
#####################################
|
109 |
-
# Streamlit Interface
|
110 |
#####################################
|
111 |
st.title("Resume Analyzer and Company Suitability Checker")
|
112 |
st.markdown(
|
@@ -118,80 +134,77 @@ Upload your resume file in **.doc** or **.docx** format. The app performs the fo
|
|
118 |
"""
|
119 |
)
|
120 |
|
121 |
-
#
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
# Button to process the resume
|
132 |
-
if st.button("Process Resume", type="primary", use_container_width=True):
|
133 |
-
if uploaded_file is None:
|
134 |
-
st.error("Please upload a resume file first.")
|
135 |
-
else:
|
136 |
-
with st.status("Processing resume...") as status:
|
137 |
-
status.update(label="Extracting text from resume...")
|
138 |
-
resume_text = extract_text_from_file(uploaded_file)
|
139 |
-
|
140 |
-
if not resume_text or resume_text.strip() == "":
|
141 |
-
status.update(label="Error: No text could be extracted", state="error")
|
142 |
-
else:
|
143 |
-
status.update(label=f"Extracted {len(resume_text)} characters. Generating summary...")
|
144 |
-
candidate_summary = summarize_resume_text(resume_text, models)
|
145 |
-
st.session_state["candidate_summary"] = candidate_summary
|
146 |
-
status.update(label="Processing complete!", state="complete")
|
147 |
-
|
148 |
-
# Display candidate summary if available
|
149 |
-
if "candidate_summary" in st.session_state:
|
150 |
-
st.subheader("Candidate Summary")
|
151 |
-
st.markdown(st.session_state["candidate_summary"])
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
default_company_prompt = (
|
156 |
-
"Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
|
157 |
-
"artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
|
158 |
-
"problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
|
159 |
-
"languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
|
160 |
-
"Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
|
161 |
-
"of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
|
162 |
-
)
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
else:
|
176 |
-
|
177 |
-
|
178 |
-
st.
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
-
# Display score
|
186 |
-
st.
|
187 |
-
st.
|
|
|
188 |
|
189 |
-
#
|
190 |
-
if
|
191 |
-
st.
|
192 |
-
elif
|
193 |
-
st.
|
194 |
-
elif
|
195 |
-
st.
|
196 |
else:
|
197 |
-
st.
|
|
|
4 |
import docx
|
5 |
import textract
|
6 |
from transformers import pipeline
|
7 |
+
import numpy as np
|
8 |
+
from scipy.spatial.distance import cosine
|
9 |
+
import time
|
10 |
|
11 |
# Set page title
|
12 |
st.set_page_config(page_title="Resume Analyzer and Company Suitability Checker")
|
|
|
15 |
# Preload Models
|
16 |
#####################################
|
17 |
@st.cache_resource(show_spinner=True)
|
18 |
+
def load_models(summarization_model="google/pegasus-xsum", similarity_model="sentence-transformers/all-MiniLM-L6-v2"):
|
19 |
+
"""Load models at startup"""
|
20 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
21 |
models = {}
|
22 |
# Load summarization model
|
23 |
+
models['summarizer'] = pipeline("summarization", model=summarization_model)
|
24 |
+
|
25 |
+
# Load feature extraction model for similarity
|
26 |
+
models['feature_extractor'] = pipeline("feature-extraction", model=similarity_model)
|
27 |
+
|
28 |
return models
|
29 |
|
30 |
# Preload models immediately when app starts
|
|
|
66 |
#####################################
|
67 |
def summarize_resume_text(resume_text, models):
|
68 |
"""
|
69 |
+
Generates a concise summary of the resume text using the selected summarization model.
|
70 |
"""
|
71 |
+
start_time = time.time()
|
72 |
+
|
73 |
summarizer = models['summarizer']
|
74 |
|
75 |
# Handle long text
|
76 |
+
max_input_length = 1024 # Model limit
|
77 |
|
78 |
if len(resume_text) > max_input_length:
|
79 |
# Process in chunks if text is too long
|
|
|
90 |
else:
|
91 |
candidate_summary = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
92 |
|
93 |
+
execution_time = time.time() - start_time
|
94 |
+
|
95 |
+
return candidate_summary, execution_time
|
96 |
|
97 |
#####################################
|
98 |
# Function: Compare Candidate Summary to Company Prompt
|
|
|
100 |
def compute_suitability(candidate_summary, company_prompt, models):
|
101 |
"""
|
102 |
Compute the similarity between candidate summary and company prompt.
|
103 |
+
Returns a score in the range [0, 1] and execution time.
|
104 |
"""
|
105 |
+
start_time = time.time()
|
106 |
+
|
107 |
+
feature_extractor = models['feature_extractor']
|
108 |
|
109 |
+
# Extract features (embeddings)
|
110 |
+
candidate_features = feature_extractor(candidate_summary)
|
111 |
+
company_features = feature_extractor(company_prompt)
|
|
|
|
|
112 |
|
113 |
+
# Convert to numpy arrays and flatten if needed
|
114 |
+
candidate_vec = np.mean(np.array(candidate_features[0]), axis=0)
|
115 |
+
company_vec = np.mean(np.array(company_features[0]), axis=0)
|
116 |
+
|
117 |
+
# Compute cosine similarity (1 - cosine distance)
|
118 |
+
similarity = 1 - cosine(candidate_vec, company_vec)
|
119 |
+
|
120 |
+
execution_time = time.time() - start_time
|
121 |
+
|
122 |
+
return similarity, execution_time
|
123 |
|
124 |
#####################################
|
125 |
+
# Main Streamlit Interface
|
126 |
#####################################
|
127 |
st.title("Resume Analyzer and Company Suitability Checker")
|
128 |
st.markdown(
|
|
|
134 |
"""
|
135 |
)
|
136 |
|
137 |
+
# File uploader
|
138 |
+
uploaded_file = st.file_uploader("Upload your resume (.doc or .docx)", type=["doc", "docx"])
|
139 |
|
140 |
+
# Company description text area
|
141 |
+
company_prompt = st.text_area(
|
142 |
+
"Enter the company description or job requirements:",
|
143 |
+
height=150,
|
144 |
+
help="Enter a detailed description of the company culture, role requirements, and desired skills.",
|
145 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
+
# Show model selection in sidebar
|
148 |
+
st.sidebar.header("Model Settings")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
# Model dropdowns - we're now only allowing one model of each type to be selected
|
151 |
+
summarization_model = st.sidebar.selectbox(
|
152 |
+
"Summarization Model",
|
153 |
+
["google/pegasus-xsum", "facebook/bart-large-cnn", "t5-small", "sshleifer/distilbart-cnn-12-6"],
|
154 |
+
index=0,
|
155 |
+
help="Select the model to use for summarizing the resume text."
|
156 |
+
)
|
157 |
+
|
158 |
+
similarity_model = st.sidebar.selectbox(
|
159 |
+
"Similarity Model",
|
160 |
+
["sentence-transformers/all-MiniLM-L6-v2", "sentence-transformers/all-mpnet-base-v2",
|
161 |
+
"sentence-transformers/paraphrase-MiniLM-L3-v2", "sentence-transformers/multi-qa-mpnet-base-dot-v1"],
|
162 |
+
index=0,
|
163 |
+
help="Select the model to use for comparing candidate summary with company profile."
|
164 |
+
)
|
165 |
|
166 |
+
# Reload models if changed
|
167 |
+
if st.sidebar.button("Reload Models"):
|
168 |
+
st.cache_resource.clear()
|
169 |
+
models = load_models(summarization_model, similarity_model)
|
170 |
+
st.sidebar.success("Models reloaded successfully!")
|
171 |
+
|
172 |
+
# Process button
|
173 |
+
if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
|
174 |
+
with st.spinner("Processing..."):
|
175 |
+
# Extract text from resume
|
176 |
+
resume_text = extract_text_from_file(uploaded_file)
|
177 |
+
|
178 |
+
if resume_text.startswith("Error") or resume_text == "Unsupported file type.":
|
179 |
+
st.error(resume_text)
|
180 |
else:
|
181 |
+
# Display extracted text
|
182 |
+
with st.expander("Extracted Text"):
|
183 |
+
st.text(resume_text)
|
184 |
+
|
185 |
+
# Generate summary
|
186 |
+
summary, summarization_time = summarize_resume_text(resume_text, models)
|
187 |
+
|
188 |
+
# Display summary
|
189 |
+
st.subheader("Candidate Summary")
|
190 |
+
st.write(summary)
|
191 |
+
st.info(f"Summarization completed in {summarization_time:.2f} seconds")
|
192 |
+
|
193 |
+
# Only compute similarity if company description is provided
|
194 |
+
if company_prompt:
|
195 |
+
similarity_score, similarity_time = compute_suitability(summary, company_prompt, models)
|
196 |
|
197 |
+
# Display similarity score
|
198 |
+
st.subheader("Suitability Assessment")
|
199 |
+
st.markdown(f"**Matching Score:** {similarity_score:.2%}")
|
200 |
+
st.info(f"Similarity computation completed in {similarity_time:.2f} seconds")
|
201 |
|
202 |
+
# Provide interpretation
|
203 |
+
if similarity_score >= 0.85:
|
204 |
+
st.success("Excellent match! This candidate's profile is strongly aligned with the company requirements.")
|
205 |
+
elif similarity_score >= 0.70:
|
206 |
+
st.success("Good match! This candidate shows strong potential for the position.")
|
207 |
+
elif similarity_score >= 0.50:
|
208 |
+
st.warning("Moderate match. The candidate meets some requirements but there may be gaps.")
|
209 |
else:
|
210 |
+
st.error("Low match. The candidate's profile may not align well with the requirements.")
|