Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import re
|
|
4 |
import streamlit as st
|
5 |
import docx
|
6 |
import textract
|
|
|
7 |
|
8 |
#####################################
|
9 |
# Function: Extract Text from File
|
@@ -69,7 +70,7 @@ def extract_basic_resume_info(text):
|
|
69 |
if name_match:
|
70 |
info["Name"] = name_match.group(1).strip()
|
71 |
else:
|
72 |
-
# Heuristic:
|
73 |
potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
|
74 |
if potential_names:
|
75 |
info["Name"] = potential_names[0]
|
@@ -80,14 +81,13 @@ def extract_basic_resume_info(text):
|
|
80 |
info["Age"] = age_match.group(1)
|
81 |
|
82 |
# Extract Job Experience using the "experience" section.
|
83 |
-
#
|
84 |
-
experience_match = re.search(r"experience\s*(.*?)(?:\n\s*\n|additional information|$)", text, re.IGNORECASE | re.DOTALL)
|
85 |
if experience_match:
|
86 |
-
# Clean up the extracted block by removing any extra whitespace or newlines.
|
87 |
job_experience = experience_match.group(1).strip()
|
88 |
info["Job Experience"] = " ".join(job_experience.split())
|
89 |
else:
|
90 |
-
# Fallback if a labeled section
|
91 |
exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
|
92 |
if exp_match:
|
93 |
info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
|
@@ -104,7 +104,7 @@ def extract_basic_resume_info(text):
|
|
104 |
education_block = edu_match.group(1).strip()
|
105 |
info["Education"] = " ".join(education_block.split())
|
106 |
else:
|
107 |
-
# Fallback: search for
|
108 |
edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
|
109 |
if edu_match:
|
110 |
info["Education"] = edu_match.group(0)
|
@@ -140,27 +140,48 @@ def summarize_basic_info(info):
|
|
140 |
summary_paragraph = ", ".join(parts) + "."
|
141 |
return summary_paragraph
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
#####################################
|
144 |
# Main Resume Processing Logic
|
145 |
#####################################
|
146 |
def process_resume(file_obj):
|
147 |
if file_obj is None:
|
148 |
-
return None
|
149 |
-
# Extract the full resume text.
|
150 |
resume_text = extract_text_from_file(file_obj)
|
151 |
-
# Extract basic info from the text.
|
152 |
basic_info = extract_basic_resume_info(resume_text)
|
153 |
-
# Create a summary paragraph from the basic info.
|
154 |
summary_paragraph = summarize_basic_info(basic_info)
|
155 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
#####################################
|
158 |
# Streamlit Interface
|
159 |
#####################################
|
160 |
-
st.title("Resume
|
161 |
st.markdown("""
|
162 |
-
Upload your resume file in **.doc** or **.docx** format. The app extracts key details such as name, age, job experience, skills,
|
163 |
-
and education
|
|
|
164 |
""")
|
165 |
|
166 |
uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
|
@@ -170,10 +191,26 @@ if st.button("Process Resume"):
|
|
170 |
st.error("Please upload a file first.")
|
171 |
else:
|
172 |
with st.spinner("Processing resume..."):
|
173 |
-
|
174 |
|
175 |
-
st.subheader("Summary
|
176 |
st.markdown(summary_paragraph)
|
177 |
|
178 |
-
st.subheader("
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import streamlit as st
|
5 |
import docx
|
6 |
import textract
|
7 |
+
from sentence_transformers import SentenceTransformer, util
|
8 |
|
9 |
#####################################
|
10 |
# Function: Extract Text from File
|
|
|
70 |
if name_match:
|
71 |
info["Name"] = name_match.group(1).strip()
|
72 |
else:
|
73 |
+
# Heuristic: assume a line with two or three capitalized words might be the candidate's name.
|
74 |
potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
|
75 |
if potential_names:
|
76 |
info["Name"] = potential_names[0]
|
|
|
81 |
info["Age"] = age_match.group(1)
|
82 |
|
83 |
# Extract Job Experience using the "experience" section.
|
84 |
+
# Capture everything after the word "experience" until a new section or the end.
|
85 |
+
experience_match = re.search(r"experience\s*(.*?)(?:\n\s*\n|additional information|skills|education|$)", text, re.IGNORECASE | re.DOTALL)
|
86 |
if experience_match:
|
|
|
87 |
job_experience = experience_match.group(1).strip()
|
88 |
info["Job Experience"] = " ".join(job_experience.split())
|
89 |
else:
|
90 |
+
# Fallback if not a labeled section.
|
91 |
exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
|
92 |
if exp_match:
|
93 |
info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
|
|
|
104 |
education_block = edu_match.group(1).strip()
|
105 |
info["Education"] = " ".join(education_block.split())
|
106 |
else:
|
107 |
+
# Fallback: search for common degree identifiers.
|
108 |
edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
|
109 |
if edu_match:
|
110 |
info["Education"] = edu_match.group(0)
|
|
|
140 |
summary_paragraph = ", ".join(parts) + "."
|
141 |
return summary_paragraph
|
142 |
|
143 |
+
#####################################
|
144 |
+
# Function: Compare Candidate Summary to Company Prompt
|
145 |
+
#####################################
|
146 |
+
def compute_suitability(candidate_summary, company_prompt, model):
|
147 |
+
"""
|
148 |
+
Compute the cosine similarity between candidate summary and company prompt embeddings.
|
149 |
+
Returns a score in the range [0, 1].
|
150 |
+
"""
|
151 |
+
candidate_embed = model.encode(candidate_summary, convert_to_tensor=True)
|
152 |
+
company_embed = model.encode(company_prompt, convert_to_tensor=True)
|
153 |
+
cosine_sim = util.cos_sim(candidate_embed, company_embed)
|
154 |
+
score = float(cosine_sim.item())
|
155 |
+
return score
|
156 |
+
|
157 |
#####################################
|
158 |
# Main Resume Processing Logic
|
159 |
#####################################
|
160 |
def process_resume(file_obj):
|
161 |
if file_obj is None:
|
162 |
+
return None
|
|
|
163 |
resume_text = extract_text_from_file(file_obj)
|
|
|
164 |
basic_info = extract_basic_resume_info(resume_text)
|
|
|
165 |
summary_paragraph = summarize_basic_info(basic_info)
|
166 |
+
return summary_paragraph
|
167 |
+
|
168 |
+
#####################################
|
169 |
+
# Load the Sentence-BERT Model
|
170 |
+
#####################################
|
171 |
+
@st.cache_resource(show_spinner=False)
|
172 |
+
def load_model():
|
173 |
+
return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
174 |
+
|
175 |
+
model = load_model()
|
176 |
|
177 |
#####################################
|
178 |
# Streamlit Interface
|
179 |
#####################################
|
180 |
+
st.title("Resume Analyzer and Company Suitability Checker")
|
181 |
st.markdown("""
|
182 |
+
Upload your resume file in **.doc** or **.docx** format. The app extracts key details (such as name, age, job experience, skills,
|
183 |
+
and education) and summarizes them into a single paragraph. It then compares the candidate summary with the company profile
|
184 |
+
(using a pre-defined prompt for Google LLC) to produce a suitability score.
|
185 |
""")
|
186 |
|
187 |
uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
|
|
|
191 |
st.error("Please upload a file first.")
|
192 |
else:
|
193 |
with st.spinner("Processing resume..."):
|
194 |
+
summary_paragraph = process_resume(uploaded_file)
|
195 |
|
196 |
+
st.subheader("Candidate Summary")
|
197 |
st.markdown(summary_paragraph)
|
198 |
|
199 |
+
st.subheader("Company Information (Prompt)")
|
200 |
+
default_company_prompt = (
|
201 |
+
"Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
|
202 |
+
"artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
|
203 |
+
"problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
|
204 |
+
"languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
|
205 |
+
"Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
|
206 |
+
"of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
|
207 |
+
)
|
208 |
+
company_prompt = st.text_area("Enter company details:", value=default_company_prompt, height=150)
|
209 |
+
|
210 |
+
if st.button("Compute Suitability Score"):
|
211 |
+
if not company_prompt.strip():
|
212 |
+
st.error("Please enter the company information.")
|
213 |
+
else:
|
214 |
+
with st.spinner("Computing suitability score..."):
|
215 |
+
score = compute_suitability(summary_paragraph, company_prompt, model)
|
216 |
+
st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")
|