Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import streamlit as st
|
|
5 |
import docx
|
6 |
import textract
|
7 |
from sentence_transformers import SentenceTransformer, util
|
|
|
8 |
|
9 |
#####################################
|
10 |
# Function: Extract Text from File
|
@@ -44,109 +45,30 @@ def extract_text_from_file(file_obj):
|
|
44 |
return text
|
45 |
|
46 |
#####################################
|
47 |
-
# Function:
|
48 |
#####################################
|
49 |
-
|
|
|
50 |
"""
|
51 |
-
|
52 |
-
-
|
53 |
-
- Age
|
54 |
-
- Job Experience (capturing the block under the "experience" section)
|
55 |
-
- Skills
|
56 |
-
- Education
|
57 |
-
|
58 |
-
Returns a dictionary with the extracted elements.
|
59 |
"""
|
60 |
-
|
61 |
-
"Name": None,
|
62 |
-
"Age": None,
|
63 |
-
"Job Experience": None,
|
64 |
-
"Skills": None,
|
65 |
-
"Education": None,
|
66 |
-
}
|
67 |
-
|
68 |
-
# Extract Name (e.g., "CONG, An Dong" from the first line)
|
69 |
-
name_match = re.search(r"^([A-Z]+)[,\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)", text, re.MULTILINE)
|
70 |
-
if name_match:
|
71 |
-
info["Name"] = f"{name_match.group(1)} {name_match.group(2)}"
|
72 |
-
else:
|
73 |
-
# Fallback heuristic: assume a line with two or three capitalized words might be the candidate's name.
|
74 |
-
potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
|
75 |
-
if potential_names:
|
76 |
-
info["Name"] = potential_names[0]
|
77 |
-
|
78 |
-
# Extract Age (e.g., "Age: 28")
|
79 |
-
age_match = re.search(r"[Aa]ge[:\-]\s*(\d{1,3})", text)
|
80 |
-
if age_match:
|
81 |
-
info["Age"] = age_match.group(1)
|
82 |
|
83 |
-
|
84 |
-
# Capture everything after the word "experience" until a new section or the end.
|
85 |
-
experience_match = re.search(
|
86 |
-
r"experience\s*(.*?)(?:\n\s*\n|additional information|skills|education|$)",
|
87 |
-
text,
|
88 |
-
re.IGNORECASE | re.DOTALL,
|
89 |
-
)
|
90 |
-
if experience_match:
|
91 |
-
job_experience = experience_match.group(1).strip()
|
92 |
-
info["Job Experience"] = " ".join(job_experience.split())
|
93 |
-
else:
|
94 |
-
# Fallback if not a labeled section.
|
95 |
-
exp_match = re.search(
|
96 |
-
r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE
|
97 |
-
)
|
98 |
-
if exp_match:
|
99 |
-
info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
|
100 |
-
|
101 |
-
# Extract Skills (e.g., "Skills: Python, Java, SQL")
|
102 |
-
skills_match = re.search(r"(Skills|Technical Skills)[:\-]\s*(.+)", text, re.IGNORECASE)
|
103 |
-
if skills_match:
|
104 |
-
skills_str = skills_match.group(2).strip()
|
105 |
-
info["Skills"] = skills_str.rstrip(".")
|
106 |
-
|
107 |
-
# Extract Education (e.g., "Education: ...")
|
108 |
-
edu_match = re.search(
|
109 |
-
r"education\s*(.*?)(?:\n\s*\n|experience|$)", text, re.IGNORECASE | re.DOTALL
|
110 |
-
)
|
111 |
-
if edu_match:
|
112 |
-
education_block = edu_match.group(1).strip()
|
113 |
-
info["Education"] = " ".join(education_block.split())
|
114 |
-
else:
|
115 |
-
# Fallback: search for common degree identifiers.
|
116 |
-
edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
|
117 |
-
if edu_match:
|
118 |
-
info["Education"] = edu_match.group(0)
|
119 |
-
|
120 |
-
return info
|
121 |
-
|
122 |
-
#####################################
|
123 |
-
# Function: Summarize Basic Info into a Paragraph
|
124 |
-
#####################################
|
125 |
-
def summarize_basic_info(info):
|
126 |
"""
|
127 |
-
|
|
|
128 |
"""
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
if info.get("Job Experience"):
|
140 |
-
parts.append(f"with job experience: {info['Job Experience']}")
|
141 |
-
|
142 |
-
if info.get("Skills"):
|
143 |
-
parts.append(f"skilled in {info['Skills']}")
|
144 |
-
|
145 |
-
if info.get("Education"):
|
146 |
-
parts.append(f"and educated in {info['Education']}")
|
147 |
-
|
148 |
-
summary_paragraph = ", ".join(parts) + "."
|
149 |
-
return summary_paragraph
|
150 |
|
151 |
#####################################
|
152 |
# Function: Compare Candidate Summary to Company Prompt
|
@@ -166,19 +88,24 @@ def compute_suitability(candidate_summary, company_prompt, model):
|
|
166 |
# Main Resume Processing Logic
|
167 |
#####################################
|
168 |
def process_resume(file_obj):
|
|
|
|
|
|
|
|
|
169 |
resume_text = extract_text_from_file(file_obj)
|
170 |
-
|
171 |
-
|
172 |
-
return summary_paragraph
|
173 |
|
174 |
#####################################
|
175 |
-
# Load the Sentence-BERT Model
|
176 |
#####################################
|
177 |
@st.cache_resource(show_spinner=False)
|
178 |
-
def
|
|
|
179 |
return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
180 |
|
181 |
-
model
|
|
|
182 |
|
183 |
#####################################
|
184 |
# Streamlit Interface
|
@@ -186,9 +113,10 @@ model = load_model()
|
|
186 |
st.title("Resume Analyzer and Company Suitability Checker")
|
187 |
st.markdown(
|
188 |
"""
|
189 |
-
Upload your resume file in **.doc** or **.docx** format. The app
|
190 |
-
|
191 |
-
|
|
|
192 |
"""
|
193 |
)
|
194 |
|
@@ -206,7 +134,7 @@ if st.button("Process Resume"):
|
|
206 |
st.subheader("Candidate Summary")
|
207 |
st.markdown(candidate_summary)
|
208 |
|
209 |
-
# Pre-
|
210 |
default_company_prompt = (
|
211 |
"Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
|
212 |
"artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
|
@@ -235,5 +163,5 @@ if st.button("Compute Suitability Score"):
|
|
235 |
st.error("Please enter the company information.")
|
236 |
else:
|
237 |
with st.spinner("Computing suitability score..."):
|
238 |
-
score = compute_suitability(candidate_summary, company_prompt,
|
239 |
st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")
|
|
|
5 |
import docx
|
6 |
import textract
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
+
from transformers import pipeline
|
9 |
|
10 |
#####################################
|
11 |
# Function: Extract Text from File
|
|
|
45 |
return text
|
46 |
|
47 |
#####################################
|
48 |
+
# Function: Summarize Resume Text using a Transformer Model
|
49 |
#####################################
|
50 |
+
@st.cache_resource(show_spinner=False)
|
51 |
+
def load_summarizer():
|
52 |
"""
|
53 |
+
Loads the summarization pipeline using a transformer model.
|
54 |
+
We use the model "ainize/bart-base-cnn" for summarization.
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
"""
|
56 |
+
return pipeline("summarization", model="ainize/bart-base-cnn")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
def summarize_resume_text(resume_text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
"""
|
60 |
+
Generates a concise summary of the resume text using the summarization model.
|
61 |
+
If the resume text is very long, we trim it to avoid hitting the model's maximum input size.
|
62 |
"""
|
63 |
+
summarizer = load_summarizer()
|
64 |
+
# In case the resume text is too long, we trim it.
|
65 |
+
max_input_length = 1024 # adjust as needed
|
66 |
+
if len(resume_text) > max_input_length:
|
67 |
+
resume_text = resume_text[:max_input_length]
|
68 |
+
# The summarization pipeline returns a list of summaries.
|
69 |
+
summary_result = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)
|
70 |
+
candidate_summary = summary_result[0]['summary_text']
|
71 |
+
return candidate_summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
#####################################
|
74 |
# Function: Compare Candidate Summary to Company Prompt
|
|
|
88 |
# Main Resume Processing Logic
|
89 |
#####################################
|
90 |
def process_resume(file_obj):
|
91 |
+
"""
|
92 |
+
Extracts text from the uploaded file and then generates a summary
|
93 |
+
using a text summarization model.
|
94 |
+
"""
|
95 |
resume_text = extract_text_from_file(file_obj)
|
96 |
+
candidate_summary = summarize_resume_text(resume_text)
|
97 |
+
return candidate_summary
|
|
|
98 |
|
99 |
#####################################
|
100 |
+
# Load the Sentence-BERT Model (Semantic Similarity Model)
|
101 |
#####################################
|
102 |
@st.cache_resource(show_spinner=False)
|
103 |
+
def load_sbert_model():
|
104 |
+
# This loads the Sentence-BERT model "all-MiniLM-L6-v2"
|
105 |
return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
106 |
|
107 |
+
# Load Sentence-BERT model for computing semantic similarity.
|
108 |
+
sbert_model = load_sbert_model()
|
109 |
|
110 |
#####################################
|
111 |
# Streamlit Interface
|
|
|
113 |
st.title("Resume Analyzer and Company Suitability Checker")
|
114 |
st.markdown(
|
115 |
"""
|
116 |
+
Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
|
117 |
+
1. Extracts text from the resume.
|
118 |
+
2. Uses a transformer-based text summarization model (**ainize/bart-base-cnn**) to generate a concise candidate summary.
|
119 |
+
3. Compares the candidate summary with a company profile (using Sentence-BERT) to produce a suitability score.
|
120 |
"""
|
121 |
)
|
122 |
|
|
|
134 |
st.subheader("Candidate Summary")
|
135 |
st.markdown(candidate_summary)
|
136 |
|
137 |
+
# Pre-defined company prompt for Google LLC.
|
138 |
default_company_prompt = (
|
139 |
"Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
|
140 |
"artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
|
|
|
163 |
st.error("Please enter the company information.")
|
164 |
else:
|
165 |
with st.spinner("Computing suitability score..."):
|
166 |
+
score = compute_suitability(candidate_summary, company_prompt, sbert_model)
|
167 |
st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")
|