CR7CAD commited on
Commit
cccaa8e
·
verified ·
1 Parent(s): 6637415

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -18
app.py CHANGED
@@ -4,6 +4,7 @@ import re
4
  import streamlit as st
5
  import docx
6
  import textract
 
7
 
8
  #####################################
9
  # Function: Extract Text from File
@@ -69,7 +70,7 @@ def extract_basic_resume_info(text):
69
  if name_match:
70
  info["Name"] = name_match.group(1).strip()
71
  else:
72
- # Heuristic: Assume the first line or a line with two or three capitalized words is the candidate's name.
73
  potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
74
  if potential_names:
75
  info["Name"] = potential_names[0]
@@ -80,14 +81,13 @@ def extract_basic_resume_info(text):
80
  info["Age"] = age_match.group(1)
81
 
82
  # Extract Job Experience using the "experience" section.
83
- # This regex captures everything after the word "experience" until the next section heading (e.g., "additional information" or "skills")
84
- experience_match = re.search(r"experience\s*(.*?)(?:\n\s*\n|additional information|$)", text, re.IGNORECASE | re.DOTALL)
85
  if experience_match:
86
- # Clean up the extracted block by removing any extra whitespace or newlines.
87
  job_experience = experience_match.group(1).strip()
88
  info["Job Experience"] = " ".join(job_experience.split())
89
  else:
90
- # Fallback if a labeled section isn't found.
91
  exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
92
  if exp_match:
93
  info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
@@ -104,7 +104,7 @@ def extract_basic_resume_info(text):
104
  education_block = edu_match.group(1).strip()
105
  info["Education"] = " ".join(education_block.split())
106
  else:
107
- # Fallback: search for lines starting with common degree words.
108
  edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
109
  if edu_match:
110
  info["Education"] = edu_match.group(0)
@@ -140,27 +140,48 @@ def summarize_basic_info(info):
140
  summary_paragraph = ", ".join(parts) + "."
141
  return summary_paragraph
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  #####################################
144
  # Main Resume Processing Logic
145
  #####################################
146
  def process_resume(file_obj):
147
  if file_obj is None:
148
- return None, None
149
- # Extract the full resume text.
150
  resume_text = extract_text_from_file(file_obj)
151
- # Extract basic info from the text.
152
  basic_info = extract_basic_resume_info(resume_text)
153
- # Create a summary paragraph from the basic info.
154
  summary_paragraph = summarize_basic_info(basic_info)
155
- return resume_text, summary_paragraph
 
 
 
 
 
 
 
 
 
156
 
157
  #####################################
158
  # Streamlit Interface
159
  #####################################
160
- st.title("Resume Basic Information Summary")
161
  st.markdown("""
162
- Upload your resume file in **.doc** or **.docx** format. The app extracts key details such as name, age, job experience, skills,
163
- and education, then summarizes them into a single paragraph.
 
164
  """)
165
 
166
  uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
@@ -170,10 +191,26 @@ if st.button("Process Resume"):
170
  st.error("Please upload a file first.")
171
  else:
172
  with st.spinner("Processing resume..."):
173
- resume_text, summary_paragraph = process_resume(uploaded_file)
174
 
175
- st.subheader("Summary Paragraph")
176
  st.markdown(summary_paragraph)
177
 
178
- st.subheader("Full Extracted Resume Text")
179
- st.text_area("", resume_text, height=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import streamlit as st
5
  import docx
6
  import textract
7
+ from sentence_transformers import SentenceTransformer, util
8
 
9
  #####################################
10
  # Function: Extract Text from File
 
70
  if name_match:
71
  info["Name"] = name_match.group(1).strip()
72
  else:
73
+ # Heuristic: assume a line with two or three capitalized words might be the candidate's name.
74
  potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
75
  if potential_names:
76
  info["Name"] = potential_names[0]
 
81
  info["Age"] = age_match.group(1)
82
 
83
  # Extract Job Experience using the "experience" section.
84
+ # Capture everything after the word "experience" until a new section or the end.
85
+ experience_match = re.search(r"experience\s*(.*?)(?:\n\s*\n|additional information|skills|education|$)", text, re.IGNORECASE | re.DOTALL)
86
  if experience_match:
 
87
  job_experience = experience_match.group(1).strip()
88
  info["Job Experience"] = " ".join(job_experience.split())
89
  else:
90
+ # Fallback if not a labeled section.
91
  exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
92
  if exp_match:
93
  info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
 
104
  education_block = edu_match.group(1).strip()
105
  info["Education"] = " ".join(education_block.split())
106
  else:
107
+ # Fallback: search for common degree identifiers.
108
  edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
109
  if edu_match:
110
  info["Education"] = edu_match.group(0)
 
140
  summary_paragraph = ", ".join(parts) + "."
141
  return summary_paragraph
142
 
143
+ #####################################
144
+ # Function: Compare Candidate Summary to Company Prompt
145
+ #####################################
146
+ def compute_suitability(candidate_summary, company_prompt, model):
147
+ """
148
+ Compute the cosine similarity between candidate summary and company prompt embeddings.
149
+ Returns a score in the range [0, 1].
150
+ """
151
+ candidate_embed = model.encode(candidate_summary, convert_to_tensor=True)
152
+ company_embed = model.encode(company_prompt, convert_to_tensor=True)
153
+ cosine_sim = util.cos_sim(candidate_embed, company_embed)
154
+ score = float(cosine_sim.item())
155
+ return score
156
+
157
  #####################################
158
  # Main Resume Processing Logic
159
  #####################################
160
  def process_resume(file_obj):
161
  if file_obj is None:
162
+ return None
 
163
  resume_text = extract_text_from_file(file_obj)
 
164
  basic_info = extract_basic_resume_info(resume_text)
 
165
  summary_paragraph = summarize_basic_info(basic_info)
166
+ return summary_paragraph
167
+
168
+ #####################################
169
+ # Load the Sentence-BERT Model
170
+ #####################################
171
+ @st.cache_resource(show_spinner=False)
172
+ def load_model():
173
+ return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
174
+
175
+ model = load_model()
176
 
177
  #####################################
178
  # Streamlit Interface
179
  #####################################
180
+ st.title("Resume Analyzer and Company Suitability Checker")
181
  st.markdown("""
182
+ Upload your resume file in **.doc** or **.docx** format. The app extracts key details (such as name, age, job experience, skills,
183
+ and education) and summarizes them into a single paragraph. It then compares the candidate summary with the company profile
184
+ (using a pre-defined prompt for Google LLC) to produce a suitability score.
185
  """)
186
 
187
  uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
 
191
  st.error("Please upload a file first.")
192
  else:
193
  with st.spinner("Processing resume..."):
194
+ summary_paragraph = process_resume(uploaded_file)
195
 
196
+ st.subheader("Candidate Summary")
197
  st.markdown(summary_paragraph)
198
 
199
+ st.subheader("Company Information (Prompt)")
200
+ default_company_prompt = (
201
+ "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
202
+ "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
203
+ "problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
204
+ "languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
205
+ "Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
206
+ "of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
207
+ )
208
+ company_prompt = st.text_area("Enter company details:", value=default_company_prompt, height=150)
209
+
210
+ if st.button("Compute Suitability Score"):
211
+ if not company_prompt.strip():
212
+ st.error("Please enter the company information.")
213
+ else:
214
+ with st.spinner("Computing suitability score..."):
215
+ score = compute_suitability(summary_paragraph, company_prompt, model)
216
+ st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")