CR7CAD commited on
Commit
cc18787
·
verified ·
1 Parent(s): 89f5ee9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -108
app.py CHANGED
@@ -5,6 +5,7 @@ import streamlit as st
5
  import docx
6
  import textract
7
  from sentence_transformers import SentenceTransformer, util
 
8
 
9
  #####################################
10
  # Function: Extract Text from File
@@ -44,109 +45,30 @@ def extract_text_from_file(file_obj):
44
  return text
45
 
46
  #####################################
47
- # Function: Extract Basic Resume Information
48
  #####################################
49
- def extract_basic_resume_info(text):
 
50
  """
51
- Parse the extracted text to extract/summarize:
52
- - Name
53
- - Age
54
- - Job Experience (capturing the block under the "experience" section)
55
- - Skills
56
- - Education
57
-
58
- Returns a dictionary with the extracted elements.
59
  """
60
- info = {
61
- "Name": None,
62
- "Age": None,
63
- "Job Experience": None,
64
- "Skills": None,
65
- "Education": None,
66
- }
67
-
68
- # Extract Name (e.g., "CONG, An Dong" from the first line)
69
- name_match = re.search(r"^([A-Z]+)[,\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)", text, re.MULTILINE)
70
- if name_match:
71
- info["Name"] = f"{name_match.group(1)} {name_match.group(2)}"
72
- else:
73
- # Fallback heuristic: assume a line with two or three capitalized words might be the candidate's name.
74
- potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
75
- if potential_names:
76
- info["Name"] = potential_names[0]
77
-
78
- # Extract Age (e.g., "Age: 28")
79
- age_match = re.search(r"[Aa]ge[:\-]\s*(\d{1,3})", text)
80
- if age_match:
81
- info["Age"] = age_match.group(1)
82
 
83
- # Extract Job Experience using the "experience" section.
84
- # Capture everything after the word "experience" until a new section or the end.
85
- experience_match = re.search(
86
- r"experience\s*(.*?)(?:\n\s*\n|additional information|skills|education|$)",
87
- text,
88
- re.IGNORECASE | re.DOTALL,
89
- )
90
- if experience_match:
91
- job_experience = experience_match.group(1).strip()
92
- info["Job Experience"] = " ".join(job_experience.split())
93
- else:
94
- # Fallback if not a labeled section.
95
- exp_match = re.search(
96
- r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE
97
- )
98
- if exp_match:
99
- info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
100
-
101
- # Extract Skills (e.g., "Skills: Python, Java, SQL")
102
- skills_match = re.search(r"(Skills|Technical Skills)[:\-]\s*(.+)", text, re.IGNORECASE)
103
- if skills_match:
104
- skills_str = skills_match.group(2).strip()
105
- info["Skills"] = skills_str.rstrip(".")
106
-
107
- # Extract Education (e.g., "Education: ...")
108
- edu_match = re.search(
109
- r"education\s*(.*?)(?:\n\s*\n|experience|$)", text, re.IGNORECASE | re.DOTALL
110
- )
111
- if edu_match:
112
- education_block = edu_match.group(1).strip()
113
- info["Education"] = " ".join(education_block.split())
114
- else:
115
- # Fallback: search for common degree identifiers.
116
- edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
117
- if edu_match:
118
- info["Education"] = edu_match.group(0)
119
-
120
- return info
121
-
122
- #####################################
123
- # Function: Summarize Basic Info into a Paragraph
124
- #####################################
125
- def summarize_basic_info(info):
126
  """
127
- Combine the extracted resume elements into a concise summary paragraph.
 
128
  """
129
- parts = []
130
-
131
- if info.get("Name"):
132
- parts.append(f"Candidate {info['Name']}")
133
- else:
134
- parts.append("The candidate")
135
-
136
- if info.get("Age"):
137
- parts.append(f"aged {info['Age']}")
138
-
139
- if info.get("Job Experience"):
140
- parts.append(f"with job experience: {info['Job Experience']}")
141
-
142
- if info.get("Skills"):
143
- parts.append(f"skilled in {info['Skills']}")
144
-
145
- if info.get("Education"):
146
- parts.append(f"and educated in {info['Education']}")
147
-
148
- summary_paragraph = ", ".join(parts) + "."
149
- return summary_paragraph
150
 
151
  #####################################
152
  # Function: Compare Candidate Summary to Company Prompt
@@ -166,19 +88,24 @@ def compute_suitability(candidate_summary, company_prompt, model):
166
  # Main Resume Processing Logic
167
  #####################################
168
  def process_resume(file_obj):
 
 
 
 
169
  resume_text = extract_text_from_file(file_obj)
170
- basic_info = extract_basic_resume_info(resume_text)
171
- summary_paragraph = summarize_basic_info(basic_info)
172
- return summary_paragraph
173
 
174
  #####################################
175
- # Load the Sentence-BERT Model
176
  #####################################
177
  @st.cache_resource(show_spinner=False)
178
- def load_model():
 
179
  return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
180
 
181
- model = load_model()
 
182
 
183
  #####################################
184
  # Streamlit Interface
@@ -186,9 +113,10 @@ model = load_model()
186
  st.title("Resume Analyzer and Company Suitability Checker")
187
  st.markdown(
188
  """
189
- Upload your resume file in **.doc** or **.docx** format. The app extracts key details such as name, age, job experience, skills,
190
- and education, and summarizes them into a single paragraph. Then, it compares the candidate summary with a company profile
191
- (using a pre-defined prompt for Google LLC) to produce a suitability score.
 
192
  """
193
  )
194
 
@@ -206,7 +134,7 @@ if st.button("Process Resume"):
206
  st.subheader("Candidate Summary")
207
  st.markdown(candidate_summary)
208
 
209
- # Pre-define the company prompt for Google LLC.
210
  default_company_prompt = (
211
  "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
212
  "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
@@ -235,5 +163,5 @@ if st.button("Compute Suitability Score"):
235
  st.error("Please enter the company information.")
236
  else:
237
  with st.spinner("Computing suitability score..."):
238
- score = compute_suitability(candidate_summary, company_prompt, model)
239
  st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")
 
5
  import docx
6
  import textract
7
  from sentence_transformers import SentenceTransformer, util
8
+ from transformers import pipeline
9
 
10
  #####################################
11
  # Function: Extract Text from File
 
45
  return text
46
 
47
  #####################################
48
+ # Function: Summarize Resume Text using a Transformer Model
49
  #####################################
50
+ @st.cache_resource(show_spinner=False)
51
+ def load_summarizer():
52
  """
53
+ Loads the summarization pipeline using a transformer model.
54
+ We use the model "ainize/bart-base-cnn" for summarization.
 
 
 
 
 
 
55
  """
56
+ return pipeline("summarization", model="ainize/bart-base-cnn")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ def summarize_resume_text(resume_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  """
60
+ Generates a concise summary of the resume text using the summarization model.
61
+ If the resume text is very long, we trim it to avoid hitting the model's maximum input size.
62
  """
63
+ summarizer = load_summarizer()
64
+ # In case the resume text is too long, we trim it.
65
+ max_input_length = 1024 # adjust as needed
66
+ if len(resume_text) > max_input_length:
67
+ resume_text = resume_text[:max_input_length]
68
+ # The summarization pipeline returns a list of summaries.
69
+ summary_result = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)
70
+ candidate_summary = summary_result[0]['summary_text']
71
+ return candidate_summary
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  #####################################
74
  # Function: Compare Candidate Summary to Company Prompt
 
88
  # Main Resume Processing Logic
89
  #####################################
90
  def process_resume(file_obj):
91
+ """
92
+ Extracts text from the uploaded file and then generates a summary
93
+ using a text summarization model.
94
+ """
95
  resume_text = extract_text_from_file(file_obj)
96
+ candidate_summary = summarize_resume_text(resume_text)
97
+ return candidate_summary
 
98
 
99
  #####################################
100
+ # Load the Sentence-BERT Model (Semantic Similarity Model)
101
  #####################################
102
  @st.cache_resource(show_spinner=False)
103
+ def load_sbert_model():
104
+ # This loads the Sentence-BERT model "all-MiniLM-L6-v2"
105
  return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
106
 
107
+ # Load Sentence-BERT model for computing semantic similarity.
108
+ sbert_model = load_sbert_model()
109
 
110
  #####################################
111
  # Streamlit Interface
 
113
  st.title("Resume Analyzer and Company Suitability Checker")
114
  st.markdown(
115
  """
116
+ Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
117
+ 1. Extracts text from the resume.
118
+ 2. Uses a transformer-based text summarization model (**ainize/bart-base-cnn**) to generate a concise candidate summary.
119
+ 3. Compares the candidate summary with a company profile (using Sentence-BERT) to produce a suitability score.
120
  """
121
  )
122
 
 
134
  st.subheader("Candidate Summary")
135
  st.markdown(candidate_summary)
136
 
137
+ # Pre-defined company prompt for Google LLC.
138
  default_company_prompt = (
139
  "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
140
  "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
 
163
  st.error("Please enter the company information.")
164
  else:
165
  with st.spinner("Computing suitability score..."):
166
+ score = compute_suitability(candidate_summary, company_prompt, sbert_model)
167
  st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")