CR7CAD commited on
Commit
d2d6501
·
verified ·
1 Parent(s): 5a77c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -158
app.py CHANGED
@@ -4,43 +4,29 @@ import streamlit as st
4
  import docx
5
  import textract
6
  from transformers import pipeline
7
- import threading
8
- import numpy as np
 
9
 
10
  #####################################
11
- # Load Models - Optimized with Threading
12
  #####################################
13
- @st.cache_resource(show_spinner=False)
14
  def load_models():
15
- """
16
- Load all models in parallel using threading to speed up initialization
17
- """
18
- models = {}
19
-
20
- def load_summarizer_thread():
21
- models['summarizer'] = pipeline("summarization", model="google/pegasus-xsum", device=0 if st.session_state.get('use_gpu', False) else -1)
22
-
23
- def load_similarity_thread():
24
- # Using sentence-similarity pipeline instead of SentenceTransformer
25
- models['similarity'] = pipeline("sentence-similarity", model="sentence-transformers/all-MiniLM-L6-v2",
26
- device=0 if st.session_state.get('use_gpu', False) else -1)
27
-
28
- # Start threads to load models in parallel
29
- threads = [
30
- threading.Thread(target=load_summarizer_thread),
31
- threading.Thread(target=load_similarity_thread)
32
- ]
33
-
34
- for thread in threads:
35
- thread.start()
36
-
37
- for thread in threads:
38
- thread.join()
39
-
40
- return models
41
 
42
  #####################################
43
- # Function: Extract Text from File - Optimized
44
  #####################################
45
  def extract_text_from_file(file_obj):
46
  """
@@ -54,18 +40,15 @@ def extract_text_from_file(file_obj):
54
  if ext == ".docx":
55
  try:
56
  document = docx.Document(file_obj)
57
- # Use a list comprehension and join for better performance
58
  text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
59
  except Exception as e:
60
  text = f"Error processing DOCX file: {e}"
61
  elif ext == ".doc":
62
  try:
63
- # Use a context manager for better file handling
64
  with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
65
  tmp.write(file_obj.read())
66
  tmp_filename = tmp.name
67
  text = textract.process(tmp_filename).decode("utf-8")
68
- # Clean up the temporary file immediately
69
  os.unlink(tmp_filename)
70
  except Exception as e:
71
  text = f"Error processing DOC file: {e}"
@@ -74,20 +57,19 @@ def extract_text_from_file(file_obj):
74
  return text
75
 
76
  #####################################
77
- # Function: Summarize Resume Text - Optimized
78
  #####################################
79
  def summarize_resume_text(resume_text, models):
80
  """
81
- Generates a concise summary of the resume text using the pre-loaded summarization model.
82
  """
83
  summarizer = models['summarizer']
84
-
85
- # Optimize text processing - only use essential text
86
- # Break text into chunks and summarize important parts
87
  max_input_length = 1024 # PEGASUS-XSUM limit
88
 
89
  if len(resume_text) > max_input_length:
90
- # Instead of simple trimming, extract key sections
91
  chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
92
  summaries = []
93
 
@@ -96,7 +78,6 @@ def summarize_resume_text(resume_text, models):
96
  summaries.append(chunk_summary)
97
 
98
  candidate_summary = " ".join(summaries)
99
- # Summarize again if combined summary is too long
100
  if len(candidate_summary) > max_input_length:
101
  candidate_summary = summarizer(candidate_summary[:max_input_length], max_length=150, min_length=40, do_sample=False)[0]['summary_text']
102
  else:
@@ -105,11 +86,11 @@ def summarize_resume_text(resume_text, models):
105
  return candidate_summary
106
 
107
  #####################################
108
- # Function: Compare Candidate Summary to Company Prompt - Using Pipeline
109
  #####################################
110
  def compute_suitability(candidate_summary, company_prompt, models):
111
  """
112
- Compute the similarity between candidate summary and company prompt using the similarity pipeline.
113
  Returns a score in the range [0, 1].
114
  """
115
  similarity_pipeline = models['similarity']
@@ -125,130 +106,92 @@ def compute_suitability(candidate_summary, company_prompt, models):
125
  return score
126
 
127
  #####################################
128
- # Main Resume Processing Logic
129
  #####################################
130
- def process_resume(file_obj, models):
131
- """
132
- Extracts text from the uploaded file and then generates a summary
133
- using a text summarization model.
134
  """
135
- with st.status("Processing resume...") as status:
136
- status.update(label="Extracting text from resume...")
137
- resume_text = extract_text_from_file(file_obj)
138
-
139
- # Check if resume_text is valid
140
- if not resume_text or resume_text.strip() == "":
141
- status.update(label="Error: No text could be extracted", state="error")
142
- return ""
143
-
144
- status.update(label=f"Extracted {len(resume_text)} characters. Generating summary...")
145
-
146
- candidate_summary = summarize_resume_text(resume_text, models)
147
- status.update(label="Processing complete!", state="complete")
148
-
149
- return candidate_summary
150
 
151
- #####################################
152
- # Streamlit Interface - Optimized
153
- #####################################
154
- def main():
155
- st.set_page_config(page_title="Resume Analyzer", layout="wide")
 
156
 
157
- # Initialize session state for GPU usage
158
- if 'use_gpu' not in st.session_state:
159
- st.session_state.use_gpu = False
160
 
161
- # Only show sidebar settings on first run
162
- with st.sidebar:
163
- st.title("Settings")
164
- if st.checkbox("Use GPU (if available)", value=st.session_state.use_gpu):
165
- st.session_state.use_gpu = True
166
  else:
167
- st.session_state.use_gpu = False
168
-
169
- st.info("Using GPU can significantly speed up model inference if available")
170
-
171
- # Load models - this happens only once due to caching
172
- with st.spinner("Loading AI models..."):
173
- models = load_models()
174
-
175
- st.title("Resume Analyzer and Company Suitability Checker")
176
- st.markdown(
177
- """
178
- Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
179
- 1. Extracts text from the resume.
180
- 2. Uses a transformer-based model to generate a concise candidate summary.
181
- 3. Compares the candidate summary with a company profile to produce a suitability score.
182
- """
183
- )
184
-
185
- # Use columns for better layout
186
- col1, col2 = st.columns([1, 1])
187
-
188
- with col1:
189
- # File uploader for resume
190
- uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
191
-
192
- # Button to process the resume
193
- if st.button("Process Resume", type="primary", use_container_width=True):
194
- if uploaded_file is None:
195
- st.error("Please upload a resume file first.")
196
- else:
197
- candidate_summary = process_resume(uploaded_file, models)
198
- if candidate_summary: # only if summary is generated
199
  st.session_state["candidate_summary"] = candidate_summary
200
-
201
- # Display candidate summary if available
202
- if "candidate_summary" in st.session_state:
203
- st.subheader("Candidate Summary")
204
- st.markdown(st.session_state["candidate_summary"])
205
 
206
- with col2:
207
- # Pre-defined company prompt for Google LLC.
208
- default_company_prompt = (
209
- "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
210
- "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
211
- "problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
212
- "languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
213
- "Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
214
- "of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
215
- )
216
 
217
- # Company prompt text area.
218
- company_prompt = st.text_area(
219
- "Enter company details:",
220
- value=default_company_prompt,
221
- height=150,
222
- )
 
 
 
 
223
 
224
- # Button to compute the suitability score.
225
- if st.button("Compute Suitability Score", type="primary", use_container_width=True):
226
- if "candidate_summary" not in st.session_state:
227
- st.error("Please process the resume first!")
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  else:
229
- candidate_summary = st.session_state["candidate_summary"]
230
- if candidate_summary.strip() == "":
231
- st.error("Candidate summary is empty; please check your resume file.")
232
- elif company_prompt.strip() == "":
233
- st.error("Please enter the company information.")
 
 
 
 
 
 
 
 
 
234
  else:
235
- with st.spinner("Computing suitability score..."):
236
- score = compute_suitability(candidate_summary, company_prompt, models)
237
-
238
- # Display score with a progress bar for visual feedback
239
- st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")
240
- st.progress(score)
241
-
242
- # Add interpretation of score
243
- if score > 0.75:
244
- st.info("Excellent match! Your profile appears very well suited for this company.")
245
- elif score > 0.5:
246
- st.info("Good match. Your profile aligns with many aspects of the company's requirements.")
247
- elif score > 0.3:
248
- st.info("Moderate match. Consider highlighting more relevant skills or experience.")
249
- else:
250
- st.info("Low match. Your profile may need significant adjustments to better align with this company.")
251
-
252
-
253
- if __name__ == "__main__":
254
- main()
 
4
  import docx
5
  import textract
6
  from transformers import pipeline
7
+
8
+ # Set page title
9
+ st.set_page_config(page_title="Resume Analyzer and Company Suitability Checker")
10
 
11
  #####################################
12
+ # Preload Models
13
  #####################################
14
+ @st.cache_resource(show_spinner=True)
15
  def load_models():
16
+ """Load all models at startup"""
17
+ with st.spinner("Loading AI models... This may take a minute on first run."):
18
+ models = {}
19
+ # Load summarization model
20
+ models['summarizer'] = pipeline("summarization", model="google/pegasus-xsum")
21
+ # Load similarity model
22
+ models['similarity'] = pipeline("sentence-similarity", model="sentence-transformers/all-MiniLM-L6-v2")
23
+ return models
24
+
25
+ # Preload models immediately when app starts
26
+ models = load_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  #####################################
29
+ # Function: Extract Text from File
30
  #####################################
31
  def extract_text_from_file(file_obj):
32
  """
 
40
  if ext == ".docx":
41
  try:
42
  document = docx.Document(file_obj)
 
43
  text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
44
  except Exception as e:
45
  text = f"Error processing DOCX file: {e}"
46
  elif ext == ".doc":
47
  try:
 
48
  with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
49
  tmp.write(file_obj.read())
50
  tmp_filename = tmp.name
51
  text = textract.process(tmp_filename).decode("utf-8")
 
52
  os.unlink(tmp_filename)
53
  except Exception as e:
54
  text = f"Error processing DOC file: {e}"
 
57
  return text
58
 
59
  #####################################
60
+ # Function: Summarize Resume Text
61
  #####################################
62
  def summarize_resume_text(resume_text, models):
63
  """
64
+ Generates a concise summary of the resume text using the summarization model.
65
  """
66
  summarizer = models['summarizer']
67
+
68
+ # Handle long text
 
69
  max_input_length = 1024 # PEGASUS-XSUM limit
70
 
71
  if len(resume_text) > max_input_length:
72
+ # Process in chunks if text is too long
73
  chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
74
  summaries = []
75
 
 
78
  summaries.append(chunk_summary)
79
 
80
  candidate_summary = " ".join(summaries)
 
81
  if len(candidate_summary) > max_input_length:
82
  candidate_summary = summarizer(candidate_summary[:max_input_length], max_length=150, min_length=40, do_sample=False)[0]['summary_text']
83
  else:
 
86
  return candidate_summary
87
 
88
  #####################################
89
+ # Function: Compare Candidate Summary to Company Prompt
90
  #####################################
91
  def compute_suitability(candidate_summary, company_prompt, models):
92
  """
93
+ Compute the similarity between candidate summary and company prompt.
94
  Returns a score in the range [0, 1].
95
  """
96
  similarity_pipeline = models['similarity']
 
106
  return score
107
 
108
  #####################################
109
+ # Streamlit Interface
110
  #####################################
111
+ st.title("Resume Analyzer and Company Suitability Checker")
112
+ st.markdown(
 
 
113
  """
114
+ Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
115
+ 1. Extracts text from the resume.
116
+ 2. Uses a transformer-based model to generate a concise candidate summary.
117
+ 3. Compares the candidate summary with a company profile to produce a suitability score.
118
+ """
119
+ )
 
 
 
 
 
 
 
 
 
120
 
121
+ # Use two columns with equal width
122
+ col1, col2 = st.columns(2)
123
+
124
+ with col1:
125
+ # File uploader for resume
126
+ uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
127
 
128
+ if uploaded_file is not None:
129
+ st.write(f"{uploaded_file.name} {uploaded_file.size/1024:.1f}KB")
 
130
 
131
+ # Button to process the resume
132
+ if st.button("Process Resume", type="primary", use_container_width=True):
133
+ if uploaded_file is None:
134
+ st.error("Please upload a resume file first.")
 
135
  else:
136
+ with st.status("Processing resume...") as status:
137
+ status.update(label="Extracting text from resume...")
138
+ resume_text = extract_text_from_file(uploaded_file)
139
+
140
+ if not resume_text or resume_text.strip() == "":
141
+ status.update(label="Error: No text could be extracted", state="error")
142
+ else:
143
+ status.update(label=f"Extracted {len(resume_text)} characters. Generating summary...")
144
+ candidate_summary = summarize_resume_text(resume_text, models)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  st.session_state["candidate_summary"] = candidate_summary
146
+ status.update(label="Processing complete!", state="complete")
 
 
 
 
147
 
148
+ # Display candidate summary if available
149
+ if "candidate_summary" in st.session_state:
150
+ st.subheader("Candidate Summary")
151
+ st.markdown(st.session_state["candidate_summary"])
 
 
 
 
 
 
152
 
153
+ with col2:
154
+ # Pre-defined company prompt for Google LLC.
155
+ default_company_prompt = (
156
+ "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
157
+ "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
158
+ "problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
159
+ "languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
160
+ "Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
161
+ "of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
162
+ )
163
 
164
+ # Company prompt text area.
165
+ company_prompt = st.text_area(
166
+ "Enter company details:",
167
+ value=default_company_prompt,
168
+ height=150,
169
+ )
170
+
171
+ # Button to compute the suitability score.
172
+ if st.button("Compute Suitability Score", type="primary", use_container_width=True):
173
+ if "candidate_summary" not in st.session_state:
174
+ st.error("Please process the resume first!")
175
+ else:
176
+ candidate_summary = st.session_state["candidate_summary"]
177
+ if candidate_summary.strip() == "":
178
+ st.error("Candidate summary is empty; please check your resume file.")
179
+ elif company_prompt.strip() == "":
180
+ st.error("Please enter the company information.")
181
  else:
182
+ with st.spinner("Computing suitability score..."):
183
+ score = compute_suitability(candidate_summary, company_prompt, models)
184
+
185
+ # Display score with a progress bar for visual feedback
186
+ st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")
187
+ st.progress(score)
188
+
189
+ # Add interpretation of score
190
+ if score > 0.75:
191
+ st.info("Excellent match! Your profile appears very well suited for this company.")
192
+ elif score > 0.5:
193
+ st.info("Good match. Your profile aligns with many aspects of the company's requirements.")
194
+ elif score > 0.3:
195
+ st.info("Moderate match. Consider highlighting more relevant skills or experience.")
196
  else:
197
+ st.info("Low match. Your profile may need significant adjustments to better align with this company.")