CR7CAD commited on
Commit
2e98a93
·
verified ·
1 Parent(s): 86037f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -11
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import io
3
  import streamlit as st
4
  import docx
 
 
5
  from transformers import pipeline
6
  import numpy as np
7
  from scipy.spatial.distance import cosine
@@ -45,7 +47,7 @@ models = load_models()
45
  #####################################
46
  def extract_text_from_file(file_obj):
47
  """
48
- Extract text from .docx files.
49
  Returns the extracted text or an error message if extraction fails.
50
  """
51
  filename = file_obj.name
@@ -75,8 +77,13 @@ def extract_text_from_file(file_obj):
75
  os.unlink(temp_path)
76
  except Exception as e:
77
  text = f"Error processing DOC file: {e}"
 
 
 
 
 
78
  else:
79
- text = "Unsupported file type. Please upload a .docx or .doc file."
80
  return text
81
 
82
  #####################################
@@ -84,7 +91,8 @@ def extract_text_from_file(file_obj):
84
  #####################################
85
  def summarize_resume_text(resume_text, models):
86
  """
87
- Generates a concise summary of the resume text using the selected summarization model.
 
88
  """
89
  start_time = time.time()
90
 
@@ -93,6 +101,9 @@ def summarize_resume_text(resume_text, models):
93
  # Handle long text
94
  max_input_length = 1024 # Model limit
95
 
 
 
 
96
  if len(resume_text) > max_input_length:
97
  # Process in chunks if text is too long
98
  chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
@@ -104,13 +115,31 @@ def summarize_resume_text(resume_text, models):
104
 
105
  candidate_summary = " ".join(summaries)
106
  if len(candidate_summary) > max_input_length:
107
- candidate_summary = summarizer(candidate_summary[:max_input_length], max_length=150, min_length=40, do_sample=False)[0]['summary_text']
 
108
  else:
109
- candidate_summary = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  execution_time = time.time() - start_time
112
 
113
- return candidate_summary, execution_time
114
 
115
  #####################################
116
  # Function: Compare Candidate Summary to Company Prompt
@@ -145,15 +174,15 @@ def compute_suitability(candidate_summary, company_prompt, models):
145
  st.title("Resume Analyzer and Company Suitability Checker")
146
  st.markdown(
147
  """
148
- Upload your resume file in **.docx** or **.txt** format. The app performs the following tasks:
149
  1. Extracts text from the resume.
150
- 2. Uses a transformer-based model to generate a concise candidate summary.
151
  3. Compares the candidate summary with a company profile to produce a suitability score.
152
  """
153
  )
154
 
155
  # File uploader
156
- uploaded_file = st.file_uploader("Upload your resume (.docx or .txt)", type=["docx", "txt"])
157
 
158
  # Company description text area
159
  company_prompt = st.text_area(
@@ -168,7 +197,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
168
  # Extract text from resume
169
  resume_text = extract_text_from_file(uploaded_file)
170
 
171
- if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx or .txt file.":
172
  st.error(resume_text)
173
  else:
174
  # Generate summary
@@ -176,7 +205,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
176
 
177
  # Display summary
178
  st.subheader("Candidate Summary")
179
- st.write(summary)
180
  st.info(f"Summarization completed in {summarization_time:.2f} seconds")
181
 
182
  # Only compute similarity if company description is provided
 
2
  import io
3
  import streamlit as st
4
  import docx
5
+ import docx2txt
6
+ import tempfile
7
  from transformers import pipeline
8
  import numpy as np
9
  from scipy.spatial.distance import cosine
 
47
  #####################################
48
  def extract_text_from_file(file_obj):
49
  """
50
+ Extract text from .docx and .doc files.
51
  Returns the extracted text or an error message if extraction fails.
52
  """
53
  filename = file_obj.name
 
77
  os.unlink(temp_path)
78
  except Exception as e:
79
  text = f"Error processing DOC file: {e}"
80
+ elif ext == ".txt":
81
+ try:
82
+ text = file_obj.getvalue().decode("utf-8")
83
+ except Exception as e:
84
+ text = f"Error processing TXT file: {e}"
85
  else:
86
+ text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
87
  return text
88
 
89
  #####################################
 
91
  #####################################
92
  def summarize_resume_text(resume_text, models):
93
  """
94
+ Generates a structured summary of the resume text including name, age,
95
+ expected job industry, and skills of the candidate.
96
  """
97
  start_time = time.time()
98
 
 
101
  # Handle long text
102
  max_input_length = 1024 # Model limit
103
 
104
+ # Append instructions to guide the model to extract structured information
105
+ prompt = f"Summarize this resume and include the candidate's name, age, expected job industry, and skills: {resume_text[:max_input_length]}"
106
+
107
  if len(resume_text) > max_input_length:
108
  # Process in chunks if text is too long
109
  chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
 
115
 
116
  candidate_summary = " ".join(summaries)
117
  if len(candidate_summary) > max_input_length:
118
+ candidate_summary = summarizer(f"Provide name, age, expected job industry, and skills of the candidate: {candidate_summary[:max_input_length]}",
119
+ max_length=150, min_length=40, do_sample=False)[0]['summary_text']
120
  else:
121
+ candidate_summary = summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
122
+
123
+ # Format the summary to ensure it contains the required information
124
+ # If the model doesn't extract all required information, we'll add placeholders
125
+ formatted_summary = candidate_summary
126
+
127
+ # Check if the summary contains the required information and add labels if needed
128
+ if "name:" not in formatted_summary.lower() and "name " not in formatted_summary.lower():
129
+ formatted_summary = "Name: [Not explicitly mentioned in resume]\n" + formatted_summary
130
+
131
+ if "age:" not in formatted_summary.lower() and "age " not in formatted_summary.lower():
132
+ formatted_summary += "\nAge: [Not explicitly mentioned in resume]"
133
+
134
+ if "industry:" not in formatted_summary.lower() and "expected job" not in formatted_summary.lower():
135
+ formatted_summary += "\nExpected Job Industry: [Based on resume content]"
136
+
137
+ if "skills:" not in formatted_summary.lower() and "skills " not in formatted_summary.lower():
138
+ formatted_summary += "\nSkills: [Key skills extracted from resume]"
139
 
140
  execution_time = time.time() - start_time
141
 
142
+ return formatted_summary, execution_time
143
 
144
  #####################################
145
  # Function: Compare Candidate Summary to Company Prompt
 
174
  st.title("Resume Analyzer and Company Suitability Checker")
175
  st.markdown(
176
  """
177
+ Upload your resume file in **.docx**, **.doc**, or **.txt** format. The app performs the following tasks:
178
  1. Extracts text from the resume.
179
+ 2. Uses a transformer-based model to generate a structured candidate summary with name, age, expected job industry, and skills.
180
  3. Compares the candidate summary with a company profile to produce a suitability score.
181
  """
182
  )
183
 
184
  # File uploader
185
+ uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
186
 
187
  # Company description text area
188
  company_prompt = st.text_area(
 
197
  # Extract text from resume
198
  resume_text = extract_text_from_file(uploaded_file)
199
 
200
+ if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
201
  st.error(resume_text)
202
  else:
203
  # Generate summary
 
205
 
206
  # Display summary
207
  st.subheader("Candidate Summary")
208
+ st.markdown(summary)
209
  st.info(f"Summarization completed in {summarization_time:.2f} seconds")
210
 
211
  # Only compute similarity if company description is provided