Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ import os
|
|
2 |
import io
|
3 |
import streamlit as st
|
4 |
import docx
|
|
|
|
|
5 |
from transformers import pipeline
|
6 |
import numpy as np
|
7 |
from scipy.spatial.distance import cosine
|
@@ -45,7 +47,7 @@ models = load_models()
|
|
45 |
#####################################
|
46 |
def extract_text_from_file(file_obj):
|
47 |
"""
|
48 |
-
Extract text from .docx files.
|
49 |
Returns the extracted text or an error message if extraction fails.
|
50 |
"""
|
51 |
filename = file_obj.name
|
@@ -75,8 +77,13 @@ def extract_text_from_file(file_obj):
|
|
75 |
os.unlink(temp_path)
|
76 |
except Exception as e:
|
77 |
text = f"Error processing DOC file: {e}"
|
|
|
|
|
|
|
|
|
|
|
78 |
else:
|
79 |
-
text = "Unsupported file type. Please upload a .docx or .
|
80 |
return text
|
81 |
|
82 |
#####################################
|
@@ -84,7 +91,8 @@ def extract_text_from_file(file_obj):
|
|
84 |
#####################################
|
85 |
def summarize_resume_text(resume_text, models):
|
86 |
"""
|
87 |
-
Generates a
|
|
|
88 |
"""
|
89 |
start_time = time.time()
|
90 |
|
@@ -93,6 +101,9 @@ def summarize_resume_text(resume_text, models):
|
|
93 |
# Handle long text
|
94 |
max_input_length = 1024 # Model limit
|
95 |
|
|
|
|
|
|
|
96 |
if len(resume_text) > max_input_length:
|
97 |
# Process in chunks if text is too long
|
98 |
chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
|
@@ -104,13 +115,31 @@ def summarize_resume_text(resume_text, models):
|
|
104 |
|
105 |
candidate_summary = " ".join(summaries)
|
106 |
if len(candidate_summary) > max_input_length:
|
107 |
-
candidate_summary = summarizer(
|
|
|
108 |
else:
|
109 |
-
candidate_summary = summarizer(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
execution_time = time.time() - start_time
|
112 |
|
113 |
-
return
|
114 |
|
115 |
#####################################
|
116 |
# Function: Compare Candidate Summary to Company Prompt
|
@@ -145,15 +174,15 @@ def compute_suitability(candidate_summary, company_prompt, models):
|
|
145 |
st.title("Resume Analyzer and Company Suitability Checker")
|
146 |
st.markdown(
|
147 |
"""
|
148 |
-
Upload your resume file in **.docx
|
149 |
1. Extracts text from the resume.
|
150 |
-
2. Uses a transformer-based model to generate a
|
151 |
3. Compares the candidate summary with a company profile to produce a suitability score.
|
152 |
"""
|
153 |
)
|
154 |
|
155 |
# File uploader
|
156 |
-
uploaded_file = st.file_uploader("Upload your resume (.docx or .txt)", type=["docx", "txt"])
|
157 |
|
158 |
# Company description text area
|
159 |
company_prompt = st.text_area(
|
@@ -168,7 +197,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
|
|
168 |
# Extract text from resume
|
169 |
resume_text = extract_text_from_file(uploaded_file)
|
170 |
|
171 |
-
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx or .txt file.":
|
172 |
st.error(resume_text)
|
173 |
else:
|
174 |
# Generate summary
|
@@ -176,7 +205,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
|
|
176 |
|
177 |
# Display summary
|
178 |
st.subheader("Candidate Summary")
|
179 |
-
st.
|
180 |
st.info(f"Summarization completed in {summarization_time:.2f} seconds")
|
181 |
|
182 |
# Only compute similarity if company description is provided
|
|
|
2 |
import io
|
3 |
import streamlit as st
|
4 |
import docx
|
5 |
+
import docx2txt
|
6 |
+
import tempfile
|
7 |
from transformers import pipeline
|
8 |
import numpy as np
|
9 |
from scipy.spatial.distance import cosine
|
|
|
47 |
#####################################
|
48 |
def extract_text_from_file(file_obj):
|
49 |
"""
|
50 |
+
Extract text from .docx and .doc files.
|
51 |
Returns the extracted text or an error message if extraction fails.
|
52 |
"""
|
53 |
filename = file_obj.name
|
|
|
77 |
os.unlink(temp_path)
|
78 |
except Exception as e:
|
79 |
text = f"Error processing DOC file: {e}"
|
80 |
+
elif ext == ".txt":
|
81 |
+
try:
|
82 |
+
text = file_obj.getvalue().decode("utf-8")
|
83 |
+
except Exception as e:
|
84 |
+
text = f"Error processing TXT file: {e}"
|
85 |
else:
|
86 |
+
text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
|
87 |
return text
|
88 |
|
89 |
#####################################
|
|
|
91 |
#####################################
|
92 |
def summarize_resume_text(resume_text, models):
|
93 |
"""
|
94 |
+
Generates a structured summary of the resume text including name, age,
|
95 |
+
expected job industry, and skills of the candidate.
|
96 |
"""
|
97 |
start_time = time.time()
|
98 |
|
|
|
101 |
# Handle long text
|
102 |
max_input_length = 1024 # Model limit
|
103 |
|
104 |
+
# Append instructions to guide the model to extract structured information
|
105 |
+
prompt = f"Summarize this resume and include the candidate's name, age, expected job industry, and skills: {resume_text[:max_input_length]}"
|
106 |
+
|
107 |
if len(resume_text) > max_input_length:
|
108 |
# Process in chunks if text is too long
|
109 |
chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
|
|
|
115 |
|
116 |
candidate_summary = " ".join(summaries)
|
117 |
if len(candidate_summary) > max_input_length:
|
118 |
+
candidate_summary = summarizer(f"Provide name, age, expected job industry, and skills of the candidate: {candidate_summary[:max_input_length]}",
|
119 |
+
max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
120 |
else:
|
121 |
+
candidate_summary = summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
122 |
+
|
123 |
+
# Format the summary to ensure it contains the required information
|
124 |
+
# If the model doesn't extract all required information, we'll add placeholders
|
125 |
+
formatted_summary = candidate_summary
|
126 |
+
|
127 |
+
# Check if the summary contains the required information and add labels if needed
|
128 |
+
if "name:" not in formatted_summary.lower() and "name " not in formatted_summary.lower():
|
129 |
+
formatted_summary = "Name: [Not explicitly mentioned in resume]\n" + formatted_summary
|
130 |
+
|
131 |
+
if "age:" not in formatted_summary.lower() and "age " not in formatted_summary.lower():
|
132 |
+
formatted_summary += "\nAge: [Not explicitly mentioned in resume]"
|
133 |
+
|
134 |
+
if "industry:" not in formatted_summary.lower() and "expected job" not in formatted_summary.lower():
|
135 |
+
formatted_summary += "\nExpected Job Industry: [Based on resume content]"
|
136 |
+
|
137 |
+
if "skills:" not in formatted_summary.lower() and "skills " not in formatted_summary.lower():
|
138 |
+
formatted_summary += "\nSkills: [Key skills extracted from resume]"
|
139 |
|
140 |
execution_time = time.time() - start_time
|
141 |
|
142 |
+
return formatted_summary, execution_time
|
143 |
|
144 |
#####################################
|
145 |
# Function: Compare Candidate Summary to Company Prompt
|
|
|
174 |
st.title("Resume Analyzer and Company Suitability Checker")
|
175 |
st.markdown(
|
176 |
"""
|
177 |
+
Upload your resume file in **.docx**, **.doc**, or **.txt** format. The app performs the following tasks:
|
178 |
1. Extracts text from the resume.
|
179 |
+
2. Uses a transformer-based model to generate a structured candidate summary with name, age, expected job industry, and skills.
|
180 |
3. Compares the candidate summary with a company profile to produce a suitability score.
|
181 |
"""
|
182 |
)
|
183 |
|
184 |
# File uploader
|
185 |
+
uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
|
186 |
|
187 |
# Company description text area
|
188 |
company_prompt = st.text_area(
|
|
|
197 |
# Extract text from resume
|
198 |
resume_text = extract_text_from_file(uploaded_file)
|
199 |
|
200 |
+
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
|
201 |
st.error(resume_text)
|
202 |
else:
|
203 |
# Generate summary
|
|
|
205 |
|
206 |
# Display summary
|
207 |
st.subheader("Candidate Summary")
|
208 |
+
st.markdown(summary)
|
209 |
st.info(f"Summarization completed in {summarization_time:.2f} seconds")
|
210 |
|
211 |
# Only compute similarity if company description is provided
|