Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@ import streamlit as st
|
|
4 |
import docx
|
5 |
from transformers import pipeline
|
6 |
import time
|
|
|
|
|
7 |
|
8 |
# Set page title and hide sidebar
|
9 |
st.set_page_config(
|
@@ -28,10 +30,10 @@ def load_models():
|
|
28 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
29 |
models = {}
|
30 |
# Load summarization model
|
31 |
-
models['summarizer'] = pipeline("summarization", model="
|
32 |
|
33 |
# Load text generation model for suitability assessment
|
34 |
-
models['text_generator'] = pipeline("text-generation", model="gpt2")
|
35 |
|
36 |
return models
|
37 |
|
@@ -43,7 +45,7 @@ models = load_models()
|
|
43 |
#####################################
|
44 |
def extract_text_from_file(file_obj):
|
45 |
"""
|
46 |
-
Extract text from .docx files.
|
47 |
Returns the extracted text or an error message if extraction fails.
|
48 |
"""
|
49 |
filename = file_obj.name
|
@@ -56,13 +58,33 @@ def extract_text_from_file(file_obj):
|
|
56 |
text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
|
57 |
except Exception as e:
|
58 |
text = f"Error processing DOCX file: {e}"
|
59 |
-
elif ext == ".
|
60 |
try:
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
except Exception as e:
|
63 |
-
text = f"Error processing
|
64 |
else:
|
65 |
-
text = "Unsupported file type. Please upload a .
|
|
|
66 |
return text
|
67 |
|
68 |
#####################################
|
@@ -103,24 +125,24 @@ def summarize_resume_text(resume_text, models):
|
|
103 |
#####################################
|
104 |
def generate_suitability_assessment(candidate_summary, company_prompt, models):
|
105 |
"""
|
106 |
-
Generate a suitability assessment using text generation
|
107 |
Returns the generated assessment text and execution time.
|
108 |
"""
|
109 |
start_time = time.time()
|
110 |
|
111 |
text_generator = models['text_generator']
|
112 |
|
113 |
-
# Create a prompt for the text generation model
|
114 |
prompt = f"""
|
115 |
Resume Summary: {candidate_summary}
|
116 |
|
117 |
Company Description: {company_prompt}
|
118 |
|
119 |
Suitability Assessment:
|
120 |
-
|
121 |
|
122 |
# Generate text
|
123 |
-
max_length =
|
124 |
generated_text = text_generator(
|
125 |
prompt,
|
126 |
max_length=max_length,
|
@@ -135,8 +157,8 @@ This candidate is a"""
|
|
135 |
|
136 |
# Determine a numerical score from the text
|
137 |
# This is a simplified approach - we're looking for positive and negative words
|
138 |
-
positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified']
|
139 |
-
negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good']
|
140 |
|
141 |
assessment_lower = assessment.lower()
|
142 |
|
@@ -164,15 +186,15 @@ This candidate is a"""
|
|
164 |
st.title("Resume Analyzer and Company Suitability Checker")
|
165 |
st.markdown(
|
166 |
"""
|
167 |
-
Upload your resume file in **.
|
168 |
1. Extracts text from the resume.
|
169 |
2. Uses a transformer-based model to generate a concise candidate summary.
|
170 |
-
3.
|
171 |
"""
|
172 |
)
|
173 |
|
174 |
# File uploader
|
175 |
-
uploaded_file = st.file_uploader("Upload your resume (.
|
176 |
|
177 |
# Company description text area
|
178 |
company_prompt = st.text_area(
|
@@ -187,7 +209,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
|
|
187 |
# Extract text from resume
|
188 |
resume_text = extract_text_from_file(uploaded_file)
|
189 |
|
190 |
-
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .
|
191 |
st.error(resume_text)
|
192 |
else:
|
193 |
# Generate summary
|
|
|
4 |
import docx
|
5 |
from transformers import pipeline
|
6 |
import time
|
7 |
+
import tempfile
|
8 |
+
import subprocess
|
9 |
|
10 |
# Set page title and hide sidebar
|
11 |
st.set_page_config(
|
|
|
30 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
31 |
models = {}
|
32 |
# Load summarization model
|
33 |
+
models['summarizer'] = pipeline("summarization", model="marianna13/flan-t5-base-summarization")
|
34 |
|
35 |
# Load text generation model for suitability assessment
|
36 |
+
models['text_generator'] = pipeline("text-generation", model="gpt2")
|
37 |
|
38 |
return models
|
39 |
|
|
|
45 |
#####################################
|
46 |
def extract_text_from_file(file_obj):
|
47 |
"""
|
48 |
+
Extract text from .doc or .docx files.
|
49 |
Returns the extracted text or an error message if extraction fails.
|
50 |
"""
|
51 |
filename = file_obj.name
|
|
|
58 |
text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
|
59 |
except Exception as e:
|
60 |
text = f"Error processing DOCX file: {e}"
|
61 |
+
elif ext == ".doc":
|
62 |
try:
|
63 |
+
# For .doc files, we need to save to a temp file and use an external tool
|
64 |
+
# This example uses antiword which needs to be installed in the environment
|
65 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
|
66 |
+
temp_file.write(file_obj.getvalue())
|
67 |
+
temp_path = temp_file.name
|
68 |
+
|
69 |
+
# Try using python-docx2txt if available
|
70 |
+
try:
|
71 |
+
import docx2txt
|
72 |
+
text = docx2txt.process(temp_path)
|
73 |
+
except ImportError:
|
74 |
+
# Fallback to antiword if installed
|
75 |
+
try:
|
76 |
+
text = subprocess.check_output(['antiword', temp_path]).decode('utf-8')
|
77 |
+
except:
|
78 |
+
# If all else fails, inform the user
|
79 |
+
text = "Could not process .doc file. Please convert to .docx format."
|
80 |
+
|
81 |
+
# Clean up temp file
|
82 |
+
os.unlink(temp_path)
|
83 |
except Exception as e:
|
84 |
+
text = f"Error processing DOC file: {e}"
|
85 |
else:
|
86 |
+
text = "Unsupported file type. Please upload a .doc or .docx file."
|
87 |
+
|
88 |
return text
|
89 |
|
90 |
#####################################
|
|
|
125 |
#####################################
|
126 |
def generate_suitability_assessment(candidate_summary, company_prompt, models):
|
127 |
"""
|
128 |
+
Generate a suitability assessment using text generation.
|
129 |
Returns the generated assessment text and execution time.
|
130 |
"""
|
131 |
start_time = time.time()
|
132 |
|
133 |
text_generator = models['text_generator']
|
134 |
|
135 |
+
# Create a prompt for the text generation model that focuses on candidate alignment with company
|
136 |
prompt = f"""
|
137 |
Resume Summary: {candidate_summary}
|
138 |
|
139 |
Company Description: {company_prompt}
|
140 |
|
141 |
Suitability Assessment:
|
142 |
+
Based on an analysis of the candidate's profile compared to the company requirements, this candidate"""
|
143 |
|
144 |
# Generate text
|
145 |
+
max_length = 100 + len(prompt.split()) # Limit output length
|
146 |
generated_text = text_generator(
|
147 |
prompt,
|
148 |
max_length=max_length,
|
|
|
157 |
|
158 |
# Determine a numerical score from the text
|
159 |
# This is a simplified approach - we're looking for positive and negative words
|
160 |
+
positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified', 'aligns well', 'matches', 'suitable']
|
161 |
+
negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good fit', 'misaligned', 'lacks', 'does not align']
|
162 |
|
163 |
assessment_lower = assessment.lower()
|
164 |
|
|
|
186 |
st.title("Resume Analyzer and Company Suitability Checker")
|
187 |
st.markdown(
|
188 |
"""
|
189 |
+
Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
|
190 |
1. Extracts text from the resume.
|
191 |
2. Uses a transformer-based model to generate a concise candidate summary.
|
192 |
+
3. Evaluates how well the candidate aligns with the company requirements.
|
193 |
"""
|
194 |
)
|
195 |
|
196 |
# File uploader
|
197 |
+
uploaded_file = st.file_uploader("Upload your resume (.doc or .docx)", type=["doc", "docx"])
|
198 |
|
199 |
# Company description text area
|
200 |
company_prompt = st.text_area(
|
|
|
209 |
# Extract text from resume
|
210 |
resume_text = extract_text_from_file(uploaded_file)
|
211 |
|
212 |
+
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .doc or .docx file.":
|
213 |
st.error(resume_text)
|
214 |
else:
|
215 |
# Generate summary
|