Spaces:
Sleeping
Sleeping
import os | |
import io | |
import streamlit as st | |
import docx | |
import time | |
import tempfile | |
import torch | |
import transformers | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
import docx2txt | |
# Set page title and hide sidebar | |
st.set_page_config( | |
page_title="Resume Analyzer and Company Suitability Checker", | |
initial_sidebar_state="collapsed" | |
) | |
# Hide sidebar completely with custom CSS | |
st.markdown(""" | |
<style> | |
[data-testid="collapsedControl"] {display: none;} | |
section[data-testid="stSidebar"] {display: none;} | |
</style> | |
""", unsafe_allow_html=True) | |
##################################### | |
# Optimized Model Loading | |
##################################### | |
def load_models(): | |
"""Load models at startup with optimizations""" | |
with st.spinner("Loading AI models... This may take a minute on first run."): | |
models = {} | |
# Use half-precision for all models to reduce memory usage and increase speed | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
device = 0 if torch.cuda.is_available() else -1 # Use GPU if available | |
# Load a smaller summarization model | |
models['summarizer'] = pipeline( | |
"summarization", | |
model="facebook/bart-large-cnn", # Faster model with good summarization quality | |
torch_dtype=torch_dtype, | |
device=device | |
) | |
# Use a smaller and faster text generation model | |
models['text_generator'] = pipeline( | |
"text-generation", | |
model="distilgpt2", # Much smaller than GPT-2 | |
torch_dtype=torch_dtype, | |
device=device | |
) | |
return models | |
# Preload models immediately when app starts | |
models = load_models() | |
##################################### | |
# Function: Extract Text from File - Optimized | |
##################################### | |
def extract_text_from_file(file_content, file_name): | |
""" | |
Extract text from .doc or .docx files. | |
Returns the extracted text or an error message if extraction fails. | |
""" | |
ext = os.path.splitext(file_name)[1].lower() | |
text = "" | |
if ext == ".docx": | |
try: | |
# Use BytesIO to avoid disk I/O | |
doc_file = io.BytesIO(file_content) | |
document = docx.Document(doc_file) | |
text = "\n".join(para.text for para in document.paragraphs if para.text.strip()) | |
except Exception as e: | |
text = f"Error processing DOCX file: {e}" | |
elif ext == ".doc": | |
try: | |
# For .doc files, we need to save to a temp file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file: | |
temp_file.write(file_content) | |
temp_path = temp_file.name | |
# Use docx2txt which is generally faster | |
try: | |
text = docx2txt.process(temp_path) | |
except Exception: | |
text = "Could not process .doc file. Please convert to .docx format." | |
# Clean up temp file | |
os.unlink(temp_path) | |
except Exception as e: | |
text = f"Error processing DOC file: {e}" | |
else: | |
text = "Unsupported file type. Please upload a .doc or .docx file." | |
return text | |
##################################### | |
# Function: Summarize Resume Text - Optimized | |
##################################### | |
def summarize_resume_text(resume_text, models): | |
""" | |
Generates a concise summary of the resume text using an optimized approach. | |
""" | |
start_time = time.time() | |
summarizer = models['summarizer'] | |
# Truncate text to avoid multiple passes | |
max_input_length = 1024 # Model limit | |
truncated_text = resume_text[:max_input_length] if len(resume_text) > max_input_length else resume_text | |
# Get a concise summary in one pass | |
candidate_summary = summarizer( | |
truncated_text, | |
max_length=150, | |
min_length=30, | |
do_sample=False | |
)[0]['summary_text'] | |
execution_time = time.time() - start_time | |
return candidate_summary, execution_time | |
##################################### | |
# Function: Generate Suitability Assessment - Optimized | |
##################################### | |
def generate_suitability_assessment(candidate_summary, company_prompt, models): | |
""" | |
Generate a suitability assessment using text generation - optimized. | |
""" | |
start_time = time.time() | |
text_generator = models['text_generator'] | |
# Create a shorter, more focused prompt | |
prompt = f"""Resume: {candidate_summary[:300]}... | |
Company: {company_prompt[:300]}... | |
Suitability Assessment: This candidate""" | |
# Generate shorter text for faster completion | |
max_length = 50 + len(prompt.split()) | |
generated_text = text_generator( | |
prompt, | |
max_length=max_length, | |
num_return_sequences=1, | |
temperature=0.7, | |
top_p=0.9, | |
do_sample=True | |
)[0]['generated_text'] | |
# Extract only the assessment part | |
assessment = generated_text[len(prompt):].strip() | |
# Determine a numerical score (simplified for better performance) | |
positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified', 'aligns', 'matches', 'suitable'] | |
negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good fit', 'misaligned', 'lacks'] | |
assessment_lower = assessment.lower() | |
# Calculate score | |
positive_count = sum(1 for word in positive_words if word in assessment_lower) | |
negative_count = sum(1 for word in negative_words if word in assessment_lower) | |
total = positive_count + negative_count | |
if total > 0: | |
score = 0.5 + 0.4 * (positive_count - negative_count) / total | |
else: | |
score = 0.5 | |
# Clamp the score | |
score = max(0.1, min(0.9, score)) | |
execution_time = time.time() - start_time | |
return assessment, score, execution_time | |
##################################### | |
# Main Streamlit Interface | |
##################################### | |
st.title("Resume Analyzer and Company Suitability Checker") | |
st.markdown( | |
""" | |
Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks: | |
1. Extracts text from the resume. | |
2. Uses a transformer-based model to generate a concise candidate summary. | |
3. Evaluates how well the candidate aligns with the company requirements. | |
""" | |
) | |
# File uploader | |
uploaded_file = st.file_uploader("Upload your resume (.doc or .docx)", type=["doc", "docx"]) | |
# Company description text area | |
company_prompt = st.text_area( | |
"Enter the company description or job requirements:", | |
height=150, | |
help="Enter a detailed description of the company culture, role requirements, and desired skills.", | |
) | |
# Process button | |
if uploaded_file is not None and company_prompt and st.button("Analyze Resume"): | |
with st.spinner("Processing..."): | |
# Extract text from resume with caching | |
resume_text = extract_text_from_file(uploaded_file.getvalue(), uploaded_file.name) | |
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .doc or .docx file.": | |
st.error(resume_text) | |
else: | |
# Add a progress bar | |
progress_bar = st.progress(0) | |
# Generate summary | |
summary, summarization_time = summarize_resume_text(resume_text, models) | |
progress_bar.progress(50) | |
# Display summary | |
st.subheader("Candidate Summary") | |
st.write(summary) | |
st.info(f"Summarization completed in {summarization_time:.2f} seconds") | |
# Generate suitability assessment | |
assessment, estimated_score, generation_time = generate_suitability_assessment(summary, company_prompt, models) | |
progress_bar.progress(100) | |
# Display assessment | |
st.subheader("Suitability Assessment") | |
st.write(assessment) | |
st.markdown(f"**Estimated Matching Score:** {estimated_score:.2%}") | |
st.info(f"Assessment generated in {generation_time:.2f} seconds") | |
# Provide interpretation based on estimated score | |
if estimated_score >= 0.85: | |
st.success("Excellent match! This candidate's profile is strongly aligned with the company requirements.") | |
elif estimated_score >= 0.70: | |
st.success("Good match! This candidate shows strong potential for the position.") | |
elif estimated_score >= 0.50: | |
st.warning("Moderate match. The candidate meets some requirements but there may be gaps.") | |
else: | |
st.error("Low match. The candidate's profile may not align well with the requirements.") |