Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

File size: 8,942 Bytes

cf8a522
4077883
8e1d297
92f45fe
e0405b6
6713758
0807dc8
 
 
 
d2d6501
5d07781
 
 
 
 
 
 
 
 
 
 
 
 
8e1d297
 
0807dc8
c6d228e
d2d6501
5d07781
0807dc8
d2d6501
 
e0405b6
0807dc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0405b6
d2d6501
 
 
 
c6d228e
 
0807dc8
8e1d297
0807dc8
 
92f45fe
6713758
7716c5c
92f45fe
0807dc8
92f45fe
7716c5c
 
9753cc9
0807dc8
 
 
c6d228e
9753cc9
92f45fe
6713758
92f45fe
0807dc8
6713758
0807dc8
6713758
 
0807dc8
6713758
 
0807dc8
 
6713758
 
 
92f45fe
6713758
92f45fe
6713758
 
92f45fe
8e1d297
 
0807dc8
7716c5c
c6d228e
d836318
0807dc8
d836318
e0405b6
 
c6d228e
d2d6501
0807dc8
e0405b6
0807dc8
c6d228e
0807dc8
 
 
 
 
 
 
0d4f4dd
e0405b6
 
 
d836318
cccaa8e
0807dc8
cccaa8e
41d8604
cccaa8e
0807dc8
cccaa8e
e0405b6
 
41d8604
c6d228e
0807dc8
 
41d8604
0807dc8
41d8604
0807dc8
41d8604
0807dc8
 
41d8604
 
 
 
 
 
 
 
 
0807dc8
41d8604
 
0807dc8
 
 
c6d228e
41d8604
e0405b6
0807dc8
 
 
41d8604
0807dc8
 
 
 
 
41d8604
0807dc8
41d8604
e0405b6
 
 
41d8604
cccaa8e
7716c5c
e0405b6
8e1d297
d2d6501
 
cc18787
6713758
d2d6501
 
6713758
d2d6501
 
cccaa8e
e0405b6
6713758
d2d6501
e0405b6
 
 
 
 
 
3661e7e
e0405b6
 
 
0807dc8
 
e0405b6
6713758
e0405b6
d2d6501
0807dc8
 
 
e0405b6
 
0807dc8
e0405b6
 
 
 
 
 
0807dc8
41d8604
0807dc8
41d8604

import os
import io
import streamlit as st
import docx
import time
import tempfile
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import docx2txt

# Set page title and hide sidebar
st.set_page_config(
    page_title="Resume Analyzer and Company Suitability Checker",
    initial_sidebar_state="collapsed"
)

# Hide sidebar completely with custom CSS
st.markdown("""
<style>
    [data-testid="collapsedControl"] {display: none;}
    section[data-testid="stSidebar"] {display: none;}
</style>
""", unsafe_allow_html=True)

#####################################
# Optimized Model Loading
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
    """Load models at startup with optimizations"""
    with st.spinner("Loading AI models... This may take a minute on first run."):
        models = {}
        
        # Use half-precision for all models to reduce memory usage and increase speed
        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        device = 0 if torch.cuda.is_available() else -1  # Use GPU if available
        
        # Load a smaller summarization model
        models['summarizer'] = pipeline(
            "summarization", 
            model="facebook/bart-large-cnn",  # Faster model with good summarization quality
            torch_dtype=torch_dtype,
            device=device
        )
        
        # Use a smaller and faster text generation model
        models['text_generator'] = pipeline(
            "text-generation", 
            model="distilgpt2",  # Much smaller than GPT-2
            torch_dtype=torch_dtype,
            device=device
        )
        
        return models

# Preload models immediately when app starts
models = load_models()

#####################################
# Function: Extract Text from File - Optimized
#####################################
@st.cache_data
def extract_text_from_file(file_content, file_name):
    """
    Extract text from .doc or .docx files.
    Returns the extracted text or an error message if extraction fails.
    """
    ext = os.path.splitext(file_name)[1].lower()
    text = ""

    if ext == ".docx":
        try:
            # Use BytesIO to avoid disk I/O
            doc_file = io.BytesIO(file_content)
            document = docx.Document(doc_file)
            text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
        except Exception as e:
            text = f"Error processing DOCX file: {e}"
    elif ext == ".doc":
        try:
            # For .doc files, we need to save to a temp file
            with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
                temp_file.write(file_content)
                temp_path = temp_file.name
            
            # Use docx2txt which is generally faster
            try:
                text = docx2txt.process(temp_path)
            except Exception:
                text = "Could not process .doc file. Please convert to .docx format."
            
            # Clean up temp file
            os.unlink(temp_path)
        except Exception as e:
            text = f"Error processing DOC file: {e}"
    else:
        text = "Unsupported file type. Please upload a .doc or .docx file."
    
    return text

#####################################
# Function: Summarize Resume Text - Optimized
#####################################
def summarize_resume_text(resume_text, models):
    """
    Generates a concise summary of the resume text using an optimized approach.
    """
    start_time = time.time()
    
    summarizer = models['summarizer']
    
    # Truncate text to avoid multiple passes
    max_input_length = 1024  # Model limit
    truncated_text = resume_text[:max_input_length] if len(resume_text) > max_input_length else resume_text
    
    # Get a concise summary in one pass
    candidate_summary = summarizer(
        truncated_text, 
        max_length=150, 
        min_length=30, 
        do_sample=False
    )[0]['summary_text']
    
    execution_time = time.time() - start_time
    
    return candidate_summary, execution_time

#####################################
# Function: Generate Suitability Assessment - Optimized
#####################################
def generate_suitability_assessment(candidate_summary, company_prompt, models):
    """
    Generate a suitability assessment using text generation - optimized.
    """
    start_time = time.time()
    
    text_generator = models['text_generator']
    
    # Create a shorter, more focused prompt
    prompt = f"""Resume: {candidate_summary[:300]}...

Company: {company_prompt[:300]}...

Suitability Assessment: This candidate"""
    
    # Generate shorter text for faster completion
    max_length = 50 + len(prompt.split())
    generated_text = text_generator(
        prompt, 
        max_length=max_length,
        num_return_sequences=1,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )[0]['generated_text']
    
    # Extract only the assessment part
    assessment = generated_text[len(prompt):].strip()
    
    # Determine a numerical score (simplified for better performance)
    positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified', 'aligns', 'matches', 'suitable']
    negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good fit', 'misaligned', 'lacks']
    
    assessment_lower = assessment.lower()
    
    # Calculate score
    positive_count = sum(1 for word in positive_words if word in assessment_lower)
    negative_count = sum(1 for word in negative_words if word in assessment_lower)
    
    total = positive_count + negative_count
    if total > 0:
        score = 0.5 + 0.4 * (positive_count - negative_count) / total
    else:
        score = 0.5
    
    # Clamp the score
    score = max(0.1, min(0.9, score))
    
    execution_time = time.time() - start_time
    
    return assessment, score, execution_time

#####################################
# Main Streamlit Interface
#####################################
st.title("Resume Analyzer and Company Suitability Checker")
st.markdown(
    """
Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
1. Extracts text from the resume.
2. Uses a transformer-based model to generate a concise candidate summary.
3. Evaluates how well the candidate aligns with the company requirements.
"""
)

# File uploader
uploaded_file = st.file_uploader("Upload your resume (.doc or .docx)", type=["doc", "docx"])

# Company description text area
company_prompt = st.text_area(
    "Enter the company description or job requirements:",
    height=150,
    help="Enter a detailed description of the company culture, role requirements, and desired skills.",
)

# Process button
if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
    with st.spinner("Processing..."):
        # Extract text from resume with caching
        resume_text = extract_text_from_file(uploaded_file.getvalue(), uploaded_file.name)
        
        if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .doc or .docx file.":
            st.error(resume_text)
        else:
            # Add a progress bar
            progress_bar = st.progress(0)
            
            # Generate summary
            summary, summarization_time = summarize_resume_text(resume_text, models)
            progress_bar.progress(50)
            
            # Display summary
            st.subheader("Candidate Summary")
            st.write(summary)
            st.info(f"Summarization completed in {summarization_time:.2f} seconds")
            
            # Generate suitability assessment
            assessment, estimated_score, generation_time = generate_suitability_assessment(summary, company_prompt, models)
            progress_bar.progress(100)
            
            # Display assessment
            st.subheader("Suitability Assessment")
            st.write(assessment)
            st.markdown(f"**Estimated Matching Score:** {estimated_score:.2%}")
            st.info(f"Assessment generated in {generation_time:.2f} seconds")
            
            # Provide interpretation based on estimated score
            if estimated_score >= 0.85:
                st.success("Excellent match! This candidate's profile is strongly aligned with the company requirements.")
            elif estimated_score >= 0.70:
                st.success("Good match! This candidate shows strong potential for the position.")
            elif estimated_score >= 0.50:
                st.warning("Moderate match. The candidate meets some requirements but there may be gaps.")
            else:
                st.error("Low match. The candidate's profile may not align well with the requirements.")