File size: 6,113 Bytes
cf8a522
4077883
8e1d297
92f45fe
2e98a93
 
e0405b6
1a0f22c
e1a5956
 
ce7c5e8
97150aa
d2d6501
5d07781
 
ca31f44
5d07781
 
 
 
 
 
 
 
 
 
8e1d297
97150aa
 
ca31f44
8e1d297
97150aa
c6d228e
d2d6501
5d07781
97150aa
 
 
 
 
 
 
 
 
 
 
d2d6501
 
 
c6d228e
 
97150aa
8e1d297
97150aa
501c91b
97150aa
501c91b
 
92f45fe
97150aa
 
 
 
 
 
 
cf98c48
 
97150aa
 
 
 
 
 
ce7c5e8
97150aa
8e1d297
1a0f22c
97150aa
1a0f22c
97150aa
 
 
e1a5956
97150aa
 
 
 
 
 
1a0f22c
97150aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a0f22c
97150aa
d204788
8e1d297
97150aa
7716c5c
e33d65b
97150aa
 
 
 
 
 
c6d228e
e1a5956
97150aa
0d4f4dd
97150aa
d836318
cccaa8e
97150aa
3e9d890
97150aa
 
 
 
 
 
3e9d890
 
97150aa
 
3e9d890
97150aa
 
 
3e9d890
97150aa
3e9d890
 
97150aa
ce7c5e8
97150aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import os
import io
import streamlit as st
import docx
import docx2txt
import tempfile
import time
import re
import concurrent.futures
from functools import lru_cache
from transformers import pipeline
from collections import defaultdict

# Set page title and hide sidebar
st.set_page_config(
    page_title="Resume-Google Job Match Analyzer",
    initial_sidebar_state="collapsed"
)

# Hide sidebar completely with custom CSS
st.markdown("""
<style>
    [data-testid="collapsedControl"] {display: none;}
    section[data-testid="stSidebar"] {display: none;}
</style>
""", unsafe_allow_html=True)

# Pre-defined company description for Google (unchanged)
GOOGLE_DESCRIPTION = """..."""  # Keep your original content here

#####################################
# Preload Models - Optimized with DistilBART
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
    """Load optimized models at startup"""
    with st.spinner("Loading AI models..."):
        models = {
            'summarizer': pipeline(
                "summarization", 
                model="distilbart-base-cs",  # Faster smaller model
                max_length=300,
                truncation=True,
                num_return_sequences=1
            )
        }
        return models

models = load_models()

#####################################
# Function: Extract Text from File - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_text_from_file(file_obj):
    """Optimized text extraction with early exit"""
    filename = file_obj.name
    ext = os.path.splitext(filename)[1].lower()
    text = ""
    MAX_TEXT = 15000  # Reduced processing limit
    
    try:
        if ext == ".docx":
            doc = docx.Document(file_obj)
            text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
        elif ext == ".doc":
            with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
                temp_file.write(file_obj.getvalue())
                text = docx2txt.process(temp_file.name)[:MAX_TEXT]
                os.unlink(temp_file.name)
        elif ext == ".txt":
            text = file_obj.getvalue().decode("utf-8")[:MAX_TEXT]
    except Exception as e:
        text = f"Error: {str(e)}"
    
    return text

#####################################
# Unified Information Extraction - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_info(text):
    """Combined extraction of all candidate info in one pass"""
    text_lower = text.lower()
    info = {
        'name': extract_name_optimized(text),
        'age': extract_age_optimized(text_lower),
        'industry': extract_industry_optimized(text_lower),
        'skills': extract_skills_optimized(text_lower),
        'experience': extract_experience_optimized(text)
    }
    return info

def extract_name_optimized(text):
    """Faster name extraction with reduced checks"""
    lines = text.split('\n')[:10]
    for line in lines:
        if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
            return line.strip()
    return "Unknown"

def extract_age_optimized(text):
    """Simplified age pattern matching"""
    patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
    for pattern in patterns:
        match = re.search(pattern, text)
        if match: return match.group(1)
    return "Not specified"

# Other extract_ functions with similar optimizations...

#####################################
# Optimized Summarization
#####################################
def summarize_resume_text(resume_text):
    """Faster summarization with input truncation"""
    base_summary = models['summarizer'](
        resume_text[:1024], 
        max_length=150,
        truncation=True
    )[0]['summary_text']
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        info = executor.submit(extract_info, resume_text).result()
    
    return f"**Name**: {info['name']}\n**Age**: {info['age']}\n**Industry**: {info['industry']}\n\n{base_summary}", 0.1

#####################################
# Optimized Scoring System
#####################################
def calculate_google_match_score(summary):
    """Precomputed keyword matching for faster scoring"""
    GOOGLE_KEYWORDS = {
        "Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
        "Advanced Tech": {"ai", "ml", "cloud", "data science"},
        # Add other categories...
    }
    
    score = defaultdict(float)
    summary_lower = summary.lower()
    
    for category, keywords in GOOGLE_KEYWORDS.items():
        count = len(keywords & set(summary_lower.split()))
        score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
    
    return sum(score.values() * weights), score  # weights defined accordingly

#####################################
# Streamlit Interface Optimizations
#####################################
st.title("Google Resume Analyzer")
st.session_state progress = 0
st.session_state.last_update = time.time()

if uploaded_file and st.button("Analyze"):
    with st.spinner():
        # Use session state for progress tracking
        start_time = time.time()
        
        # Step 1: Text extraction
        text = extract_text_from_file(uploaded_file)
        st.session_state.progress = 33
        if "Error" in text:
            st.error(text)
            continue
        
        # Step 2: Information extraction & summarization
        summary, _ = summarize_resume_text(text)
        st.session_state.progress = 66
        
        # Step 3: Scoring
        score, breakdown = calculate_google_match_score(summary)
        st.session_state.progress = 100
        
        # Display results
        st.subheader("Analysis Complete!")
        st.markdown(f"**Match Score**: {score*100:.1f}%")
        # Add other displays...

if st.session_state.progress < 100:
    st.progress(st.session_state.progress, 100)
    time.sleep(0.1)  # Simulate progress update