Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

46ff202

verified ·

1 Parent(s): 97150aa

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -100

app.py CHANGED Viewed

@@ -9,24 +9,8 @@ import re
 import concurrent.futures
 from functools import lru_cache
 from transformers import pipeline
-from collections import defaultdict
-# Set page title and hide sidebar
-st.set_page_config(
-    page_title="Resume-Google Job Match Analyzer",
-    initial_sidebar_state="collapsed"
-)
-# Hide sidebar completely with custom CSS
-st.markdown("""
-<style>
-    [data-testid="collapsedControl"] {display: none;}
-    section[data-testid="stSidebar"] {display: none;}
-</style>
-""", unsafe_allow_html=True)
-# Pre-defined company description for Google (unchanged)
-GOOGLE_DESCRIPTION = """..."""  # Keep your original content here
 #####################################
 # Preload Models - Optimized with DistilBART
@@ -35,11 +19,12 @@ GOOGLE_DESCRIPTION = """..."""  # Keep your original content here
 def load_models():
     """Load optimized models at startup"""
     with st.spinner("Loading AI models..."):
         models = {
             'summarizer': pipeline(
                 "summarization",
-                model="distilbart-base-cs",  # Faster smaller model
-                max_length=300,
                 truncation=True,
                 num_return_sequences=1
             )
@@ -62,121 +47,114 @@ def extract_text_from_file(file_obj):
     try:
         if ext == ".docx":
             doc = docx.Document(file_obj)
             text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
         elif ext == ".doc":
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
-                temp_file.write(file_obj.getvalue())
-                text = docx2txt.process(temp_file.name)[:MAX_TEXT]
-                os.unlink(temp_file.name)
         elif ext == ".txt":
-            text = file_obj.getvalue().decode("utf-8")[:MAX_TEXT]
     except Exception as e:
         text = f"Error: {str(e)}"
     return text
-#####################################
-# Unified Information Extraction - Optimized
-#####################################
-@lru_cache(maxsize=16, typed=False)
-def extract_info(text):
-    """Combined extraction of all candidate info in one pass"""
-    text_lower = text.lower()
-    info = {
-        'name': extract_name_optimized(text),
-        'age': extract_age_optimized(text_lower),
-        'industry': extract_industry_optimized(text_lower),
-        'skills': extract_skills_optimized(text_lower),
-        'experience': extract_experience_optimized(text)
-    }
-    return info
-def extract_name_optimized(text):
-    """Faster name extraction with reduced checks"""
-    lines = text.split('\n')[:10]
-    for line in lines:
-        if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
-            return line.strip()
-    return "Unknown"
-def extract_age_optimized(text):
-    """Simplified age pattern matching"""
-    patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
-    for pattern in patterns:
-        match = re.search(pattern, text)
-        if match: return match.group(1)
-    return "Not specified"
-# Other extract_ functions with similar optimizations...
 #####################################
 # Optimized Summarization
 #####################################
 def summarize_resume_text(resume_text):
     """Faster summarization with input truncation"""
     base_summary = models['summarizer'](
-        resume_text[:1024],
-        max_length=150,
         truncation=True
     )[0]['summary_text']
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        info = executor.submit(extract_info, resume_text).result()
-    return f"**Name**: {info['name']}\n**Age**: {info['age']}\n**Industry**: {info['industry']}\n\n{base_summary}", 0.1
-#####################################
-# Optimized Scoring System
-#####################################
-def calculate_google_match_score(summary):
-    """Precomputed keyword matching for faster scoring"""
-    GOOGLE_KEYWORDS = {
-        "Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
-        "Advanced Tech": {"ai", "ml", "cloud", "data science"},
-        # Add other categories...
-    }
-    score = defaultdict(float)
-    summary_lower = summary.lower()
-    for category, keywords in GOOGLE_KEYWORDS.items():
-        count = len(keywords & set(summary_lower.split()))
-        score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
-    return sum(score.values() * weights), score  # weights defined accordingly
 #####################################
-# Streamlit Interface Optimizations
 #####################################
 st.title("Google Resume Analyzer")
-st.session_state progress = 0
-st.session_state.last_update = time.time()
 if uploaded_file and st.button("Analyze"):
     with st.spinner():
-        # Use session state for progress tracking
-        start_time = time.time()
         # Step 1: Text extraction
-        text = extract_text_from_file(uploaded_file)
-        st.session_state.progress = 33
-        if "Error" in text:
-            st.error(text)
             continue
-        # Step 2: Information extraction & summarization
-        summary, _ = summarize_resume_text(text)
-        st.session_state.progress = 66
         # Step 3: Scoring
-        score, breakdown = calculate_google_match_score(summary)
-        st.session_state.progress = 100
         # Display results
         st.subheader("Analysis Complete!")
-        st.markdown(f"**Match Score**: {score*100:.1f}%")
-        # Add other displays...
-if st.session_state.progress < 100:
-    st.progress(st.session_state.progress, 100)
-    time.sleep(0.1)  # Simulate progress update

 import concurrent.futures
 from functools import lru_cache
 from transformers import pipeline
+# ... [Keep your existing configurations and constants] ...
 #####################################
 # Preload Models - Optimized with DistilBART
 def load_models():
     """Load optimized models at startup"""
     with st.spinner("Loading AI models..."):
+        # Use smaller, faster model
         models = {
             'summarizer': pipeline(
                 "summarization",
+                model="distilbart-base-cs",  # Faster than BART
+                max_length=300,              # Reduced context window
                 truncation=True,
                 num_return_sequences=1
             )
     try:
         if ext == ".docx":
             doc = docx.Document(file_obj)
+            # Only process first 50 paragraphs (approx 10 pages)
             text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
         elif ext == ".doc":
+            # Direct conversion using docx2txt
+            text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
         elif ext == ".txt":
+            text = file_obj.read().decode("utf-8")[:MAX_TEXT]
     except Exception as e:
         text = f"Error: {str(e)}"
     return text
+# ... [Keep your existing extraction functions] ...
 #####################################
 # Optimized Summarization
 #####################################
 def summarize_resume_text(resume_text):
     """Faster summarization with input truncation"""
+    start_time = time.time()
+    # Truncate text for summarization
+    text_to_summarize = resume_text[:1024]
     base_summary = models['summarizer'](
+        text_to_summarize,
+        max_length=150,  # Smaller summary
         truncation=True
     )[0]['summary_text']
+    # Parallel extraction with thread pool
     with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Reduced number of parallel tasks
+        name_future = executor.submit(extract_name, resume_text[:200])
+        age_future = executor.submit(extract_age, resume_text)
+        industry_future = executor.submit(extract_industry, resume_text, base_summary)
+        # Get results
+        name = name_future.result()
+        age = age_future.result()
+        industry = industry_future.result()
+        skills, work = extract_skills_and_work(resume_text)  # Sequential
+    # Format summary (simplified)
+    return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1
+# ... [Keep your scoring and feedback functions] ...
 #####################################
+# Optimized Streamlit Interface
 #####################################
 st.title("Google Resume Analyzer")
+# Initialize session state properly
+if 'progress' not in st.session_state:
+    st.session_state['progress'] = 0
+if 'last_update' not in st.session_state:
+    st.session_state['last_update'] = time.time()
+uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
 if uploaded_file and st.button("Analyze"):
+    # Use exponential backoff for progress updates
+    progress_interval = 0.1
+    max_retries = 10
     with st.spinner():
         # Step 1: Text extraction
+        st.session_state['progress'] = 33
+        resume_text = extract_text_from_file(uploaded_file)
+        if "Error" in resume_text:
+            st.error(resume_text)
+            st.session_state['progress'] = 100
             continue
+        # Step 2: Summarization
+        st.session_state['progress'] = 66
+        summary, _ = summarize_resume_text(resume_text)
         # Step 3: Scoring
+        st.session_state['progress'] = 100
         # Display results
         st.subheader("Analysis Complete!")
+        st.markdown(summary)
+        # Display scores
+        overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
+        show_score(overall_score)
+        # Display feedback
+        feedback, _ = generate_template_feedback(category_scores)
+        st.markdown(feedback)
+# Progress bar implementation
+if st.session_state['progress'] < 100:
+    st.progress(st.session_state['progress'], 100)
+    time.sleep(progress_interval)
+def show_score(score):
+    """Display score with appropriate formatting"""
+    score_percent = int(score * 100)
+    if score >= 0.85:
+        st.success(f"**Match Score**: {score_percent}% 🌟")
+    elif score >= 0.70:
+        st.success(f"**Match Score**: {score_percent}% ✅")
+    elif score >= 0.50:
+        st.warning(f"**Match Score**: {score_percent}% ⚠️")
+    else:
+        st.error(f"**Match Score**: {score_percent}% 🔍")
+# ... [Keep your remaining functions] ...