CR7CAD commited on
Commit
46ff202
Β·
verified Β·
1 Parent(s): 97150aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -100
app.py CHANGED
@@ -9,24 +9,8 @@ import re
9
  import concurrent.futures
10
  from functools import lru_cache
11
  from transformers import pipeline
12
- from collections import defaultdict
13
 
14
- # Set page title and hide sidebar
15
- st.set_page_config(
16
- page_title="Resume-Google Job Match Analyzer",
17
- initial_sidebar_state="collapsed"
18
- )
19
-
20
- # Hide sidebar completely with custom CSS
21
- st.markdown("""
22
- <style>
23
- [data-testid="collapsedControl"] {display: none;}
24
- section[data-testid="stSidebar"] {display: none;}
25
- </style>
26
- """, unsafe_allow_html=True)
27
-
28
- # Pre-defined company description for Google (unchanged)
29
- GOOGLE_DESCRIPTION = """...""" # Keep your original content here
30
 
31
  #####################################
32
  # Preload Models - Optimized with DistilBART
@@ -35,11 +19,12 @@ GOOGLE_DESCRIPTION = """...""" # Keep your original content here
35
  def load_models():
36
  """Load optimized models at startup"""
37
  with st.spinner("Loading AI models..."):
 
38
  models = {
39
  'summarizer': pipeline(
40
  "summarization",
41
- model="distilbart-base-cs", # Faster smaller model
42
- max_length=300,
43
  truncation=True,
44
  num_return_sequences=1
45
  )
@@ -62,121 +47,114 @@ def extract_text_from_file(file_obj):
62
  try:
63
  if ext == ".docx":
64
  doc = docx.Document(file_obj)
 
65
  text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
66
  elif ext == ".doc":
67
- with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
68
- temp_file.write(file_obj.getvalue())
69
- text = docx2txt.process(temp_file.name)[:MAX_TEXT]
70
- os.unlink(temp_file.name)
71
  elif ext == ".txt":
72
- text = file_obj.getvalue().decode("utf-8")[:MAX_TEXT]
73
  except Exception as e:
74
  text = f"Error: {str(e)}"
75
 
76
  return text
77
 
78
- #####################################
79
- # Unified Information Extraction - Optimized
80
- #####################################
81
- @lru_cache(maxsize=16, typed=False)
82
- def extract_info(text):
83
- """Combined extraction of all candidate info in one pass"""
84
- text_lower = text.lower()
85
- info = {
86
- 'name': extract_name_optimized(text),
87
- 'age': extract_age_optimized(text_lower),
88
- 'industry': extract_industry_optimized(text_lower),
89
- 'skills': extract_skills_optimized(text_lower),
90
- 'experience': extract_experience_optimized(text)
91
- }
92
- return info
93
-
94
- def extract_name_optimized(text):
95
- """Faster name extraction with reduced checks"""
96
- lines = text.split('\n')[:10]
97
- for line in lines:
98
- if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
99
- return line.strip()
100
- return "Unknown"
101
-
102
- def extract_age_optimized(text):
103
- """Simplified age pattern matching"""
104
- patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
105
- for pattern in patterns:
106
- match = re.search(pattern, text)
107
- if match: return match.group(1)
108
- return "Not specified"
109
-
110
- # Other extract_ functions with similar optimizations...
111
 
112
  #####################################
113
  # Optimized Summarization
114
  #####################################
115
  def summarize_resume_text(resume_text):
116
  """Faster summarization with input truncation"""
 
 
 
 
117
  base_summary = models['summarizer'](
118
- resume_text[:1024],
119
- max_length=150,
120
  truncation=True
121
  )[0]['summary_text']
122
 
 
123
  with concurrent.futures.ThreadPoolExecutor() as executor:
124
- info = executor.submit(extract_info, resume_text).result()
 
 
 
 
 
 
 
 
 
125
 
126
- return f"**Name**: {info['name']}\n**Age**: {info['age']}\n**Industry**: {info['industry']}\n\n{base_summary}", 0.1
 
127
 
128
- #####################################
129
- # Optimized Scoring System
130
- #####################################
131
- def calculate_google_match_score(summary):
132
- """Precomputed keyword matching for faster scoring"""
133
- GOOGLE_KEYWORDS = {
134
- "Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
135
- "Advanced Tech": {"ai", "ml", "cloud", "data science"},
136
- # Add other categories...
137
- }
138
-
139
- score = defaultdict(float)
140
- summary_lower = summary.lower()
141
-
142
- for category, keywords in GOOGLE_KEYWORDS.items():
143
- count = len(keywords & set(summary_lower.split()))
144
- score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
145
-
146
- return sum(score.values() * weights), score # weights defined accordingly
147
 
148
  #####################################
149
- # Streamlit Interface Optimizations
150
  #####################################
151
  st.title("Google Resume Analyzer")
152
- st.session_state progress = 0
153
- st.session_state.last_update = time.time()
 
 
 
 
 
 
154
 
155
  if uploaded_file and st.button("Analyze"):
 
 
 
 
156
  with st.spinner():
157
- # Use session state for progress tracking
158
- start_time = time.time()
159
-
160
  # Step 1: Text extraction
161
- text = extract_text_from_file(uploaded_file)
162
- st.session_state.progress = 33
163
- if "Error" in text:
164
- st.error(text)
 
165
  continue
166
 
167
- # Step 2: Information extraction & summarization
168
- summary, _ = summarize_resume_text(text)
169
- st.session_state.progress = 66
170
 
171
  # Step 3: Scoring
172
- score, breakdown = calculate_google_match_score(summary)
173
- st.session_state.progress = 100
174
 
175
  # Display results
176
  st.subheader("Analysis Complete!")
177
- st.markdown(f"**Match Score**: {score*100:.1f}%")
178
- # Add other displays...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- if st.session_state.progress < 100:
181
- st.progress(st.session_state.progress, 100)
182
- time.sleep(0.1) # Simulate progress update
 
9
  import concurrent.futures
10
  from functools import lru_cache
11
  from transformers import pipeline
 
12
 
13
+ # ... [Keep your existing configurations and constants] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  #####################################
16
  # Preload Models - Optimized with DistilBART
 
19
  def load_models():
20
  """Load optimized models at startup"""
21
  with st.spinner("Loading AI models..."):
22
+ # Use smaller, faster model
23
  models = {
24
  'summarizer': pipeline(
25
  "summarization",
26
+ model="distilbart-base-cs", # Faster than BART
27
+ max_length=300, # Reduced context window
28
  truncation=True,
29
  num_return_sequences=1
30
  )
 
47
  try:
48
  if ext == ".docx":
49
  doc = docx.Document(file_obj)
50
+ # Only process first 50 paragraphs (approx 10 pages)
51
  text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
52
  elif ext == ".doc":
53
+ # Direct conversion using docx2txt
54
+ text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
 
 
55
  elif ext == ".txt":
56
+ text = file_obj.read().decode("utf-8")[:MAX_TEXT]
57
  except Exception as e:
58
  text = f"Error: {str(e)}"
59
 
60
  return text
61
 
62
+ # ... [Keep your existing extraction functions] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  #####################################
65
  # Optimized Summarization
66
  #####################################
67
  def summarize_resume_text(resume_text):
68
  """Faster summarization with input truncation"""
69
+ start_time = time.time()
70
+
71
+ # Truncate text for summarization
72
+ text_to_summarize = resume_text[:1024]
73
  base_summary = models['summarizer'](
74
+ text_to_summarize,
75
+ max_length=150, # Smaller summary
76
  truncation=True
77
  )[0]['summary_text']
78
 
79
+ # Parallel extraction with thread pool
80
  with concurrent.futures.ThreadPoolExecutor() as executor:
81
+ # Reduced number of parallel tasks
82
+ name_future = executor.submit(extract_name, resume_text[:200])
83
+ age_future = executor.submit(extract_age, resume_text)
84
+ industry_future = executor.submit(extract_industry, resume_text, base_summary)
85
+
86
+ # Get results
87
+ name = name_future.result()
88
+ age = age_future.result()
89
+ industry = industry_future.result()
90
+ skills, work = extract_skills_and_work(resume_text) # Sequential
91
 
92
+ # Format summary (simplified)
93
+ return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1
94
 
95
+ # ... [Keep your scoring and feedback functions] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  #####################################
98
+ # Optimized Streamlit Interface
99
  #####################################
100
  st.title("Google Resume Analyzer")
101
+
102
+ # Initialize session state properly
103
+ if 'progress' not in st.session_state:
104
+ st.session_state['progress'] = 0
105
+ if 'last_update' not in st.session_state:
106
+ st.session_state['last_update'] = time.time()
107
+
108
+ uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
109
 
110
  if uploaded_file and st.button("Analyze"):
111
+ # Use exponential backoff for progress updates
112
+ progress_interval = 0.1
113
+ max_retries = 10
114
+
115
  with st.spinner():
 
 
 
116
  # Step 1: Text extraction
117
+ st.session_state['progress'] = 33
118
+ resume_text = extract_text_from_file(uploaded_file)
119
+ if "Error" in resume_text:
120
+ st.error(resume_text)
121
+ st.session_state['progress'] = 100
122
  continue
123
 
124
+ # Step 2: Summarization
125
+ st.session_state['progress'] = 66
126
+ summary, _ = summarize_resume_text(resume_text)
127
 
128
  # Step 3: Scoring
129
+ st.session_state['progress'] = 100
 
130
 
131
  # Display results
132
  st.subheader("Analysis Complete!")
133
+ st.markdown(summary)
134
+
135
+ # Display scores
136
+ overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
137
+ show_score(overall_score)
138
+
139
+ # Display feedback
140
+ feedback, _ = generate_template_feedback(category_scores)
141
+ st.markdown(feedback)
142
+
143
+ # Progress bar implementation
144
+ if st.session_state['progress'] < 100:
145
+ st.progress(st.session_state['progress'], 100)
146
+ time.sleep(progress_interval)
147
+
148
+ def show_score(score):
149
+ """Display score with appropriate formatting"""
150
+ score_percent = int(score * 100)
151
+ if score >= 0.85:
152
+ st.success(f"**Match Score**: {score_percent}% 🌟")
153
+ elif score >= 0.70:
154
+ st.success(f"**Match Score**: {score_percent}% βœ…")
155
+ elif score >= 0.50:
156
+ st.warning(f"**Match Score**: {score_percent}% ⚠️")
157
+ else:
158
+ st.error(f"**Match Score**: {score_percent}% πŸ”")
159
 
160
+ # ... [Keep your remaining functions] ...