Spaces:
Sleeping
Sleeping
bsiddhharth
commited on
Commit
·
09a1406
1
Parent(s):
44af2f1
Updated app.py, requirements.txt, and .gitignore; Added new scripts for CV analysis and job spying
Browse files- .gitignore +2 -1
- app.py +5 -2
- cv_analyzer_search.py +343 -0
- python_jobspy.py +47 -0
- requirements.txt +41 -18
- resume_advance_analysis.py +201 -0
.gitignore
CHANGED
@@ -10,7 +10,8 @@ __pycache__/
|
|
10 |
|
11 |
# Ignore specific file (like extraction.pydantic)
|
12 |
extraction_pydantic.py
|
13 |
-
cv_quest.py
|
14 |
logger.py
|
|
|
15 |
|
16 |
app.log
|
|
|
10 |
|
11 |
# Ignore specific file (like extraction.pydantic)
|
12 |
extraction_pydantic.py
|
13 |
+
cv_quest(with main).py
|
14 |
logger.py
|
15 |
+
cv_job_reco2.py
|
16 |
|
17 |
app.log
|
app.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
import streamlit as st
|
3 |
import cv_question
|
4 |
import cv_short
|
|
|
5 |
from logger import setup_logger
|
6 |
|
7 |
# def initialize_session_state():
|
@@ -32,7 +33,6 @@ def main():
|
|
32 |
# Setup logger for app
|
33 |
app_logger = setup_logger('app_logger', 'app.log')
|
34 |
|
35 |
-
# Initialize session state
|
36 |
# initialize_session_state()
|
37 |
|
38 |
# Sidebar
|
@@ -46,7 +46,7 @@ def main():
|
|
46 |
app_logger.info("Session state reset")
|
47 |
|
48 |
# Navigation
|
49 |
-
page = st.sidebar.radio("Go to", ["CV Shortlisting", "Interview Questions"])
|
50 |
app_logger.info(f"Page selected: {page}")
|
51 |
|
52 |
try:
|
@@ -62,6 +62,9 @@ def main():
|
|
62 |
# else:
|
63 |
app_logger.info("Navigating to Interview Questions")
|
64 |
cv_question.create_interview_questions_page()
|
|
|
|
|
|
|
65 |
|
66 |
except Exception as e:
|
67 |
app_logger.error(f"Error occurred: {e}")
|
|
|
2 |
import streamlit as st
|
3 |
import cv_question
|
4 |
import cv_short
|
5 |
+
import cv_analyzer_search
|
6 |
from logger import setup_logger
|
7 |
|
8 |
# def initialize_session_state():
|
|
|
33 |
# Setup logger for app
|
34 |
app_logger = setup_logger('app_logger', 'app.log')
|
35 |
|
|
|
36 |
# initialize_session_state()
|
37 |
|
38 |
# Sidebar
|
|
|
46 |
app_logger.info("Session state reset")
|
47 |
|
48 |
# Navigation
|
49 |
+
page = st.sidebar.radio("Go to", ["CV Shortlisting", "Interview Questions","CV Analyser + JobSearch"])
|
50 |
app_logger.info(f"Page selected: {page}")
|
51 |
|
52 |
try:
|
|
|
62 |
# else:
|
63 |
app_logger.info("Navigating to Interview Questions")
|
64 |
cv_question.create_interview_questions_page()
|
65 |
+
|
66 |
+
elif page == "CV Analyser + JobSearch":
|
67 |
+
cv_analyzer_search.Job_assistant()
|
68 |
|
69 |
except Exception as e:
|
70 |
app_logger.error(f"Error occurred: {e}")
|
cv_analyzer_search.py
ADDED
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from langchain_groq import ChatGroq
|
4 |
+
from groq import Groq
|
5 |
+
from jobspy import scrape_jobs
|
6 |
+
from resume_advance_analysis import *
|
7 |
+
from extraction import *
|
8 |
+
# (
|
9 |
+
# cv,
|
10 |
+
# extract_cv_data,
|
11 |
+
# process_file, # File processing function
|
12 |
+
# initialize_llm, # LLM initialization function
|
13 |
+
# display_candidates_info # Candidate info display function
|
14 |
+
# )
|
15 |
+
from typing import List, Dict, Any
|
16 |
+
import json
|
17 |
+
import re
|
18 |
+
import os
|
19 |
+
import logging
|
20 |
+
|
21 |
+
|
22 |
+
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
|
23 |
+
groq_api_key = os.getenv("GROQ_API_KEY")
|
24 |
+
|
25 |
+
# Configure logging
|
26 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
class JobSuggestionEngine:
|
30 |
+
def __init__(self):
|
31 |
+
|
32 |
+
# self.llm = ChatGroq(
|
33 |
+
# groq_api_key = groq_api_key,
|
34 |
+
# model_name="llama-3.1-70b-versatile",
|
35 |
+
# temperature=0.7,
|
36 |
+
# max_tokens=4096
|
37 |
+
# )
|
38 |
+
self.client = Groq(api_key=groq_api_key)
|
39 |
+
|
40 |
+
def _extract_json(self, text: str) -> Dict[str, Any]:
|
41 |
+
"""
|
42 |
+
Extracting JSON from LLM
|
43 |
+
"""
|
44 |
+
try:
|
45 |
+
logger.debug("Extracting JSON from LLM response")
|
46 |
+
# Clean and extract JSON
|
47 |
+
json_match = re.search(r'\{.*\}', text, re.DOTALL)
|
48 |
+
if json_match:
|
49 |
+
return json.loads(json_match.group(0))
|
50 |
+
return {}
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
st.error(f"JSON Extraction Error: {e}")
|
54 |
+
logger.error(f"JSON Extraction Error: {e}")
|
55 |
+
return {}
|
56 |
+
|
57 |
+
def generate_job_suggestions(self, resume_data: cv) -> List[Dict[str, str]]:
|
58 |
+
|
59 |
+
logger.info("Generating job suggestions based on resume")
|
60 |
+
|
61 |
+
prompt = f"""Based on the following resume details, provide job suggestions:
|
62 |
+
|
63 |
+
Resume Details:
|
64 |
+
- Skills: {', '.join(resume_data.skills or [])}
|
65 |
+
- Certifications: {', '.join(resume_data.certifications or [])}
|
66 |
+
- Years of Experience: {resume_data.years_of_exp or 0}
|
67 |
+
|
68 |
+
Tasks:
|
69 |
+
1. Suggest most potential 3 job roles that match the profile
|
70 |
+
2. Include job role, brief description, and why it's suitable
|
71 |
+
3. Respond in strict JSON format
|
72 |
+
|
73 |
+
Required JSON Structure:
|
74 |
+
{{
|
75 |
+
"job_suggestions": [
|
76 |
+
{{
|
77 |
+
"role": "Job Role",
|
78 |
+
"description": "Brief job description",
|
79 |
+
"suitability_reason": "Why this role matches the resume"
|
80 |
+
}}
|
81 |
+
]
|
82 |
+
}}
|
83 |
+
|
84 |
+
|
85 |
+
"""
|
86 |
+
try:
|
87 |
+
|
88 |
+
logger.debug(f"Calling Groq API with prompt: {prompt[:100]}...") # start of api call
|
89 |
+
|
90 |
+
# Make the API call to the Groq client for chat completions
|
91 |
+
chat_completion = self.client.chat.completions.create(
|
92 |
+
messages=[
|
93 |
+
{"role": "system", "content": "You are a career advisor generating job suggestions based on resume details."},
|
94 |
+
{"role": "user", "content": prompt}
|
95 |
+
],
|
96 |
+
model="llama3-8b-8192", # Replace with the correct model name if needed
|
97 |
+
temperature=0.7, # Adjust temperature for randomness
|
98 |
+
max_tokens=1024, # Limit the number of tokens
|
99 |
+
top_p=1,
|
100 |
+
stop=None,
|
101 |
+
stream=False
|
102 |
+
)
|
103 |
+
|
104 |
+
# Extract and parse the JSON response from the completion
|
105 |
+
response_text = chat_completion.choices[0].message.content
|
106 |
+
suggestions_data = self._extract_json(response_text)
|
107 |
+
|
108 |
+
logger.info(f"Job suggestions generated: {len(suggestions_data.get('job_suggestions', []))} found")
|
109 |
+
|
110 |
+
# Return job suggestions, defaulting to an empty list if not found
|
111 |
+
return suggestions_data.get('job_suggestions', [])
|
112 |
+
|
113 |
+
except Exception as e:
|
114 |
+
st.error(f"Job Suggestion Error: {e}")
|
115 |
+
logger.error(f"Job Suggestion Error: {e}")
|
116 |
+
return []
|
117 |
+
|
118 |
+
def Job_assistant():
|
119 |
+
st.title("📄 Job Suggestion & Search Assistant")
|
120 |
+
|
121 |
+
# Tabs for different functionalities
|
122 |
+
tab1, tab2 = st.tabs(["Resume Analysis", "Direct Job Search"])
|
123 |
+
|
124 |
+
|
125 |
+
with tab1:
|
126 |
+
st.header("Resume Analysis & Job Suggestions")
|
127 |
+
|
128 |
+
# File Upload
|
129 |
+
uploaded_resume = st.file_uploader(
|
130 |
+
"Upload Resume",
|
131 |
+
type=['pdf', 'txt'],
|
132 |
+
help="Upload your resume in PDF or TXT format"
|
133 |
+
)
|
134 |
+
|
135 |
+
# # Initialize LLM
|
136 |
+
# try:
|
137 |
+
# llm = initialize_llm()
|
138 |
+
# logger.info("LLM initialized successfully")
|
139 |
+
|
140 |
+
# except Exception as e:
|
141 |
+
# st.error(f"LLM Initialization Error: {e}")
|
142 |
+
# logger.error(f"LLM Initialization Error: {e}")
|
143 |
+
# st.stop()
|
144 |
+
|
145 |
+
if uploaded_resume:
|
146 |
+
# Process Resume
|
147 |
+
with st.spinner("Analyzing Resume..."):
|
148 |
+
try:
|
149 |
+
# Extract resume text
|
150 |
+
resume_text = process_file(uploaded_resume)
|
151 |
+
logger.info("Resume extracted successfully")
|
152 |
+
|
153 |
+
# Extract structured CV data
|
154 |
+
candidates = extract_cv_data(resume_text)
|
155 |
+
|
156 |
+
if not candidates:
|
157 |
+
st.error("Could not extract resume data")
|
158 |
+
logger.error("No candidates extracted from resume")
|
159 |
+
st.stop()
|
160 |
+
|
161 |
+
# Display extracted candidate information
|
162 |
+
st.subheader("Resume Analysis")
|
163 |
+
display_candidates_info(candidates)
|
164 |
+
|
165 |
+
resume_data = candidates[0]
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
st.error(f"Resume Processing Error: {e}")
|
169 |
+
logger.error(f"Resume Processing Error: {e}")
|
170 |
+
st.stop()
|
171 |
+
|
172 |
+
# Initialize Job Suggestion Engine
|
173 |
+
suggestion_engine = JobSuggestionEngine()
|
174 |
+
logger.info("Job_Suggestion_Engine initialized")
|
175 |
+
|
176 |
+
# Generate Job Suggestions
|
177 |
+
job_suggestions = suggestion_engine.generate_job_suggestions(resume_data)
|
178 |
+
logger.info(f"Generated {len(job_suggestions)} job suggestions")
|
179 |
+
|
180 |
+
# Display Job Suggestions
|
181 |
+
st.header("🎯 Job Suggestions")
|
182 |
+
for suggestion in job_suggestions:
|
183 |
+
with st.expander(f"{suggestion.get('role', 'Unnamed Role')}"):
|
184 |
+
st.write(f"**Description:** {suggestion.get('description', 'No description')}")
|
185 |
+
st.write(f"**Suitability:** {suggestion.get('suitability_reason', 'Not specified')}")
|
186 |
+
|
187 |
+
|
188 |
+
try:
|
189 |
+
# Extract resume text
|
190 |
+
resume_text = process_file(uploaded_resume)
|
191 |
+
logger.info("Resume text extracted again for improvement suggestions")
|
192 |
+
|
193 |
+
# Initialize Improvement Engine
|
194 |
+
improvement_engine = ResumeImprovementEngine()
|
195 |
+
|
196 |
+
# Generate Improvement Suggestions
|
197 |
+
improvement_suggestions = improvement_engine.generate_resume_improvement_suggestions(resume_text)
|
198 |
+
logger.info("Resume improvement suggestions generated")
|
199 |
+
|
200 |
+
# Display Improvement Suggestions
|
201 |
+
st.subheader("🔍 Comprehensive Resume Analysis")
|
202 |
+
|
203 |
+
# Overall Assessment
|
204 |
+
if improvement_suggestions.get('overall_assessment'):
|
205 |
+
with st.expander("📊 Overall Assessment"):
|
206 |
+
st.write("**Strengths:**")
|
207 |
+
for strength in improvement_suggestions['overall_assessment'].get('strengths', []):
|
208 |
+
st.markdown(f"- {strength}")
|
209 |
+
|
210 |
+
st.write("**Weaknesses:**")
|
211 |
+
for weakness in improvement_suggestions['overall_assessment'].get('weaknesses', []):
|
212 |
+
st.markdown(f"- {weakness}")
|
213 |
+
|
214 |
+
# Section Recommendations
|
215 |
+
if improvement_suggestions.get('section_recommendations'):
|
216 |
+
with st.expander("📝 Section-by-Section Recommendations"):
|
217 |
+
for section, details in improvement_suggestions['section_recommendations'].items():
|
218 |
+
st.subheader(f"{section.replace('_', ' ').title()} Section")
|
219 |
+
st.write(f"**Current Status:** {details.get('current_status', 'No assessment')}")
|
220 |
+
|
221 |
+
st.write("**Improvement Suggestions:**")
|
222 |
+
for suggestion in details.get('improvement_suggestions', []):
|
223 |
+
st.markdown(f"- {suggestion}")
|
224 |
+
|
225 |
+
# Additional Insights
|
226 |
+
st.subheader("✨ Additional Recommendations")
|
227 |
+
|
228 |
+
# Writing Improvements
|
229 |
+
if improvement_suggestions.get('writing_improvements'):
|
230 |
+
with st.expander("✍️ Writing & Formatting Advice"):
|
231 |
+
st.write("**Language Suggestions:**")
|
232 |
+
for lang_suggestion in improvement_suggestions['writing_improvements'].get('language_suggestions', []):
|
233 |
+
st.markdown(f"- {lang_suggestion}")
|
234 |
+
|
235 |
+
st.write("**Formatting Advice:**")
|
236 |
+
for format_advice in improvement_suggestions['writing_improvements'].get('formatting_advice', []):
|
237 |
+
st.markdown(f"- {format_advice}")
|
238 |
+
|
239 |
+
# Additional Sections
|
240 |
+
if improvement_suggestions.get('additional_sections_recommended'):
|
241 |
+
with st.expander("📋 Suggested Additional Sections"):
|
242 |
+
for section in improvement_suggestions['additional_sections_recommended']:
|
243 |
+
st.markdown(f"- {section}")
|
244 |
+
|
245 |
+
# Keyword Optimization
|
246 |
+
if improvement_suggestions.get('keyword_optimization'):
|
247 |
+
with st.expander("🔑 Keyword & ATS Optimization"):
|
248 |
+
st.write("**Missing Industry Keywords:**")
|
249 |
+
for keyword in improvement_suggestions['keyword_optimization'].get('missing_industry_keywords', []):
|
250 |
+
st.markdown(f"- {keyword}")
|
251 |
+
|
252 |
+
st.write(f"**ATS Compatibility Score:** {improvement_suggestions['keyword_optimization'].get('ats_compatibility_score', 'Not available')}")
|
253 |
+
|
254 |
+
# Career Positioning
|
255 |
+
if improvement_suggestions.get('career_positioning'):
|
256 |
+
with st.expander("🎯 Career Positioning"):
|
257 |
+
st.write("**Personal Branding Suggestions:**")
|
258 |
+
for branding_suggestion in improvement_suggestions['career_positioning'].get('personal_branding_suggestions', []):
|
259 |
+
st.markdown(f"- {branding_suggestion}")
|
260 |
+
|
261 |
+
st.write("**Skill Highlighting Recommendations:**")
|
262 |
+
for skill_suggestion in improvement_suggestions['career_positioning'].get('skill_highlighting_recommendations', []):
|
263 |
+
st.markdown(f"- {skill_suggestion}")
|
264 |
+
|
265 |
+
except Exception as e:
|
266 |
+
st.error(f"Resume Improvement Analysis Error: {e}")
|
267 |
+
logger.error(f"Resume Improvement Analysis Error: {e}")
|
268 |
+
|
269 |
+
|
270 |
+
with tab2:
|
271 |
+
st.header("🔍 Direct Job Search")
|
272 |
+
|
273 |
+
# Job Search Parameters
|
274 |
+
col1, col2, col3, col4 = st.columns(4)
|
275 |
+
|
276 |
+
with col1:
|
277 |
+
site_name = st.multiselect(
|
278 |
+
"Select Job Sites",
|
279 |
+
["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"],
|
280 |
+
default=["indeed", "linkedin"]
|
281 |
+
)
|
282 |
+
|
283 |
+
with col2:
|
284 |
+
search_term = st.text_input("Search Term", "software engineer")
|
285 |
+
|
286 |
+
with col3:
|
287 |
+
location = st.text_input("Location", "San Francisco, CA")
|
288 |
+
|
289 |
+
with col4:
|
290 |
+
results_wanted = st.number_input("Number of Results", min_value=1, max_value=100, value=20)
|
291 |
+
|
292 |
+
# Additional parameters
|
293 |
+
col5, col6 = st.columns(2)
|
294 |
+
|
295 |
+
with col5:
|
296 |
+
hours_old = st.number_input("Jobs Posted Within (hours)", min_value=1, max_value=168, value=72)
|
297 |
+
|
298 |
+
with col6:
|
299 |
+
country_indeed = st.text_input("Country (for Indeed)", "USA")
|
300 |
+
|
301 |
+
# Search Button
|
302 |
+
if st.button("Search Jobs"):
|
303 |
+
with st.spinner("Searching Jobs..."):
|
304 |
+
# Perform job search
|
305 |
+
try:
|
306 |
+
logger.info(f"Performing job search with {search_term} in {location}")
|
307 |
+
jobs = scrape_jobs(
|
308 |
+
site_name=site_name,
|
309 |
+
search_term=search_term,
|
310 |
+
google_search_term=f"{search_term} jobs near {location}",
|
311 |
+
location=location,
|
312 |
+
results_wanted=results_wanted,
|
313 |
+
hours_old=hours_old,
|
314 |
+
country_indeed=country_indeed,
|
315 |
+
)
|
316 |
+
|
317 |
+
if len(jobs) > 0:
|
318 |
+
st.success(f"Found {len(jobs)} jobs")
|
319 |
+
|
320 |
+
jobs_filtered = jobs[['site', 'job_url', 'title', 'company', 'location', 'date_posted']]
|
321 |
+
# Display job data in a table
|
322 |
+
# st.dataframe(jobs)
|
323 |
+
st.dataframe(jobs_filtered)
|
324 |
+
|
325 |
+
# Option to download jobs
|
326 |
+
csv_file = jobs.to_csv(index=False)
|
327 |
+
st.download_button(
|
328 |
+
label="Download Jobs as CSV",
|
329 |
+
data=csv_file,
|
330 |
+
file_name='job_search_results.csv',
|
331 |
+
mime='text/csv'
|
332 |
+
)
|
333 |
+
else:
|
334 |
+
st.warning("No jobs found")
|
335 |
+
|
336 |
+
except Exception as e:
|
337 |
+
st.error(f"Job Search Error: {e}")
|
338 |
+
logger.error(f"Job Search Error: {e}")
|
339 |
+
|
340 |
+
|
341 |
+
|
342 |
+
# if __name__ == "__main__":
|
343 |
+
# main()
|
python_jobspy.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
"""
|
3 |
+
Simple Working Version Of Job_Spy in Streamlit
|
4 |
+
"""
|
5 |
+
|
6 |
+
import csv
|
7 |
+
from jobspy import scrape_jobs
|
8 |
+
import streamlit as st
|
9 |
+
import pandas as pd
|
10 |
+
|
11 |
+
st.title("Job-Scrapper")
|
12 |
+
|
13 |
+
site_name = st.multiselect(
|
14 |
+
"Select Job Sites", ["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"], default=["indeed", "linkedin"]
|
15 |
+
)
|
16 |
+
|
17 |
+
search_term = st.text_input("Search Term", "software engineer")
|
18 |
+
location = st.text_input("Location", "San Francisco, CA")
|
19 |
+
results_wanted = st.number_input("Number of Results", min_value=1, max_value=100, value=20)
|
20 |
+
hours_old = st.number_input("How many hours old?", min_value=1, max_value=168, value=72)
|
21 |
+
country_indeed = st.text_input("Country (for Indeed)", "USA")
|
22 |
+
|
23 |
+
if st.button("scrape jobs"):
|
24 |
+
jobs = scrape_jobs(
|
25 |
+
site_name=site_name,
|
26 |
+
search_term=search_term,
|
27 |
+
google_search_term= f"{search_term} jobs near {location}",
|
28 |
+
location=location,
|
29 |
+
results_wanted= results_wanted,
|
30 |
+
hours_old=hours_old,
|
31 |
+
country_indeed=country_indeed,
|
32 |
+
|
33 |
+
# linkedin_fetch_description=True # gets more info such as description, direct job url (slower)
|
34 |
+
# proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
|
35 |
+
)
|
36 |
+
|
37 |
+
if len(jobs) > 0:
|
38 |
+
st.success(f"Found {len(jobs)} jobs")
|
39 |
+
|
40 |
+
# Display job data in a table
|
41 |
+
st.dataframe(jobs)
|
42 |
+
|
43 |
+
else:
|
44 |
+
st.warning("No jobs found")
|
45 |
+
# print(f"Found {len(jobs)} jobs")
|
46 |
+
# print(jobs.head())
|
47 |
+
# jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_excel
|
requirements.txt
CHANGED
@@ -1,18 +1,41 @@
|
|
1 |
-
langchain
|
2 |
-
python-dotenv
|
3 |
-
ipykernel
|
4 |
-
langchain-community
|
5 |
-
streamlit
|
6 |
-
pypdf
|
7 |
-
pymupdf
|
8 |
-
langchain-text-splitters
|
9 |
-
langchain-openai
|
10 |
-
chromadb
|
11 |
-
sentence_transformers
|
12 |
-
langchain_huggingface
|
13 |
-
faiss-cpu
|
14 |
-
langchain_chroma
|
15 |
-
openai
|
16 |
-
langchain-groq
|
17 |
-
pdfplumber
|
18 |
-
prettytable
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# langchain
|
2 |
+
# python-dotenv
|
3 |
+
# ipykernel
|
4 |
+
# langchain-community
|
5 |
+
# streamlit
|
6 |
+
# pypdf
|
7 |
+
# pymupdf
|
8 |
+
# langchain-text-splitters
|
9 |
+
# langchain-openai
|
10 |
+
# chromadb
|
11 |
+
# sentence_transformers
|
12 |
+
# langchain_huggingface
|
13 |
+
# faiss-cpu
|
14 |
+
# langchain_chroma
|
15 |
+
# openai
|
16 |
+
# langchain-groq
|
17 |
+
# pdfplumber
|
18 |
+
# prettytable
|
19 |
+
# python-jobspy
|
20 |
+
# scikit-learn
|
21 |
+
|
22 |
+
langchain==0.3.7
|
23 |
+
python-dotenv==1.0.1
|
24 |
+
ipykernel==6.29.5
|
25 |
+
langchain-community==0.3.5
|
26 |
+
streamlit==1.39.0
|
27 |
+
pypdf==5.1.0
|
28 |
+
PyMuPDF==1.24.13
|
29 |
+
langchain-text-splitters==0.3.2
|
30 |
+
langchain-openai==0.2.5
|
31 |
+
chromadb==0.5.17
|
32 |
+
sentence_transformers==3.2.1
|
33 |
+
langchain-huggingface==0.1.2
|
34 |
+
faiss-cpu==1.9.0
|
35 |
+
langchain-chroma==0.1.4
|
36 |
+
openai==1.53.0
|
37 |
+
langchain-groq==0.2.1
|
38 |
+
pdfplumber==0.11.4
|
39 |
+
prettytable==3.12.0
|
40 |
+
python-jobspy==1.1.75
|
41 |
+
scikit-learn==1.5.2
|
resume_advance_analysis.py
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from typing import Any,Dict
|
3 |
+
import json
|
4 |
+
from groq import Groq
|
5 |
+
import re
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
|
9 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
|
14 |
+
groq_api_key = os.getenv("GROQ_API_KEY")
|
15 |
+
|
16 |
+
class ResumeImprovementEngine:
|
17 |
+
def __init__(self):
|
18 |
+
# self.llm = ChatGroq(
|
19 |
+
# groq_api_key = groq_api_key,
|
20 |
+
# model_name="llama-3.1-70b-versatile",
|
21 |
+
# temperature=0.7,
|
22 |
+
# max_tokens=4096
|
23 |
+
# )
|
24 |
+
self.client = Groq(api_key=groq_api_key)
|
25 |
+
logger.info("ResumeImprovementEngine initialized with Groq API key.")
|
26 |
+
|
27 |
+
def generate_resume_improvement_suggestions(self, resume_text: str) -> dict[str, Any]:
|
28 |
+
"""
|
29 |
+
Generate comprehensive resume improvement suggestions
|
30 |
+
|
31 |
+
Args:
|
32 |
+
resume_text (str): Full text of the resume
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
Dict containing detailed improvement suggestions
|
36 |
+
"""
|
37 |
+
prompt = f"""Perform a comprehensive analysis of the following resume and provide detailed improvement suggestions:
|
38 |
+
|
39 |
+
Resume Content:
|
40 |
+
{resume_text}
|
41 |
+
|
42 |
+
Tasks:
|
43 |
+
1. Provide a structured analysis of resume strengths and weaknesses
|
44 |
+
2. Offer specific, actionable improvement recommendations
|
45 |
+
3. Suggest additional sections or content enhancements
|
46 |
+
4. Provide writing and formatting advice
|
47 |
+
5. Respond in detailed, structured JSON format
|
48 |
+
|
49 |
+
Required JSON Structure:
|
50 |
+
{{
|
51 |
+
"overall_assessment": {{
|
52 |
+
"strengths": ["Key strengths of the resume"],
|
53 |
+
"weaknesses": ["Areas needing improvement"]
|
54 |
+
}},
|
55 |
+
"section_recommendations": {{
|
56 |
+
"work_experience": {{
|
57 |
+
"current_status": "Assessment of current work experience section",
|
58 |
+
"improvement_suggestions": ["Specific improvements"]
|
59 |
+
}},
|
60 |
+
"education": {{
|
61 |
+
"current_status": "Assessment of education section",
|
62 |
+
"improvement_suggestions": ["Specific improvements"]
|
63 |
+
}}
|
64 |
+
}},
|
65 |
+
"writing_improvements": {{
|
66 |
+
"language_suggestions": ["Writing style improvements"],
|
67 |
+
"formatting_advice": ["Formatting and layout suggestions"]
|
68 |
+
}},
|
69 |
+
"additional_sections_recommended": ["List of suggested new sections"],
|
70 |
+
"keyword_optimization": {{
|
71 |
+
"missing_industry_keywords": ["Keywords to add"],
|
72 |
+
"ats_compatibility_score": "Numeric score or rating"
|
73 |
+
}},
|
74 |
+
"career_positioning": {{
|
75 |
+
"personal_branding_suggestions": ["Ways to enhance personal brand"],
|
76 |
+
"skill_highlighting_recommendations": ["How to better showcase skills"]
|
77 |
+
}}
|
78 |
+
}}
|
79 |
+
"""
|
80 |
+
|
81 |
+
try:
|
82 |
+
logger.info("Sending request to Groq for resume improvement.")
|
83 |
+
# Make API call to generate improvement suggestions
|
84 |
+
chat_completion = self.client.chat.completions.create(
|
85 |
+
messages=[
|
86 |
+
{
|
87 |
+
"role": "system",
|
88 |
+
"content": "You are an expert resume consultant providing detailed, constructive feedback."
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"role": "user",
|
92 |
+
"content": prompt
|
93 |
+
}
|
94 |
+
],
|
95 |
+
model="llama3-groq-70b-8192-tool-use-preview",
|
96 |
+
temperature=0.7,
|
97 |
+
max_tokens=2048,
|
98 |
+
top_p=1,
|
99 |
+
stream=False
|
100 |
+
)
|
101 |
+
|
102 |
+
logger.info("Groq API response received.")
|
103 |
+
|
104 |
+
# Extract and parse the JSON response
|
105 |
+
response_text = chat_completion.choices[0].message.content
|
106 |
+
suggestions = self._extract_json(response_text)
|
107 |
+
|
108 |
+
logger.debug(f"Improvement suggestions received: {suggestions}")
|
109 |
+
|
110 |
+
return suggestions
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
st.error(f"Resume Improvement Error: {e}")
|
114 |
+
logger.error(f"Resume Improvement Error: {e}")
|
115 |
+
return {}
|
116 |
+
|
117 |
+
|
118 |
+
def _extract_json(self, text: str) -> dict[str, Any]:
|
119 |
+
"""
|
120 |
+
Safely extract JSON from LLM response
|
121 |
+
|
122 |
+
Args:
|
123 |
+
text (str): LLM response text
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
Dict of extracted JSON or empty dict
|
127 |
+
"""
|
128 |
+
try:
|
129 |
+
logger.debug("Extracting JSON from response text.")
|
130 |
+
|
131 |
+
json_match = re.search(r'\{.*\}', text, re.DOTALL | re.MULTILINE)
|
132 |
+
if json_match:
|
133 |
+
return json.loads(json_match.group(0))
|
134 |
+
|
135 |
+
logger.warning("No valid JSON found in response text.")
|
136 |
+
|
137 |
+
return {}
|
138 |
+
|
139 |
+
except Exception as e:
|
140 |
+
st.error(f"JSON Extraction Error: {e}")
|
141 |
+
logger.error(f"JSON Extraction Error: {e}")
|
142 |
+
return {}
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
# def _extract_json(self, text: str) -> Dict[str, Any]:
|
148 |
+
# """
|
149 |
+
# Safely extract JSON from LLM response with robust error handling
|
150 |
+
|
151 |
+
# Args:
|
152 |
+
# text (str): LLM response text
|
153 |
+
|
154 |
+
# Returns:
|
155 |
+
# Dict of extracted JSON or empty dict
|
156 |
+
# """
|
157 |
+
# try:
|
158 |
+
# logger.debug("Attempting to extract JSON from response text.")
|
159 |
+
|
160 |
+
# # Clean the text and remove any non-JSON characters
|
161 |
+
# # Remove text before first '{' and after last '}'
|
162 |
+
# cleaned_text = text.strip()
|
163 |
+
# first_brace = cleaned_text.find('{')
|
164 |
+
# last_brace = cleaned_text.rfind('}')
|
165 |
+
|
166 |
+
# if first_brace != -1 and last_brace != -1:
|
167 |
+
# cleaned_text = cleaned_text[first_brace:last_brace+1]
|
168 |
+
|
169 |
+
# # Extraction strategies
|
170 |
+
# extraction_strategies = [
|
171 |
+
# # Direct parsing of cleaned text
|
172 |
+
# lambda t: json.loads(t),
|
173 |
+
|
174 |
+
# # Remove non-printable characters and try parsing
|
175 |
+
# lambda t: json.loads(re.sub(r'[^\x20-\x7E\n]', '', t)),
|
176 |
+
|
177 |
+
# # Extract JSON within code block
|
178 |
+
# lambda t: json.loads(re.search(r'```json\n(.*?)```', t, re.DOTALL).group(1) if re.search(r'```json\n(.*?)```', t, re.DOTALL) else '')
|
179 |
+
# ]
|
180 |
+
|
181 |
+
# # Try each extraction strategy
|
182 |
+
# for strategy in extraction_strategies:
|
183 |
+
# try:
|
184 |
+
# parsed_json = strategy(cleaned_text)
|
185 |
+
|
186 |
+
# # Additional validation to ensure it's a dictionary
|
187 |
+
# if isinstance(parsed_json, dict):
|
188 |
+
# logger.info("Successfully extracted and parsed JSON.")
|
189 |
+
# return parsed_json
|
190 |
+
# except (json.JSONDecodeError, AttributeError, IndexError):
|
191 |
+
# continue
|
192 |
+
|
193 |
+
# # Detailed logging for troubleshooting
|
194 |
+
# logger.warning(f"Could not extract valid JSON. Raw text: {text}")
|
195 |
+
# return {}
|
196 |
+
|
197 |
+
# except Exception as e:
|
198 |
+
# # Log the full error details
|
199 |
+
# logger.error(f"JSON Extraction Error: {e}", exc_info=True)
|
200 |
+
# st.error(f"JSON Extraction Error: {e}")
|
201 |
+
# return {}
|