Spaces:

SID2702
/

CV_Process

Sleeping

App Files Files Community

bsiddhharth commited on Nov 29, 2024

Commit

09a1406

1 Parent(s): 44af2f1

Updated app.py, requirements.txt, and .gitignore; Added new scripts for CV analysis and job spying

Browse files

Files changed (6) hide show

.gitignore +2 -1
app.py +5 -2
cv_analyzer_search.py +343 -0
python_jobspy.py +47 -0
requirements.txt +41 -18
resume_advance_analysis.py +201 -0

.gitignore CHANGED Viewed

@@ -10,7 +10,8 @@ __pycache__/
 # Ignore specific file (like extraction.pydantic)
 extraction_pydantic.py
-cv_quest.py
 logger.py
 app.log

 # Ignore specific file (like extraction.pydantic)
 extraction_pydantic.py
+cv_quest(with main).py
 logger.py
+cv_job_reco2.py
 app.log

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import streamlit as st
 import cv_question
 import cv_short
 from logger import setup_logger
 # def initialize_session_state():
@@ -32,7 +33,6 @@ def main():
     # Setup logger for app
     app_logger = setup_logger('app_logger', 'app.log')
-    # Initialize session state
     # initialize_session_state()
     # Sidebar
@@ -46,7 +46,7 @@ def main():
         app_logger.info("Session state reset")
     # Navigation
-    page = st.sidebar.radio("Go to", ["CV Shortlisting", "Interview Questions"])
     app_logger.info(f"Page selected: {page}")
     try:
@@ -62,6 +62,9 @@ def main():
             # else:
                 app_logger.info("Navigating to Interview Questions")
                 cv_question.create_interview_questions_page()
     except Exception as e:
         app_logger.error(f"Error occurred: {e}")

 import streamlit as st
 import cv_question
 import cv_short
+import cv_analyzer_search
 from logger import setup_logger
 # def initialize_session_state():
     # Setup logger for app
     app_logger = setup_logger('app_logger', 'app.log')
     # initialize_session_state()
     # Sidebar
         app_logger.info("Session state reset")
     # Navigation
+    page = st.sidebar.radio("Go to", ["CV Shortlisting", "Interview Questions","CV Analyser + JobSearch"])
     app_logger.info(f"Page selected: {page}")
     try:
             # else:
                 app_logger.info("Navigating to Interview Questions")
                 cv_question.create_interview_questions_page()
+        elif page == "CV Analyser + JobSearch":
+            cv_analyzer_search.Job_assistant()
     except Exception as e:
         app_logger.error(f"Error occurred: {e}")

cv_analyzer_search.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import streamlit as st
+import pandas as pd
+from langchain_groq import ChatGroq
+from groq import Groq
+from jobspy import scrape_jobs
+from resume_advance_analysis import *
+from extraction import *
+# (
+#     cv,
+#     extract_cv_data,
+#     process_file,  # File processing function
+#     initialize_llm,  # LLM initialization function
+#     display_candidates_info  # Candidate info display function
+# )
+from typing import List, Dict, Any
+import json
+import re
+import os
+import logging
+os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
+groq_api_key = os.getenv("GROQ_API_KEY")
+# Configure logging
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class JobSuggestionEngine:
+    def __init__(self):
+        # self.llm = ChatGroq(
+        #     groq_api_key = groq_api_key,
+        #     model_name="llama-3.1-70b-versatile",
+        #     temperature=0.7,
+        #     max_tokens=4096
+        # )
+        self.client = Groq(api_key=groq_api_key)
+    def _extract_json(self, text: str) -> Dict[str, Any]:
+        """
+                Extracting JSON from LLM
+        """
+        try:
+            logger.debug("Extracting JSON from LLM response")
+            # Clean and extract JSON
+            json_match = re.search(r'\{.*\}', text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(0))
+            return {}
+        except Exception as e:
+            st.error(f"JSON Extraction Error: {e}")
+            logger.error(f"JSON Extraction Error: {e}")
+            return {}
+    def generate_job_suggestions(self, resume_data: cv) -> List[Dict[str, str]]:
+        logger.info("Generating job suggestions based on resume")
+        prompt = f"""Based on the following resume details, provide job suggestions:
+            Resume Details:
+            - Skills: {', '.join(resume_data.skills or [])}
+            - Certifications: {', '.join(resume_data.certifications or [])}
+            - Years of Experience: {resume_data.years_of_exp or 0}
+            Tasks:
+            1. Suggest most potential 3 job roles that match the profile
+            2. Include job role, brief description, and why it's suitable
+            3. Respond in strict JSON format
+            Required JSON Structure:
+            {{
+                "job_suggestions": [
+                    {{
+                        "role": "Job Role",
+                        "description": "Brief job description",
+                        "suitability_reason": "Why this role matches the resume"
+                    }}
+                ]
+            }}
+            """
+        try:
+            logger.debug(f"Calling Groq API with prompt: {prompt[:100]}...") # start of api call
+            # Make the API call to the Groq client for chat completions
+            chat_completion = self.client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are a career advisor generating job suggestions based on resume details."},
+                    {"role": "user", "content": prompt}
+                ],
+                model="llama3-8b-8192",  # Replace with the correct model name if needed
+                temperature=0.7,  # Adjust temperature for randomness
+                max_tokens=1024,  # Limit the number of tokens
+                top_p=1,
+                stop=None,
+                stream=False
+            )
+            # Extract and parse the JSON response from the completion
+            response_text = chat_completion.choices[0].message.content
+            suggestions_data = self._extract_json(response_text)
+            logger.info(f"Job suggestions generated: {len(suggestions_data.get('job_suggestions', []))} found")
+            # Return job suggestions, defaulting to an empty list if not found
+            return suggestions_data.get('job_suggestions', [])
+        except Exception as e:
+            st.error(f"Job Suggestion Error: {e}")
+            logger.error(f"Job Suggestion Error: {e}")
+            return []
+def Job_assistant():
+    st.title("📄 Job Suggestion & Search Assistant")
+    # Tabs for different functionalities
+    tab1, tab2 = st.tabs(["Resume Analysis", "Direct Job Search"])
+    with tab1:
+        st.header("Resume Analysis & Job Suggestions")
+        # File Upload
+        uploaded_resume = st.file_uploader(
+            "Upload Resume",
+            type=['pdf', 'txt'],
+            help="Upload your resume in PDF or TXT format"
+        )
+        # # Initialize LLM
+        # try:
+        #     llm = initialize_llm()
+        #     logger.info("LLM initialized successfully")
+        # except Exception as e:
+        #     st.error(f"LLM Initialization Error: {e}")
+        #     logger.error(f"LLM Initialization Error: {e}")
+        #     st.stop()
+        if uploaded_resume:
+            # Process Resume
+            with st.spinner("Analyzing Resume..."):
+                try:
+                    # Extract resume text
+                    resume_text = process_file(uploaded_resume)
+                    logger.info("Resume extracted successfully")
+                    # Extract structured CV data
+                    candidates = extract_cv_data(resume_text)
+                    if not candidates:
+                        st.error("Could not extract resume data")
+                        logger.error("No candidates extracted from resume")
+                        st.stop()
+                    # Display extracted candidate information
+                    st.subheader("Resume Analysis")
+                    display_candidates_info(candidates)
+                    resume_data = candidates[0]
+                except Exception as e:
+                    st.error(f"Resume Processing Error: {e}")
+                    logger.error(f"Resume Processing Error: {e}")
+                    st.stop()
+            # Initialize Job Suggestion Engine
+            suggestion_engine = JobSuggestionEngine()
+            logger.info("Job_Suggestion_Engine initialized")
+            # Generate Job Suggestions
+            job_suggestions = suggestion_engine.generate_job_suggestions(resume_data)
+            logger.info(f"Generated {len(job_suggestions)} job suggestions")
+            # Display Job Suggestions
+            st.header("🎯 Job Suggestions")
+            for suggestion in job_suggestions:
+                with st.expander(f"{suggestion.get('role', 'Unnamed Role')}"):
+                    st.write(f"**Description:** {suggestion.get('description', 'No description')}")
+                    st.write(f"**Suitability:** {suggestion.get('suitability_reason', 'Not specified')}")
+            try:
+                    # Extract resume text
+                    resume_text = process_file(uploaded_resume)
+                    logger.info("Resume text extracted again for improvement suggestions")
+                    # Initialize Improvement Engine
+                    improvement_engine = ResumeImprovementEngine()
+                    # Generate Improvement Suggestions
+                    improvement_suggestions = improvement_engine.generate_resume_improvement_suggestions(resume_text)
+                    logger.info("Resume improvement suggestions generated")
+                    # Display Improvement Suggestions
+                    st.subheader("🔍 Comprehensive Resume Analysis")
+                    # Overall Assessment
+                    if improvement_suggestions.get('overall_assessment'):
+                        with st.expander("📊 Overall Assessment"):
+                            st.write("**Strengths:**")
+                            for strength in improvement_suggestions['overall_assessment'].get('strengths', []):
+                                st.markdown(f"- {strength}")
+                            st.write("**Weaknesses:**")
+                            for weakness in improvement_suggestions['overall_assessment'].get('weaknesses', []):
+                                st.markdown(f"- {weakness}")
+                    # Section Recommendations
+                    if improvement_suggestions.get('section_recommendations'):
+                        with st.expander("📝 Section-by-Section Recommendations"):
+                            for section, details in improvement_suggestions['section_recommendations'].items():
+                                st.subheader(f"{section.replace('_', ' ').title()} Section")
+                                st.write(f"**Current Status:** {details.get('current_status', 'No assessment')}")
+                                st.write("**Improvement Suggestions:**")
+                                for suggestion in details.get('improvement_suggestions', []):
+                                    st.markdown(f"- {suggestion}")
+                    # Additional Insights
+                    st.subheader("✨ Additional Recommendations")
+                    # Writing Improvements
+                    if improvement_suggestions.get('writing_improvements'):
+                        with st.expander("✍️ Writing & Formatting Advice"):
+                            st.write("**Language Suggestions:**")
+                            for lang_suggestion in improvement_suggestions['writing_improvements'].get('language_suggestions', []):
+                                st.markdown(f"- {lang_suggestion}")
+                            st.write("**Formatting Advice:**")
+                            for format_advice in improvement_suggestions['writing_improvements'].get('formatting_advice', []):
+                                st.markdown(f"- {format_advice}")
+                    # Additional Sections
+                    if improvement_suggestions.get('additional_sections_recommended'):
+                        with st.expander("📋 Suggested Additional Sections"):
+                            for section in improvement_suggestions['additional_sections_recommended']:
+                                st.markdown(f"- {section}")
+                    # Keyword Optimization
+                    if improvement_suggestions.get('keyword_optimization'):
+                        with st.expander("🔑 Keyword & ATS Optimization"):
+                            st.write("**Missing Industry Keywords:**")
+                            for keyword in improvement_suggestions['keyword_optimization'].get('missing_industry_keywords', []):
+                                st.markdown(f"- {keyword}")
+                            st.write(f"**ATS Compatibility Score:** {improvement_suggestions['keyword_optimization'].get('ats_compatibility_score', 'Not available')}")
+                    # Career Positioning
+                    if improvement_suggestions.get('career_positioning'):
+                        with st.expander("🎯 Career Positioning"):
+                            st.write("**Personal Branding Suggestions:**")
+                            for branding_suggestion in improvement_suggestions['career_positioning'].get('personal_branding_suggestions', []):
+                                st.markdown(f"- {branding_suggestion}")
+                            st.write("**Skill Highlighting Recommendations:**")
+                            for skill_suggestion in improvement_suggestions['career_positioning'].get('skill_highlighting_recommendations', []):
+                                st.markdown(f"- {skill_suggestion}")
+            except Exception as e:
+                    st.error(f"Resume Improvement Analysis Error: {e}")
+                    logger.error(f"Resume Improvement Analysis Error: {e}")
+    with tab2:
+        st.header("🔍 Direct Job Search")
+        # Job Search Parameters
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            site_name = st.multiselect(
+                "Select Job Sites",
+                ["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"],
+                default=["indeed", "linkedin"]
+            )
+        with col2:
+            search_term = st.text_input("Search Term", "software engineer")
+        with col3:
+            location = st.text_input("Location", "San Francisco, CA")
+        with col4:
+            results_wanted = st.number_input("Number of Results", min_value=1, max_value=100, value=20)
+        # Additional parameters
+        col5, col6 = st.columns(2)
+        with col5:
+            hours_old = st.number_input("Jobs Posted Within (hours)", min_value=1, max_value=168, value=72)
+        with col6:
+            country_indeed = st.text_input("Country (for Indeed)", "USA")
+        # Search Button
+        if st.button("Search Jobs"):
+            with st.spinner("Searching Jobs..."):
+                # Perform job search
+                try:
+                    logger.info(f"Performing job search with {search_term} in {location}")
+                    jobs = scrape_jobs(
+                        site_name=site_name,
+                        search_term=search_term,
+                        google_search_term=f"{search_term} jobs near {location}",
+                        location=location,
+                        results_wanted=results_wanted,
+                        hours_old=hours_old,
+                        country_indeed=country_indeed,
+                    )
+                    if len(jobs) > 0:
+                        st.success(f"Found {len(jobs)} jobs")
+                        jobs_filtered = jobs[['site', 'job_url', 'title', 'company', 'location', 'date_posted']]
+                        # Display job data in a table
+                        # st.dataframe(jobs)
+                        st.dataframe(jobs_filtered)
+                        # Option to download jobs
+                        csv_file = jobs.to_csv(index=False)
+                        st.download_button(
+                            label="Download Jobs as CSV",
+                            data=csv_file,
+                            file_name='job_search_results.csv',
+                            mime='text/csv'
+                        )
+                    else:
+                        st.warning("No jobs found")
+                except Exception as e:
+                    st.error(f"Job Search Error: {e}")
+                    logger.error(f"Job Search Error: {e}")
+# if __name__ == "__main__":
+#     main()

python_jobspy.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+        Simple Working Version Of Job_Spy in Streamlit
+"""
+import csv
+from jobspy import scrape_jobs
+import streamlit as st
+import pandas as pd
+st.title("Job-Scrapper")
+site_name = st.multiselect(
+    "Select Job Sites", ["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"], default=["indeed", "linkedin"]
+)
+search_term = st.text_input("Search Term", "software engineer")
+location = st.text_input("Location", "San Francisco, CA")
+results_wanted = st.number_input("Number of Results", min_value=1, max_value=100, value=20)
+hours_old = st.number_input("How many hours old?", min_value=1, max_value=168, value=72)
+country_indeed = st.text_input("Country (for Indeed)", "USA")
+if st.button("scrape jobs"):
+    jobs = scrape_jobs(
+        site_name=site_name,
+        search_term=search_term,
+        google_search_term= f"{search_term} jobs near {location}",
+        location=location,
+        results_wanted= results_wanted,
+        hours_old=hours_old,
+        country_indeed=country_indeed,
+        # linkedin_fetch_description=True # gets more info such as description, direct job url (slower)
+        # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
+    )
+    if len(jobs) > 0:
+        st.success(f"Found {len(jobs)} jobs")
+        # Display job data in a table
+        st.dataframe(jobs)
+    else:
+        st.warning("No jobs found")
+# print(f"Found {len(jobs)} jobs")
+# print(jobs.head())
+# jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_excel

requirements.txt CHANGED Viewed

@@ -1,18 +1,41 @@
-langchain
-python-dotenv
-ipykernel
-langchain-community
-streamlit
-pypdf
-pymupdf
-langchain-text-splitters
-langchain-openai
-chromadb
-sentence_transformers
-langchain_huggingface
-faiss-cpu
-langchain_chroma
-openai
-langchain-groq
-pdfplumber
-prettytable

+# langchain
+# python-dotenv
+# ipykernel
+# langchain-community
+# streamlit
+# pypdf
+# pymupdf
+# langchain-text-splitters
+# langchain-openai
+# chromadb
+# sentence_transformers
+# langchain_huggingface
+# faiss-cpu
+# langchain_chroma
+# openai
+# langchain-groq
+# pdfplumber
+# prettytable
+# python-jobspy
+# scikit-learn
+langchain==0.3.7
+python-dotenv==1.0.1
+ipykernel==6.29.5
+langchain-community==0.3.5
+streamlit==1.39.0
+pypdf==5.1.0
+PyMuPDF==1.24.13
+langchain-text-splitters==0.3.2
+langchain-openai==0.2.5
+chromadb==0.5.17
+sentence_transformers==3.2.1
+langchain-huggingface==0.1.2
+faiss-cpu==1.9.0
+langchain-chroma==0.1.4
+openai==1.53.0
+langchain-groq==0.2.1
+pdfplumber==0.11.4
+prettytable==3.12.0
+python-jobspy==1.1.75
+scikit-learn==1.5.2

resume_advance_analysis.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import streamlit as st
+from typing import Any,Dict
+import json
+from groq import Groq
+import re
+import os
+import logging
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
+groq_api_key = os.getenv("GROQ_API_KEY")
+class ResumeImprovementEngine:
+    def __init__(self):
+        # self.llm = ChatGroq(
+        #     groq_api_key = groq_api_key,
+        #     model_name="llama-3.1-70b-versatile",
+        #     temperature=0.7,
+        #     max_tokens=4096
+        # )
+        self.client = Groq(api_key=groq_api_key)
+        logger.info("ResumeImprovementEngine initialized with Groq API key.")
+    def generate_resume_improvement_suggestions(self, resume_text: str) -> dict[str, Any]:
+            """
+            Generate comprehensive resume improvement suggestions
+            Args:
+                resume_text (str): Full text of the resume
+            Returns:
+                Dict containing detailed improvement suggestions
+            """
+            prompt = f"""Perform a comprehensive analysis of the following resume and provide detailed improvement suggestions:
+            Resume Content:
+            {resume_text}
+            Tasks:
+            1. Provide a structured analysis of resume strengths and weaknesses
+            2. Offer specific, actionable improvement recommendations
+            3. Suggest additional sections or content enhancements
+            4. Provide writing and formatting advice
+            5. Respond in detailed, structured JSON format
+            Required JSON Structure:
+            {{
+                "overall_assessment": {{
+                    "strengths": ["Key strengths of the resume"],
+                    "weaknesses": ["Areas needing improvement"]
+                }},
+                "section_recommendations": {{
+                    "work_experience": {{
+                        "current_status": "Assessment of current work experience section",
+                        "improvement_suggestions": ["Specific improvements"]
+                    }},
+                    "education": {{
+                        "current_status": "Assessment of education section",
+                        "improvement_suggestions": ["Specific improvements"]
+                    }}
+                }},
+                "writing_improvements": {{
+                    "language_suggestions": ["Writing style improvements"],
+                    "formatting_advice": ["Formatting and layout suggestions"]
+                }},
+                "additional_sections_recommended": ["List of suggested new sections"],
+                "keyword_optimization": {{
+                    "missing_industry_keywords": ["Keywords to add"],
+                    "ats_compatibility_score": "Numeric score or rating"
+                }},
+                "career_positioning": {{
+                    "personal_branding_suggestions": ["Ways to enhance personal brand"],
+                    "skill_highlighting_recommendations": ["How to better showcase skills"]
+                }}
+            }}
+            """
+            try:
+                logger.info("Sending request to Groq for resume improvement.")
+                # Make API call to generate improvement suggestions
+                chat_completion = self.client.chat.completions.create(
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": "You are an expert resume consultant providing detailed, constructive feedback."
+                        },
+                        {
+                            "role": "user",
+                            "content": prompt
+                        }
+                    ],
+                    model="llama3-groq-70b-8192-tool-use-preview",
+                    temperature=0.7,
+                    max_tokens=2048,
+                    top_p=1,
+                    stream=False
+                )
+                logger.info("Groq API response received.")
+                # Extract and parse the JSON response
+                response_text = chat_completion.choices[0].message.content
+                suggestions = self._extract_json(response_text)
+                logger.debug(f"Improvement suggestions received: {suggestions}")
+                return suggestions
+            except Exception as e:
+                st.error(f"Resume Improvement Error: {e}")
+                logger.error(f"Resume Improvement Error: {e}")
+                return {}
+    def _extract_json(self, text: str) -> dict[str, Any]:
+        """
+        Safely extract JSON from LLM response
+        Args:
+            text (str): LLM response text
+        Returns:
+            Dict of extracted JSON or empty dict
+        """
+        try:
+            logger.debug("Extracting JSON from response text.")
+            json_match = re.search(r'\{.*\}', text, re.DOTALL | re.MULTILINE)
+            if json_match:
+                return json.loads(json_match.group(0))
+            logger.warning("No valid JSON found in response text.")
+            return {}
+        except Exception as e:
+            st.error(f"JSON Extraction Error: {e}")
+            logger.error(f"JSON Extraction Error: {e}")
+            return {}
+# def _extract_json(self, text: str) -> Dict[str, Any]:
+    #     """
+    #     Safely extract JSON from LLM response with robust error handling
+    #     Args:
+    #         text (str): LLM response text
+    #     Returns:
+    #         Dict of extracted JSON or empty dict
+    #     """
+    #     try:
+    #         logger.debug("Attempting to extract JSON from response text.")
+    #         # Clean the text and remove any non-JSON characters
+    #         # Remove text before first '{' and after last '}'
+    #         cleaned_text = text.strip()
+    #         first_brace = cleaned_text.find('{')
+    #         last_brace = cleaned_text.rfind('}')
+    #         if first_brace != -1 and last_brace != -1:
+    #             cleaned_text = cleaned_text[first_brace:last_brace+1]
+    #         # Extraction strategies
+    #         extraction_strategies = [
+    #             # Direct parsing of cleaned text
+    #             lambda t: json.loads(t),
+    #             # Remove non-printable characters and try parsing
+    #             lambda t: json.loads(re.sub(r'[^\x20-\x7E\n]', '', t)),
+    #             # Extract JSON within code block
+    #             lambda t: json.loads(re.search(r'```json\n(.*?)```', t, re.DOTALL).group(1) if re.search(r'```json\n(.*?)```', t, re.DOTALL) else '')
+    #         ]
+    #         # Try each extraction strategy
+    #         for strategy in extraction_strategies:
+    #             try:
+    #                 parsed_json = strategy(cleaned_text)
+    #                 # Additional validation to ensure it's a dictionary
+    #                 if isinstance(parsed_json, dict):
+    #                     logger.info("Successfully extracted and parsed JSON.")
+    #                     return parsed_json
+    #             except (json.JSONDecodeError, AttributeError, IndexError):
+    #                 continue
+    #         # Detailed logging for troubleshooting
+    #         logger.warning(f"Could not extract valid JSON. Raw text: {text}")
+    #         return {}
+    #     except Exception as e:
+    #         # Log the full error details
+    #         logger.error(f"JSON Extraction Error: {e}", exc_info=True)
+    #         st.error(f"JSON Extraction Error: {e}")
+    #         return {}