Spaces:

Anupam251272
/

catcey-recruitai

Build error

App Files Files Community

Anupam251272 commited on Dec 19, 2024

Commit

a0f700b

verified ·

1 Parent(s): 1827717

Create app.py

Browse files

Files changed (1) hide show

app.py +255 -0

app.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import torch
+import transformers
+import gradio as gr
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics.pairwise import cosine_similarity
+from linkedin_api import Linkedin
+import os
+from dotenv import load_dotenv
+import re
+from typing import List, Dict
+from linkedin_v2 import linkedin
+from transformers import AutoTokenizer, AutoModel
+class LinkedInResumeScreeningApp:
+    def __init__(self):
+        # Load LinkedIn credentials from environment variables
+        load_dotenv()
+        self.linkedin_client_id = os.getenv('77sd5p8vsgyf2w')
+        self.linkedin_client_secret = os.getenv('WPL_AP1.q9WCdX7Yf1z6YmEy.guv26Q==')
+        self.linkedin_redirect_uri = os.getenv('https://www.linkedin.com/in/anupam-joshi-980840290/')
+        # Debug logging
+        print("LinkedIn Credentials Status:")
+        print(f"Client ID exists: {bool(self.linkedin_client_id)}")
+        print(f"Client Secret exists: {bool(self.linkedin_client_secret)}")
+        print(f"Redirect URI exists: {bool(self.linkedin_redirect_uri)}")
+        # We also need an access token
+        self.access_token = os.getenv('AQVF8Ec70U1Qyajgk1Czv8Mk0WD8fs-PtWhPgFbxfGoUTFMy3XT8OmYkzx3riZNeIf-HJGMubPz6RVABfYV1JkGCHxUOfvddCsYHcwHLJ9mzTHCNag8Knkrf3ywzp1GowBxaSssi6lgnC01VfyAaN_qiC1RlJwEya2gEHRDkEHaYbb70tSjSx2Zk1yFPUiXO-uvG0AsDR0mGuBtf0nzkbEYnzAsEuhtMnUo3NGZhc8tzNgCKga6t7NOJ5aha7XmWo3M-P0rruGUeiVsfuM_wF2NnGv1wqICUuUNZ1t4MZbOJzfEhrBgsqu7fKp5MvQD8oSvHzX75t3R8m_dg-oSfFCo7GKS7xg')
+        print(f"Access Token exists: {bool(self.access_token)}")
+        if not all([self.linkedin_client_id, self.linkedin_client_secret,
+                   self.linkedin_redirect_uri, self.access_token]):
+            print("WARNING: Some LinkedIn credentials are missing!")
+        # Demo mode for testing without LinkedIn API
+        self.demo_mode = not all([self.linkedin_client_id, self.linkedin_client_secret, self.linkedin_redirect_uri])
+        # Sample profiles for demo mode
+        self.demo_profiles = [
+            {
+                'name': 'John Smith',
+                'headline': 'Senior Data Scientist at Tech Corp',
+                'email': '[email protected]',
+                'profile_url': 'https://linkedin.com/in/john-smith',
+                'skills': ['Python', 'Machine Learning', 'TensorFlow', 'Deep Learning', 'NLP'],
+                'experience': 'Senior Data Scientist at Tech Corp: Leading ML projects\nData Scientist at AI Solutions: Developed predictive models'
+            },
+            {
+                'name': 'Sarah Johnson',
+                'headline': 'Full Stack Developer | React | Node.js',
+                'email': '[email protected]',
+                'profile_url': 'https://linkedin.com/in/sarah-johnson',
+                'skills': ['React', 'Node.js', 'TypeScript', 'AWS', 'Docker'],
+                'experience': 'Full Stack Developer at Web Solutions: Building scalable applications\nFrontend Developer at StartupX: Developed React applications'
+            },
+            {
+                'name': 'Michael Chen',
+                'headline': 'Machine Learning Engineer | AI Researcher',
+                'email': '[email protected]',
+                'profile_url': 'https://linkedin.com/in/michael-chen',
+                'skills': ['PyTorch', 'Computer Vision', 'Deep Learning', 'Python', 'MLOps'],
+                'experience': 'ML Engineer at AI Labs: Developing CV models\nResearch Scientist at Tech University: Published papers on deep learning'
+            }
+        ]
+        try:
+            # Initialize LinkedIn API client
+            self.authentication = linkedin.LinkedInAuthentication(
+                self.linkedin_client_id,
+                self.linkedin_client_secret,
+                self.linkedin_redirect_uri,
+                ['r_liteprofile', 'r_emailaddress', 'w_member_social']
+            )
+            if self.access_token:
+                self.authentication.token = self.access_token
+                print("Successfully set LinkedIn access token")
+            else:
+                print("No access token found - authentication will fail")
+            # Test the connection
+            self.linkedin = linkedin.LinkedInApplication(self.authentication)
+            print("LinkedIn API connection initialized successfully")
+        except Exception as e:
+            print(f"Error initializing LinkedIn API: {str(e)}")
+            print("Falling back to demo mode")
+        # Load access token if available
+        self.access_token = os.getenv('LINKEDIN_ACCESS_TOKEN')
+        if self.access_token:
+            self.authentication.token = self.access_token
+        self.linkedin = linkedin.LinkedInApplication(self.authentication)
+        # Load pre-trained model for embedding generation
+        self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+        self.model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+        # Initialize cache for LinkedIn profiles
+        self.profile_cache = {}
+    def search_linkedin_profiles(self, keywords: str, limit: int = 20) -> List[Dict]:
+        """
+        Search LinkedIn for profiles matching the given keywords.
+        Falls back to demo profiles if LinkedIn API is not configured.
+        """
+        if self.demo_mode:
+            print("Running in demo mode with sample profiles")
+            return self.demo_profiles
+        try:
+            # Search for people on LinkedIn using the v2 API
+            search_params = {
+                'keywords': keywords,
+                'count': limit,
+                'facet': 'network,|S,F'
+            }
+            search_results = self.linkedin.search_profile(
+                selectors=[
+                    'id', 'first-name', 'last-name', 'headline',
+                    'public-profile-url', 'email-address'
+                ],
+                params=search_params
+            )
+            profiles = []
+            for profile in search_results.get('people', {}).get('values', []):
+                # Format profile data
+                formatted_profile = {
+                    'name': f"{profile.get('firstName', '')} {profile.get('lastName', '')}",
+                    'headline': profile.get('headline', ''),
+                    'email': profile.get('emailAddress', ''),
+                    'profile_url': profile.get('publicProfileUrl', ''),
+                    'skills': self._get_profile_skills(profile.get('id')),
+                    'experience': self._get_profile_experience(profile.get('id'))
+                }
+                profiles.append(formatted_profile)
+            return profiles
+        except Exception as e:
+            print(f"Error searching LinkedIn profiles: {str(e)}")
+            return []
+    def _get_profile_skills(self, profile_id: str) -> List[str]:
+        """
+        Get skills for a specific profile
+        """
+        try:
+            skills = self.linkedin.get_profile(
+                profile_id,
+                selectors=['skills']
+            )
+            return [skill['name'] for skill in skills.get('skills', {}).get('values', [])]
+        except:
+            return []
+    def _get_profile_experience(self, profile_id: str) -> str:
+        """
+        Get formatted experience for a specific profile
+        """
+        try:
+            experience = self.linkedin.get_profile(
+                profile_id,
+                selectors=['positions']
+            )
+            formatted = []
+            for position in experience.get('positions', {}).get('values', []):
+                company = position.get('company', {}).get('name', '')
+                title = position.get('title', '')
+                description = position.get('summary', '')
+                formatted.append(f"{title} at {company}: {description}")
+            return '\n'.join(formatted)
+        except:
+            return ''
+    def screen_candidates(self, job_description: str, keywords: str, limit: int = 10):
+        """
+        Screen candidates from LinkedIn based on job description
+        """
+        # Search LinkedIn for matching profiles
+        profiles = self.search_linkedin_profiles(keywords, limit)
+        if not profiles:
+            return [{"Error": "No profiles found or LinkedIn API access error"}]
+        # Generate embeddings for job description and profiles
+        job_embed = self.generate_embeddings([job_description])
+        profile_texts = [
+            f"{p['headline']}\n{p['experience']}\n{' '.join(p['skills'])}"
+            for p in profiles
+        ]
+        profile_embeds = self.generate_embeddings(profile_texts)
+        # Calculate similarities
+        similarities = cosine_similarity(job_embed, profile_embeds)[0]
+        # Sort profiles by similarity
+        sorted_indices = similarities.argsort()[::-1]
+        # Format results
+        results = []
+        for idx in sorted_indices:
+            profile = profiles[idx]
+            results.append({
+                'Name': profile['name'],
+                'Email': profile['email'],
+                'Headline': profile['headline'],
+                'Profile URL': profile['profile_url'],
+                'Similarity Score': f"{similarities[idx]*100:.2f}%"
+            })
+        return results
+    def generate_embeddings(self, texts):
+        # Generate embeddings for given texts
+        with torch.no_grad():
+            inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
+            outputs = self.model(**inputs)
+            embeddings = self._mean_pooling(outputs, inputs['attention_mask'])
+        return embeddings
+    def _mean_pooling(self, model_output, attention_mask):
+        token_embeddings = model_output[0]
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+    def gradio_interface(self):
+        # Create Gradio interface
+        iface = gr.Interface(
+            fn=self.screen_candidates,
+            inputs=[
+                gr.Textbox(label="Job Description"),
+                gr.Textbox(label="LinkedIn Search Keywords"),
+                gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Number of candidates to screen")
+            ],
+            outputs=gr.JSON(label="Matching Candidates"),
+            title="AI Resume Screening with LinkedIn Integration",
+            description="Search LinkedIn profiles and match candidates to job descriptions using AI embeddings."
+        )
+        return iface
+# Initialize and launch the app
+if __name__ == "__main__":
+    app = LinkedInResumeScreeningApp()
+    demo = app.gradio_interface()
+    demo.launch(debug=True)