Spaces:
Build error
Build error
Anupam251272
commited on
Commit
•
a0f700b
1
Parent(s):
1827717
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import transformers
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
from sklearn.model_selection import train_test_split
|
7 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
+
from linkedin_api import Linkedin
|
9 |
+
import os
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
import re
|
12 |
+
from typing import List, Dict
|
13 |
+
from linkedin_v2 import linkedin
|
14 |
+
from transformers import AutoTokenizer, AutoModel
|
15 |
+
|
16 |
+
|
17 |
+
class LinkedInResumeScreeningApp:
|
18 |
+
def __init__(self):
|
19 |
+
# Load LinkedIn credentials from environment variables
|
20 |
+
load_dotenv()
|
21 |
+
self.linkedin_client_id = os.getenv('77sd5p8vsgyf2w')
|
22 |
+
self.linkedin_client_secret = os.getenv('WPL_AP1.q9WCdX7Yf1z6YmEy.guv26Q==')
|
23 |
+
self.linkedin_redirect_uri = os.getenv('https://www.linkedin.com/in/anupam-joshi-980840290/')
|
24 |
+
|
25 |
+
# Debug logging
|
26 |
+
print("LinkedIn Credentials Status:")
|
27 |
+
print(f"Client ID exists: {bool(self.linkedin_client_id)}")
|
28 |
+
print(f"Client Secret exists: {bool(self.linkedin_client_secret)}")
|
29 |
+
print(f"Redirect URI exists: {bool(self.linkedin_redirect_uri)}")
|
30 |
+
|
31 |
+
# We also need an access token
|
32 |
+
self.access_token = os.getenv('AQVF8Ec70U1Qyajgk1Czv8Mk0WD8fs-PtWhPgFbxfGoUTFMy3XT8OmYkzx3riZNeIf-HJGMubPz6RVABfYV1JkGCHxUOfvddCsYHcwHLJ9mzTHCNag8Knkrf3ywzp1GowBxaSssi6lgnC01VfyAaN_qiC1RlJwEya2gEHRDkEHaYbb70tSjSx2Zk1yFPUiXO-uvG0AsDR0mGuBtf0nzkbEYnzAsEuhtMnUo3NGZhc8tzNgCKga6t7NOJ5aha7XmWo3M-P0rruGUeiVsfuM_wF2NnGv1wqICUuUNZ1t4MZbOJzfEhrBgsqu7fKp5MvQD8oSvHzX75t3R8m_dg-oSfFCo7GKS7xg')
|
33 |
+
print(f"Access Token exists: {bool(self.access_token)}")
|
34 |
+
|
35 |
+
if not all([self.linkedin_client_id, self.linkedin_client_secret,
|
36 |
+
self.linkedin_redirect_uri, self.access_token]):
|
37 |
+
print("WARNING: Some LinkedIn credentials are missing!")
|
38 |
+
|
39 |
+
# Demo mode for testing without LinkedIn API
|
40 |
+
self.demo_mode = not all([self.linkedin_client_id, self.linkedin_client_secret, self.linkedin_redirect_uri])
|
41 |
+
|
42 |
+
# Sample profiles for demo mode
|
43 |
+
self.demo_profiles = [
|
44 |
+
{
|
45 |
+
'name': 'John Smith',
|
46 |
+
'headline': 'Senior Data Scientist at Tech Corp',
|
47 |
+
'email': '[email protected]',
|
48 |
+
'profile_url': 'https://linkedin.com/in/john-smith',
|
49 |
+
'skills': ['Python', 'Machine Learning', 'TensorFlow', 'Deep Learning', 'NLP'],
|
50 |
+
'experience': 'Senior Data Scientist at Tech Corp: Leading ML projects\nData Scientist at AI Solutions: Developed predictive models'
|
51 |
+
},
|
52 |
+
{
|
53 |
+
'name': 'Sarah Johnson',
|
54 |
+
'headline': 'Full Stack Developer | React | Node.js',
|
55 |
+
'email': '[email protected]',
|
56 |
+
'profile_url': 'https://linkedin.com/in/sarah-johnson',
|
57 |
+
'skills': ['React', 'Node.js', 'TypeScript', 'AWS', 'Docker'],
|
58 |
+
'experience': 'Full Stack Developer at Web Solutions: Building scalable applications\nFrontend Developer at StartupX: Developed React applications'
|
59 |
+
},
|
60 |
+
{
|
61 |
+
'name': 'Michael Chen',
|
62 |
+
'headline': 'Machine Learning Engineer | AI Researcher',
|
63 |
+
'email': '[email protected]',
|
64 |
+
'profile_url': 'https://linkedin.com/in/michael-chen',
|
65 |
+
'skills': ['PyTorch', 'Computer Vision', 'Deep Learning', 'Python', 'MLOps'],
|
66 |
+
'experience': 'ML Engineer at AI Labs: Developing CV models\nResearch Scientist at Tech University: Published papers on deep learning'
|
67 |
+
}
|
68 |
+
]
|
69 |
+
|
70 |
+
try:
|
71 |
+
# Initialize LinkedIn API client
|
72 |
+
self.authentication = linkedin.LinkedInAuthentication(
|
73 |
+
self.linkedin_client_id,
|
74 |
+
self.linkedin_client_secret,
|
75 |
+
self.linkedin_redirect_uri,
|
76 |
+
['r_liteprofile', 'r_emailaddress', 'w_member_social']
|
77 |
+
)
|
78 |
+
|
79 |
+
if self.access_token:
|
80 |
+
self.authentication.token = self.access_token
|
81 |
+
print("Successfully set LinkedIn access token")
|
82 |
+
else:
|
83 |
+
print("No access token found - authentication will fail")
|
84 |
+
|
85 |
+
# Test the connection
|
86 |
+
self.linkedin = linkedin.LinkedInApplication(self.authentication)
|
87 |
+
print("LinkedIn API connection initialized successfully")
|
88 |
+
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Error initializing LinkedIn API: {str(e)}")
|
91 |
+
print("Falling back to demo mode")
|
92 |
+
|
93 |
+
# Load access token if available
|
94 |
+
self.access_token = os.getenv('LINKEDIN_ACCESS_TOKEN')
|
95 |
+
if self.access_token:
|
96 |
+
self.authentication.token = self.access_token
|
97 |
+
|
98 |
+
self.linkedin = linkedin.LinkedInApplication(self.authentication)
|
99 |
+
|
100 |
+
# Load pre-trained model for embedding generation
|
101 |
+
self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
102 |
+
self.model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
103 |
+
|
104 |
+
# Initialize cache for LinkedIn profiles
|
105 |
+
self.profile_cache = {}
|
106 |
+
|
107 |
+
def search_linkedin_profiles(self, keywords: str, limit: int = 20) -> List[Dict]:
|
108 |
+
"""
|
109 |
+
Search LinkedIn for profiles matching the given keywords.
|
110 |
+
Falls back to demo profiles if LinkedIn API is not configured.
|
111 |
+
"""
|
112 |
+
if self.demo_mode:
|
113 |
+
print("Running in demo mode with sample profiles")
|
114 |
+
return self.demo_profiles
|
115 |
+
try:
|
116 |
+
# Search for people on LinkedIn using the v2 API
|
117 |
+
search_params = {
|
118 |
+
'keywords': keywords,
|
119 |
+
'count': limit,
|
120 |
+
'facet': 'network,|S,F'
|
121 |
+
}
|
122 |
+
|
123 |
+
search_results = self.linkedin.search_profile(
|
124 |
+
selectors=[
|
125 |
+
'id', 'first-name', 'last-name', 'headline',
|
126 |
+
'public-profile-url', 'email-address'
|
127 |
+
],
|
128 |
+
params=search_params
|
129 |
+
)
|
130 |
+
|
131 |
+
profiles = []
|
132 |
+
for profile in search_results.get('people', {}).get('values', []):
|
133 |
+
# Format profile data
|
134 |
+
formatted_profile = {
|
135 |
+
'name': f"{profile.get('firstName', '')} {profile.get('lastName', '')}",
|
136 |
+
'headline': profile.get('headline', ''),
|
137 |
+
'email': profile.get('emailAddress', ''),
|
138 |
+
'profile_url': profile.get('publicProfileUrl', ''),
|
139 |
+
'skills': self._get_profile_skills(profile.get('id')),
|
140 |
+
'experience': self._get_profile_experience(profile.get('id'))
|
141 |
+
}
|
142 |
+
|
143 |
+
profiles.append(formatted_profile)
|
144 |
+
|
145 |
+
return profiles
|
146 |
+
except Exception as e:
|
147 |
+
print(f"Error searching LinkedIn profiles: {str(e)}")
|
148 |
+
return []
|
149 |
+
|
150 |
+
def _get_profile_skills(self, profile_id: str) -> List[str]:
|
151 |
+
"""
|
152 |
+
Get skills for a specific profile
|
153 |
+
"""
|
154 |
+
try:
|
155 |
+
skills = self.linkedin.get_profile(
|
156 |
+
profile_id,
|
157 |
+
selectors=['skills']
|
158 |
+
)
|
159 |
+
return [skill['name'] for skill in skills.get('skills', {}).get('values', [])]
|
160 |
+
except:
|
161 |
+
return []
|
162 |
+
|
163 |
+
def _get_profile_experience(self, profile_id: str) -> str:
|
164 |
+
"""
|
165 |
+
Get formatted experience for a specific profile
|
166 |
+
"""
|
167 |
+
try:
|
168 |
+
experience = self.linkedin.get_profile(
|
169 |
+
profile_id,
|
170 |
+
selectors=['positions']
|
171 |
+
)
|
172 |
+
|
173 |
+
formatted = []
|
174 |
+
for position in experience.get('positions', {}).get('values', []):
|
175 |
+
company = position.get('company', {}).get('name', '')
|
176 |
+
title = position.get('title', '')
|
177 |
+
description = position.get('summary', '')
|
178 |
+
formatted.append(f"{title} at {company}: {description}")
|
179 |
+
|
180 |
+
return '\n'.join(formatted)
|
181 |
+
except:
|
182 |
+
return ''
|
183 |
+
|
184 |
+
def screen_candidates(self, job_description: str, keywords: str, limit: int = 10):
|
185 |
+
"""
|
186 |
+
Screen candidates from LinkedIn based on job description
|
187 |
+
"""
|
188 |
+
# Search LinkedIn for matching profiles
|
189 |
+
profiles = self.search_linkedin_profiles(keywords, limit)
|
190 |
+
|
191 |
+
if not profiles:
|
192 |
+
return [{"Error": "No profiles found or LinkedIn API access error"}]
|
193 |
+
|
194 |
+
# Generate embeddings for job description and profiles
|
195 |
+
job_embed = self.generate_embeddings([job_description])
|
196 |
+
|
197 |
+
profile_texts = [
|
198 |
+
f"{p['headline']}\n{p['experience']}\n{' '.join(p['skills'])}"
|
199 |
+
for p in profiles
|
200 |
+
]
|
201 |
+
profile_embeds = self.generate_embeddings(profile_texts)
|
202 |
+
|
203 |
+
# Calculate similarities
|
204 |
+
similarities = cosine_similarity(job_embed, profile_embeds)[0]
|
205 |
+
|
206 |
+
# Sort profiles by similarity
|
207 |
+
sorted_indices = similarities.argsort()[::-1]
|
208 |
+
|
209 |
+
# Format results
|
210 |
+
results = []
|
211 |
+
for idx in sorted_indices:
|
212 |
+
profile = profiles[idx]
|
213 |
+
results.append({
|
214 |
+
'Name': profile['name'],
|
215 |
+
'Email': profile['email'],
|
216 |
+
'Headline': profile['headline'],
|
217 |
+
'Profile URL': profile['profile_url'],
|
218 |
+
'Similarity Score': f"{similarities[idx]*100:.2f}%"
|
219 |
+
})
|
220 |
+
|
221 |
+
return results
|
222 |
+
|
223 |
+
def generate_embeddings(self, texts):
|
224 |
+
# Generate embeddings for given texts
|
225 |
+
with torch.no_grad():
|
226 |
+
inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
227 |
+
outputs = self.model(**inputs)
|
228 |
+
embeddings = self._mean_pooling(outputs, inputs['attention_mask'])
|
229 |
+
return embeddings
|
230 |
+
|
231 |
+
def _mean_pooling(self, model_output, attention_mask):
|
232 |
+
token_embeddings = model_output[0]
|
233 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
234 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
235 |
+
|
236 |
+
def gradio_interface(self):
|
237 |
+
# Create Gradio interface
|
238 |
+
iface = gr.Interface(
|
239 |
+
fn=self.screen_candidates,
|
240 |
+
inputs=[
|
241 |
+
gr.Textbox(label="Job Description"),
|
242 |
+
gr.Textbox(label="LinkedIn Search Keywords"),
|
243 |
+
gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Number of candidates to screen")
|
244 |
+
],
|
245 |
+
outputs=gr.JSON(label="Matching Candidates"),
|
246 |
+
title="AI Resume Screening with LinkedIn Integration",
|
247 |
+
description="Search LinkedIn profiles and match candidates to job descriptions using AI embeddings."
|
248 |
+
)
|
249 |
+
return iface
|
250 |
+
|
251 |
+
# Initialize and launch the app
|
252 |
+
if __name__ == "__main__":
|
253 |
+
app = LinkedInResumeScreeningApp()
|
254 |
+
demo = app.gradio_interface()
|
255 |
+
demo.launch(debug=True)
|