Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ from langchain.chat_models import ChatOpenAI
|
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
13 |
from selenium import webdriver
|
14 |
from selenium.webdriver.common.by import By
|
|
|
15 |
import warnings
|
16 |
warnings.filterwarnings('ignore')
|
17 |
|
@@ -121,165 +122,238 @@ class resume_analyzer:
|
|
121 |
return response
|
122 |
|
123 |
|
124 |
-
class
|
125 |
|
126 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
b = []
|
129 |
-
for i in
|
130 |
x = i.split()
|
131 |
y = '%20'.join(x)
|
132 |
b.append(y)
|
133 |
-
job_title = '%2C%20'.join(b)
|
134 |
|
|
|
135 |
link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location=India&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0"
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
driver.get(link)
|
138 |
driver.implicitly_wait(10)
|
139 |
|
140 |
-
|
|
|
141 |
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
142 |
-
|
|
|
|
|
143 |
try:
|
144 |
x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
|
145 |
-
|
146 |
except:
|
147 |
pass
|
148 |
|
149 |
|
150 |
-
def
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
153 |
|
154 |
-
|
|
|
155 |
|
156 |
-
|
157 |
-
|
158 |
|
159 |
-
return
|
|
|
160 |
|
161 |
|
162 |
-
def
|
163 |
-
|
164 |
-
location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
|
165 |
|
166 |
-
|
|
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
return company_location
|
172 |
-
|
173 |
|
174 |
-
def job_title(driver):
|
175 |
-
|
176 |
title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
|
177 |
-
|
178 |
-
job_title = []
|
179 |
-
|
180 |
-
for i in title:
|
181 |
-
job_title.append(i.text)
|
182 |
-
|
183 |
-
return job_title
|
184 |
-
|
185 |
-
|
186 |
-
def job_url(driver):
|
187 |
|
188 |
url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
|
189 |
-
|
190 |
-
url_list = [i.get_attribute('href') for i in url]
|
191 |
-
|
192 |
-
job_url = []
|
193 |
-
|
194 |
-
for url in url_list:
|
195 |
-
job_url.append(url)
|
196 |
-
|
197 |
-
return job_url
|
198 |
-
|
199 |
-
|
200 |
-
def job_title_filter(x, user_job_title):
|
201 |
-
|
202 |
-
s = [i.lower() for i in user_job_title]
|
203 |
-
suggestion = []
|
204 |
-
for i in s:
|
205 |
-
suggestion.extend(i.split())
|
206 |
-
|
207 |
-
s = x.split()
|
208 |
-
a = [i.lower() for i in s]
|
209 |
-
|
210 |
-
intersection = list(set(suggestion).intersection(set(a)))
|
211 |
-
return x if len(intersection) > 1 else np.nan
|
212 |
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
-
|
|
|
215 |
|
216 |
-
|
217 |
-
|
|
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
time.sleep(2)
|
222 |
|
223 |
-
|
224 |
-
value='div[class="show-more-less-html__markup relative overflow-hidden"]')
|
225 |
-
driver.implicitly_wait(4)
|
226 |
|
227 |
-
for j in description:
|
228 |
-
return j.text
|
229 |
-
|
230 |
-
|
231 |
-
def data_scrap(driver, user_job_title):
|
232 |
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
df['Location'] = pd.DataFrame(linkedin_scrap.company_location(driver))
|
237 |
-
df['Website URL'] = pd.DataFrame(linkedin_scrap.job_url(driver))
|
238 |
-
|
239 |
-
# job title filter based on user input
|
240 |
-
df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scrap.job_title_filter(x, user_job_title))
|
241 |
-
df = df.dropna()
|
242 |
-
df.reset_index(drop=True, inplace=True)
|
243 |
-
df = df.iloc[:10, :]
|
244 |
-
|
245 |
-
# make a list after filter
|
246 |
website_url = df['Website URL'].tolist()
|
247 |
-
|
248 |
-
#
|
249 |
job_description = []
|
250 |
-
|
251 |
for i in range(0, len(website_url)):
|
252 |
-
|
253 |
-
|
254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
job_description.append(data)
|
256 |
else:
|
257 |
job_description.append('Description Not Available')
|
258 |
|
|
|
|
|
259 |
df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
|
260 |
df = df.dropna()
|
261 |
df.reset_index(drop=True, inplace=True)
|
262 |
return df
|
263 |
|
264 |
|
265 |
-
def
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
options.add_argument('--disable-dev-shm-usage')
|
271 |
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
-
return final_df
|
281 |
|
282 |
|
|
|
283 |
streamlit_config()
|
284 |
add_vertical_space(1)
|
285 |
|
@@ -310,9 +384,8 @@ if option == 'Summary':
|
|
310 |
st.write(result_summary)
|
311 |
|
312 |
except Exception as e:
|
313 |
-
|
314 |
-
|
315 |
-
st.warning(e)
|
316 |
|
317 |
|
318 |
elif option == 'Strength':
|
@@ -337,9 +410,8 @@ elif option == 'Strength':
|
|
337 |
st.write(result_strength)
|
338 |
|
339 |
except Exception as e:
|
340 |
-
|
341 |
-
|
342 |
-
st.warning(e)
|
343 |
|
344 |
|
345 |
elif option == 'Weakness':
|
@@ -364,9 +436,8 @@ elif option == 'Weakness':
|
|
364 |
st.write(result_weakness)
|
365 |
|
366 |
except Exception as e:
|
367 |
-
|
368 |
-
|
369 |
-
st.warning(e)
|
370 |
|
371 |
|
372 |
elif option == 'Job Titles':
|
@@ -390,47 +461,14 @@ elif option == 'Job Titles':
|
|
390 |
st.write(result_suggestion)
|
391 |
|
392 |
except Exception as e:
|
393 |
-
|
394 |
-
|
395 |
-
st.warning(e)
|
396 |
|
397 |
|
398 |
elif option == 'Linkedin Jobs':
|
399 |
|
400 |
-
|
401 |
-
|
402 |
-
user_input_job_title = st.text_input(label='Enter Job Titles (with comma separated):')
|
403 |
-
submit = st.button('Submit')
|
404 |
-
|
405 |
-
if submit and len(user_input_job_title) > 0:
|
406 |
-
|
407 |
-
user_job_title = user_input_job_title.split(',')
|
408 |
-
|
409 |
-
df = linkedin_scrap.main(user_job_title)
|
410 |
-
|
411 |
-
l = len(df['Company Name'])
|
412 |
-
for i in range(0, l):
|
413 |
-
st.write(f"Company Name : {df.iloc[i,0]}")
|
414 |
-
st.write(f"Job Title : {df.iloc[i,1]}")
|
415 |
-
st.write(f"Location : {df.iloc[i,2]}")
|
416 |
-
st.write(f"Website URL : {df.iloc[i,3]}")
|
417 |
-
with st.expander(label='Job Desription'):
|
418 |
-
st.write(df.iloc[i, 4])
|
419 |
-
st.write('')
|
420 |
-
st.write('')
|
421 |
-
|
422 |
-
elif submit and len(user_input_job_title) == 0:
|
423 |
-
col1, col2 = st.columns(2)
|
424 |
-
with col1:
|
425 |
-
st.info('Please Enter the Job Titles')
|
426 |
-
|
427 |
-
except:
|
428 |
-
st.write('')
|
429 |
-
st.info("This feature is currently not working in the deployed Streamlit application due to a 'selenium.common.exceptions.WebDriverException' error.")
|
430 |
-
st.write('')
|
431 |
-
|
432 |
-
st.write(
|
433 |
-
"Please use the local Streamlit application for a smooth experience: [http://localhost:8501](http://localhost:8501)")
|
434 |
|
435 |
|
436 |
elif option == 'Exit':
|
|
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
13 |
from selenium import webdriver
|
14 |
from selenium.webdriver.common.by import By
|
15 |
+
from selenium.common.exceptions import NoSuchElementException
|
16 |
import warnings
|
17 |
warnings.filterwarnings('ignore')
|
18 |
|
|
|
122 |
return response
|
123 |
|
124 |
|
125 |
+
class linkedin_scraper:
|
126 |
|
127 |
+
def webdriver_setup():
|
128 |
+
|
129 |
+
options = webdriver.ChromeOptions()
|
130 |
+
options.add_argument('--headless')
|
131 |
+
options.add_argument('--no-sandbox')
|
132 |
+
options.add_argument('--disable-dev-shm-usage')
|
133 |
+
|
134 |
+
driver = webdriver.Chrome(options=options)
|
135 |
+
driver.maximize_window()
|
136 |
+
return driver
|
137 |
+
|
138 |
+
|
139 |
+
def get_userinput():
|
140 |
+
|
141 |
+
add_vertical_space(2)
|
142 |
+
with st.form(key='linkedin_scarp'):
|
143 |
+
|
144 |
+
add_vertical_space(1)
|
145 |
+
col1,col2 = st.columns([0.7,0.3], gap='medium')
|
146 |
+
with col1:
|
147 |
+
job_title = st.text_input(label='Job Title')
|
148 |
+
job_title = job_title.split()
|
149 |
+
with col2:
|
150 |
+
job_count = st.number_input(label='Job Count', min_value=1, value=1, step=1)
|
151 |
+
|
152 |
+
# Submit Button
|
153 |
+
add_vertical_space(1)
|
154 |
+
submit = st.form_submit_button(label='Submit')
|
155 |
+
add_vertical_space(1)
|
156 |
+
|
157 |
+
return job_title, job_count, submit
|
158 |
+
|
159 |
+
|
160 |
+
def build_url(job_title):
|
161 |
|
162 |
b = []
|
163 |
+
for i in job_title:
|
164 |
x = i.split()
|
165 |
y = '%20'.join(x)
|
166 |
b.append(y)
|
|
|
167 |
|
168 |
+
job_title = '%2C%20'.join(b)
|
169 |
link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location=India&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0"
|
170 |
|
171 |
+
return link
|
172 |
+
|
173 |
+
|
174 |
+
def link_open_scrolldown(driver, link, job_count):
|
175 |
+
|
176 |
+
# Open the Link in LinkedIn
|
177 |
driver.get(link)
|
178 |
driver.implicitly_wait(10)
|
179 |
|
180 |
+
# Scroll Down the Page
|
181 |
+
for i in range(0,job_count):
|
182 |
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
183 |
+
driver.implicitly_wait(5)
|
184 |
+
|
185 |
+
# Click on See More Jobs Button if Present
|
186 |
try:
|
187 |
x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
|
188 |
+
driver.implicitly_wait(5)
|
189 |
except:
|
190 |
pass
|
191 |
|
192 |
|
193 |
+
def job_title_filter(scrap_job_title, user_job_title_input):
|
194 |
+
|
195 |
+
# User Job Title Convert Lower Case and Split into List
|
196 |
+
user_input = []
|
197 |
+
for i in [i.lower() for i in user_job_title_input]:
|
198 |
+
user_input.extend(i.split())
|
199 |
|
200 |
+
# scraped Job Title Convert Lower Case and Split into List
|
201 |
+
scrap_title = [i.lower() for i in scrap_job_title.split()]
|
202 |
|
203 |
+
# Identify Same Words in Both Lists
|
204 |
+
matched_words = list(set(user_input).intersection(set(scrap_title)))
|
205 |
|
206 |
+
# Return Job Title if there are more than 1 matched word else return NaN
|
207 |
+
return scrap_job_title if len(matched_words) > 1 else np.nan
|
208 |
|
209 |
|
210 |
+
def scrap_company_data(driver, job_title_input, job_count):
|
|
|
|
|
211 |
|
212 |
+
# scraping the Company Data
|
213 |
+
company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]')
|
214 |
+
company_name = [i.text for i in company]
|
215 |
|
216 |
+
location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
|
217 |
+
company_location = [i.text for i in location]
|
|
|
|
|
|
|
218 |
|
|
|
|
|
219 |
title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
|
220 |
+
job_title = [i.text for i in title]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
|
223 |
+
website_url = [i.get_attribute('href') for i in url]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
+
# combine the all data to single dataframe
|
226 |
+
df = pd.DataFrame(company_name, columns=['Company Name'])
|
227 |
+
df['Job Title'] = pd.DataFrame(job_title)
|
228 |
+
df['Location'] = pd.DataFrame(company_location)
|
229 |
+
df['Website URL'] = pd.DataFrame(website_url)
|
230 |
|
231 |
+
# Return Job Title if there are more than 1 matched word else return NaN
|
232 |
+
df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scraper.job_title_filter(x, job_title_input))
|
233 |
|
234 |
+
# Drop Null Values and Reset Index
|
235 |
+
df = df.dropna()
|
236 |
+
df.reset_index(drop=True, inplace=True)
|
237 |
|
238 |
+
# Filter Job Title Based on User Input
|
239 |
+
df = df.iloc[:job_count, :]
|
|
|
240 |
|
241 |
+
return df
|
|
|
|
|
242 |
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
+
def scrap_job_description(driver, df):
|
245 |
+
|
246 |
+
# Get URL into List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
website_url = df['Website URL'].tolist()
|
248 |
+
|
249 |
+
# Scrap the Job Description
|
250 |
job_description = []
|
|
|
251 |
for i in range(0, len(website_url)):
|
252 |
+
# Open the URL
|
253 |
+
driver.get(website_url[i])
|
254 |
+
driver.implicitly_wait(10)
|
255 |
+
time.sleep(1)
|
256 |
+
|
257 |
+
try:
|
258 |
+
# Click on Show More Button
|
259 |
+
driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
|
260 |
+
driver.implicitly_wait(10)
|
261 |
+
time.sleep(1)
|
262 |
+
|
263 |
+
except NoSuchElementException:
|
264 |
+
# Open the URL
|
265 |
+
driver.get(website_url[i])
|
266 |
+
driver.implicitly_wait(10)
|
267 |
+
time.sleep(1)
|
268 |
+
|
269 |
+
# Click on Show More Button
|
270 |
+
driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
|
271 |
+
driver.implicitly_wait(10)
|
272 |
+
time.sleep(1)
|
273 |
+
|
274 |
+
# Get Job Description
|
275 |
+
description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
|
276 |
+
driver.implicitly_wait(10)
|
277 |
+
data = [i.text for i in description][0]
|
278 |
+
|
279 |
+
if len(data.strip()) > 0:
|
280 |
job_description.append(data)
|
281 |
else:
|
282 |
job_description.append('Description Not Available')
|
283 |
|
284 |
+
|
285 |
+
# Add Job Description in Dataframe
|
286 |
df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
|
287 |
df = df.dropna()
|
288 |
df.reset_index(drop=True, inplace=True)
|
289 |
return df
|
290 |
|
291 |
|
292 |
+
def display_data_userinterface(df_final):
|
293 |
|
294 |
+
# Display the Data in User Interface
|
295 |
+
add_vertical_space(1)
|
296 |
+
for i in range(0, len(df_final)):
|
|
|
297 |
|
298 |
+
st.write(f"Company Name : {df_final.iloc[i,0]}")
|
299 |
+
st.write(f"Job Title : {df_final.iloc[i,1]}")
|
300 |
+
st.write(f"Location : {df_final.iloc[i,2]}")
|
301 |
+
st.write(f"Website URL : {df_final.iloc[i,3]}")
|
302 |
+
|
303 |
+
with st.expander(label='Job Desription'):
|
304 |
+
st.write(df_final.iloc[i, 4])
|
305 |
+
add_vertical_space(3)
|
306 |
+
|
307 |
+
|
308 |
+
def main():
|
309 |
+
|
310 |
+
# Initially set driver to None
|
311 |
+
# driver = None
|
312 |
+
|
313 |
+
# try:
|
314 |
+
job_title_input, job_count, submit = linkedin_scraper.get_userinput()
|
315 |
+
add_vertical_space(2)
|
316 |
|
317 |
+
if submit:
|
318 |
+
if job_title_input != '':
|
319 |
+
|
320 |
+
with st.spinner('Webdriver Setup Initializing...'):
|
321 |
+
driver = linkedin_scraper.webdriver_setup()
|
322 |
+
|
323 |
+
with st.spinner('Build URL and Open Link...'):
|
324 |
+
|
325 |
+
# build URL based on User Job Title Input
|
326 |
+
link = linkedin_scraper.build_url(job_title_input)
|
327 |
+
|
328 |
+
# Open the Link in LinkedIn and Scroll Down the Page
|
329 |
+
linkedin_scraper.link_open_scrolldown(driver, link, job_count)
|
330 |
+
|
331 |
+
with st.spinner('scraping Company Data...'):
|
332 |
+
df = linkedin_scraper.scrap_company_data(driver, job_title_input, job_count)
|
333 |
+
|
334 |
+
with st.spinner('Scraping Job Description Data...'):
|
335 |
+
df_final = linkedin_scraper. scrap_job_description(driver, df)
|
336 |
+
|
337 |
+
# Display the Data in User Interface
|
338 |
+
linkedin_scraper.display_data_userinterface(df_final)
|
339 |
|
340 |
+
|
341 |
+
# If User Click Submit Button and Job Title is Empty
|
342 |
+
elif job_title_input == '':
|
343 |
+
st.markdown(f'<h5 style="text-align: center;color: orange;">Job Title is Empty</h5>',
|
344 |
+
unsafe_allow_html=True)
|
345 |
+
|
346 |
+
# except Exception as e:
|
347 |
+
# add_vertical_space(2)
|
348 |
+
# st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
|
349 |
+
|
350 |
+
# finally:
|
351 |
+
# if driver:
|
352 |
+
# driver.quit()
|
353 |
|
|
|
354 |
|
355 |
|
356 |
+
# Streamlit Configuration Setup
|
357 |
streamlit_config()
|
358 |
add_vertical_space(1)
|
359 |
|
|
|
384 |
st.write(result_summary)
|
385 |
|
386 |
except Exception as e:
|
387 |
+
add_vertical_space(2)
|
388 |
+
st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
|
|
|
389 |
|
390 |
|
391 |
elif option == 'Strength':
|
|
|
410 |
st.write(result_strength)
|
411 |
|
412 |
except Exception as e:
|
413 |
+
add_vertical_space(2)
|
414 |
+
st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
|
|
|
415 |
|
416 |
|
417 |
elif option == 'Weakness':
|
|
|
436 |
st.write(result_weakness)
|
437 |
|
438 |
except Exception as e:
|
439 |
+
add_vertical_space(2)
|
440 |
+
st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
|
|
|
441 |
|
442 |
|
443 |
elif option == 'Job Titles':
|
|
|
461 |
st.write(result_suggestion)
|
462 |
|
463 |
except Exception as e:
|
464 |
+
add_vertical_space(2)
|
465 |
+
st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
|
|
|
466 |
|
467 |
|
468 |
elif option == 'Linkedin Jobs':
|
469 |
|
470 |
+
add_vertical_space(2)
|
471 |
+
linkedin_scraper.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
|
473 |
|
474 |
elif option == 'Exit':
|