Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ from langchain.chat_models import ChatOpenAI
|
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
13 |
from selenium import webdriver
|
14 |
from selenium.webdriver.common.by import By
|
|
|
15 |
import warnings
|
16 |
warnings.filterwarnings('ignore')
|
17 |
|
@@ -334,7 +335,7 @@ class linkedin_scraper:
|
|
334 |
col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium')
|
335 |
with col1:
|
336 |
job_title_input = st.text_input(label='Job Title')
|
337 |
-
job_title_input = job_title_input.split()
|
338 |
with col2:
|
339 |
job_location = st.text_input(label='Job Location', value='India')
|
340 |
with col3:
|
@@ -362,17 +363,39 @@ class linkedin_scraper:
|
|
362 |
return link
|
363 |
|
364 |
|
365 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
|
|
|
|
|
|
|
367 |
# Open the Link in LinkedIn
|
368 |
-
|
369 |
-
driver.implicitly_wait(10)
|
370 |
|
371 |
# Scroll Down the Page
|
372 |
for i in range(0,job_count):
|
|
|
|
|
|
|
|
|
373 |
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
374 |
-
driver.implicitly_wait(
|
375 |
-
|
376 |
# Click on See More Jobs Button if Present
|
377 |
try:
|
378 |
x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
|
@@ -383,23 +406,23 @@ class linkedin_scraper:
|
|
383 |
|
384 |
def job_title_filter(scrap_job_title, user_job_title_input):
|
385 |
|
386 |
-
# User Job Title Convert Lower Case
|
387 |
-
user_input = []
|
388 |
-
for i in [i.lower() for i in user_job_title_input]:
|
389 |
-
user_input.extend(i.split())
|
390 |
|
391 |
-
# scraped Job Title Convert Lower Case
|
392 |
-
scrap_title = [i.lower() for i in scrap_job_title
|
393 |
|
394 |
-
#
|
395 |
-
|
|
|
|
|
|
|
396 |
|
397 |
-
# Return Job Title if
|
398 |
-
if
|
399 |
-
return scrap_job_title
|
400 |
-
|
401 |
else:
|
402 |
-
return
|
403 |
|
404 |
|
405 |
def scrap_company_data(driver, job_title_input, job_location):
|
@@ -428,7 +451,7 @@ class linkedin_scraper:
|
|
428 |
|
429 |
# Return Location if User Job Location in Scraped Location else return NaN
|
430 |
df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
|
431 |
-
|
432 |
# Drop Null Values and Reset Index
|
433 |
df = df.dropna()
|
434 |
df.reset_index(drop=True, inplace=True)
|
@@ -445,10 +468,8 @@ class linkedin_scraper:
|
|
445 |
job_description, description_count = [], 0
|
446 |
for i in range(0, len(website_url)):
|
447 |
try:
|
448 |
-
# Open the
|
449 |
-
|
450 |
-
driver.implicitly_wait(5)
|
451 |
-
time.sleep(1)
|
452 |
|
453 |
# Click on Show More Button
|
454 |
driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
|
@@ -563,7 +584,7 @@ add_vertical_space(5)
|
|
563 |
|
564 |
with st.sidebar:
|
565 |
|
566 |
-
add_vertical_space(
|
567 |
|
568 |
option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs'],
|
569 |
icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin'])
|
|
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
13 |
from selenium import webdriver
|
14 |
from selenium.webdriver.common.by import By
|
15 |
+
from selenium.webdriver.common.keys import Keys
|
16 |
import warnings
|
17 |
warnings.filterwarnings('ignore')
|
18 |
|
|
|
335 |
col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium')
|
336 |
with col1:
|
337 |
job_title_input = st.text_input(label='Job Title')
|
338 |
+
job_title_input = job_title_input.split(',')
|
339 |
with col2:
|
340 |
job_location = st.text_input(label='Job Location', value='India')
|
341 |
with col3:
|
|
|
363 |
return link
|
364 |
|
365 |
|
366 |
+
def open_link(driver, link):
|
367 |
+
|
368 |
+
while True:
|
369 |
+
|
370 |
+
# Open the Link
|
371 |
+
driver.get(link)
|
372 |
+
driver.implicitly_wait(10)
|
373 |
+
time.sleep(3)
|
374 |
+
|
375 |
+
# Check the Page Loaded Correctly and Break the Loop
|
376 |
+
if driver.find_element(by=By.CSS_SELECTOR, value='span[class="switcher-tabs__placeholder-text m-auto"]'):
|
377 |
+
break
|
378 |
+
|
379 |
+
# Page not Loaded Properly, Again Open the Page
|
380 |
+
else:
|
381 |
+
driver.get(link)
|
382 |
+
driver.implicitly_wait(10)
|
383 |
+
time.sleep(3)
|
384 |
|
385 |
+
|
386 |
+
def link_open_scrolldown(driver, link, job_count):
|
387 |
+
|
388 |
# Open the Link in LinkedIn
|
389 |
+
linkedin_scraper.open_link(driver, link)
|
|
|
390 |
|
391 |
# Scroll Down the Page
|
392 |
for i in range(0,job_count):
|
393 |
+
# Simulate clicking the Page Up button
|
394 |
+
body = driver.find_element(by=By.TAG_NAME, value='body')
|
395 |
+
body.send_keys(Keys.PAGE_UP)
|
396 |
+
# Scoll down the Page to End
|
397 |
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
398 |
+
driver.implicitly_wait(2)
|
|
|
399 |
# Click on See More Jobs Button if Present
|
400 |
try:
|
401 |
x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
|
|
|
406 |
|
407 |
def job_title_filter(scrap_job_title, user_job_title_input):
|
408 |
|
409 |
+
# User Job Title Convert into Lower Case
|
410 |
+
user_input = [i.lower().strip() for i in user_job_title_input]
|
|
|
|
|
411 |
|
412 |
+
# scraped Job Title Convert into Lower Case
|
413 |
+
scrap_title = [i.lower().strip() for i in [scrap_job_title]]
|
414 |
|
415 |
+
# Verify Any User Job Title in the scraped Job Title
|
416 |
+
confirmation_count = 0
|
417 |
+
for i in user_input:
|
418 |
+
if all(j in scrap_title[0] for j in i.split()):
|
419 |
+
confirmation_count += 1
|
420 |
|
421 |
+
# Return Job Title if confirmation_count greater than 0 else return NaN
|
422 |
+
if confirmation_count > 0:
|
423 |
+
return scrap_job_title
|
|
|
424 |
else:
|
425 |
+
return np.nan
|
426 |
|
427 |
|
428 |
def scrap_company_data(driver, job_title_input, job_location):
|
|
|
451 |
|
452 |
# Return Location if User Job Location in Scraped Location else return NaN
|
453 |
df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
|
454 |
+
|
455 |
# Drop Null Values and Reset Index
|
456 |
df = df.dropna()
|
457 |
df.reset_index(drop=True, inplace=True)
|
|
|
468 |
job_description, description_count = [], 0
|
469 |
for i in range(0, len(website_url)):
|
470 |
try:
|
471 |
+
# Open the Link in LinkedIn
|
472 |
+
linkedin_scraper.open_link(driver, website_url[i])
|
|
|
|
|
473 |
|
474 |
# Click on Show More Button
|
475 |
driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
|
|
|
584 |
|
585 |
with st.sidebar:
|
586 |
|
587 |
+
add_vertical_space(4)
|
588 |
|
589 |
option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs'],
|
590 |
icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin'])
|