gopiashokan commited on
Commit
e7d876e
·
verified ·
1 Parent(s): f15cd8a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -25
app.py CHANGED
@@ -12,6 +12,7 @@ from langchain.chat_models import ChatOpenAI
12
  from langchain.chains.question_answering import load_qa_chain
13
  from selenium import webdriver
14
  from selenium.webdriver.common.by import By
 
15
  import warnings
16
  warnings.filterwarnings('ignore')
17
 
@@ -334,7 +335,7 @@ class linkedin_scraper:
334
  col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium')
335
  with col1:
336
  job_title_input = st.text_input(label='Job Title')
337
- job_title_input = job_title_input.split()
338
  with col2:
339
  job_location = st.text_input(label='Job Location', value='India')
340
  with col3:
@@ -362,17 +363,39 @@ class linkedin_scraper:
362
  return link
363
 
364
 
365
- def link_open_scrolldown(driver, link, job_count):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
 
 
 
367
  # Open the Link in LinkedIn
368
- driver.get(link)
369
- driver.implicitly_wait(10)
370
 
371
  # Scroll Down the Page
372
  for i in range(0,job_count):
 
 
 
 
373
  driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
374
- driver.implicitly_wait(5)
375
-
376
  # Click on See More Jobs Button if Present
377
  try:
378
  x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
@@ -383,23 +406,23 @@ class linkedin_scraper:
383
 
384
  def job_title_filter(scrap_job_title, user_job_title_input):
385
 
386
- # User Job Title Convert Lower Case and Split into List
387
- user_input = []
388
- for i in [i.lower() for i in user_job_title_input]:
389
- user_input.extend(i.split())
390
 
391
- # scraped Job Title Convert Lower Case and Split into List
392
- scrap_title = [i.lower() for i in scrap_job_title.split()]
393
 
394
- # Identify Same Words in Both Lists
395
- matched_words = list(set(user_input).intersection(set(scrap_title)))
 
 
 
396
 
397
- # Return Job Title if there are more than 1 matched word else return NaN
398
- if len(user_input) > 1:
399
- return scrap_job_title if len(matched_words) > 1 else np.nan
400
-
401
  else:
402
- return scrap_job_title if len(matched_words) == 1 else np.nan
403
 
404
 
405
  def scrap_company_data(driver, job_title_input, job_location):
@@ -428,7 +451,7 @@ class linkedin_scraper:
428
 
429
  # Return Location if User Job Location in Scraped Location else return NaN
430
  df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
431
-
432
  # Drop Null Values and Reset Index
433
  df = df.dropna()
434
  df.reset_index(drop=True, inplace=True)
@@ -445,10 +468,8 @@ class linkedin_scraper:
445
  job_description, description_count = [], 0
446
  for i in range(0, len(website_url)):
447
  try:
448
- # Open the URL
449
- driver.get(website_url[i])
450
- driver.implicitly_wait(5)
451
- time.sleep(1)
452
 
453
  # Click on Show More Button
454
  driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
@@ -563,7 +584,7 @@ add_vertical_space(5)
563
 
564
  with st.sidebar:
565
 
566
- add_vertical_space(3)
567
 
568
  option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs'],
569
  icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin'])
 
12
  from langchain.chains.question_answering import load_qa_chain
13
  from selenium import webdriver
14
  from selenium.webdriver.common.by import By
15
+ from selenium.webdriver.common.keys import Keys
16
  import warnings
17
  warnings.filterwarnings('ignore')
18
 
 
335
  col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium')
336
  with col1:
337
  job_title_input = st.text_input(label='Job Title')
338
+ job_title_input = job_title_input.split(',')
339
  with col2:
340
  job_location = st.text_input(label='Job Location', value='India')
341
  with col3:
 
363
  return link
364
 
365
 
366
+ def open_link(driver, link):
367
+
368
+ while True:
369
+
370
+ # Open the Link
371
+ driver.get(link)
372
+ driver.implicitly_wait(10)
373
+ time.sleep(3)
374
+
375
+ # Check the Page Loaded Correctly and Break the Loop
376
+ if driver.find_element(by=By.CSS_SELECTOR, value='span[class="switcher-tabs__placeholder-text m-auto"]'):
377
+ break
378
+
379
+ # Page not Loaded Properly, Again Open the Page
380
+ else:
381
+ driver.get(link)
382
+ driver.implicitly_wait(10)
383
+ time.sleep(3)
384
 
385
+
386
+ def link_open_scrolldown(driver, link, job_count):
387
+
388
  # Open the Link in LinkedIn
389
+ linkedin_scraper.open_link(driver, link)
 
390
 
391
  # Scroll Down the Page
392
  for i in range(0,job_count):
393
+ # Simulate clicking the Page Up button
394
+ body = driver.find_element(by=By.TAG_NAME, value='body')
395
+ body.send_keys(Keys.PAGE_UP)
396
+ # Scoll down the Page to End
397
  driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
398
+ driver.implicitly_wait(2)
 
399
  # Click on See More Jobs Button if Present
400
  try:
401
  x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
 
406
 
407
  def job_title_filter(scrap_job_title, user_job_title_input):
408
 
409
+ # User Job Title Convert into Lower Case
410
+ user_input = [i.lower().strip() for i in user_job_title_input]
 
 
411
 
412
+ # scraped Job Title Convert into Lower Case
413
+ scrap_title = [i.lower().strip() for i in [scrap_job_title]]
414
 
415
+ # Verify Any User Job Title in the scraped Job Title
416
+ confirmation_count = 0
417
+ for i in user_input:
418
+ if all(j in scrap_title[0] for j in i.split()):
419
+ confirmation_count += 1
420
 
421
+ # Return Job Title if confirmation_count greater than 0 else return NaN
422
+ if confirmation_count > 0:
423
+ return scrap_job_title
 
424
  else:
425
+ return np.nan
426
 
427
 
428
  def scrap_company_data(driver, job_title_input, job_location):
 
451
 
452
  # Return Location if User Job Location in Scraped Location else return NaN
453
  df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
454
+
455
  # Drop Null Values and Reset Index
456
  df = df.dropna()
457
  df.reset_index(drop=True, inplace=True)
 
468
  job_description, description_count = [], 0
469
  for i in range(0, len(website_url)):
470
  try:
471
+ # Open the Link in LinkedIn
472
+ linkedin_scraper.open_link(driver, website_url[i])
 
 
473
 
474
  # Click on Show More Button
475
  driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
 
584
 
585
  with st.sidebar:
586
 
587
+ add_vertical_space(4)
588
 
589
  option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs'],
590
  icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin'])