charulp2499 commited on
Commit
8308e31
·
verified ·
1 Parent(s): 69c661e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -27
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
  import logging
3
  from linkedin_jobs_scraper import LinkedinScraper
4
- from linkedin_jobs_scraper.events import Events, EventData
5
  from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
6
- from linkedin_jobs_scraper.filters import TimeFilters
7
  import pandas as pd
8
 
9
  # Configure logging
@@ -21,80 +21,93 @@ def on_data(data: EventData):
21
  'Location': data.location,
22
  'Job Link': data.link,
23
  'Description Length': len(data.description),
 
24
  })
25
 
26
  def on_end():
27
  print("[ON_END] Scraping completed.")
28
 
29
- # LinkedIn Scraper function with error handling
30
- def scrape_jobs(query, locations):
31
  global job_data
32
  try:
33
  job_data = []
34
-
35
  scraper = LinkedinScraper(
36
  chrome_executable_path=None,
37
  chrome_binary_location=None,
38
  chrome_options=None,
39
  headless=True,
40
- max_workers=5,
41
  slow_mo=0.8,
42
  page_load_timeout=60,
43
  )
44
-
45
- # Catching the exception for missing chrome and notify the user
46
  scraper.on(Events.DATA, on_data)
47
  scraper.on(Events.END, on_end)
48
 
 
 
 
 
 
 
 
49
  queries = [
50
  Query(
51
  query=query,
52
  options=QueryOptions(
53
  locations=locations.split(','),
54
  apply_link=True,
55
- skip_promoted_jobs=True,
56
  page_offset=0,
57
  limit=100,
58
  filters=QueryFilters(
59
- time=TimeFilters.DAY, # Specify desired time filter
 
60
  ),
61
  ),
62
  ),
63
  ]
64
 
65
  scraper.run(queries)
66
-
67
- # Convert to DataFrame and return
68
  if job_data:
69
- df = pd.DataFrame(job_data)
70
- return df, "Scraping successful"
 
71
  else:
72
- return pd.DataFrame(), "No jobs found"
 
73
  except Exception as e:
74
  # Handle errors gracefully
75
- message = f"Error occurred: {str(e)}"
76
  return None, message
77
 
78
- # Gradio interface
79
- def gradio_interface(query, locations):
80
- df, message = scrape_jobs(query, locations)
81
- return df, message
82
 
83
- # Gradio app layout
84
  iface = gr.Interface(
85
  fn=gradio_interface,
86
  inputs=[
87
  gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
88
- gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States", value="United States"),
 
 
 
 
 
 
89
  ],
90
  outputs=[
91
- gr.Dataframe(label="Job Results", headers=["Date", "Title", "Company", "Location", "Job Link"], interactive=True),
92
  gr.Textbox(label="Message"),
93
  ],
94
- title="LinkedIn Job Scraper",
95
- description="Scrape LinkedIn for jobs based on query and locations.",
96
  )
97
 
98
- # Launch app
99
  if __name__ == "__main__":
100
- iface.launch()
 
1
  import gradio as gr
2
  import logging
3
  from linkedin_jobs_scraper import LinkedinScraper
4
+ from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
5
  from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
6
+ from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
7
  import pandas as pd
8
 
9
  # Configure logging
 
21
  'Location': data.location,
22
  'Job Link': data.link,
23
  'Description Length': len(data.description),
24
+ 'Description': data.description,
25
  })
26
 
27
  def on_end():
28
  print("[ON_END] Scraping completed.")
29
 
30
+ # Scraper function
31
+ def scrape_jobs(query, locations, time_filter):
32
  global job_data
33
  try:
34
  job_data = []
35
+
36
  scraper = LinkedinScraper(
37
  chrome_executable_path=None,
38
  chrome_binary_location=None,
39
  chrome_options=None,
40
  headless=True,
41
+ max_workers=10,
42
  slow_mo=0.8,
43
  page_load_timeout=60,
44
  )
45
+
 
46
  scraper.on(Events.DATA, on_data)
47
  scraper.on(Events.END, on_end)
48
 
49
+ if time_filter == "From Past Month":
50
+ time_filter = TimeFilters.MONTH
51
+ elif time_filter == "From Last 24 Hours":
52
+ time_filter = TimeFilters.DAY
53
+ else:
54
+ time_filter = TimeFilters.MONTH
55
+
56
  queries = [
57
  Query(
58
  query=query,
59
  options=QueryOptions(
60
  locations=locations.split(','),
61
  apply_link=True,
62
+ skip_promoted_jobs=False,
63
  page_offset=0,
64
  limit=100,
65
  filters=QueryFilters(
66
+ # relevance=RelevanceFilters.RECENT,
67
+ time=time_filter,
68
  ),
69
  ),
70
  ),
71
  ]
72
 
73
  scraper.run(queries)
74
+
 
75
  if job_data:
76
+ df = pd.DataFrame(job_data)
77
+ message = f"Jobs ({len(job_data)}) data successfully scraped."
78
+ return df, message
79
  else:
80
+ return pd.DataFrame(), '-'
81
+
82
  except Exception as e:
83
  # Handle errors gracefully
84
+ message = f"An error occurred during scraping: {e}"
85
  return None, message
86
 
87
+ def gradio_interface(query, locations, time_filter):
88
+ df, message = scrape_jobs(query, locations, time_filter)
89
+ return df, message
 
90
 
91
+ #App Layout
92
  iface = gr.Interface(
93
  fn=gradio_interface,
94
  inputs=[
95
  gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
96
+ gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
97
+ gr.Dropdown(
98
+ label="Time Filter",
99
+ choices=["From Past Month", "From Last 24 Hours"],
100
+ value="From Past Month", # Default option
101
+ type="value",
102
+ ),
103
  ],
104
  outputs=[
105
+ gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
106
  gr.Textbox(label="Message"),
107
  ],
108
+ title="Job Scraper",
109
+ description="Enter a job query and locations to scrape job postings and display the results in a table.",
110
  )
111
 
 
112
  if __name__ == "__main__":
113
+ iface.launch()