charulp2499 commited on
Commit
52c07e0
·
verified ·
1 Parent(s): 8ea77e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -76
app.py CHANGED
@@ -1,9 +1,8 @@
1
- import gradio as gr
2
  import logging
3
  from linkedin_jobs_scraper import LinkedinScraper
4
- from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
5
  from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
6
- from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
7
  import pandas as pd
8
 
9
  # Configure logging
@@ -19,38 +18,30 @@ def on_data(data: EventData):
19
  'Title': data.title,
20
  'Company': data.company,
21
  'Location': data.location,
22
- # 'Company Link': data.company_link,
23
  'Job Link': data.link,
24
- # 'Insights': data.insights,
25
  'Description Length': len(data.description),
26
  })
27
 
28
  def on_end():
29
  print("[ON_END] Scraping completed.")
30
 
31
- # LinkedIn Scraper function
32
- def scrape_jobs(query, locations, time_filter):
33
  global job_data
34
  try:
35
  job_data = []
36
-
37
- if time_filter == "From Past Month":
38
- time_filter = TimeFilters.MONTH
39
- elif time_filter == "From Last 24 Hours":
40
- time_filter = TimeFilters.DAY
41
- else:
42
- time_filter = TimeFilters.MONTH
43
-
44
  scraper = LinkedinScraper(
45
  chrome_executable_path=None,
46
  chrome_binary_location=None,
47
  chrome_options=None,
48
  headless=True,
49
- max_workers=10,
50
  slow_mo=0.8,
51
  page_load_timeout=60,
52
  )
53
-
 
54
  scraper.on(Events.DATA, on_data)
55
  scraper.on(Events.END, on_end)
56
 
@@ -60,13 +51,11 @@ def scrape_jobs(query, locations, time_filter):
60
  options=QueryOptions(
61
  locations=locations.split(','),
62
  apply_link=True,
63
- skip_promoted_jobs=False,
64
  page_offset=0,
65
  limit=100,
66
  filters=QueryFilters(
67
- # relevance=RelevanceFilters.RECENT,
68
- time=time_filter,
69
- # on_site_or_remote=OnSiteOrRemoteFilters.REMOTE,
70
  ),
71
  ),
72
  ),
@@ -75,77 +64,36 @@ def scrape_jobs(query, locations, time_filter):
75
  scraper.run(queries)
76
 
77
  # Convert to DataFrame and return
78
- # Save the job data to a CSV file after scraping ends
79
- # if job_data:
80
- # # Save the job data to a CSV file
81
- # file_name = "jobs_data.csv"
82
- # df = pd.DataFrame(job_data)
83
- # df.to_csv(file_name, index=False)
84
- # message = f"Jobs data saved to {file_name}"
85
- # return file_name, message # Return the CSV file path and success message
86
- # else:
87
- # message = "No job data found for the given query and locations."
88
- # return None, message
89
  if job_data:
90
- df = pd.DataFrame(job_data)
91
- message = f"Jobs ({len(job_data)}) data successfully scraped."
92
- return df, message # Return DataFrame and message
93
  else:
94
- return pd.DataFrame(), '-'
95
-
96
  except Exception as e:
97
  # Handle errors gracefully
98
- message = f"An error occurred during scraping: {e}"
99
  return None, message
100
 
 
 
 
 
101
 
102
- # Define Gradio interface
103
- # def gradio_interface(query, locations):
104
- # csv_data, message = scrape_jobs(query, locations)
105
- # if csv_data:
106
- # return csv_data, message
107
- # else:
108
- # return None, "No results to display."
109
-
110
- def gradio_interface(query, locations, time_filter):
111
- df, message = scrape_jobs(query, locations, time_filter)
112
- return df, message
113
-
114
- # # Gradio app layout
115
- # iface = gr.Interface(
116
- # fn=gradio_interface,
117
- # inputs=[
118
- # gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
119
- # gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, India"),
120
- # ],
121
- # outputs=[
122
- # gr.File(label="Download CSV"),
123
- # gr.Textbox(label="Message"),
124
- # ],
125
- # title="LinkedIn Job Scraper",
126
- # description="Enter the job query and locations to scrape LinkedIn job postings. Outputs a downloadable CSV file.",
127
- # )
128
-
129
  iface = gr.Interface(
130
  fn=gradio_interface,
131
  inputs=[
132
  gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
133
- gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
134
- gr.Dropdown(
135
- label="Time Filter",
136
- choices=["From Past Month", "From Last 24 Hours"], # The options the user can select
137
- value="From Past Month", # Default option
138
- type="value",
139
- ),
140
  ],
141
  outputs=[
142
- gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
143
  gr.Textbox(label="Message"),
144
  ],
145
- title="Job Scraper",
146
- description="Enter a job query and locations to scrape job postings and display the results in a table.",
147
  )
148
 
149
  # Launch app
150
  if __name__ == "__main__":
151
- iface.launch()
 
 
1
  import logging
2
  from linkedin_jobs_scraper import LinkedinScraper
3
+ from linkedin_jobs_scraper.events import Events, EventData
4
  from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
5
+ from linkedin_jobs_scraper.filters import TimeFilters
6
  import pandas as pd
7
 
8
  # Configure logging
 
18
  'Title': data.title,
19
  'Company': data.company,
20
  'Location': data.location,
 
21
  'Job Link': data.link,
 
22
  'Description Length': len(data.description),
23
  })
24
 
25
  def on_end():
26
  print("[ON_END] Scraping completed.")
27
 
28
+ # LinkedIn Scraper function with error handling
29
+ def scrape_jobs(query, locations):
30
  global job_data
31
  try:
32
  job_data = []
33
+
 
 
 
 
 
 
 
34
  scraper = LinkedinScraper(
35
  chrome_executable_path=None,
36
  chrome_binary_location=None,
37
  chrome_options=None,
38
  headless=True,
39
+ max_workers=5,
40
  slow_mo=0.8,
41
  page_load_timeout=60,
42
  )
43
+
44
+ # Catching the exception for missing chrome and notify the user
45
  scraper.on(Events.DATA, on_data)
46
  scraper.on(Events.END, on_end)
47
 
 
51
  options=QueryOptions(
52
  locations=locations.split(','),
53
  apply_link=True,
54
+ skip_promoted_jobs=True,
55
  page_offset=0,
56
  limit=100,
57
  filters=QueryFilters(
58
+ time=TimeFilters.DAY, # Specify desired time filter
 
 
59
  ),
60
  ),
61
  ),
 
64
  scraper.run(queries)
65
 
66
  # Convert to DataFrame and return
 
 
 
 
 
 
 
 
 
 
 
67
  if job_data:
68
+ df = pd.DataFrame(job_data)
69
+ return df, "Scraping successful"
 
70
  else:
71
+ return pd.DataFrame(), "No jobs found"
 
72
  except Exception as e:
73
  # Handle errors gracefully
74
+ message = f"Error occurred: {str(e)}"
75
  return None, message
76
 
77
+ # Gradio interface
78
+ def gradio_interface(query, locations):
79
+ df, message = scrape_jobs(query, locations)
80
+ return df, message
81
 
82
+ # Gradio app layout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  iface = gr.Interface(
84
  fn=gradio_interface,
85
  inputs=[
86
  gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
87
+ gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States", value="United States"),
 
 
 
 
 
 
88
  ],
89
  outputs=[
90
+ gr.Dataframe(label="Job Results", headers=["Date", "Title", "Company", "Location", "Job Link"], interactive=True),
91
  gr.Textbox(label="Message"),
92
  ],
93
+ title="LinkedIn Job Scraper",
94
+ description="Scrape LinkedIn for jobs based on query and locations.",
95
  )
96
 
97
  # Launch app
98
  if __name__ == "__main__":
99
+ iface.launch()