charulp2499 commited on
Commit
7b0faa9
·
verified ·
1 Parent(s): 8641940

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -150
app.py CHANGED
@@ -1,151 +1,151 @@
1
- import gradio as gr
2
- import logging
3
- from linkedin_jobs_scraper import LinkedinScraper
4
- from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
5
- from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
6
- from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
7
- import pandas as pd
8
-
9
- # Configure logging
10
- logging.basicConfig(level=logging.INFO)
11
-
12
- # Initialize job data storage
13
- job_data = []
14
-
15
- # Event Handlers
16
- def on_data(data: EventData):
17
- job_data.append({
18
- 'Date Posted': data.date,
19
- 'Title': data.title,
20
- 'Company': data.company,
21
- 'Location': data.location,
22
- # 'Company Link': data.company_link,
23
- 'Job Link': data.link,
24
- # 'Insights': data.insights,
25
- 'Description Length': len(data.description),
26
- })
27
-
28
- def on_end():
29
- print("[ON_END] Scraping completed.")
30
-
31
- # LinkedIn Scraper function
32
- def scrape_jobs(query, locations, time_filter):
33
- global job_data
34
- try:
35
- job_data = []
36
-
37
- if time_filter == "From Past Month":
38
- time_filter = TimeFilters.MONTH
39
- elif time_filter == "From Last 24 Hours":
40
- time_filter = TimeFilters.DAY
41
- else:
42
- time_filter = TimeFilters.MONTH
43
-
44
- scraper = LinkedinScraper(
45
- chrome_executable_path=None,
46
- chrome_binary_location=None,
47
- chrome_options=None,
48
- headless=True,
49
- max_workers=10,
50
- slow_mo=0.8,
51
- page_load_timeout=60,
52
- )
53
-
54
- scraper.on(Events.DATA, on_data)
55
- scraper.on(Events.END, on_end)
56
-
57
- queries = [
58
- Query(
59
- query=query,
60
- options=QueryOptions(
61
- locations=locations.split(','),
62
- apply_link=True,
63
- skip_promoted_jobs=False,
64
- page_offset=0,
65
- limit=100,
66
- filters=QueryFilters(
67
- # relevance=RelevanceFilters.RECENT,
68
- time=time_filter,
69
- # on_site_or_remote=OnSiteOrRemoteFilters.REMOTE,
70
- ),
71
- ),
72
- ),
73
- ]
74
-
75
- scraper.run(queries)
76
-
77
- # Convert to DataFrame and return
78
- # Save the job data to a CSV file after scraping ends
79
- # if job_data:
80
- # # Save the job data to a CSV file
81
- # file_name = "jobs_data.csv"
82
- # df = pd.DataFrame(job_data)
83
- # df.to_csv(file_name, index=False)
84
- # message = f"Jobs data saved to {file_name}"
85
- # return file_name, message # Return the CSV file path and success message
86
- # else:
87
- # message = "No job data found for the given query and locations."
88
- # return None, message
89
- if job_data:
90
- df = pd.DataFrame(job_data)
91
- message = f"Jobs ({len(job_data)}) data successfully scraped."
92
- return df, message # Return DataFrame and message
93
- else:
94
- return pd.DataFrame(),
95
-
96
- except Exception as e:
97
- # Handle errors gracefully
98
- message = f"An error occurred during scraping: {e}"
99
- return None, message
100
-
101
-
102
- # Define Gradio interface
103
- # def gradio_interface(query, locations):
104
- # csv_data, message = scrape_jobs(query, locations)
105
- # if csv_data:
106
- # return csv_data, message
107
- # else:
108
- # return None, "No results to display."
109
-
110
- def gradio_interface(query, locations, time_filter):
111
- df, message = scrape_jobs(query, locations, time_filter)
112
- return df, message
113
-
114
- # # Gradio app layout
115
- # iface = gr.Interface(
116
- # fn=gradio_interface,
117
- # inputs=[
118
- # gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
119
- # gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, India"),
120
- # ],
121
- # outputs=[
122
- # gr.File(label="Download CSV"),
123
- # gr.Textbox(label="Message"),
124
- # ],
125
- # title="LinkedIn Job Scraper",
126
- # description="Enter the job query and locations to scrape LinkedIn job postings. Outputs a downloadable CSV file.",
127
- # )
128
-
129
- iface = gr.Interface(
130
- fn=gradio_interface,
131
- inputs=[
132
- gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
133
- gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
134
- gr.Dropdown(
135
- label="Time Filter",
136
- choices=["From Past Month", "From Last 24 Hours"], # The options the user can select
137
- value="From Past Month", # Default option
138
- type="value",
139
- ),
140
- ],
141
- outputs=[
142
- gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
143
- gr.Textbox(label="Message"),
144
- ],
145
- title="Job Scraper",
146
- description="Enter a job query and locations to scrape job postings and display the results in a table.",
147
- )
148
-
149
- # Launch app
150
- if __name__ == "__main__":
151
  iface.launch()
 
1
+ import gradio as gr
2
+ import logging
3
+ from linkedin_jobs_scraper import LinkedinScraper
4
+ from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
5
+ from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
6
+ from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
7
+ import pandas as pd
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+ # Initialize job data storage
13
+ job_data = []
14
+
15
+ # Event Handlers
16
+ def on_data(data: EventData):
17
+ job_data.append({
18
+ 'Date Posted': data.date,
19
+ 'Title': data.title,
20
+ 'Company': data.company,
21
+ 'Location': data.location,
22
+ # 'Company Link': data.company_link,
23
+ 'Job Link': data.link,
24
+ # 'Insights': data.insights,
25
+ 'Description Length': len(data.description),
26
+ })
27
+
28
+ def on_end():
29
+ print("[ON_END] Scraping completed.")
30
+
31
+ # LinkedIn Scraper function
32
+ def scrape_jobs(query, locations, time_filter):
33
+ global job_data
34
+ try:
35
+ job_data = []
36
+
37
+ if time_filter == "From Past Month":
38
+ time_filter = TimeFilters.MONTH
39
+ elif time_filter == "From Last 24 Hours":
40
+ time_filter = TimeFilters.DAY
41
+ else:
42
+ time_filter = TimeFilters.MONTH
43
+
44
+ scraper = LinkedinScraper(
45
+ chrome_executable_path=None,
46
+ chrome_binary_location=None,
47
+ chrome_options=None,
48
+ headless=True,
49
+ max_workers=10,
50
+ slow_mo=0.8,
51
+ page_load_timeout=60,
52
+ )
53
+
54
+ scraper.on(Events.DATA, on_data)
55
+ scraper.on(Events.END, on_end)
56
+
57
+ queries = [
58
+ Query(
59
+ query=query,
60
+ options=QueryOptions(
61
+ locations=locations.split(','),
62
+ apply_link=True,
63
+ skip_promoted_jobs=False,
64
+ page_offset=0,
65
+ limit=100,
66
+ filters=QueryFilters(
67
+ # relevance=RelevanceFilters.RECENT,
68
+ time=time_filter,
69
+ # on_site_or_remote=OnSiteOrRemoteFilters.REMOTE,
70
+ ),
71
+ ),
72
+ ),
73
+ ]
74
+
75
+ scraper.run(queries)
76
+
77
+ # Convert to DataFrame and return
78
+ # Save the job data to a CSV file after scraping ends
79
+ # if job_data:
80
+ # # Save the job data to a CSV file
81
+ # file_name = "jobs_data.csv"
82
+ # df = pd.DataFrame(job_data)
83
+ # df.to_csv(file_name, index=False)
84
+ # message = f"Jobs data saved to {file_name}"
85
+ # return file_name, message # Return the CSV file path and success message
86
+ # else:
87
+ # message = "No job data found for the given query and locations."
88
+ # return None, message
89
+ if job_data:
90
+ df = pd.DataFrame(job_data)
91
+ message = f"Jobs ({len(job_data)}) data successfully scraped."
92
+ return df, message # Return DataFrame and message
93
+ else:
94
+ return pd.DataFrame(),
95
+
96
+ except Exception as e:
97
+ # Handle errors gracefully
98
+ message = f"An error occurred during scraping: {e}"
99
+ return None, message
100
+
101
+
102
+ # Define Gradio interface
103
+ # def gradio_interface(query, locations):
104
+ # csv_data, message = scrape_jobs(query, locations)
105
+ # if csv_data:
106
+ # return csv_data, message
107
+ # else:
108
+ # return None, "No results to display."
109
+
110
+ def gradio_interface(query, locations, time_filter):
111
+ df, message, _ = scrape_jobs(query, locations, time_filter)
112
+ return df, message
113
+
114
+ # # Gradio app layout
115
+ # iface = gr.Interface(
116
+ # fn=gradio_interface,
117
+ # inputs=[
118
+ # gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
119
+ # gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, India"),
120
+ # ],
121
+ # outputs=[
122
+ # gr.File(label="Download CSV"),
123
+ # gr.Textbox(label="Message"),
124
+ # ],
125
+ # title="LinkedIn Job Scraper",
126
+ # description="Enter the job query and locations to scrape LinkedIn job postings. Outputs a downloadable CSV file.",
127
+ # )
128
+
129
+ iface = gr.Interface(
130
+ fn=gradio_interface,
131
+ inputs=[
132
+ gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
133
+ gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
134
+ gr.Dropdown(
135
+ label="Time Filter",
136
+ choices=["From Past Month", "From Last 24 Hours"], # The options the user can select
137
+ value="From Past Month", # Default option
138
+ type="value",
139
+ ),
140
+ ],
141
+ outputs=[
142
+ gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
143
+ gr.Textbox(label="Message"),
144
+ ],
145
+ title="Job Scraper",
146
+ description="Enter a job query and locations to scrape job postings and display the results in a table.",
147
+ )
148
+
149
+ # Launch app
150
+ if __name__ == "__main__":
151
  iface.launch()