charulp2499 commited on
Commit
8641940
·
verified ·
1 Parent(s): c409c0d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +151 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import logging
3
+ from linkedin_jobs_scraper import LinkedinScraper
4
+ from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
5
+ from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
6
+ from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
7
+ import pandas as pd
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+ # Initialize job data storage
13
+ job_data = []
14
+
15
+ # Event Handlers
16
+ def on_data(data: EventData):
17
+ job_data.append({
18
+ 'Date Posted': data.date,
19
+ 'Title': data.title,
20
+ 'Company': data.company,
21
+ 'Location': data.location,
22
+ # 'Company Link': data.company_link,
23
+ 'Job Link': data.link,
24
+ # 'Insights': data.insights,
25
+ 'Description Length': len(data.description),
26
+ })
27
+
28
+ def on_end():
29
+ print("[ON_END] Scraping completed.")
30
+
31
+ # LinkedIn Scraper function
32
+ def scrape_jobs(query, locations, time_filter):
33
+ global job_data
34
+ try:
35
+ job_data = []
36
+
37
+ if time_filter == "From Past Month":
38
+ time_filter = TimeFilters.MONTH
39
+ elif time_filter == "From Last 24 Hours":
40
+ time_filter = TimeFilters.DAY
41
+ else:
42
+ time_filter = TimeFilters.MONTH
43
+
44
+ scraper = LinkedinScraper(
45
+ chrome_executable_path=None,
46
+ chrome_binary_location=None,
47
+ chrome_options=None,
48
+ headless=True,
49
+ max_workers=10,
50
+ slow_mo=0.8,
51
+ page_load_timeout=60,
52
+ )
53
+
54
+ scraper.on(Events.DATA, on_data)
55
+ scraper.on(Events.END, on_end)
56
+
57
+ queries = [
58
+ Query(
59
+ query=query,
60
+ options=QueryOptions(
61
+ locations=locations.split(','),
62
+ apply_link=True,
63
+ skip_promoted_jobs=False,
64
+ page_offset=0,
65
+ limit=100,
66
+ filters=QueryFilters(
67
+ # relevance=RelevanceFilters.RECENT,
68
+ time=time_filter,
69
+ # on_site_or_remote=OnSiteOrRemoteFilters.REMOTE,
70
+ ),
71
+ ),
72
+ ),
73
+ ]
74
+
75
+ scraper.run(queries)
76
+
77
+ # Convert to DataFrame and return
78
+ # Save the job data to a CSV file after scraping ends
79
+ # if job_data:
80
+ # # Save the job data to a CSV file
81
+ # file_name = "jobs_data.csv"
82
+ # df = pd.DataFrame(job_data)
83
+ # df.to_csv(file_name, index=False)
84
+ # message = f"Jobs data saved to {file_name}"
85
+ # return file_name, message # Return the CSV file path and success message
86
+ # else:
87
+ # message = "No job data found for the given query and locations."
88
+ # return None, message
89
+ if job_data:
90
+ df = pd.DataFrame(job_data)
91
+ message = f"Jobs ({len(job_data)}) data successfully scraped."
92
+ return df, message # Return DataFrame and message
93
+ else:
94
+ return pd.DataFrame(),
95
+
96
+ except Exception as e:
97
+ # Handle errors gracefully
98
+ message = f"An error occurred during scraping: {e}"
99
+ return None, message
100
+
101
+
102
+ # Define Gradio interface
103
+ # def gradio_interface(query, locations):
104
+ # csv_data, message = scrape_jobs(query, locations)
105
+ # if csv_data:
106
+ # return csv_data, message
107
+ # else:
108
+ # return None, "No results to display."
109
+
110
+ def gradio_interface(query, locations, time_filter):
111
+ df, message = scrape_jobs(query, locations, time_filter)
112
+ return df, message
113
+
114
+ # # Gradio app layout
115
+ # iface = gr.Interface(
116
+ # fn=gradio_interface,
117
+ # inputs=[
118
+ # gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
119
+ # gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, India"),
120
+ # ],
121
+ # outputs=[
122
+ # gr.File(label="Download CSV"),
123
+ # gr.Textbox(label="Message"),
124
+ # ],
125
+ # title="LinkedIn Job Scraper",
126
+ # description="Enter the job query and locations to scrape LinkedIn job postings. Outputs a downloadable CSV file.",
127
+ # )
128
+
129
+ iface = gr.Interface(
130
+ fn=gradio_interface,
131
+ inputs=[
132
+ gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
133
+ gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
134
+ gr.Dropdown(
135
+ label="Time Filter",
136
+ choices=["From Past Month", "From Last 24 Hours"], # The options the user can select
137
+ value="From Past Month", # Default option
138
+ type="value",
139
+ ),
140
+ ],
141
+ outputs=[
142
+ gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
143
+ gr.Textbox(label="Message"),
144
+ ],
145
+ title="Job Scraper",
146
+ description="Enter a job query and locations to scrape job postings and display the results in a table.",
147
+ )
148
+
149
+ # Launch app
150
+ if __name__ == "__main__":
151
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ linkedin-jobs-scraper
2
+ gradio
3
+ pandas