Spaces:

charulp2499
/

JobScrapper

Running

App Files Files

charulp2499 commited on Jan 20

Commit

8641940

verified ·

1 Parent(s): c409c0d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +151 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gradio as gr
+import logging
+from linkedin_jobs_scraper import LinkedinScraper
+from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
+from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
+from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
+import pandas as pd
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# Initialize job data storage
+job_data = []
+# Event Handlers
+def on_data(data: EventData):
+    job_data.append({
+        'Date Posted': data.date,
+        'Title': data.title,
+        'Company': data.company,
+        'Location': data.location,
+        # 'Company Link': data.company_link,
+        'Job Link': data.link,
+        # 'Insights': data.insights,
+        'Description Length': len(data.description),
+    })
+def on_end():
+    print("[ON_END] Scraping completed.")
+# LinkedIn Scraper function
+def scrape_jobs(query, locations, time_filter):
+    global job_data
+    try:
+        job_data = []
+        if time_filter == "From Past Month":
+            time_filter = TimeFilters.MONTH
+        elif time_filter == "From Last 24 Hours":
+            time_filter = TimeFilters.DAY
+        else:
+            time_filter = TimeFilters.MONTH
+        scraper = LinkedinScraper(
+            chrome_executable_path=None,
+            chrome_binary_location=None,
+            chrome_options=None,
+            headless=True,
+            max_workers=10,
+            slow_mo=0.8,
+            page_load_timeout=60,
+        )
+        scraper.on(Events.DATA, on_data)
+        scraper.on(Events.END, on_end)
+        queries = [
+            Query(
+                query=query,
+                options=QueryOptions(
+                    locations=locations.split(','),
+                    apply_link=True,
+                    skip_promoted_jobs=False,
+                    page_offset=0,
+                    limit=100,
+                    filters=QueryFilters(
+                        # relevance=RelevanceFilters.RECENT,
+                        time=time_filter,
+                        # on_site_or_remote=OnSiteOrRemoteFilters.REMOTE,
+                    ),
+                ),
+            ),
+        ]
+        scraper.run(queries)
+        # Convert to DataFrame and return
+        # Save the job data to a CSV file after scraping ends
+        # if job_data:
+        #     # Save the job data to a CSV file
+        #     file_name = "jobs_data.csv"
+        #     df = pd.DataFrame(job_data)
+        #     df.to_csv(file_name, index=False)
+        #     message = f"Jobs data saved to {file_name}"
+        #     return file_name, message  # Return the CSV file path and success message
+        # else:
+        #     message = "No job data found for the given query and locations."
+        #     return None, message
+        if job_data:
+            df = pd.DataFrame(job_data)
+            message = f"Jobs ({len(job_data)}) data successfully scraped."
+            return df, message  # Return DataFrame and message
+        else:
+            return pd.DataFrame(),
+    except Exception as e:
+        # Handle errors gracefully
+        message = f"An error occurred during scraping: {e}"
+        return None, message
+# Define Gradio interface
+# def gradio_interface(query, locations):
+#     csv_data, message = scrape_jobs(query, locations)
+#     if csv_data:
+#         return csv_data, message
+#     else:
+#         return None, "No results to display."
+def gradio_interface(query, locations, time_filter):
+    df, message = scrape_jobs(query, locations, time_filter)
+    return df, message
+# # Gradio app layout
+# iface = gr.Interface(
+#     fn=gradio_interface,
+#     inputs=[
+#         gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
+#         gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, India"),
+#     ],
+#     outputs=[
+#         gr.File(label="Download CSV"),
+#         gr.Textbox(label="Message"),
+#     ],
+#     title="LinkedIn Job Scraper",
+#     description="Enter the job query and locations to scrape LinkedIn job postings. Outputs a downloadable CSV file.",
+# )
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
+        gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
+        gr.Dropdown(
+            label="Time Filter",
+            choices=["From Past Month", "From Last 24 Hours"],  # The options the user can select
+            value="From Past Month",  # Default option
+            type="value",
+            ),
+    ],
+    outputs=[
+        gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
+        gr.Textbox(label="Message"),
+    ],
+    title="Job Scraper",
+    description="Enter a job query and locations to scrape job postings and display the results in a table.",
+)
+# Launch app
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+linkedin-jobs-scraper
+gradio
+pandas