Spaces:
Running
Running
import gradio as gr | |
import logging | |
from linkedin_jobs_scraper import LinkedinScraper | |
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics | |
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters | |
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters | |
import pandas as pd | |
# Configure logging | |
logging.basicConfig(filename="job_scraper.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
# Initialize job data storage | |
job_data = [] | |
# Event Handlers | |
def on_data(data: EventData): | |
job_data.append({ | |
'Date Posted': data.date, | |
'Title': data.title, | |
'Company': data.company, | |
'Location': data.location, | |
'Job Link': data.link, | |
'Description Length': len(data.description), | |
'Description': data.description, | |
}) | |
def on_end(): | |
logging.info("[ON_END] Scraping completed.") | |
# Scraper function | |
def scrape_jobs(query, locations, time_filter): | |
global job_data | |
try: | |
job_data = [] | |
scraper = LinkedinScraper( | |
chrome_executable_path=None, | |
chrome_binary_location=None, | |
chrome_options=None, | |
headless=True, | |
max_workers=5, | |
slow_mo=0.8, | |
page_load_timeout=100, | |
) | |
scraper.on(Events.DATA, on_data) | |
scraper.on(Events.END, on_end) | |
if time_filter == "From Past Month": | |
time_filter = TimeFilters.MONTH | |
elif time_filter == "From Last 24 Hours": | |
time_filter = TimeFilters.DAY | |
else: | |
time_filter = TimeFilters.MONTH | |
queries = [ | |
Query( | |
query=query, | |
options=QueryOptions( | |
locations=locations.split(','), | |
apply_link=True, | |
skip_promoted_jobs=False, | |
page_offset=0, | |
limit=100, | |
filters=QueryFilters( | |
# relevance=RelevanceFilters.RECENT, | |
time=time_filter, | |
), | |
), | |
), | |
] | |
scraper.run(queries) | |
if job_data: | |
df = pd.DataFrame(job_data) | |
message = f"Jobs ({len(job_data)}) data successfully scraped." | |
logging.info(message) | |
return df, message | |
else: | |
logging.warning("No job data found.") | |
return pd.DataFrame(), 'No jobs found.' | |
except Exception as e: | |
# Handle specific exceptions and log detailed information | |
logging.error(f"An error occurred during scraping: {e}", exc_info=True) | |
message = f"An error occurred during scraping: {e}. Please check the logs for more details." | |
return None, message | |
def gradio_interface(query, locations, time_filter): | |
df, message = scrape_jobs(query, locations, time_filter) | |
return df, message | |
# App Layout | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=[ | |
gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Blockchain developers"), | |
gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, India"), | |
gr.Dropdown( | |
label="Time Filter", | |
choices=["From Past Month", "From Last 24 Hours"], | |
value="From Last 24 Hours", # Default option | |
type="value", | |
), | |
], | |
outputs=[ | |
gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True), | |
gr.Textbox(label="Message"), | |
], | |
title="Job Scraper", | |
description="Enter a job query and locations to scrape job postings and display the results in a table.", | |
) | |
if __name__ == "__main__": | |
iface.launch() | |