JobScrapper / app.py
charulp2499's picture
Update app.py
f095e3e verified
raw
history blame
3.86 kB
import gradio as gr
import logging
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
import pandas as pd
# Configure logging
logging.basicConfig(filename="job_scraper.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
# Initialize job data storage
job_data = []
# Event Handlers
def on_data(data: EventData):
job_data.append({
'Date Posted': data.date,
'Title': data.title,
'Company': data.company,
'Location': data.location,
'Job Link': data.link,
'Description Length': len(data.description),
'Description': data.description,
})
def on_end():
logging.info("[ON_END] Scraping completed.")
# Scraper function
def scrape_jobs(query, locations, time_filter):
global job_data
try:
job_data = []
scraper = LinkedinScraper(
chrome_executable_path=None,
chrome_binary_location=None,
chrome_options=None,
headless=True,
max_workers=5,
slow_mo=0.8,
page_load_timeout=100,
)
scraper.on(Events.DATA, on_data)
scraper.on(Events.END, on_end)
if time_filter == "From Past Month":
time_filter = TimeFilters.MONTH
elif time_filter == "From Last 24 Hours":
time_filter = TimeFilters.DAY
else:
time_filter = TimeFilters.MONTH
queries = [
Query(
query=query,
options=QueryOptions(
locations=locations.split(','),
apply_link=True,
skip_promoted_jobs=False,
page_offset=0,
limit=100,
filters=QueryFilters(
# relevance=RelevanceFilters.RECENT,
time=time_filter,
),
),
),
]
scraper.run(queries)
if job_data:
df = pd.DataFrame(job_data)
message = f"Jobs ({len(job_data)}) data successfully scraped."
logging.info(message)
return df, message
else:
logging.warning("No job data found.")
return pd.DataFrame(), 'No jobs found.'
except Exception as e:
# Handle specific exceptions and log detailed information
logging.error(f"An error occurred during scraping: {e}", exc_info=True)
message = f"An error occurred during scraping: {e}. Please check the logs for more details."
return None, message
def gradio_interface(query, locations, time_filter):
df, message = scrape_jobs(query, locations, time_filter)
return df, message
# App Layout
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Blockchain developers"),
gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, India"),
gr.Dropdown(
label="Time Filter",
choices=["From Past Month", "From Last 24 Hours"],
value="From Last 24 Hours", # Default option
type="value",
),
],
outputs=[
gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
gr.Textbox(label="Message"),
],
title="Job Scraper",
description="Enter a job query and locations to scrape job postings and display the results in a table.",
)
if __name__ == "__main__":
iface.launch()