Spaces:
Running
Running
File size: 3,858 Bytes
69c661e 7b0faa9 3474389 7b0faa9 3474389 7b0faa9 3474389 7b0faa9 8308e31 7b0faa9 3474389 7b0faa9 8308e31 7b0faa9 8308e31 7b0faa9 2d97236 795f17e 2d97236 7b0faa9 8308e31 7b0faa9 8308e31 7b0faa9 8308e31 7b0faa9 8308e31 7b0faa9 8308e31 7b0faa9 8308e31 3474389 8308e31 7b0faa9 3474389 8308e31 7b0faa9 3474389 7b0faa9 8308e31 7b0faa9 3474389 7b0faa9 f095e3e 2d97236 8308e31 2d97236 8308e31 7b0faa9 8308e31 7b0faa9 8308e31 7b0faa9 3474389 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import gradio as gr
import logging
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
import pandas as pd
# Configure logging
logging.basicConfig(filename="job_scraper.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
# Initialize job data storage
job_data = []
# Event Handlers
def on_data(data: EventData):
job_data.append({
'Date Posted': data.date,
'Title': data.title,
'Company': data.company,
'Location': data.location,
'Job Link': data.link,
'Description Length': len(data.description),
'Description': data.description,
})
def on_end():
logging.info("[ON_END] Scraping completed.")
# Scraper function
def scrape_jobs(query, locations, time_filter):
global job_data
try:
job_data = []
scraper = LinkedinScraper(
chrome_executable_path=None,
chrome_binary_location=None,
chrome_options=None,
headless=True,
max_workers=5,
slow_mo=0.8,
page_load_timeout=100,
)
scraper.on(Events.DATA, on_data)
scraper.on(Events.END, on_end)
if time_filter == "From Past Month":
time_filter = TimeFilters.MONTH
elif time_filter == "From Last 24 Hours":
time_filter = TimeFilters.DAY
else:
time_filter = TimeFilters.MONTH
queries = [
Query(
query=query,
options=QueryOptions(
locations=locations.split(','),
apply_link=True,
skip_promoted_jobs=False,
page_offset=0,
limit=100,
filters=QueryFilters(
# relevance=RelevanceFilters.RECENT,
time=time_filter,
),
),
),
]
scraper.run(queries)
if job_data:
df = pd.DataFrame(job_data)
message = f"Jobs ({len(job_data)}) data successfully scraped."
logging.info(message)
return df, message
else:
logging.warning("No job data found.")
return pd.DataFrame(), 'No jobs found.'
except Exception as e:
# Handle specific exceptions and log detailed information
logging.error(f"An error occurred during scraping: {e}", exc_info=True)
message = f"An error occurred during scraping: {e}. Please check the logs for more details."
return None, message
def gradio_interface(query, locations, time_filter):
df, message = scrape_jobs(query, locations, time_filter)
return df, message
# App Layout
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Blockchain developers"),
gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, India"),
gr.Dropdown(
label="Time Filter",
choices=["From Past Month", "From Last 24 Hours"],
value="From Last 24 Hours", # Default option
type="value",
),
],
outputs=[
gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
gr.Textbox(label="Message"),
],
title="Job Scraper",
description="Enter a job query and locations to scrape job postings and display the results in a table.",
)
if __name__ == "__main__":
iface.launch()
|