File size: 3,858 Bytes
69c661e
7b0faa9
 
3474389
7b0faa9
3474389
7b0faa9
 
 
3474389
7b0faa9
 
 
 
 
 
 
 
 
 
 
 
 
8308e31
7b0faa9
 
 
3474389
7b0faa9
8308e31
 
7b0faa9
 
 
8308e31
7b0faa9
 
 
 
 
2d97236
795f17e
2d97236
7b0faa9
8308e31
7b0faa9
 
 
8308e31
 
 
 
 
 
 
7b0faa9
 
 
 
 
 
8308e31
7b0faa9
 
 
8308e31
 
7b0faa9
 
 
 
 
 
8308e31
7b0faa9
8308e31
 
3474389
8308e31
7b0faa9
3474389
 
8308e31
7b0faa9
3474389
 
 
7b0faa9
 
8308e31
 
 
7b0faa9
3474389
7b0faa9
 
 
f095e3e
2d97236
8308e31
 
 
2d97236
8308e31
 
7b0faa9
 
8308e31
7b0faa9
 
8308e31
 
7b0faa9
 
 
3474389
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gradio as gr
import logging
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
import pandas as pd

# Configure logging
logging.basicConfig(filename="job_scraper.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Initialize job data storage
job_data = []

# Event Handlers
def on_data(data: EventData):
    job_data.append({
        'Date Posted': data.date,
        'Title': data.title,
        'Company': data.company,
        'Location': data.location,
        'Job Link': data.link,
        'Description Length': len(data.description),
        'Description': data.description,
    })

def on_end():
    logging.info("[ON_END] Scraping completed.")

# Scraper function
def scrape_jobs(query, locations, time_filter):
    global job_data
    try:
        job_data = []

        scraper = LinkedinScraper(
            chrome_executable_path=None,
            chrome_binary_location=None,
            chrome_options=None,
            headless=True,
            max_workers=5,
            slow_mo=0.8,
            page_load_timeout=100,
        )

        scraper.on(Events.DATA, on_data)
        scraper.on(Events.END, on_end)

        if time_filter == "From Past Month":
            time_filter = TimeFilters.MONTH
        elif time_filter == "From Last 24 Hours":
            time_filter = TimeFilters.DAY
        else:
            time_filter = TimeFilters.MONTH  

        queries = [
            Query(
                query=query,
                options=QueryOptions(
                    locations=locations.split(','),
                    apply_link=True,
                    skip_promoted_jobs=False,
                    page_offset=0,
                    limit=100,
                    filters=QueryFilters(
                        # relevance=RelevanceFilters.RECENT,
                        time=time_filter,
                    ),
                ),
            ),
        ]

        scraper.run(queries)
        
        if job_data:
            df = pd.DataFrame(job_data) 
            message = f"Jobs ({len(job_data)}) data successfully scraped."
            logging.info(message)
            return df, message
        else:
            logging.warning("No job data found.")
            return pd.DataFrame(), 'No jobs found.'

    except Exception as e:
        # Handle specific exceptions and log detailed information
        logging.error(f"An error occurred during scraping: {e}", exc_info=True)
        message = f"An error occurred during scraping: {e}. Please check the logs for more details."
        return None, message

def gradio_interface(query, locations, time_filter):
    df, message = scrape_jobs(query, locations, time_filter)
    return df, message

# App Layout
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Blockchain developers"),
        gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, India"),
        gr.Dropdown(
            label="Time Filter",
            choices=["From Past Month", "From Last 24 Hours"],
            value="From Last 24 Hours",  # Default option
            type="value",
            ),
    ],
    outputs=[
        gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
        gr.Textbox(label="Message"),
    ],
    title="Job Scraper",
    description="Enter a job query and locations to scrape job postings and display the results in a table.",
)

if __name__ == "__main__":
    iface.launch()