brackozi commited on
Commit
4b926ea
·
1 Parent(s): d3880f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import gradio as gr
5
+
6
+ def scrape_jobs(link):
7
+ # Send a GET request to the carrier website
8
+ response = requests.get(link)
9
+
10
+ # Parse the HTML content using BeautifulSoup
11
+ soup = BeautifulSoup(response.content, 'html.parser')
12
+
13
+ # Find all the job listings on the page
14
+ job_elements = soup.find_all('div', class_='job-listing')
15
+
16
+ jobs = []
17
+ for job_element in job_elements:
18
+ # Extract relevant information from each job listing
19
+ job_title = job_element.find('h2', class_='job-title').text.strip()
20
+ job_location = job_element.find('span', class_='job-location').text.strip()
21
+ job_description = job_element.find('div', class_='job-description').text.strip()
22
+
23
+ # Store the job information in a dictionary
24
+ job = {
25
+ 'Title': job_title,
26
+ 'Location': job_location,
27
+ 'Description': job_description
28
+ }
29
+
30
+ jobs.append(job)
31
+
32
+ return jobs
33
+
34
+ def export_to_excel(jobs, filename):
35
+ # Create a DataFrame from the job data
36
+ df = pd.DataFrame(jobs)
37
+
38
+ # Save the DataFrame to an Excel file
39
+ df.to_excel(filename, index=False)
40
+
41
+ def job_listing_scraper(link):
42
+ job_listings = scrape_jobs(link)
43
+ output_file = "job_listings.xlsx"
44
+ export_to_excel(job_listings, output_file)
45
+ return f"Job listings scraped successfully! Saved to {output_file}"
46
+
47
+ # Define the Gradio interface
48
+ interface = gr.Interface(
49
+ fn=job_listing_scraper,
50
+ inputs="text",
51
+ outputs="text",
52
+ title="Job Listing Scraper",
53
+ description="Enter the link to the carrier website and click 'Submit' to scrape job listings and save them to an Excel file.",
54
+ examples=[["https://example.com/carrier"]],
55
+ )
56
+
57
+ # Run the Gradio interface
58
+ interface.launch()