srinuksv commited on
Commit
22571b0
·
verified ·
1 Parent(s): f80c39f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ import re
4
+ from fastapi import FastAPI
5
+ from fastapi.responses import HTMLResponse
6
+ from fastapi.staticfiles import StaticFiles
7
+ from selenium import webdriver
8
+ from selenium.webdriver.chrome.service import Service
9
+ from selenium.webdriver.common.by import By
10
+ from webdriver_manager.chrome import ChromeDriverManager
11
+ from selenium.webdriver.chrome.options import Options
12
+
13
+ app = FastAPI()
14
+
15
+ # Serve static files
16
+ app.mount("/static", StaticFiles(directory="static"), name="static")
17
+
18
+ def scrape_upwork_data(search_query, num_jobs, page):
19
+ options = Options()
20
+ options.add_argument("--headless") # Run in headless mode for faster scraping
21
+ service = Service(ChromeDriverManager().install())
22
+ driver = webdriver.Chrome(service=service, options=options)
23
+
24
+ job_listings = []
25
+ try:
26
+ url = f'https://www.upwork.com/nx/search/jobs?amount=500-&hourly_rate=25-&location=Americas,Europe,Australia%20and%20New%20Zealand,Canada,India,Switzerland,United%20States&per_page=50&q={search_query}&sort=recency&t=0,1&page={page}'
27
+ driver.get(url)
28
+
29
+ time.sleep(5) # Wait for the page to load
30
+ jobs = driver.find_elements(By.CSS_SELECTOR, 'article[data-test="JobTile"]')
31
+
32
+ for job in jobs:
33
+ try:
34
+ posted_date = job.find_element(By.CSS_SELECTOR, 'small[data-test="job-pubilshed-date"]').text.strip()
35
+ title_element = job.find_element(By.CSS_SELECTOR, 'h2.job-tile-title > a')
36
+ title = title_element.text.strip()
37
+ link = title_element.get_attribute('href')
38
+ description = job.find_element(By.CSS_SELECTOR, 'div[data-test="JobTileDetails"] > div > div > p').text.strip()
39
+
40
+ job_info = job.find_element(By.CSS_SELECTOR, 'ul.job-tile-info-list')
41
+ job_type = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="job-type-label"]').text.strip()
42
+ experience_level = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="experience-level"]').text.strip()
43
+
44
+ # Check for budget (fixed price or hourly)
45
+ try:
46
+ budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
47
+ except:
48
+ budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
49
+
50
+ job_listings.append({
51
+ 'title': title,
52
+ 'date': posted_date,
53
+ 'link': link,
54
+ 'description': description,
55
+ 'job_type': job_type,
56
+ 'experience_level': experience_level,
57
+ 'budget': budget
58
+ })
59
+
60
+ except Exception as e:
61
+ print(f'Error parsing job listing: {e}')
62
+
63
+ finally:
64
+ driver.quit()
65
+
66
+ return job_listings
67
+
68
+ @app.get("/", response_class=HTMLResponse)
69
+ async def read_root():
70
+ return """
71
+ <html>
72
+ <head>
73
+ <title>Upwork Job Listings</title>
74
+ </head>
75
+ <body>
76
+ <h1>Welcome to Upwork Job Scraper</h1>
77
+ <form action="/jobs" method="get">
78
+ <input type="text" name="query" placeholder="Search Query" required>
79
+ <input type="number" name="num_jobs" value="50" min="1" max="100" required>
80
+ <button type="submit">Search Jobs</button>
81
+ </form>
82
+ </body>
83
+ </html>
84
+ """
85
+
86
+ @app.get("/jobs", response_class=HTMLResponse)
87
+ async def get_jobs(query: str, num_jobs: int = 50):
88
+ jobs = []
89
+ for page in range(1, 3): # Change to however many pages you want to scrape
90
+ job_listings = scrape_upwork_data(query, num_jobs, page)
91
+ jobs.extend(job_listings)
92
+
93
+ # Generate HTML output for jobs
94
+ html_output = "<h2>Job Listings</h2>"
95
+ for job in jobs:
96
+ html_output += f"""
97
+ <div>
98
+ <h3><a href="{job['link']}">{job['title']}</a></h3>
99
+ <p>Posted Date: {job['date']}</p>
100
+ <p>Type: {job['job_type']}</p>
101
+ <p>Experience Level: {job['experience_level']}</p>
102
+ <p>Budget: {job['budget']}</p>
103
+ <p>Description: {job['description']}</p>
104
+ </div>
105
+ <hr>
106
+ """
107
+ return HTMLResponse(content=html_output)
108
+
109
+ if __name__ == "__main__":
110
+ import uvicorn
111
+ uvicorn.run(app, host="0.0.0.0", port=7860)