Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import pandas as pd
|
3 |
+
import re
|
4 |
+
from fastapi import FastAPI
|
5 |
+
from fastapi.responses import HTMLResponse
|
6 |
+
from fastapi.staticfiles import StaticFiles
|
7 |
+
from selenium import webdriver
|
8 |
+
from selenium.webdriver.chrome.service import Service
|
9 |
+
from selenium.webdriver.common.by import By
|
10 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
11 |
+
from selenium.webdriver.chrome.options import Options
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
|
15 |
+
# Serve static files
|
16 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
17 |
+
|
18 |
+
def scrape_upwork_data(search_query, num_jobs, page):
|
19 |
+
options = Options()
|
20 |
+
options.add_argument("--headless") # Run in headless mode for faster scraping
|
21 |
+
service = Service(ChromeDriverManager().install())
|
22 |
+
driver = webdriver.Chrome(service=service, options=options)
|
23 |
+
|
24 |
+
job_listings = []
|
25 |
+
try:
|
26 |
+
url = f'https://www.upwork.com/nx/search/jobs?amount=500-&hourly_rate=25-&location=Americas,Europe,Australia%20and%20New%20Zealand,Canada,India,Switzerland,United%20States&per_page=50&q={search_query}&sort=recency&t=0,1&page={page}'
|
27 |
+
driver.get(url)
|
28 |
+
|
29 |
+
time.sleep(5) # Wait for the page to load
|
30 |
+
jobs = driver.find_elements(By.CSS_SELECTOR, 'article[data-test="JobTile"]')
|
31 |
+
|
32 |
+
for job in jobs:
|
33 |
+
try:
|
34 |
+
posted_date = job.find_element(By.CSS_SELECTOR, 'small[data-test="job-pubilshed-date"]').text.strip()
|
35 |
+
title_element = job.find_element(By.CSS_SELECTOR, 'h2.job-tile-title > a')
|
36 |
+
title = title_element.text.strip()
|
37 |
+
link = title_element.get_attribute('href')
|
38 |
+
description = job.find_element(By.CSS_SELECTOR, 'div[data-test="JobTileDetails"] > div > div > p').text.strip()
|
39 |
+
|
40 |
+
job_info = job.find_element(By.CSS_SELECTOR, 'ul.job-tile-info-list')
|
41 |
+
job_type = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="job-type-label"]').text.strip()
|
42 |
+
experience_level = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="experience-level"]').text.strip()
|
43 |
+
|
44 |
+
# Check for budget (fixed price or hourly)
|
45 |
+
try:
|
46 |
+
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
|
47 |
+
except:
|
48 |
+
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
|
49 |
+
|
50 |
+
job_listings.append({
|
51 |
+
'title': title,
|
52 |
+
'date': posted_date,
|
53 |
+
'link': link,
|
54 |
+
'description': description,
|
55 |
+
'job_type': job_type,
|
56 |
+
'experience_level': experience_level,
|
57 |
+
'budget': budget
|
58 |
+
})
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
print(f'Error parsing job listing: {e}')
|
62 |
+
|
63 |
+
finally:
|
64 |
+
driver.quit()
|
65 |
+
|
66 |
+
return job_listings
|
67 |
+
|
68 |
+
@app.get("/", response_class=HTMLResponse)
|
69 |
+
async def read_root():
|
70 |
+
return """
|
71 |
+
<html>
|
72 |
+
<head>
|
73 |
+
<title>Upwork Job Listings</title>
|
74 |
+
</head>
|
75 |
+
<body>
|
76 |
+
<h1>Welcome to Upwork Job Scraper</h1>
|
77 |
+
<form action="/jobs" method="get">
|
78 |
+
<input type="text" name="query" placeholder="Search Query" required>
|
79 |
+
<input type="number" name="num_jobs" value="50" min="1" max="100" required>
|
80 |
+
<button type="submit">Search Jobs</button>
|
81 |
+
</form>
|
82 |
+
</body>
|
83 |
+
</html>
|
84 |
+
"""
|
85 |
+
|
86 |
+
@app.get("/jobs", response_class=HTMLResponse)
|
87 |
+
async def get_jobs(query: str, num_jobs: int = 50):
|
88 |
+
jobs = []
|
89 |
+
for page in range(1, 3): # Change to however many pages you want to scrape
|
90 |
+
job_listings = scrape_upwork_data(query, num_jobs, page)
|
91 |
+
jobs.extend(job_listings)
|
92 |
+
|
93 |
+
# Generate HTML output for jobs
|
94 |
+
html_output = "<h2>Job Listings</h2>"
|
95 |
+
for job in jobs:
|
96 |
+
html_output += f"""
|
97 |
+
<div>
|
98 |
+
<h3><a href="{job['link']}">{job['title']}</a></h3>
|
99 |
+
<p>Posted Date: {job['date']}</p>
|
100 |
+
<p>Type: {job['job_type']}</p>
|
101 |
+
<p>Experience Level: {job['experience_level']}</p>
|
102 |
+
<p>Budget: {job['budget']}</p>
|
103 |
+
<p>Description: {job['description']}</p>
|
104 |
+
</div>
|
105 |
+
<hr>
|
106 |
+
"""
|
107 |
+
return HTMLResponse(content=html_output)
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
import uvicorn
|
111 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|