|
import streamlit as st |
|
import pandas as pd |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from google.oauth2.service_account import Credentials |
|
import gspread |
|
import plotly.express as px |
|
|
|
|
|
st.title("Booking.com 多項目數據抓取和視覺化展示") |
|
|
|
|
|
urls = [ |
|
"https://www.booking.com/hotel/it/appartamento-via-genova-roma.zh-tw.html", |
|
"https://www.booking.com/hotel/it/giulia-39-s-coliseum.zh-tw.html", |
|
"https://www.booking.com/hotel/it/le-stelle-dell-esquilino.zh-tw.html", |
|
"https://www.booking.com/hotel/it/retro-flat-termini.zh-tw.html", |
|
"https://www.booking.com/hotel/it/domus-fanti.zh-tw.html", |
|
"https://www.booking.com/hotel/it/radiant-retreat-in-esquilino.zh-tw.html", |
|
"https://www.booking.com/hotel/it/over-the-roof-top-roma.zh-tw.html", |
|
"https://www.booking.com/hotel/it/rome-sweet-home-roma1234.zh-tw.html", |
|
"https://www.booking.com/hotel/it/visione-guest-house-via-delle-fratte-36.zh-tw.html", |
|
"https://www.booking.com/hotel/it/appartamento-roma-centro-roma5.zh-tw.html" |
|
] |
|
|
|
|
|
headers = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" |
|
} |
|
|
|
|
|
def fetch_booking_data(url, headers): |
|
try: |
|
response = requests.get(url, headers=headers) |
|
soup = BeautifulSoup(response.content, "html.parser") |
|
title = soup.find("h2", class_="d2fee87262 pp-header__title").text.strip() |
|
rating = soup.find("div", class_="bcdcb105b3 f45d8e4c32 df64fda51b").text.strip() |
|
return {"標題": title, "評分": rating} |
|
except Exception as e: |
|
st.error(f"Error fetching data from {url}: {e}") |
|
return None |
|
|
|
|
|
data = [] |
|
for url in urls: |
|
result = fetch_booking_data(url, headers) |
|
if result: |
|
data.append(result) |
|
|
|
|
|
if data: |
|
df = pd.DataFrame(data) |
|
st.write("抓取到的數據:", df) |
|
else: |
|
st.warning("無法從任何 URL 抓取數據。") |
|
|
|
|
|
def upload_to_google_sheet(df, spreadsheet_url, creds_file): |
|
try: |
|
scope = ['https://www.googleapis.com/auth/spreadsheets'] |
|
creds = Credentials.from_service_account_file(creds_file, scopes=scope) |
|
gs = gspread.authorize(creds) |
|
sheet = gs.open_by_url(spreadsheet_url) |
|
worksheet = sheet.get_worksheet(0) |
|
worksheet.clear() |
|
worksheet.update([df.columns.values.tolist()] + df.values.tolist()) |
|
st.success("Data uploaded to Google Sheets successfully!") |
|
except Exception as e: |
|
st.error(f"Error uploading data: {e}") |
|
|
|
|
|
spreadsheet_url = st.text_input("Google Sheets URL", "https://docs.google.com/spreadsheets/d/1iOzoii9bVAmqlcqnseoqjZBkBuaFcpbIvUxZeRJ2kmk/edit?gid=0#gid=0") |
|
creds_file = st.file_uploader("上傳 Google API 金鑰檔案", type=["json"]) |
|
|
|
|
|
if st.button("上傳數據至 Google Sheets") and data: |
|
if creds_file is not None: |
|
upload_to_google_sheet(df, spreadsheet_url, creds_file.name) |
|
else: |
|
st.error("請上傳 Google API 金鑰檔案") |
|
|
|
|
|
if not df.empty: |
|
|
|
df['評分'] = pd.to_numeric(df['評分'], errors='coerce') |
|
|
|
|
|
fig = px.bar(df, x='標題', y='評分', title="Booking.com 評分比較", labels={'標題': '酒店標題', '評分': '評分'}) |
|
fig.update_layout(xaxis_title="酒店標題", yaxis_title="評分", xaxis_tickangle=-45) |
|
st.plotly_chart(fig) |
|
else: |
|
st.warning("無可用數據繪製圖表") |
|
|