import streamlit as st import pandas as pd import requests from bs4 import BeautifulSoup from google.oauth2.service_account import Credentials import gspread import plotly.express as px import json # 用於解析 JSON 文件 # Streamlit UI 標題 st.title("Booking.com 多項目數據抓取和視覺化展示") # 合併的 URL 列表 urls = [ "https://www.booking.com/hotel/it/appartamento-via-genova-roma.zh-tw.html", "https://www.booking.com/hotel/it/giulia-39-s-coliseum.zh-tw.html", "https://www.booking.com/hotel/it/le-stelle-dell-esquilino.zh-tw.html", "https://www.booking.com/hotel/it/retro-flat-termini.zh-tw.html", "https://www.booking.com/hotel/it/domus-fanti.zh-tw.html", "https://www.booking.com/hotel/it/radiant-retreat-in-esquilino.zh-tw.html", "https://www.booking.com/hotel/it/over-the-roof-top-roma.zh-tw.html", "https://www.booking.com/hotel/it/rome-sweet-home-roma1234.zh-tw.html", "https://www.booking.com/hotel/it/visione-guest-house-via-delle-fratte-36.zh-tw.html", "https://www.booking.com/hotel/it/appartamento-roma-centro-roma5.zh-tw.html", "https://www.booking.com/hotel/us/loews-royal-pacific-resort-at-universal.zh-tw.html", "https://www.booking.com/hotel/us/buena-vista-suites.zh-tw.html", "https://www.booking.com/hotel/us/ramada-international-drive.zh-tw.html", "https://www.booking.com/hotel/us/lake-buena-vista-14651-chelonia-parkway.zh-tw.html", "https://www.booking.com/hotel/it/trevi-apartment-roma1.zh-tw.html", "https://www.booking.com/hotel/it/white-flat-colosseo.zh-tw.html", "https://www.booking.com/hotel/it/via-cavour-238.zh-tw.html", "https://www.booking.com/hotel/it/modern-apartment-near-the-vatican-roma.zh-tw.html", "https://www.booking.com/hotel/it/apt-prati-lt-vatican-and-center.zh-tw.html", "https://www.booking.com/hotel/it/lion-99.zh-tw.html", "https://www.booking.com/hotel/it/sweet-dream-cavour-roma1.zh-tw.html", "https://www.booking.com/hotel/it/cavour-1.zh-tw.html", "https://www.booking.com/hotel/it/eufonia-vinyl-apartment.zh-tw.html", "https://www.booking.com/hotel/it/panisperna-apartment.zh-tw.html", "https://www.booking.com/hotel/it/navona-panoramic-penthouse.zh-tw.html" ] # 設定 User-Agent headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } # 定義抓取數據函數 def fetch_booking_data(url, headers): try: response = requests.get(url, headers=headers) soup = BeautifulSoup(response.content, "html.parser") title = soup.find("h2", class_="d2fee87262 pp-header__title").text.strip() rating = soup.find("div", class_="bcdcb105b3 f45d8e4c32 df64fda51b").text.strip() return {"標題": title, "評分": rating} except Exception as e: st.error(f"Error fetching data from {url}: {e}") return None # 抓取數據並顯示 st.header("抓取數據") data = [] for url in urls: result = fetch_booking_data(url, headers) if result: data.append(result) # 將數據轉換為 DataFrame 並顯示 if data: df = pd.DataFrame(data) st.write("抓取到的數據:", df) else: st.warning("無法從任何 URL 抓取數據。") # 定義上傳到 Google Sheets 的函數 def upload_to_google_sheet(df, spreadsheet_url, creds_file_content): try: # 將金鑰內容轉換為字典 creds_dict = json.loads(creds_file_content) # 創建憑據 scope = ['https://www.googleapis.com/auth/spreadsheets'] creds = Credentials.from_service_account_info(creds_dict, scopes=scope) # 授權並打開 Google Sheets gs = gspread.authorize(creds) sheet = gs.open_by_url(spreadsheet_url) worksheet = sheet.get_worksheet(0) # 清除並更新數據 worksheet.clear() worksheet.update([df.columns.values.tolist()] + df.values.tolist()) st.success("數據已成功上傳到 Google Sheets!") except Exception as e: st.error(f"數據上傳失敗:{e}") # 上傳到 Google Sheets st.header("上傳到 Google Sheets") spreadsheet_url = st.text_input("Google Sheets URL", "https://docs.google.com/spreadsheets/d/1iOzoii9bVAmqlcqnseoqjZBkBuaFcpbIvUxZeRJ2kmk/edit?gid=0#gid=0") creds_file = st.file_uploader("上傳 Google API 金鑰檔案", type=["json"]) if st.button("上傳數據至 Google Sheets") and data: if creds_file is not None: creds_content = creds_file.read() # 讀取上傳的文件內容 upload_to_google_sheet(df, spreadsheet_url, creds_content) else: st.error("請上傳 Google API 金鑰檔案") # 數據視覺化 st.header("數據視覺化") if data: # 提取數字評分 df['數字評分'] = df['評分'].str.extract(r'評分:(\d+\.\d+)').astype(float) # 使用 Plotly 繪製條形圖 fig = px.bar(df, x='標題', y='數字評分', title="Booking.com 評分比較", labels={'標題': '酒店標題', '數字評分': '評分'}, text='數字評分') fig.update_layout(xaxis_title="酒店標題", yaxis_title="評分", xaxis_tickangle=-45) st.plotly_chart(fig) else: st.warning("無可用數據繪製圖表")