File size: 5,273 Bytes
963e1ff 736153f 963e1ff d76b50b 963e1ff f573f99 d76b50b 6f6173d 86f7cfa d76b50b 736153f 963e1ff d76b50b 963e1ff 86f7cfa 736153f d76b50b 736153f d76b50b 86f7cfa 6f6173d 963e1ff 736153f 963e1ff 736153f 963e1ff 736153f 963e1ff 736153f 963e1ff 736153f 963e1ff 86f7cfa 736153f 963e1ff d76b50b 6f6173d 963e1ff d76b50b 963e1ff 86f7cfa 736153f f573f99 d76b50b f573f99 d76b50b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
from google.oauth2.service_account import Credentials
import gspread
import plotly.express as px
import json # 用於解析 JSON 文件
# Streamlit UI 標題
st.title("Booking.com 多項目數據抓取和視覺化展示")
# 合併的 URL 列表
urls = [
"https://www.booking.com/hotel/it/appartamento-via-genova-roma.zh-tw.html",
"https://www.booking.com/hotel/it/giulia-39-s-coliseum.zh-tw.html",
"https://www.booking.com/hotel/it/le-stelle-dell-esquilino.zh-tw.html",
"https://www.booking.com/hotel/it/retro-flat-termini.zh-tw.html",
"https://www.booking.com/hotel/it/domus-fanti.zh-tw.html",
"https://www.booking.com/hotel/it/radiant-retreat-in-esquilino.zh-tw.html",
"https://www.booking.com/hotel/it/over-the-roof-top-roma.zh-tw.html",
"https://www.booking.com/hotel/it/rome-sweet-home-roma1234.zh-tw.html",
"https://www.booking.com/hotel/it/visione-guest-house-via-delle-fratte-36.zh-tw.html",
"https://www.booking.com/hotel/it/appartamento-roma-centro-roma5.zh-tw.html",
"https://www.booking.com/hotel/us/loews-royal-pacific-resort-at-universal.zh-tw.html",
"https://www.booking.com/hotel/us/buena-vista-suites.zh-tw.html",
"https://www.booking.com/hotel/us/ramada-international-drive.zh-tw.html",
"https://www.booking.com/hotel/us/lake-buena-vista-14651-chelonia-parkway.zh-tw.html",
"https://www.booking.com/hotel/it/trevi-apartment-roma1.zh-tw.html",
"https://www.booking.com/hotel/it/white-flat-colosseo.zh-tw.html",
"https://www.booking.com/hotel/it/via-cavour-238.zh-tw.html",
"https://www.booking.com/hotel/it/modern-apartment-near-the-vatican-roma.zh-tw.html",
"https://www.booking.com/hotel/it/apt-prati-lt-vatican-and-center.zh-tw.html",
"https://www.booking.com/hotel/it/lion-99.zh-tw.html",
"https://www.booking.com/hotel/it/sweet-dream-cavour-roma1.zh-tw.html",
"https://www.booking.com/hotel/it/cavour-1.zh-tw.html",
"https://www.booking.com/hotel/it/eufonia-vinyl-apartment.zh-tw.html",
"https://www.booking.com/hotel/it/panisperna-apartment.zh-tw.html",
"https://www.booking.com/hotel/it/navona-panoramic-penthouse.zh-tw.html"
]
# 設定 User-Agent
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
# 定義抓取數據函數
def fetch_booking_data(url, headers):
try:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")
title = soup.find("h2", class_="d2fee87262 pp-header__title").text.strip()
rating = soup.find("div", class_="bcdcb105b3 f45d8e4c32 df64fda51b").text.strip()
return {"標題": title, "評分": rating}
except Exception as e:
st.error(f"Error fetching data from {url}: {e}")
return None
# 抓取數據並顯示
st.header("抓取數據")
data = []
for url in urls:
result = fetch_booking_data(url, headers)
if result:
data.append(result)
# 將數據轉換為 DataFrame 並顯示
if data:
df = pd.DataFrame(data)
st.write("抓取到的數據:", df)
else:
st.warning("無法從任何 URL 抓取數據。")
# 定義上傳到 Google Sheets 的函數
def upload_to_google_sheet(df, spreadsheet_url, creds_file_content):
try:
# 將金鑰內容轉換為字典
creds_dict = json.loads(creds_file_content)
# 創建憑據
scope = ['https://www.googleapis.com/auth/spreadsheets']
creds = Credentials.from_service_account_info(creds_dict, scopes=scope)
# 授權並打開 Google Sheets
gs = gspread.authorize(creds)
sheet = gs.open_by_url(spreadsheet_url)
worksheet = sheet.get_worksheet(0)
# 清除並更新數據
worksheet.clear()
worksheet.update([df.columns.values.tolist()] + df.values.tolist())
st.success("數據已成功上傳到 Google Sheets!")
except Exception as e:
st.error(f"數據上傳失敗:{e}")
# 上傳到 Google Sheets
st.header("上傳到 Google Sheets")
spreadsheet_url = st.text_input("Google Sheets URL", "https://docs.google.com/spreadsheets/d/1iOzoii9bVAmqlcqnseoqjZBkBuaFcpbIvUxZeRJ2kmk/edit?gid=0#gid=0")
creds_file = st.file_uploader("上傳 Google API 金鑰檔案", type=["json"])
if st.button("上傳數據至 Google Sheets") and data:
if creds_file is not None:
creds_content = creds_file.read() # 讀取上傳的文件內容
upload_to_google_sheet(df, spreadsheet_url, creds_content)
else:
st.error("請上傳 Google API 金鑰檔案")
# 數據視覺化
st.header("數據視覺化")
if data:
# 提取數字評分
df['數字評分'] = df['評分'].str.extract(r'評分:(\d+\.\d+)').astype(float)
# 使用 Plotly 繪製條形圖
fig = px.bar(df, x='標題', y='數字評分', title="Booking.com 評分比較",
labels={'標題': '酒店標題', '數字評分': '評分'},
text='數字評分')
fig.update_layout(xaxis_title="酒店標題", yaxis_title="評分", xaxis_tickangle=-45)
st.plotly_chart(fig)
else:
st.warning("無可用數據繪製圖表")
|