File size: 5,273 Bytes
963e1ff
 
 
 
 
 
 
736153f
963e1ff
 
d76b50b
963e1ff
f573f99
d76b50b
 
 
 
 
 
 
 
 
 
6f6173d
 
 
 
86f7cfa
 
 
 
 
 
 
 
 
 
 
 
d76b50b
 
 
 
 
 
 
736153f
963e1ff
 
 
 
 
 
 
 
d76b50b
963e1ff
 
86f7cfa
736153f
d76b50b
 
 
 
 
 
736153f
d76b50b
 
 
 
 
 
86f7cfa
6f6173d
963e1ff
736153f
 
 
963e1ff
736153f
 
963e1ff
 
 
736153f
963e1ff
 
736153f
963e1ff
736153f
963e1ff
86f7cfa
736153f
963e1ff
 
 
d76b50b
 
6f6173d
 
963e1ff
d76b50b
963e1ff
86f7cfa
736153f
f573f99
 
 
d76b50b
f573f99
 
 
d76b50b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
from google.oauth2.service_account import Credentials
import gspread
import plotly.express as px
import json  # 用於解析 JSON 文件

# Streamlit UI 標題
st.title("Booking.com 多項目數據抓取和視覺化展示")

# 合併的 URL 列表
urls = [
    "https://www.booking.com/hotel/it/appartamento-via-genova-roma.zh-tw.html",
    "https://www.booking.com/hotel/it/giulia-39-s-coliseum.zh-tw.html",
    "https://www.booking.com/hotel/it/le-stelle-dell-esquilino.zh-tw.html",
    "https://www.booking.com/hotel/it/retro-flat-termini.zh-tw.html",
    "https://www.booking.com/hotel/it/domus-fanti.zh-tw.html",
    "https://www.booking.com/hotel/it/radiant-retreat-in-esquilino.zh-tw.html",
    "https://www.booking.com/hotel/it/over-the-roof-top-roma.zh-tw.html",
    "https://www.booking.com/hotel/it/rome-sweet-home-roma1234.zh-tw.html",
    "https://www.booking.com/hotel/it/visione-guest-house-via-delle-fratte-36.zh-tw.html",
    "https://www.booking.com/hotel/it/appartamento-roma-centro-roma5.zh-tw.html",
    "https://www.booking.com/hotel/us/loews-royal-pacific-resort-at-universal.zh-tw.html",
    "https://www.booking.com/hotel/us/buena-vista-suites.zh-tw.html",
    "https://www.booking.com/hotel/us/ramada-international-drive.zh-tw.html",
    "https://www.booking.com/hotel/us/lake-buena-vista-14651-chelonia-parkway.zh-tw.html",
    "https://www.booking.com/hotel/it/trevi-apartment-roma1.zh-tw.html",
    "https://www.booking.com/hotel/it/white-flat-colosseo.zh-tw.html",
    "https://www.booking.com/hotel/it/via-cavour-238.zh-tw.html",
    "https://www.booking.com/hotel/it/modern-apartment-near-the-vatican-roma.zh-tw.html",
    "https://www.booking.com/hotel/it/apt-prati-lt-vatican-and-center.zh-tw.html",
    "https://www.booking.com/hotel/it/lion-99.zh-tw.html",
    "https://www.booking.com/hotel/it/sweet-dream-cavour-roma1.zh-tw.html",
    "https://www.booking.com/hotel/it/cavour-1.zh-tw.html",
    "https://www.booking.com/hotel/it/eufonia-vinyl-apartment.zh-tw.html",
    "https://www.booking.com/hotel/it/panisperna-apartment.zh-tw.html",
    "https://www.booking.com/hotel/it/navona-panoramic-penthouse.zh-tw.html"
]

# 設定 User-Agent
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}

# 定義抓取數據函數
def fetch_booking_data(url, headers):
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")
        title = soup.find("h2", class_="d2fee87262 pp-header__title").text.strip()
        rating = soup.find("div", class_="bcdcb105b3 f45d8e4c32 df64fda51b").text.strip()
        return {"標題": title, "評分": rating}
    except Exception as e:
        st.error(f"Error fetching data from {url}: {e}")
        return None

# 抓取數據並顯示
st.header("抓取數據")
data = []
for url in urls:
    result = fetch_booking_data(url, headers)
    if result:
        data.append(result)

# 將數據轉換為 DataFrame 並顯示
if data:
    df = pd.DataFrame(data)
    st.write("抓取到的數據:", df)
else:
    st.warning("無法從任何 URL 抓取數據。")

# 定義上傳到 Google Sheets 的函數
def upload_to_google_sheet(df, spreadsheet_url, creds_file_content):
    try:
        # 將金鑰內容轉換為字典
        creds_dict = json.loads(creds_file_content)
        # 創建憑據
        scope = ['https://www.googleapis.com/auth/spreadsheets']
        creds = Credentials.from_service_account_info(creds_dict, scopes=scope)
        # 授權並打開 Google Sheets
        gs = gspread.authorize(creds)
        sheet = gs.open_by_url(spreadsheet_url)
        worksheet = sheet.get_worksheet(0)
        # 清除並更新數據
        worksheet.clear()
        worksheet.update([df.columns.values.tolist()] + df.values.tolist())
        st.success("數據已成功上傳到 Google Sheets!")
    except Exception as e:
        st.error(f"數據上傳失敗:{e}")

# 上傳到 Google Sheets
st.header("上傳到 Google Sheets")
spreadsheet_url = st.text_input("Google Sheets URL", "https://docs.google.com/spreadsheets/d/1iOzoii9bVAmqlcqnseoqjZBkBuaFcpbIvUxZeRJ2kmk/edit?gid=0#gid=0")
creds_file = st.file_uploader("上傳 Google API 金鑰檔案", type=["json"])

if st.button("上傳數據至 Google Sheets") and data:
    if creds_file is not None:
        creds_content = creds_file.read()  # 讀取上傳的文件內容
        upload_to_google_sheet(df, spreadsheet_url, creds_content)
    else:
        st.error("請上傳 Google API 金鑰檔案")

# 數據視覺化
st.header("數據視覺化")
if data:
    # 提取數字評分
    df['數字評分'] = df['評分'].str.extract(r'評分:(\d+\.\d+)').astype(float)
    # 使用 Plotly 繪製條形圖
    fig = px.bar(df, x='標題', y='數字評分', title="Booking.com 評分比較", 
                 labels={'標題': '酒店標題', '數字評分': '評分'},
                 text='數字評分')
    fig.update_layout(xaxis_title="酒店標題", yaxis_title="評分", xaxis_tickangle=-45)
    st.plotly_chart(fig)
else:
    st.warning("無可用數據繪製圖表")