Update app.py
Browse files
app.py
CHANGED
@@ -5,11 +5,12 @@ from bs4 import BeautifulSoup
|
|
5 |
from google.oauth2.service_account import Credentials
|
6 |
import gspread
|
7 |
import plotly.express as px
|
|
|
8 |
|
9 |
# Streamlit UI 標題
|
10 |
st.title("Booking.com 多項目數據抓取和視覺化展示")
|
11 |
|
12 |
-
#
|
13 |
urls = [
|
14 |
"https://www.booking.com/hotel/it/appartamento-via-genova-roma.zh-tw.html",
|
15 |
"https://www.booking.com/hotel/it/giulia-39-s-coliseum.zh-tw.html",
|
@@ -32,7 +33,7 @@ headers = {
|
|
32 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
33 |
}
|
34 |
|
35 |
-
#
|
36 |
def fetch_booking_data(url, headers):
|
37 |
try:
|
38 |
response = requests.get(url, headers=headers)
|
@@ -45,38 +46,44 @@ def fetch_booking_data(url, headers):
|
|
45 |
return None
|
46 |
|
47 |
# 逐一處理 URL 並收集數據
|
|
|
48 |
data = []
|
49 |
for url in urls:
|
50 |
result = fetch_booking_data(url, headers)
|
51 |
if result:
|
52 |
data.append(result)
|
53 |
|
54 |
-
# 將數據轉換為 DataFrame
|
55 |
if data:
|
56 |
df = pd.DataFrame(data)
|
57 |
st.write("抓取到的數據:", df)
|
58 |
else:
|
59 |
st.warning("無法從任何 URL 抓取數據。")
|
60 |
|
61 |
-
#
|
62 |
def upload_to_google_sheet(df, spreadsheet_url, creds_file_content):
|
63 |
try:
|
|
|
|
|
|
|
64 |
scope = ['https://www.googleapis.com/auth/spreadsheets']
|
65 |
-
creds = Credentials.from_service_account_info(
|
|
|
66 |
gs = gspread.authorize(creds)
|
67 |
sheet = gs.open_by_url(spreadsheet_url)
|
68 |
worksheet = sheet.get_worksheet(0)
|
|
|
69 |
worksheet.clear()
|
70 |
worksheet.update([df.columns.values.tolist()] + df.values.tolist())
|
71 |
-
st.success("
|
72 |
except Exception as e:
|
73 |
-
st.error(f"
|
74 |
|
75 |
# Google Sheets URL 和金鑰文件上傳
|
|
|
76 |
spreadsheet_url = st.text_input("Google Sheets URL", "https://docs.google.com/spreadsheets/d/1iOzoii9bVAmqlcqnseoqjZBkBuaFcpbIvUxZeRJ2kmk/edit?gid=0#gid=0")
|
77 |
creds_file = st.file_uploader("上傳 Google API 金鑰檔案", type=["json"])
|
78 |
|
79 |
-
# 上傳至 Google Sheets
|
80 |
if st.button("上傳數據至 Google Sheets") and data:
|
81 |
if creds_file is not None:
|
82 |
creds_content = creds_file.read() # 讀取上傳的文件內容
|
@@ -84,11 +91,11 @@ if st.button("上傳數據至 Google Sheets") and data:
|
|
84 |
else:
|
85 |
st.error("請上傳 Google API 金鑰檔案")
|
86 |
|
87 |
-
#
|
|
|
88 |
if not df.empty:
|
89 |
# 確保評分可以轉換為數字
|
90 |
df['評分'] = pd.to_numeric(df['評分'], errors='coerce')
|
91 |
-
|
92 |
# 使用 Plotly 繪製條形圖
|
93 |
fig = px.bar(df, x='標題', y='評分', title="Booking.com 評分比較", labels={'標題': '酒店標題', '評分': '評分'})
|
94 |
fig.update_layout(xaxis_title="酒店標題", yaxis_title="評分", xaxis_tickangle=-45)
|
|
|
5 |
from google.oauth2.service_account import Credentials
|
6 |
import gspread
|
7 |
import plotly.express as px
|
8 |
+
import json # 用於解析 JSON 文件
|
9 |
|
10 |
# Streamlit UI 標題
|
11 |
st.title("Booking.com 多項目數據抓取和視覺化展示")
|
12 |
|
13 |
+
# 定義 URL 列表
|
14 |
urls = [
|
15 |
"https://www.booking.com/hotel/it/appartamento-via-genova-roma.zh-tw.html",
|
16 |
"https://www.booking.com/hotel/it/giulia-39-s-coliseum.zh-tw.html",
|
|
|
33 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
34 |
}
|
35 |
|
36 |
+
# 定義抓取數據函數
|
37 |
def fetch_booking_data(url, headers):
|
38 |
try:
|
39 |
response = requests.get(url, headers=headers)
|
|
|
46 |
return None
|
47 |
|
48 |
# 逐一處理 URL 並收集數據
|
49 |
+
st.header("抓取數據")
|
50 |
data = []
|
51 |
for url in urls:
|
52 |
result = fetch_booking_data(url, headers)
|
53 |
if result:
|
54 |
data.append(result)
|
55 |
|
56 |
+
# 將數據轉換為 DataFrame 並顯示
|
57 |
if data:
|
58 |
df = pd.DataFrame(data)
|
59 |
st.write("抓取到的數據:", df)
|
60 |
else:
|
61 |
st.warning("無法從任何 URL 抓取數據。")
|
62 |
|
63 |
+
# 定義上傳 Google Sheets 函數
|
64 |
def upload_to_google_sheet(df, spreadsheet_url, creds_file_content):
|
65 |
try:
|
66 |
+
# 將金鑰內容轉換為字典
|
67 |
+
creds_dict = json.loads(creds_file_content)
|
68 |
+
# 創建憑據
|
69 |
scope = ['https://www.googleapis.com/auth/spreadsheets']
|
70 |
+
creds = Credentials.from_service_account_info(creds_dict, scopes=scope)
|
71 |
+
# 授權並打開 Google Sheets
|
72 |
gs = gspread.authorize(creds)
|
73 |
sheet = gs.open_by_url(spreadsheet_url)
|
74 |
worksheet = sheet.get_worksheet(0)
|
75 |
+
# 清除並更新數據
|
76 |
worksheet.clear()
|
77 |
worksheet.update([df.columns.values.tolist()] + df.values.tolist())
|
78 |
+
st.success("數據已成功上傳到 Google Sheets!")
|
79 |
except Exception as e:
|
80 |
+
st.error(f"數據上傳失敗:{e}")
|
81 |
|
82 |
# Google Sheets URL 和金鑰文件上傳
|
83 |
+
st.header("上傳到 Google Sheets")
|
84 |
spreadsheet_url = st.text_input("Google Sheets URL", "https://docs.google.com/spreadsheets/d/1iOzoii9bVAmqlcqnseoqjZBkBuaFcpbIvUxZeRJ2kmk/edit?gid=0#gid=0")
|
85 |
creds_file = st.file_uploader("上傳 Google API 金鑰檔案", type=["json"])
|
86 |
|
|
|
87 |
if st.button("上傳數據至 Google Sheets") and data:
|
88 |
if creds_file is not None:
|
89 |
creds_content = creds_file.read() # 讀取上傳的文件內容
|
|
|
91 |
else:
|
92 |
st.error("請上傳 Google API 金鑰檔案")
|
93 |
|
94 |
+
# 顯示動態圖表
|
95 |
+
st.header("數據視覺化")
|
96 |
if not df.empty:
|
97 |
# 確保評分可以轉換為數字
|
98 |
df['評分'] = pd.to_numeric(df['評分'], errors='coerce')
|
|
|
99 |
# 使用 Plotly 繪製條形圖
|
100 |
fig = px.bar(df, x='標題', y='評分', title="Booking.com 評分比較", labels={'標題': '酒店標題', '評分': '評分'})
|
101 |
fig.update_layout(xaxis_title="酒店標題", yaxis_title="評分", xaxis_tickangle=-45)
|