import requests from bs4 import BeautifulSoup import pandas as pd import plotly.graph_objects as go import streamlit as st # 設定應用標題 st.title("餐廳資料抓取與分析") # 從 Google 試算表中讀取 URLs sheet_id = "1W20lawjiQtEpljUKoEaMVPDlSdnhvJLPUy2jk5xao_w" urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") # 將 URLs 轉換為列表 urls = urls_df['網址'].tolist() # 初始化一個空的 DataFrame 列表來儲存所有資料 df_list = [] # 迭代每個網址並抓取資料 for url in urls: response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') # 解析並抓取所需資料 title_tag = soup.find('h1', class_='restaurant-details__heading--title') title = title_tag.text.strip() if title_tag else 'N/A' address_tag = soup.find('li', class_='restaurant-details__heading--address') address = address_tag.text.strip() if address_tag else 'N/A' phone_tag = soup.find('a', {'data-event': 'CTA_tel'}) phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A' description_tag = soup.find('div', class_='restaurant-details__description--text') description = description_tag.text.strip() if description_tag else 'N/A' # 將抓取的資料新增到列表中 df_list.append({ 'Title': title, 'Address': address, 'Phone': phone, 'Description': description }) # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame df = pd.DataFrame(df_list) # 顯示抓取的資料 st.subheader("抓取的餐廳資料") st.dataframe(df) # 統計每個區的商家數量 df['Area'] = df['Address'].str.extract(r'(\w+區)') # 提取區域 area_counts = df['Area'].value_counts() # 統計各區的商家數量 # 繪製柱狀圖 fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)]) fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量') # 顯示柱狀圖 st.plotly_chart(fig_bar) # 繪製圓餅圖 fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)]) fig_pie.update_layout(title='每個區的商家數量比例') # 按鈕來顯示圓餅圖 if st.button('顯示每個區的商家數量比例圓餅圖'): st.plotly_chart(fig_pie)