import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap
import plotly.graph_objects as go
import plotly.express as px
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderInsufficientPrivileges
import re
import streamlit as st
import time
# Streamlit title and description
st.title("米其林餐廳指南爬蟲與分析")
st.write("提取餐廳數據,可視化區域分佈,並在地圖上顯示位置和推薦度熱力圖。")
# Read data from Google Sheets
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="my_unique_app/3.0")
# Function to extract region (區域) from the address using regex
def extract_region(address):
match = re.search(r'(.*?)區|縣|市', address)
if match:
return match.group(0)
else:
return "Unknown"
# Function to get latitude and longitude with caching
@st.cache_data
def get_lat_lon(district):
try:
location = geolocator.geocode(f"台南市{district}")
if location:
time.sleep(1) # Delay to avoid rate limiting
return location.latitude, location.longitude
except GeocoderInsufficientPrivileges:
st.error("地理編碼器遇到權限問題,請稍後再試。")
return None, None
# Apply geocoding to the dataframe
df['Region'] = df['地址'].apply(extract_region)
df['Latitude'], df['Longitude'] = zip(*df['Region'].apply(get_lat_lon))
# Display the DataFrame as a table at the top
st.subheader("餐廳數據")
st.dataframe(df)
# Group the data by region and sum the number of restaurants
region_group = df.groupby("Region").agg({'Store Name': 'count', '推薦度': 'mean'}).reset_index()
region_group.columns = ['Region', 'Count', 'Avg_Recommendation']
# Create hierarchical data for the Sunburst chart
region_group['Total'] = 'All Regions' # Add a root level
hierarchical_data = region_group[['Total', 'Region', 'Count']]
# Plot interactive Sunburst chart
sunburst = go.Figure(go.Sunburst(
labels=hierarchical_data['Region'].tolist() + hierarchical_data['Total'].tolist(),
parents=hierarchical_data['Total'].tolist() + [''],
values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()],
branchvalues="total",
hovertemplate='%{label}
餐廳數量: %{value}',
maxdepth=2,
))
sunburst.update_layout(
title="餐廳分佈(點擊可放大查看)",
title_x=0.5,
title_font=dict(size=24, family="Arial"),
height=600,
margin=dict(t=50, b=50, l=0, r=0)
)
# Add custom JavaScript for click events
sunburst.update_layout(
updatemenus=[{
'type': 'buttons',
'showactive': False,
'buttons': [{
'label': '重置視圖',
'method': 'update',
'args': [{'visible': [True] * len(sunburst.data)},
{'title': '餐廳分佈(點擊可放大查看)'}]
}]
}]
)
st.subheader("餐廳分佈(Sunburst 圖)")
st.plotly_chart(sunburst, use_container_width=True)
# Plot bar chart with custom colors and labels
bar_chart = go.Figure(go.Bar(
x=region_group["Region"],
y=region_group["Count"],
text=region_group["Count"],
textposition='auto',
marker=dict(color=px.colors.qualitative.Set2)
))
bar_chart.update_layout(
title="各區域餐廳數量",
title_x=0.5,
title_font=dict(size=24, family="Arial"),
height=400,
margin=dict(t=50, b=50, l=50, r=50),
xaxis_title="區域",
yaxis_title="餐廳數量",
xaxis=dict(tickangle=-45)
)
st.subheader("各區域餐廳數量(條形圖)")
st.plotly_chart(bar_chart)
# 推薦度與地理位置的關聯性
st.header("推薦度與地理位置的關聯性")
# 區域性推薦度分析
fig_bar = px.bar(region_group, x="Region", y="Avg_Recommendation",
title="不同區域的平均推薦度比較",
color_discrete_sequence=['#66CDAA'])
st.plotly_chart(fig_bar)
# Display a map using Folium
st.subheader("餐廳位置地圖(含推薦度熱力圖)")
# Create map centered around Tainan
m = folium.Map(location=[23.0, 120.2], zoom_start=12)
# Add marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)
# Prepare data for heatmap
heat_data = []
for index, row in df.iterrows():
if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
folium.Marker(
location=[row["Latitude"], row["Longitude"]],
popup=f"{row['Store Name']} (推薦度: {row['推薦度']})",
tooltip=row["地址"]
).add_to(marker_cluster)
heat_data.append([row["Latitude"], row["Longitude"], row["推薦度"]])
# Add heatmap layer
HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="推薦度熱力圖").add_to(m)
# Add layer control
folium.LayerControl().add_to(m)
# Display the map in Streamlit
st.components.v1.html(m._repr_html_(), height=600)
# Save the DataFrame to CSV with UTF-8 encoding
csv_file = "restaurants_data.csv"
df.to_csv(csv_file, encoding="utf-8-sig", index=False)
# Display download button for the CSV
st.download_button(
label="下載餐廳數據 CSV 檔案",
data=open(csv_file, "rb").read(),
file_name=csv_file,
mime="text/csv"
)