Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import folium | |
from folium.plugins import MarkerCluster, HeatMap | |
import plotly.graph_objects as go | |
import plotly.express as px | |
from geopy.geocoders import Nominatim | |
from geopy.exc import GeocoderInsufficientPrivileges | |
import re | |
import streamlit as st | |
import time | |
# Streamlit title and description | |
st.title("米其林餐廳指南爬蟲與分析") | |
st.write("提取餐廳數據,可視化區域分佈,並在地圖上顯示位置和推薦度熱力圖。") | |
# Read data from Google Sheets | |
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U" | |
df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") | |
# Initialize Nominatim geocoder | |
geolocator = Nominatim(user_agent="my_unique_app/3.0") | |
# Function to extract region (區域) from the address using regex | |
def extract_region(address): | |
match = re.search(r'(.*?)區|縣|市', address) | |
if match: | |
return match.group(0) | |
else: | |
return "Unknown" | |
# Function to get latitude and longitude with caching | |
def get_lat_lon(district): | |
try: | |
location = geolocator.geocode(f"台南市{district}") | |
if location: | |
time.sleep(1) # Delay to avoid rate limiting | |
return location.latitude, location.longitude | |
except GeocoderInsufficientPrivileges: | |
st.error("地理編碼器遇到權限問題,請稍後再試。") | |
return None, None | |
# Apply geocoding to the dataframe | |
df['Region'] = df['地址'].apply(extract_region) | |
df['Latitude'], df['Longitude'] = zip(*df['Region'].apply(get_lat_lon)) | |
# Display the DataFrame as a table at the top | |
st.subheader("餐廳數據") | |
st.dataframe(df) | |
# Group the data by region and sum the number of restaurants | |
region_group = df.groupby("Region").agg({'Store Name': 'count', '推薦度': 'mean'}).reset_index() | |
region_group.columns = ['Region', 'Count', 'Avg_Recommendation'] | |
# Create hierarchical data for the Sunburst chart | |
region_group['Total'] = 'All Regions' # Add a root level | |
hierarchical_data = region_group[['Total', 'Region', 'Count']] | |
# Plot interactive Sunburst chart | |
sunburst = go.Figure(go.Sunburst( | |
labels=hierarchical_data['Region'].tolist() + hierarchical_data['Total'].tolist(), | |
parents=hierarchical_data['Total'].tolist() + [''], | |
values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()], | |
branchvalues="total", | |
hovertemplate='<b>%{label}</b><br>餐廳數量: %{value}<extra></extra>', | |
maxdepth=2, | |
)) | |
sunburst.update_layout( | |
title="餐廳分佈(點擊可放大查看)", | |
title_x=0.5, | |
title_font=dict(size=24, family="Arial"), | |
height=600, | |
margin=dict(t=50, b=50, l=0, r=0) | |
) | |
# Add custom JavaScript for click events | |
sunburst.update_layout( | |
updatemenus=[{ | |
'type': 'buttons', | |
'showactive': False, | |
'buttons': [{ | |
'label': '重置視圖', | |
'method': 'update', | |
'args': [{'visible': [True] * len(sunburst.data)}, | |
{'title': '餐廳分佈(點擊可放大查看)'}] | |
}] | |
}] | |
) | |
st.subheader("餐廳分佈(Sunburst 圖)") | |
st.plotly_chart(sunburst, use_container_width=True) | |
# Plot bar chart with custom colors and labels | |
bar_chart = go.Figure(go.Bar( | |
x=region_group["Region"], | |
y=region_group["Count"], | |
text=region_group["Count"], | |
textposition='auto', | |
marker=dict(color=px.colors.qualitative.Set2) | |
)) | |
bar_chart.update_layout( | |
title="各區域餐廳數量", | |
title_x=0.5, | |
title_font=dict(size=24, family="Arial"), | |
height=400, | |
margin=dict(t=50, b=50, l=50, r=50), | |
xaxis_title="區域", | |
yaxis_title="餐廳數量", | |
xaxis=dict(tickangle=-45) | |
) | |
st.subheader("各區域餐廳數量(條形圖)") | |
st.plotly_chart(bar_chart) | |
# 推薦度與地理位置的關聯性 | |
st.header("推薦度與地理位置的關聯性") | |
# 區域性推薦度分析 | |
fig_bar = px.bar(region_group, x="Region", y="Avg_Recommendation", | |
title="不同區域的平均推薦度比較", | |
color_discrete_sequence=['#66CDAA']) | |
st.plotly_chart(fig_bar) | |
# Display a map using Folium | |
st.subheader("餐廳位置地圖(含推薦度熱力圖)") | |
# Create map centered around Tainan | |
m = folium.Map(location=[23.0, 120.2], zoom_start=12) | |
# Add marker cluster to the map | |
marker_cluster = MarkerCluster().add_to(m) | |
# Prepare data for heatmap | |
heat_data = [] | |
for index, row in df.iterrows(): | |
if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]): | |
folium.Marker( | |
location=[row["Latitude"], row["Longitude"]], | |
popup=f"{row['Store Name']} (推薦度: {row['推薦度']})", | |
tooltip=row["地址"] | |
).add_to(marker_cluster) | |
heat_data.append([row["Latitude"], row["Longitude"], row["推薦度"]]) | |
# Add heatmap layer | |
HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="推薦度熱力圖").add_to(m) | |
# Add layer control | |
folium.LayerControl().add_to(m) | |
# Display the map in Streamlit | |
st.components.v1.html(m._repr_html_(), height=600) | |
# Save the DataFrame to CSV with UTF-8 encoding | |
csv_file = "restaurants_data.csv" | |
df.to_csv(csv_file, encoding="utf-8-sig", index=False) | |
# Display download button for the CSV | |
st.download_button( | |
label="下載餐廳數據 CSV 檔案", | |
data=open(csv_file, "rb").read(), | |
file_name=csv_file, | |
mime="text/csv" | |
) |