Spaces:
Sleeping
Sleeping
File size: 5,419 Bytes
3b45348 8567ba1 3b45348 8567ba1 3b45348 723bba7 3b45348 723bba7 3b45348 723bba7 3b45348 723bba7 3b45348 723bba7 3b45348 723bba7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap
import plotly.graph_objects as go
import plotly.express as px
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderInsufficientPrivileges
import re
import streamlit as st
import time
# Streamlit title and description
st.title("米其林餐廳指南爬蟲與分析")
st.write("提取餐廳數據,可視化區域分佈,並在地圖上顯示位置和推薦度熱力圖。")
# Read data from Google Sheets
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="my_unique_app/3.0")
# Function to extract region (區域) from the address using regex
def extract_region(address):
match = re.search(r'(.*?)區|縣|市', address)
if match:
return match.group(0)
else:
return "Unknown"
# Function to get latitude and longitude with caching
@st.cache_data
def get_lat_lon(district):
try:
location = geolocator.geocode(f"台南市{district}")
if location:
time.sleep(1) # Delay to avoid rate limiting
return location.latitude, location.longitude
except GeocoderInsufficientPrivileges:
st.error("地理編碼器遇到權限問題,請稍後再試。")
return None, None
# Apply geocoding to the dataframe
df['Region'] = df['地址'].apply(extract_region)
df['Latitude'], df['Longitude'] = zip(*df['Region'].apply(get_lat_lon))
# Display the DataFrame as a table at the top
st.subheader("餐廳數據")
st.dataframe(df)
# Group the data by region and sum the number of restaurants
region_group = df.groupby("Region").agg({'Store Name': 'count', '推薦度': 'mean'}).reset_index()
region_group.columns = ['Region', 'Count', 'Avg_Recommendation']
# Create hierarchical data for the Sunburst chart
region_group['Total'] = 'All Regions' # Add a root level
hierarchical_data = region_group[['Total', 'Region', 'Count']]
# Plot interactive Sunburst chart
sunburst = go.Figure(go.Sunburst(
labels=hierarchical_data['Region'].tolist() + hierarchical_data['Total'].tolist(),
parents=hierarchical_data['Total'].tolist() + [''],
values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()],
branchvalues="total",
hovertemplate='<b>%{label}</b><br>餐廳數量: %{value}<extra></extra>',
maxdepth=2,
))
sunburst.update_layout(
title="餐廳分佈(點擊可放大查看)",
title_x=0.5,
title_font=dict(size=24, family="Arial"),
height=600,
margin=dict(t=50, b=50, l=0, r=0)
)
# Add custom JavaScript for click events
sunburst.update_layout(
updatemenus=[{
'type': 'buttons',
'showactive': False,
'buttons': [{
'label': '重置視圖',
'method': 'update',
'args': [{'visible': [True] * len(sunburst.data)},
{'title': '餐廳分佈(點擊可放大查看)'}]
}]
}]
)
st.subheader("餐廳分佈(Sunburst 圖)")
st.plotly_chart(sunburst, use_container_width=True)
# Plot bar chart with custom colors and labels
bar_chart = go.Figure(go.Bar(
x=region_group["Region"],
y=region_group["Count"],
text=region_group["Count"],
textposition='auto',
marker=dict(color=px.colors.qualitative.Set2)
))
bar_chart.update_layout(
title="各區域餐廳數量",
title_x=0.5,
title_font=dict(size=24, family="Arial"),
height=400,
margin=dict(t=50, b=50, l=50, r=50),
xaxis_title="區域",
yaxis_title="餐廳數量",
xaxis=dict(tickangle=-45)
)
st.subheader("各區域餐廳數量(條形圖)")
st.plotly_chart(bar_chart)
# 推薦度與地理位置的關聯性
st.header("推薦度與地理位置的關聯性")
# 區域性推薦度分析
fig_bar = px.bar(region_group, x="Region", y="Avg_Recommendation",
title="不同區域的平均推薦度比較",
color_discrete_sequence=['#66CDAA'])
st.plotly_chart(fig_bar)
# Display a map using Folium
st.subheader("餐廳位置地圖(含推薦度熱力圖)")
# Create map centered around Tainan
m = folium.Map(location=[23.0, 120.2], zoom_start=12)
# Add marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)
# Prepare data for heatmap
heat_data = []
for index, row in df.iterrows():
if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
folium.Marker(
location=[row["Latitude"], row["Longitude"]],
popup=f"{row['Store Name']} (推薦度: {row['推薦度']})",
tooltip=row["地址"]
).add_to(marker_cluster)
heat_data.append([row["Latitude"], row["Longitude"], row["推薦度"]])
# Add heatmap layer
HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="推薦度熱力圖").add_to(m)
# Add layer control
folium.LayerControl().add_to(m)
# Display the map in Streamlit
st.components.v1.html(m._repr_html_(), height=600)
# Save the DataFrame to CSV with UTF-8 encoding
csv_file = "restaurants_data.csv"
df.to_csv(csv_file, encoding="utf-8-sig", index=False)
# Display download button for the CSV
st.download_button(
label="下載餐廳數據 CSV 檔案",
data=open(csv_file, "rb").read(),
file_name=csv_file,
mime="text/csv"
) |