File size: 5,419 Bytes
3b45348
 
 
 
8567ba1
3b45348
8567ba1
3b45348
723bba7
3b45348
 
723bba7
3b45348
 
723bba7
 
3b45348
 
 
723bba7
3b45348
 
723bba7
3b45348
 
 
 
 
 
 
 
 
723bba7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap
import plotly.graph_objects as go
import plotly.express as px
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderInsufficientPrivileges
import re
import streamlit as st
import time

# Streamlit title and description
st.title("米其林餐廳指南爬蟲與分析")
st.write("提取餐廳數據,可視化區域分佈,並在地圖上顯示位置和推薦度熱力圖。")

# Read data from Google Sheets
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")

# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="my_unique_app/3.0")

# Function to extract region (區域) from the address using regex
def extract_region(address):
    match = re.search(r'(.*?)區|縣|市', address)
    if match:
        return match.group(0)
    else:
        return "Unknown"

# Function to get latitude and longitude with caching
@st.cache_data
def get_lat_lon(district):
    try:
        location = geolocator.geocode(f"台南市{district}")
        if location:
            time.sleep(1)  # Delay to avoid rate limiting
            return location.latitude, location.longitude
    except GeocoderInsufficientPrivileges:
        st.error("地理編碼器遇到權限問題,請稍後再試。")
    return None, None

# Apply geocoding to the dataframe
df['Region'] = df['地址'].apply(extract_region)
df['Latitude'], df['Longitude'] = zip(*df['Region'].apply(get_lat_lon))

# Display the DataFrame as a table at the top
st.subheader("餐廳數據")
st.dataframe(df)

# Group the data by region and sum the number of restaurants
region_group = df.groupby("Region").agg({'Store Name': 'count', '推薦度': 'mean'}).reset_index()
region_group.columns = ['Region', 'Count', 'Avg_Recommendation']

# Create hierarchical data for the Sunburst chart
region_group['Total'] = 'All Regions'  # Add a root level
hierarchical_data = region_group[['Total', 'Region', 'Count']]

# Plot interactive Sunburst chart
sunburst = go.Figure(go.Sunburst(
    labels=hierarchical_data['Region'].tolist() + hierarchical_data['Total'].tolist(),
    parents=hierarchical_data['Total'].tolist() + [''],
    values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()],
    branchvalues="total",
    hovertemplate='<b>%{label}</b><br>餐廳數量: %{value}<extra></extra>',
    maxdepth=2,
))

sunburst.update_layout(
    title="餐廳分佈(點擊可放大查看)",
    title_x=0.5,
    title_font=dict(size=24, family="Arial"),
    height=600,
    margin=dict(t=50, b=50, l=0, r=0)
)

# Add custom JavaScript for click events
sunburst.update_layout(
    updatemenus=[{
        'type': 'buttons',
        'showactive': False,
        'buttons': [{
            'label': '重置視圖',
            'method': 'update',
            'args': [{'visible': [True] * len(sunburst.data)},
                     {'title': '餐廳分佈(點擊可放大查看)'}]
        }]
    }]
)

st.subheader("餐廳分佈(Sunburst 圖)")
st.plotly_chart(sunburst, use_container_width=True)

# Plot bar chart with custom colors and labels
bar_chart = go.Figure(go.Bar(
    x=region_group["Region"],
    y=region_group["Count"],
    text=region_group["Count"],
    textposition='auto',
    marker=dict(color=px.colors.qualitative.Set2)
))

bar_chart.update_layout(
    title="各區域餐廳數量",
    title_x=0.5,
    title_font=dict(size=24, family="Arial"),
    height=400,
    margin=dict(t=50, b=50, l=50, r=50),
    xaxis_title="區域",
    yaxis_title="餐廳數量",
    xaxis=dict(tickangle=-45)
)
st.subheader("各區域餐廳數量(條形圖)")
st.plotly_chart(bar_chart)

# 推薦度與地理位置的關聯性
st.header("推薦度與地理位置的關聯性")

# 區域性推薦度分析
fig_bar = px.bar(region_group, x="Region", y="Avg_Recommendation", 
                 title="不同區域的平均推薦度比較", 
                 color_discrete_sequence=['#66CDAA'])
st.plotly_chart(fig_bar)

# Display a map using Folium
st.subheader("餐廳位置地圖(含推薦度熱力圖)")

# Create map centered around Tainan
m = folium.Map(location=[23.0, 120.2], zoom_start=12)

# Add marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)

# Prepare data for heatmap
heat_data = []

for index, row in df.iterrows():
    if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
        folium.Marker(
            location=[row["Latitude"], row["Longitude"]],
            popup=f"{row['Store Name']} (推薦度: {row['推薦度']})",
            tooltip=row["地址"]
        ).add_to(marker_cluster)
        heat_data.append([row["Latitude"], row["Longitude"], row["推薦度"]])

# Add heatmap layer
HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="推薦度熱力圖").add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Display the map in Streamlit
st.components.v1.html(m._repr_html_(), height=600)

# Save the DataFrame to CSV with UTF-8 encoding
csv_file = "restaurants_data.csv"
df.to_csv(csv_file, encoding="utf-8-sig", index=False)

# Display download button for the CSV
st.download_button(
    label="下載餐廳數據 CSV 檔案",
    data=open(csv_file, "rb").read(),
    file_name=csv_file,
    mime="text/csv"
)