Roberta2024 commited on
Commit
723bba7
1 Parent(s): 8567ba1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -154
app.py CHANGED
@@ -6,25 +6,21 @@ from folium.plugins import MarkerCluster, HeatMap
6
  import plotly.graph_objects as go
7
  import plotly.express as px
8
  from geopy.geocoders import Nominatim
 
9
  import re
10
  import streamlit as st
 
11
 
12
  # Streamlit title and description
13
- st.title("米其林餐廳指南爬蟲")
14
- st.write("Extract restaurant data, visualize with a Sunburst chart and bar chart, and display locations on a map with heatmap.")
15
 
16
  # Read data from Google Sheets
17
  sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
18
- df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
19
-
20
- # Convert "網址" column to a Python list
21
- urls = df1["網址"].tolist()
22
-
23
- # Create a DataFrame to store all restaurant data
24
- df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Latitude", "Longitude", "Region"])
25
 
26
  # Initialize Nominatim geocoder
27
- geolocator = Nominatim(user_agent="my_app")
28
 
29
  # Function to extract region (區域) from the address using regex
30
  def extract_region(address):
@@ -34,147 +30,138 @@ def extract_region(address):
34
  else:
35
  return "Unknown"
36
 
37
- # Function to fetch and parse data
38
- def fetch_data():
39
- global df
40
- # Progress bar in Streamlit
41
- progress_bar = st.progress(0)
42
- total_urls = len(urls)
43
-
44
- # Iterate through each URL
45
- for idx, url in enumerate(urls):
46
- response = requests.get(url)
47
- soup = BeautifulSoup(response.content, "html.parser")
48
-
49
- try:
50
- store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
51
- except AttributeError:
52
- store_name = None
53
-
54
- try:
55
- address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
56
- region = extract_region(address)
57
- except AttributeError:
58
- address = None
59
- region = "Unknown"
60
-
61
- # Try to extract phone number
62
- try:
63
- phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "")
64
- except AttributeError:
65
- phone = None
66
-
67
- try:
68
- location = geolocator.geocode(address)
69
- if location:
70
- latitude = location.latitude
71
- longitude = location.longitude
72
- else:
73
- latitude = None
74
- longitude = None
75
- except:
76
- latitude = None
77
- longitude = None
78
-
79
- new_row = pd.DataFrame({
80
- "Store Name": [store_name],
81
- "Address": [address],
82
- "Phone": [phone],
83
- "Latitude": [latitude],
84
- "Longitude": [longitude],
85
- "Region": [region]
86
- })
87
-
88
- df = pd.concat([df, new_row], ignore_index=True)
89
-
90
- # Update progress bar
91
- progress_bar.progress((idx + 1) / total_urls)
92
-
93
- # Button to trigger data fetching
94
- if st.button("爬取餐廳資料"):
95
- fetch_data()
96
-
97
- # Save the DataFrame to CSV with UTF-8 encoding, including latitude and longitude
98
- csv_file = "restaurants_data.csv"
99
- df.to_csv(csv_file, encoding="utf-8-sig", index=False)
100
-
101
- # Display the DataFrame as a table at the top
102
- st.subheader("Restaurant Data")
103
- st.dataframe(df)
104
-
105
- # Display download button for the CSV
106
- st.download_button(
107
- label="Download restaurant data as CSV",
108
- data=open(csv_file, "rb").read(),
109
- file_name=csv_file,
110
- mime="text/csv"
111
- )
112
-
113
- # Group the data by region and sum the number of restaurants
114
- region_group = df.groupby("Region").size().reset_index(name='Count')
115
-
116
- # Plot Sunburst chart
117
- sunburst = px.sunburst(
118
- region_group,
119
- path=['Region'],
120
- values='Count',
121
- title="Restaurant Distribution by Region",
122
- )
123
-
124
- sunburst.update_layout(
125
- title_x=0.5,
126
- title_font=dict(size=24, family="Arial"),
127
- height=600,
128
- margin=dict(t=50, b=50, l=50, r=50)
129
- )
130
- st.subheader("Restaurant Distribution by Region (Sunburst Chart)")
131
- st.plotly_chart(sunburst)
132
-
133
- # Plot bar chart with custom colors and labels
134
- bar_chart = go.Figure(go.Bar(
135
- x=region_group["Region"],
136
- y=region_group["Count"],
137
- text=region_group["Count"],
138
- textposition='auto',
139
- marker=dict(color=px.colors.qualitative.Set2)
140
- ))
141
-
142
- bar_chart.update_layout(
143
- title="Restaurant Count by Region",
144
- title_x=0.5,
145
- title_font=dict(size=24, family="Arial"),
146
- height=400,
147
- margin=dict(t=50, b=50, l=50, r=50),
148
- xaxis_title="Region",
149
- yaxis_title="Number of Restaurants",
150
- xaxis=dict(tickangle=-45)
151
- )
152
- st.subheader("Restaurant Count by Region (Bar Chart)")
153
- st.plotly_chart(bar_chart)
154
-
155
- # Display a map using Folium
156
- st.subheader("Restaurant Locations Map with Heatmap")
157
-
158
- # Create map centered around Tainan
159
- m = folium.Map(location=[23.0, 120.2], zoom_start=12)
160
-
161
- # Add marker cluster to the map
162
- marker_cluster = MarkerCluster().add_to(m)
163
-
164
- # Prepare data for heatmap
165
- heat_data = []
166
-
167
- for index, row in df.iterrows():
168
- if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
169
- folium.Marker(
170
- location=[row["Latitude"], row["Longitude"]],
171
- popup=f"{row['Store Name']} ({row['Phone']})",
172
- tooltip=row["Address"]
173
- ).add_to(marker_cluster)
174
- heat_data.append([row["Latitude"], row["Longitude"]])
175
-
176
- # Add heatmap layer
177
- HeatMap(heat_data).add_to(m)
178
-
179
- # Display the map in Streamlit
180
- st.components.v1.html(m._repr_html_(), height=600)
 
6
  import plotly.graph_objects as go
7
  import plotly.express as px
8
  from geopy.geocoders import Nominatim
9
+ from geopy.exc import GeocoderInsufficientPrivileges
10
  import re
11
  import streamlit as st
12
+ import time
13
 
14
  # Streamlit title and description
15
+ st.title("米其林餐廳指南爬蟲與分析")
16
+ st.write("提取餐廳數據,可視化區域分佈,並在地圖上顯示位置和推薦度熱力圖。")
17
 
18
  # Read data from Google Sheets
19
  sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
20
+ df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
 
 
 
 
 
 
21
 
22
  # Initialize Nominatim geocoder
23
+ geolocator = Nominatim(user_agent="my_unique_app/3.0")
24
 
25
  # Function to extract region (區域) from the address using regex
26
  def extract_region(address):
 
30
  else:
31
  return "Unknown"
32
 
33
+ # Function to get latitude and longitude with caching
34
+ @st.cache_data
35
+ def get_lat_lon(district):
36
+ try:
37
+ location = geolocator.geocode(f"台南市{district}")
38
+ if location:
39
+ time.sleep(1) # Delay to avoid rate limiting
40
+ return location.latitude, location.longitude
41
+ except GeocoderInsufficientPrivileges:
42
+ st.error("地理編碼器遇到權限問題,請稍後再試。")
43
+ return None, None
44
+
45
+ # Apply geocoding to the dataframe
46
+ df['Region'] = df['地址'].apply(extract_region)
47
+ df['Latitude'], df['Longitude'] = zip(*df['Region'].apply(get_lat_lon))
48
+
49
+ # Display the DataFrame as a table at the top
50
+ st.subheader("餐廳數據")
51
+ st.dataframe(df)
52
+
53
+ # Group the data by region and sum the number of restaurants
54
+ region_group = df.groupby("Region").agg({'Store Name': 'count', '推薦度': 'mean'}).reset_index()
55
+ region_group.columns = ['Region', 'Count', 'Avg_Recommendation']
56
+
57
+ # Create hierarchical data for the Sunburst chart
58
+ region_group['Total'] = 'All Regions' # Add a root level
59
+ hierarchical_data = region_group[['Total', 'Region', 'Count']]
60
+
61
+ # Plot interactive Sunburst chart
62
+ sunburst = go.Figure(go.Sunburst(
63
+ labels=hierarchical_data['Region'].tolist() + hierarchical_data['Total'].tolist(),
64
+ parents=hierarchical_data['Total'].tolist() + [''],
65
+ values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()],
66
+ branchvalues="total",
67
+ hovertemplate='<b>%{label}</b><br>餐廳數量: %{value}<extra></extra>',
68
+ maxdepth=2,
69
+ ))
70
+
71
+ sunburst.update_layout(
72
+ title="餐廳分佈(點擊可放大查看)",
73
+ title_x=0.5,
74
+ title_font=dict(size=24, family="Arial"),
75
+ height=600,
76
+ margin=dict(t=50, b=50, l=0, r=0)
77
+ )
78
+
79
+ # Add custom JavaScript for click events
80
+ sunburst.update_layout(
81
+ updatemenus=[{
82
+ 'type': 'buttons',
83
+ 'showactive': False,
84
+ 'buttons': [{
85
+ 'label': '重置視圖',
86
+ 'method': 'update',
87
+ 'args': [{'visible': [True] * len(sunburst.data)},
88
+ {'title': '餐廳分佈(點擊可放大查看)'}]
89
+ }]
90
+ }]
91
+ )
92
+
93
+ st.subheader("餐廳分佈(Sunburst 圖)")
94
+ st.plotly_chart(sunburst, use_container_width=True)
95
+
96
+ # Plot bar chart with custom colors and labels
97
+ bar_chart = go.Figure(go.Bar(
98
+ x=region_group["Region"],
99
+ y=region_group["Count"],
100
+ text=region_group["Count"],
101
+ textposition='auto',
102
+ marker=dict(color=px.colors.qualitative.Set2)
103
+ ))
104
+
105
+ bar_chart.update_layout(
106
+ title="各區域餐廳數量",
107
+ title_x=0.5,
108
+ title_font=dict(size=24, family="Arial"),
109
+ height=400,
110
+ margin=dict(t=50, b=50, l=50, r=50),
111
+ xaxis_title="區域",
112
+ yaxis_title="餐廳數量",
113
+ xaxis=dict(tickangle=-45)
114
+ )
115
+ st.subheader("各區域餐廳數量(條形圖)")
116
+ st.plotly_chart(bar_chart)
117
+
118
+ # 推薦度與地理位置的關聯性
119
+ st.header("推薦度與地理位置的關聯性")
120
+
121
+ # 區域性推薦度分析
122
+ fig_bar = px.bar(region_group, x="Region", y="Avg_Recommendation",
123
+ title="不同區域的平均推薦度比較",
124
+ color_discrete_sequence=['#66CDAA'])
125
+ st.plotly_chart(fig_bar)
126
+
127
+ # Display a map using Folium
128
+ st.subheader("餐廳位置地圖(含推薦度熱力圖)")
129
+
130
+ # Create map centered around Tainan
131
+ m = folium.Map(location=[23.0, 120.2], zoom_start=12)
132
+
133
+ # Add marker cluster to the map
134
+ marker_cluster = MarkerCluster().add_to(m)
135
+
136
+ # Prepare data for heatmap
137
+ heat_data = []
138
+
139
+ for index, row in df.iterrows():
140
+ if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
141
+ folium.Marker(
142
+ location=[row["Latitude"], row["Longitude"]],
143
+ popup=f"{row['Store Name']} (推薦度: {row['推薦度']})",
144
+ tooltip=row["地址"]
145
+ ).add_to(marker_cluster)
146
+ heat_data.append([row["Latitude"], row["Longitude"], row["推薦度"]])
147
+
148
+ # Add heatmap layer
149
+ HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="推薦度熱力圖").add_to(m)
150
+
151
+ # Add layer control
152
+ folium.LayerControl().add_to(m)
153
+
154
+ # Display the map in Streamlit
155
+ st.components.v1.html(m._repr_html_(), height=600)
156
+
157
+ # Save the DataFrame to CSV with UTF-8 encoding
158
+ csv_file = "restaurants_data.csv"
159
+ df.to_csv(csv_file, encoding="utf-8-sig", index=False)
160
+
161
+ # Display download button for the CSV
162
+ st.download_button(
163
+ label="下載餐廳數據 CSV 檔案",
164
+ data=open(csv_file, "rb").read(),
165
+ file_name=csv_file,
166
+ mime="text/csv"
167
+ )