Roberta2024 commited on
Commit
f2b678e
1 Parent(s): 71a36bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -98
app.py CHANGED
@@ -1,29 +1,30 @@
 
 
1
  import pandas as pd
2
  import folium
3
- from folium.plugins import MarkerCluster, HeatMap
4
  import plotly.graph_objects as go
5
- import plotly.express as px
6
  from geopy.geocoders import Nominatim
7
- from geopy.exc import GeocoderInsufficientPrivileges
8
  import re
9
  import streamlit as st
10
- import time
11
 
12
  # Streamlit title and description
13
- st.title("米其林餐廳指南分析")
14
- st.write("分析餐廳數據,可視化區域分佈,並在地圖上顯示位置和餐廳數量熱力圖。")
15
 
16
  # Read data from Google Sheets
17
  sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
18
- df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
19
 
20
- # Print column names and first few rows
21
- st.write("資料框的列名:", df.columns.tolist())
22
- st.write("資料預覽:")
23
- st.dataframe(df.head())
 
24
 
25
  # Initialize Nominatim geocoder
26
- geolocator = Nominatim(user_agent="my_unique_app/3.0")
27
 
28
  # Function to extract region (區域) from the address using regex
29
  def extract_region(address):
@@ -33,61 +34,108 @@ def extract_region(address):
33
  else:
34
  return "Unknown"
35
 
36
- # Function to get latitude and longitude with caching
37
- @st.cache_data
38
- def get_lat_lon(district):
39
- try:
40
- location = geolocator.geocode(f"台南市{district}")
41
- if location:
42
- time.sleep(1) # Delay to avoid rate limiting
43
- return location.latitude, longitude
44
- except GeocoderInsufficientPrivileges:
45
- st.error("地理編碼器遇到權限問題,請稍後再試。")
46
- return None, None
47
-
48
- # Assuming we have a column that represents the region or can be used to derive it
49
- # If we don't have such a column, we'll need to skip this part
50
- if '區域' in df.columns:
51
- region_column = '區域'
52
- elif '地址' in df.columns:
53
- df['區域'] = df['地址'].apply(extract_region)
54
- region_column = '區域'
55
- else:
56
- st.error("無法找到區域資訊,某些分析將無法進行。")
57
- region_column = None
58
-
59
- # Group the data by region and count the number of restaurants
60
- if region_column:
61
- region_group = df.groupby(region_column).size().reset_index(name='Count')
62
-
63
- # Create hierarchical data for the Sunburst chart
64
- region_group['Total'] = 'All Regions' # Add a root level
65
- hierarchical_data = region_group[['Total', region_column, 'Count']]
66
-
67
- # Plot interactive Sunburst chart
68
- sunburst = go.Figure(go.Sunburst(
69
- labels=hierarchical_data[region_column].tolist() + hierarchical_data['Total'].tolist(),
70
- parents=hierarchical_data['Total'].tolist() + [''],
71
- values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()],
72
- branchvalues="total",
73
- hovertemplate='<b>%{label}</b><br>餐廳數量: %{value}<extra></extra>',
74
- maxdepth=2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ))
76
 
77
- sunburst.update_layout(
78
- title="餐廳分佈(點擊可放大查看)",
79
  title_x=0.5,
80
  title_font=dict(size=24, family="Arial"),
81
  height=600,
82
- margin=dict(t=50, b=50, l=0, r=0)
83
  )
84
-
85
- st.subheader("餐廳分佈(Sunburst 圖)")
86
- st.plotly_chart(sunburst, use_container_width=True)
87
 
88
  # Plot bar chart with custom colors and labels
89
  bar_chart = go.Figure(go.Bar(
90
- x=region_group[region_column],
91
  y=region_group["Count"],
92
  text=region_group["Count"],
93
  textposition='auto',
@@ -95,61 +143,33 @@ if region_column:
95
  ))
96
 
97
  bar_chart.update_layout(
98
- title="各區域餐廳數量",
99
  title_x=0.5,
100
  title_font=dict(size=24, family="Arial"),
101
  height=400,
102
  margin=dict(t=50, b=50, l=50, r=50),
103
- xaxis_title="區域",
104
- yaxis_title="餐廳數量",
105
  xaxis=dict(tickangle=-45)
106
  )
107
- st.subheader("各區域餐廳數量(條形圖)")
108
  st.plotly_chart(bar_chart)
109
 
110
- # Display a map using Folium if we have latitude and longitude
111
- if '緯度' in df.columns and '經度' in df.columns:
112
- st.subheader("餐廳位置地圖(含數量熱力圖)")
113
 
114
- # Create map centered around the mean latitude and longitude
115
- center_lat = df['緯度'].mean()
116
- center_lon = df['經度'].mean()
117
- m = folium.Map(location=[center_lat, center_lon], zoom_start=12)
118
 
119
  # Add marker cluster to the map
120
  marker_cluster = MarkerCluster().add_to(m)
121
-
122
- # Prepare data for heatmap
123
- heat_data = []
124
-
125
  for index, row in df.iterrows():
126
- if pd.notnull(row["緯度"]) and pd.notnull(row["經度"]):
127
  folium.Marker(
128
- location=[row["緯度"], row["經度"]],
129
- popup=f"{row.get('店名', 'Unknown')}",
130
- tooltip=row.get('地址', 'Unknown')
131
  ).add_to(marker_cluster)
132
- heat_data.append([row["緯度"], row["經度"], 1]) # Weight of 1 for each restaurant
133
-
134
- # Add heatmap layer
135
- HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="餐廳數量熱力圖").add_to(m)
136
-
137
- # Add layer control
138
- folium.LayerControl().add_to(m)
139
 
140
  # Display the map in Streamlit
141
  st.components.v1.html(m._repr_html_(), height=600)
142
- else:
143
- st.error("無法顯示地圖,因為缺少緯度和經度資訊。")
144
-
145
- # Save the DataFrame to CSV with UTF-8 encoding
146
- csv_file = "restaurants_data.csv"
147
- df.to_csv(csv_file, encoding="utf-8-sig", index=False)
148
-
149
- # Display download button for the CSV
150
- st.download_button(
151
- label="下載餐廳數據 CSV 檔案",
152
- data=open(csv_file, "rb").read(),
153
- file_name=csv_file,
154
- mime="text/csv"
155
- )
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import folium
5
+ from folium.plugins import MarkerCluster
6
  import plotly.graph_objects as go
7
+ import plotly.express as px # Add this import
8
  from geopy.geocoders import Nominatim
 
9
  import re
10
  import streamlit as st
 
11
 
12
  # Streamlit title and description
13
+ st.title("米其林餐廳指南爬蟲")
14
+ st.write("Extract restaurant data, visualize with a pie chart and bar chart, and display locations on a map.")
15
 
16
  # Read data from Google Sheets
17
  sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
18
+ df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
19
 
20
+ # Convert "網址" column to a Python list
21
+ urls = df1["網址"].tolist()
22
+
23
+ # Create a DataFrame to store all restaurant data
24
+ df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Latitude", "Longitude", "Region"])
25
 
26
  # Initialize Nominatim geocoder
27
+ geolocator = Nominatim(user_agent="my_app")
28
 
29
  # Function to extract region (區域) from the address using regex
30
  def extract_region(address):
 
34
  else:
35
  return "Unknown"
36
 
37
+ # Function to fetch and parse data
38
+ def fetch_data():
39
+ global df
40
+ # Progress bar in Streamlit
41
+ progress_bar = st.progress(0)
42
+ total_urls = len(urls)
43
+
44
+ # Iterate through each URL
45
+ for idx, url in enumerate(urls):
46
+ response = requests.get(url)
47
+ soup = BeautifulSoup(response.content, "html.parser")
48
+
49
+ try:
50
+ store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
51
+ except AttributeError:
52
+ store_name = None
53
+
54
+ try:
55
+ address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
56
+ region = extract_region(address)
57
+ except AttributeError:
58
+ address = None
59
+ region = "Unknown"
60
+
61
+ # Try to extract phone number
62
+ try:
63
+ phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "")
64
+ except AttributeError:
65
+ phone = None
66
+
67
+ try:
68
+ location = geolocator.geocode(address)
69
+ if location:
70
+ latitude = location.latitude
71
+ longitude = location.longitude
72
+ else:
73
+ latitude = None
74
+ longitude = None
75
+ except:
76
+ latitude = None
77
+ longitude = None
78
+
79
+ new_row = pd.DataFrame({
80
+ "Store Name": [store_name],
81
+ "Address": [address],
82
+ "Phone": [phone],
83
+ "Latitude": [latitude],
84
+ "Longitude": [longitude],
85
+ "Region": [region]
86
+ })
87
+
88
+ df = pd.concat([df, new_row], ignore_index=True)
89
+
90
+ # Update progress bar
91
+ progress_bar.progress((idx + 1) / total_urls)
92
+
93
+ # Button to trigger data fetching
94
+ if st.button("爬取餐廳資料"):
95
+ fetch_data()
96
+
97
+ # Save the DataFrame to CSV with UTF-8 encoding, including latitude and longitude
98
+ csv_file = "restaurants_data.csv"
99
+ df.to_csv(csv_file, encoding="utf-8-sig", index=False)
100
+
101
+ # Display the DataFrame as a table at the top
102
+ st.subheader("Restaurant Data")
103
+ st.dataframe(df)
104
+
105
+ # Display download button for the CSV
106
+ st.download_button(
107
+ label="Download restaurant data as CSV",
108
+ data=open(csv_file, "rb").read(),
109
+ file_name=csv_file,
110
+ mime="text/csv"
111
+ )
112
+
113
+ # Group the data by region and sum the number of restaurants
114
+ region_group = df.groupby("Region").size().reset_index(name='Count')
115
+
116
+ # Plot enlarged pie chart with custom colors and labels
117
+ pie_chart = go.Figure(go.Pie(
118
+ labels=region_group["Region"],
119
+ values=region_group["Count"],
120
+ textinfo="label+percent",
121
+ hoverinfo="label+value",
122
+ textfont=dict(size=18),
123
+ marker=dict(colors=px.colors.qualitative.Set3, line=dict(color='#000000', width=2))
124
  ))
125
 
126
+ pie_chart.update_layout(
127
+ title="Restaurant Distribution by Region",
128
  title_x=0.5,
129
  title_font=dict(size=24, family="Arial"),
130
  height=600,
131
+ margin=dict(t=50, b=50, l=50, r=50)
132
  )
133
+ st.subheader("Restaurant Distribution by Region (Enlarged Pie Chart)")
134
+ st.plotly_chart(pie_chart)
 
135
 
136
  # Plot bar chart with custom colors and labels
137
  bar_chart = go.Figure(go.Bar(
138
+ x=region_group["Region"],
139
  y=region_group["Count"],
140
  text=region_group["Count"],
141
  textposition='auto',
 
143
  ))
144
 
145
  bar_chart.update_layout(
146
+ title="Restaurant Count by Region",
147
  title_x=0.5,
148
  title_font=dict(size=24, family="Arial"),
149
  height=400,
150
  margin=dict(t=50, b=50, l=50, r=50),
151
+ xaxis_title="Region",
152
+ yaxis_title="Number of Restaurants",
153
  xaxis=dict(tickangle=-45)
154
  )
155
+ st.subheader("Restaurant Count by Region (Bar Chart)")
156
  st.plotly_chart(bar_chart)
157
 
158
+ # Display a map using Folium
159
+ st.subheader("Restaurant Locations Map")
 
160
 
161
+ # Create map centered around Tainan
162
+ m = folium.Map(location=[23.0, 120.2], zoom_start=12)
 
 
163
 
164
  # Add marker cluster to the map
165
  marker_cluster = MarkerCluster().add_to(m)
 
 
 
 
166
  for index, row in df.iterrows():
167
+ if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
168
  folium.Marker(
169
+ location=[row["Latitude"], row["Longitude"]],
170
+ popup=f"{row['Store Name']} ({row['Phone']})",
171
+ tooltip=row["Address"]
172
  ).add_to(marker_cluster)
 
 
 
 
 
 
 
173
 
174
  # Display the map in Streamlit
175
  st.components.v1.html(m._repr_html_(), height=600)