import requests from bs4 import BeautifulSoup import pandas as pd import folium from folium.plugins import MarkerCluster, HeatMap import plotly.graph_objects as go import plotly.express as px from geopy.geocoders import Nominatim from geopy.exc import GeocoderInsufficientPrivileges import re import streamlit as st import time # Streamlit title and description st.title("米其林餐廳指南爬蟲與分析") st.write("提取餐廳數據,可視化區域分佈,並在地圖上顯示位置和推薦度熱力圖。") # Read data from Google Sheets sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U" df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") # Initialize Nominatim geocoder geolocator = Nominatim(user_agent="my_unique_app/3.0") # Function to extract region (區域) from the address using regex def extract_region(address): match = re.search(r'(.*?)區|縣|市', address) if match: return match.group(0) else: return "Unknown" # Function to get latitude and longitude with caching @st.cache_data def get_lat_lon(district): try: location = geolocator.geocode(f"台南市{district}") if location: time.sleep(1) # Delay to avoid rate limiting return location.latitude, location.longitude except GeocoderInsufficientPrivileges: st.error("地理編碼器遇到權限問題,請稍後再試。") return None, None # Apply geocoding to the dataframe df['Region'] = df['地址'].apply(extract_region) df['Latitude'], df['Longitude'] = zip(*df['Region'].apply(get_lat_lon)) # Display the DataFrame as a table at the top st.subheader("餐廳數據") st.dataframe(df) # Group the data by region and sum the number of restaurants region_group = df.groupby("Region").agg({'Store Name': 'count', '推薦度': 'mean'}).reset_index() region_group.columns = ['Region', 'Count', 'Avg_Recommendation'] # Create hierarchical data for the Sunburst chart region_group['Total'] = 'All Regions' # Add a root level hierarchical_data = region_group[['Total', 'Region', 'Count']] # Plot interactive Sunburst chart sunburst = go.Figure(go.Sunburst( labels=hierarchical_data['Region'].tolist() + hierarchical_data['Total'].tolist(), parents=hierarchical_data['Total'].tolist() + [''], values=hierarchical_data['Count'].tolist() + [hierarchical_data['Count'].sum()], branchvalues="total", hovertemplate='%{label}
餐廳數量: %{value}', maxdepth=2, )) sunburst.update_layout( title="餐廳分佈(點擊可放大查看)", title_x=0.5, title_font=dict(size=24, family="Arial"), height=600, margin=dict(t=50, b=50, l=0, r=0) ) # Add custom JavaScript for click events sunburst.update_layout( updatemenus=[{ 'type': 'buttons', 'showactive': False, 'buttons': [{ 'label': '重置視圖', 'method': 'update', 'args': [{'visible': [True] * len(sunburst.data)}, {'title': '餐廳分佈(點擊可放大查看)'}] }] }] ) st.subheader("餐廳分佈(Sunburst 圖)") st.plotly_chart(sunburst, use_container_width=True) # Plot bar chart with custom colors and labels bar_chart = go.Figure(go.Bar( x=region_group["Region"], y=region_group["Count"], text=region_group["Count"], textposition='auto', marker=dict(color=px.colors.qualitative.Set2) )) bar_chart.update_layout( title="各區域餐廳數量", title_x=0.5, title_font=dict(size=24, family="Arial"), height=400, margin=dict(t=50, b=50, l=50, r=50), xaxis_title="區域", yaxis_title="餐廳數量", xaxis=dict(tickangle=-45) ) st.subheader("各區域餐廳數量(條形圖)") st.plotly_chart(bar_chart) # 推薦度與地理位置的關聯性 st.header("推薦度與地理位置的關聯性") # 區域性推薦度分析 fig_bar = px.bar(region_group, x="Region", y="Avg_Recommendation", title="不同區域的平均推薦度比較", color_discrete_sequence=['#66CDAA']) st.plotly_chart(fig_bar) # Display a map using Folium st.subheader("餐廳位置地圖(含推薦度熱力圖)") # Create map centered around Tainan m = folium.Map(location=[23.0, 120.2], zoom_start=12) # Add marker cluster to the map marker_cluster = MarkerCluster().add_to(m) # Prepare data for heatmap heat_data = [] for index, row in df.iterrows(): if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]): folium.Marker( location=[row["Latitude"], row["Longitude"]], popup=f"{row['Store Name']} (推薦度: {row['推薦度']})", tooltip=row["地址"] ).add_to(marker_cluster) heat_data.append([row["Latitude"], row["Longitude"], row["推薦度"]]) # Add heatmap layer HeatMap(heat_data, radius=15, blur=10, max_zoom=1, name="推薦度熱力圖").add_to(m) # Add layer control folium.LayerControl().add_to(m) # Display the map in Streamlit st.components.v1.html(m._repr_html_(), height=600) # Save the DataFrame to CSV with UTF-8 encoding csv_file = "restaurants_data.csv" df.to_csv(csv_file, encoding="utf-8-sig", index=False) # Display download button for the CSV st.download_button( label="下載餐廳數據 CSV 檔案", data=open(csv_file, "rb").read(), file_name=csv_file, mime="text/csv" )