import requests from bs4 import BeautifulSoup import pandas as pd import folium from folium.plugins import MarkerCluster, HeatMap import plotly.graph_objects as go import plotly.express as px from geopy.geocoders import Nominatim import re import streamlit as st # Streamlit title and description st.title("米其林餐廳指南爬蟲") st.write("Extract restaurant data, visualize with charts, and display locations on maps.") # Read data from Google Sheets sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U" df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") # Convert "網址" column to a Python list urls = df1["網址"].tolist() # Create a DataFrame to store all restaurant data df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Latitude", "Longitude", "Region"]) # Initialize Nominatim geocoder geolocator = Nominatim(user_agent="my_app") # Function to extract region (區域) from the address using regex def extract_region(address): match = re.search(r'(.*?)區|縣|市', address) if match: return match.group(0) else: return "Unknown" # Function to fetch and parse data def fetch_data(): global df # Progress bar in Streamlit progress_bar = st.progress(0) total_urls = len(urls) # Iterate through each URL for idx, url in enumerate(urls): response = requests.get(url) soup = BeautifulSoup(response.content, "html.parser") try: store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip() except AttributeError: store_name = None try: address = soup.find("li", class_="restaurant-details__heading--address").text.strip() region = extract_region(address) except AttributeError: address = None region = "Unknown" try: phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "") except AttributeError: phone = None try: location = geolocator.geocode(address) if location: latitude = location.latitude longitude = location.longitude else: latitude = None longitude = None except: latitude = None longitude = None new_row = pd.DataFrame({ "Store Name": [store_name], "Address": [address], "Phone": [phone], "Latitude": [latitude], "Longitude": [longitude], "Region": [region] }) df = pd.concat([df, new_row], ignore_index=True) # Update progress bar progress_bar.progress((idx + 1) / total_urls) # Button to trigger data fetching if st.button("爬取餐廳資料"): fetch_data() # Save the DataFrame to CSV with UTF-8 encoding, including latitude and longitude csv_file = "restaurants_data.csv" df.to_csv(csv_file, encoding="utf-8-sig", index=False) # Display the DataFrame as a table at the top st.subheader("Restaurant Data") st.dataframe(df) # Display download button for the CSV st.download_button( label="Download restaurant data as CSV", data=open(csv_file, "rb").read(), file_name=csv_file, mime="text/csv" ) # Group the data by region and sum the number of restaurants region_group = df.groupby("Region").size().reset_index(name='Count') # Plot enlarged pie chart with custom colors and labels pie_chart = go.Figure(go.Pie( labels=region_group["Region"], values=region_group["Count"], textinfo="label+percent", hoverinfo="label+value", textfont=dict(size=18), marker=dict(colors=px.colors.qualitative.Set3, line=dict(color='#000000', width=2)) )) pie_chart.update_layout( title="Restaurant Distribution by Region", title_x=0.5, title_font=dict(size=24, family="Arial"), height=600, margin=dict(t=50, b=50, l=50, r=50) ) st.subheader("Restaurant Distribution by Region (Enlarged Pie Chart)") st.plotly_chart(pie_chart) # Plot bar chart with custom colors and labels bar_chart = go.Figure(go.Bar( x=region_group["Region"], y=region_group["Count"], text=region_group["Count"], textposition='auto', marker=dict(color=px.colors.qualitative.Set2) )) bar_chart.update_layout( title="Restaurant Count by Region", title_x=0.5, title_font=dict(size=24, family="Arial"), height=400, margin=dict(t=50, b=50, l=50, r=50), xaxis_title="Region", yaxis_title="Number of Restaurants", xaxis=dict(tickangle=-45) ) st.subheader("Restaurant Count by Region (Bar Chart)") st.plotly_chart(bar_chart) # Display a map using Folium st.subheader("Restaurant Locations Map") # Create map centered around the mean latitude and longitude m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=10) # Add marker cluster to the map marker_cluster = MarkerCluster().add_to(m) for index, row in df.iterrows(): if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]): folium.Marker( location=[row["Latitude"], row["Longitude"]], popup=f"{row['Store Name']} ({row['Phone']})", tooltip=row["Address"] ).add_to(marker_cluster) # Display the map in Streamlit st.components.v1.html(m._repr_html_(), height=600) # New section for heatmap st.header("餐廳分布熱力圖") # Prepare data for heatmap heat_data = [[row['Latitude'], row['Longitude']] for index, row in df.iterrows() if pd.notnull(row['Latitude']) and pd.notnull(row['Longitude'])] # Create a new map for the heatmap heatmap = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=10) # Add heatmap to the map HeatMap(heat_data).add_to(heatmap) # Display the heatmap in Streamlit st.components.v1.html(heatmap._repr_html_(), height=600) # Regional restaurant count analysis st.header("各區域餐廳數量分析") # Create bar chart for restaurant count by region using Plotly Express fig_bar = px.bar(region_group, x='Region', y='Count', title="各區域餐廳數量比較", color='Count', color_continuous_scale=px.colors.sequential.Viridis) st.plotly_chart(fig_bar) # Create a scatter mapbox for individual restaurant locations fig_scatter = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", hover_name="Store Name", hover_data=["Address", "Phone"], zoom=10, height=600, title="餐廳位置分布圖") fig_scatter.update_layout(mapbox_style="open-street-map") st.plotly_chart(fig_scatter)