M_Restru / app.py
Roberta2024's picture
Update app.py
f2b678e verified
raw
history blame
5.68 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import plotly.graph_objects as go
import plotly.express as px # Add this import
from geopy.geocoders import Nominatim
import re
import streamlit as st
# Streamlit title and description
st.title("米其林餐廳指南爬蟲")
st.write("Extract restaurant data, visualize with a pie chart and bar chart, and display locations on a map.")
# Read data from Google Sheets
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# Convert "網址" column to a Python list
urls = df1["網址"].tolist()
# Create a DataFrame to store all restaurant data
df = pd.DataFrame(columns=["Store Name", "Address", "Phone", "Latitude", "Longitude", "Region"])
# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="my_app")
# Function to extract region (區域) from the address using regex
def extract_region(address):
match = re.search(r'(.*?)區|縣|市', address)
if match:
return match.group(0)
else:
return "Unknown"
# Function to fetch and parse data
def fetch_data():
global df
# Progress bar in Streamlit
progress_bar = st.progress(0)
total_urls = len(urls)
# Iterate through each URL
for idx, url in enumerate(urls):
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
try:
store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
except AttributeError:
store_name = None
try:
address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
region = extract_region(address)
except AttributeError:
address = None
region = "Unknown"
# Try to extract phone number
try:
phone = soup.find("a", {"data-event": "CTA_tel"}).get("href").replace("tel:", "")
except AttributeError:
phone = None
try:
location = geolocator.geocode(address)
if location:
latitude = location.latitude
longitude = location.longitude
else:
latitude = None
longitude = None
except:
latitude = None
longitude = None
new_row = pd.DataFrame({
"Store Name": [store_name],
"Address": [address],
"Phone": [phone],
"Latitude": [latitude],
"Longitude": [longitude],
"Region": [region]
})
df = pd.concat([df, new_row], ignore_index=True)
# Update progress bar
progress_bar.progress((idx + 1) / total_urls)
# Button to trigger data fetching
if st.button("爬取餐廳資料"):
fetch_data()
# Save the DataFrame to CSV with UTF-8 encoding, including latitude and longitude
csv_file = "restaurants_data.csv"
df.to_csv(csv_file, encoding="utf-8-sig", index=False)
# Display the DataFrame as a table at the top
st.subheader("Restaurant Data")
st.dataframe(df)
# Display download button for the CSV
st.download_button(
label="Download restaurant data as CSV",
data=open(csv_file, "rb").read(),
file_name=csv_file,
mime="text/csv"
)
# Group the data by region and sum the number of restaurants
region_group = df.groupby("Region").size().reset_index(name='Count')
# Plot enlarged pie chart with custom colors and labels
pie_chart = go.Figure(go.Pie(
labels=region_group["Region"],
values=region_group["Count"],
textinfo="label+percent",
hoverinfo="label+value",
textfont=dict(size=18),
marker=dict(colors=px.colors.qualitative.Set3, line=dict(color='#000000', width=2))
))
pie_chart.update_layout(
title="Restaurant Distribution by Region",
title_x=0.5,
title_font=dict(size=24, family="Arial"),
height=600,
margin=dict(t=50, b=50, l=50, r=50)
)
st.subheader("Restaurant Distribution by Region (Enlarged Pie Chart)")
st.plotly_chart(pie_chart)
# Plot bar chart with custom colors and labels
bar_chart = go.Figure(go.Bar(
x=region_group["Region"],
y=region_group["Count"],
text=region_group["Count"],
textposition='auto',
marker=dict(color=px.colors.qualitative.Set2)
))
bar_chart.update_layout(
title="Restaurant Count by Region",
title_x=0.5,
title_font=dict(size=24, family="Arial"),
height=400,
margin=dict(t=50, b=50, l=50, r=50),
xaxis_title="Region",
yaxis_title="Number of Restaurants",
xaxis=dict(tickangle=-45)
)
st.subheader("Restaurant Count by Region (Bar Chart)")
st.plotly_chart(bar_chart)
# Display a map using Folium
st.subheader("Restaurant Locations Map")
# Create map centered around Tainan
m = folium.Map(location=[23.0, 120.2], zoom_start=12)
# Add marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)
for index, row in df.iterrows():
if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
folium.Marker(
location=[row["Latitude"], row["Longitude"]],
popup=f"{row['Store Name']} ({row['Phone']})",
tooltip=row["Address"]
).add_to(marker_cluster)
# Display the map in Streamlit
st.components.v1.html(m._repr_html_(), height=600)