File size: 3,396 Bytes
3b45348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap
import plotly.graph_objects as go
from geopy.geocoders import Nominatim
import re
import streamlit as st

# Streamlit title and description
st.title("Restaurant Data Extractor")
st.write("Extracting restaurant data and displaying it on a map.")

# Read data from Google Sheets
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")

# Convert "網址" column to a Python list
urls = df1["網址"].tolist()

# Create a DataFrame to store all restaurant data
df = pd.DataFrame(columns=["Store Name", "Address", "Latitude", "Longitude", "Region"])

# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="my_app")

# Function to extract region (區域) from the address using regex
def extract_region(address):
    match = re.search(r'(.*?)區|縣|市', address)
    if match:
        return match.group(0)
    else:
        return "Unknown"

# Progress bar in Streamlit
progress_bar = st.progress(0)
total_urls = len(urls)

# Iterate through each URL
for idx, url in enumerate(urls):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    try:
        store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
    except AttributeError:
        store_name = None

    try:
        address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
        region = extract_region(address)
    except AttributeError:
        address = None
        region = "Unknown"

    try:
        location = geolocator.geocode(address)
        if location:
            latitude = location.latitude
            longitude = location.longitude
        else:
            latitude = None
            longitude = None
    except:
        latitude = None
        longitude = None

    new_row = pd.DataFrame({
        "Store Name": [store_name],
        "Address": [address],
        "Latitude": [latitude],
        "Longitude": [longitude],
        "Region": [region]
    })

    df = pd.concat([df, new_row], ignore_index=True)

    # Update progress bar
    progress_bar.progress((idx + 1) / total_urls)

# Save the DataFrame to CSV with UTF-8 encoding
csv_file = "restaurants_data.csv"
df.to_csv(csv_file, encoding="utf-8-sig", index=False)

# Display a download button for the CSV file
st.write(f"Data saved to {csv_file}")
st.download_button(
    label="Download restaurant data as CSV",
    data=open(csv_file, "rb").read(),
    file_name=csv_file,
    mime="text/csv"
)

# Display a map using Folium in Streamlit
st.subheader("Restaurant Locations Map")

# Create map centered around Tainan
m = folium.Map(location=[23.0, 120.2], zoom_start=12)

# Add marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)
for index, row in df.iterrows():
    if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
        folium.Marker(
            location=[row["Latitude"], row["Longitude"]],
            popup=row["Store Name"],
            tooltip=row["Address"]
        ).add_to(marker_cluster)

# Display the map in Streamlit
st.components.v1.html(m._repr_html_(), height=600)

# Optional: Display the DataFrame as a table
st.subheader("Restaurant Data")
st.dataframe(df)