Roberta2024 commited on
Commit
a11ef3f
·
verified ·
1 Parent(s): c698eaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -81
app.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import folium
5
- from folium.plugins import MarkerCluster, HeatMap
6
  import plotly.graph_objects as go
7
  from geopy.geocoders import Nominatim
8
  import re
@@ -10,7 +10,7 @@ import streamlit as st
10
 
11
  # Streamlit title and description
12
  st.title("Restaurant Data Extractor")
13
- st.write("Extracting restaurant data and displaying it on a map.")
14
 
15
  # Read data from Google Sheets
16
  sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
@@ -33,84 +33,113 @@ def extract_region(address):
33
  else:
34
  return "Unknown"
35
 
36
- # Progress bar in Streamlit
37
- progress_bar = st.progress(0)
38
- total_urls = len(urls)
39
-
40
- # Iterate through each URL
41
- for idx, url in enumerate(urls):
42
- response = requests.get(url)
43
- soup = BeautifulSoup(response.content, "html.parser")
44
-
45
- try:
46
- store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
47
- except AttributeError:
48
- store_name = None
49
-
50
- try:
51
- address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
52
- region = extract_region(address)
53
- except AttributeError:
54
- address = None
55
- region = "Unknown"
56
-
57
- try:
58
- location = geolocator.geocode(address)
59
- if location:
60
- latitude = location.latitude
61
- longitude = location.longitude
62
- else:
 
 
 
 
 
 
63
  latitude = None
64
  longitude = None
65
- except:
66
- latitude = None
67
- longitude = None
68
-
69
- new_row = pd.DataFrame({
70
- "Store Name": [store_name],
71
- "Address": [address],
72
- "Latitude": [latitude],
73
- "Longitude": [longitude],
74
- "Region": [region]
75
- })
76
-
77
- df = pd.concat([df, new_row], ignore_index=True)
78
-
79
- # Update progress bar
80
- progress_bar.progress((idx + 1) / total_urls)
81
-
82
- # Save the DataFrame to CSV with UTF-8 encoding
83
- csv_file = "restaurants_data.csv"
84
- df.to_csv(csv_file, encoding="utf-8-sig", index=False)
85
-
86
- # Display a download button for the CSV file
87
- st.write(f"Data saved to {csv_file}")
88
- st.download_button(
89
- label="Download restaurant data as CSV",
90
- data=open(csv_file, "rb").read(),
91
- file_name=csv_file,
92
- mime="text/csv"
93
- )
94
-
95
- # Display a map using Folium in Streamlit
96
- st.subheader("Restaurant Locations Map")
97
-
98
- # Create map centered around Tainan
99
- m = folium.Map(location=[23.0, 120.2], zoom_start=12)
100
-
101
- # Add marker cluster to the map
102
- marker_cluster = MarkerCluster().add_to(m)
103
- for index, row in df.iterrows():
104
- if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
105
- folium.Marker(
106
- location=[row["Latitude"], row["Longitude"]],
107
- popup=row["Store Name"],
108
- tooltip=row["Address"]
109
- ).add_to(marker_cluster)
110
-
111
- # Display the map in Streamlit
112
- st.components.v1.html(m._repr_html_(), height=600)
113
-
114
- # Optional: Display the DataFrame as a table
115
- st.subheader("Restaurant Data")
116
- st.dataframe(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import folium
5
+ from folium.plugins import MarkerCluster
6
  import plotly.graph_objects as go
7
  from geopy.geocoders import Nominatim
8
  import re
 
10
 
11
  # Streamlit title and description
12
  st.title("Restaurant Data Extractor")
13
+ st.write("Extract restaurant data, visualize with a pie chart and bar chart, and display locations on a map.")
14
 
15
  # Read data from Google Sheets
16
  sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
 
33
  else:
34
  return "Unknown"
35
 
36
+ # Function to fetch and parse data
37
+ def fetch_data():
38
+ global df
39
+ # Progress bar in Streamlit
40
+ progress_bar = st.progress(0)
41
+ total_urls = len(urls)
42
+
43
+ # Iterate through each URL
44
+ for idx, url in enumerate(urls):
45
+ response = requests.get(url)
46
+ soup = BeautifulSoup(response.content, "html.parser")
47
+
48
+ try:
49
+ store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
50
+ except AttributeError:
51
+ store_name = None
52
+
53
+ try:
54
+ address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
55
+ region = extract_region(address)
56
+ except AttributeError:
57
+ address = None
58
+ region = "Unknown"
59
+
60
+ try:
61
+ location = geolocator.geocode(address)
62
+ if location:
63
+ latitude = location.latitude
64
+ longitude = location.longitude
65
+ else:
66
+ latitude = None
67
+ longitude = None
68
+ except:
69
  latitude = None
70
  longitude = None
71
+
72
+ new_row = pd.DataFrame({
73
+ "Store Name": [store_name],
74
+ "Address": [address],
75
+ "Latitude": [latitude],
76
+ "Longitude": [longitude],
77
+ "Region": [region]
78
+ })
79
+
80
+ df = pd.concat([df, new_row], ignore_index=True)
81
+
82
+ # Update progress bar
83
+ progress_bar.progress((idx + 1) / total_urls)
84
+
85
+ # Button to trigger data fetching
86
+ if st.button("Fetch Restaurant Data"):
87
+ fetch_data()
88
+
89
+ # Save the DataFrame to CSV with UTF-8 encoding
90
+ csv_file = "restaurants_data.csv"
91
+ df.to_csv(csv_file, encoding="utf-8-sig", index=False)
92
+
93
+ # Display download button for the CSV
94
+ st.download_button(
95
+ label="Download restaurant data as CSV",
96
+ data=open(csv_file, "rb").read(),
97
+ file_name=csv_file,
98
+ mime="text/csv"
99
+ )
100
+
101
+ # Group the data by region
102
+ region_group = df.groupby("Region").size().reset_index(name='Count')
103
+
104
+ # Plot pie chart
105
+ pie_chart = go.Figure(go.Pie(
106
+ labels=region_group["Region"],
107
+ values=region_group["Count"],
108
+ hoverinfo="label+percent",
109
+ textinfo="value+percent",
110
+ ))
111
+ st.subheader("Restaurant Distribution by Region (Pie Chart)")
112
+ st.plotly_chart(pie_chart)
113
+
114
+ # Plot bar chart
115
+ bar_chart = go.Figure(go.Bar(
116
+ x=region_group["Region"],
117
+ y=region_group["Count"],
118
+ text=region_group["Count"],
119
+ textposition='auto'
120
+ ))
121
+ st.subheader("Restaurant Count by Region (Bar Chart)")
122
+ st.plotly_chart(bar_chart)
123
+
124
+ # Display a map using Folium
125
+ st.subheader("Restaurant Locations Map")
126
+
127
+ # Create map centered around Tainan
128
+ m = folium.Map(location=[23.0, 120.2], zoom_start=12)
129
+
130
+ # Add marker cluster to the map
131
+ marker_cluster = MarkerCluster().add_to(m)
132
+ for index, row in df.iterrows():
133
+ if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
134
+ folium.Marker(
135
+ location=[row["Latitude"], row["Longitude"]],
136
+ popup=row["Store Name"],
137
+ tooltip=row["Address"]
138
+ ).add_to(marker_cluster)
139
+
140
+ # Display the map in Streamlit
141
+ st.components.v1.html(m._repr_html_(), height=600)
142
+
143
+ # Optional: Display the DataFrame as a table
144
+ st.subheader("Restaurant Data")
145
+ st.dataframe(df)