Rozeeeee commited on
Commit
6a3e14d
·
verified ·
1 Parent(s): 363aee0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -41
app.py CHANGED
@@ -3,7 +3,8 @@ from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import plotly.graph_objects as go
5
  import streamlit as st
6
- from geopy.geocoders import Nominatim # New import for geocoding
 
7
 
8
  # 設定應用標題
9
  st.title("餐廳資料抓取與分析")
@@ -18,41 +19,51 @@ urls = urls_df['網址'].tolist()
18
  # 初始化一個空的 DataFrame 列表來儲存所有資料
19
  df_list = []
20
 
21
- # 初始化地理定位器
22
- geolocator = Nominatim(user_agent="restaurant_locator") # Initialize geolocator
23
 
24
  # 迭代每個網址並抓取資料
25
  for url in urls:
26
- response = requests.get(url)
27
- soup = BeautifulSoup(response.content, 'html.parser')
28
-
29
- # 解析並抓取所需資料
30
- title_tag = soup.find('h1', class_='restaurant-details__heading--title')
31
- title = title_tag.text.strip() if title_tag else 'N/A'
32
-
33
- address_tag = soup.find('li', class_='restaurant-details__heading--address')
34
- address = address_tag.text.strip() if address_tag else 'N/A'
35
-
36
- phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
37
- phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
38
-
39
- description_tag = soup.find('div', class_='restaurant-details__description--text')
40
- description = description_tag.text.strip() if description_tag else 'N/A'
41
-
42
- # Geocode address to get latitude and longitude
43
- location = geolocator.geocode(address) if address != 'N/A' else None
44
- lat = location.latitude if location else None
45
- lon = location.longitude if location else None
46
-
47
- # 將抓取的資料新增到列表中
48
- df_list.append({
49
- 'Title': title,
50
- 'Address': address,
51
- 'Phone': phone,
52
- 'Description': description,
53
- 'Latitude': lat,
54
- 'Longitude': lon
55
- })
 
 
 
 
 
 
 
 
 
 
56
 
57
  # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
58
  df = pd.DataFrame(df_list)
@@ -80,11 +91,3 @@ fig_pie.update_layout(title='每個區的商家數量比例')
80
  if st.button('顯示每個區的商家數量比例圓餅圖'):
81
  st.plotly_chart(fig_pie)
82
 
83
- # 顯示地圖
84
- st.subheader("餐廳地圖")
85
- # Drop rows with missing coordinates
86
- df_map = df.dropna(subset=['Latitude', 'Longitude'])
87
-
88
- # 顯示地圖
89
- st.map(df_map[['Latitude', 'Longitude']])
90
-
 
3
  import pandas as pd
4
  import plotly.graph_objects as go
5
  import streamlit as st
6
+ from geopy.geocoders import Nominatim
7
+ from geopy.exc import GeocoderServiceError
8
 
9
  # 設定應用標題
10
  st.title("餐廳資料抓取與分析")
 
19
  # 初始化一個空的 DataFrame 列表來儲存所有資料
20
  df_list = []
21
 
22
+ # 初始化 geolocator
23
+ geolocator = Nominatim(user_agent="restaurant_data_app")
24
 
25
  # 迭代每個網址並抓取資料
26
  for url in urls:
27
+ try:
28
+ response = requests.get(url)
29
+ response.raise_for_status()
30
+ soup = BeautifulSoup(response.content, 'html.parser')
31
+
32
+ # 解析並抓取所需資料
33
+ title_tag = soup.find('h1', class_='restaurant-details__heading--title')
34
+ title = title_tag.text.strip() if title_tag else 'N/A'
35
+
36
+ address_tag = soup.find('li', class_='restaurant-details__heading--address')
37
+ address = address_tag.text.strip() if address_tag else 'N/A'
38
+
39
+ phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
40
+ phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
41
+
42
+ description_tag = soup.find('div', class_='restaurant-details__description--text')
43
+ description = description_tag.text.strip() if description_tag else 'N/A'
44
+
45
+ # Geocoding the address
46
+ try:
47
+ location = geolocator.geocode(address)
48
+ latitude = location.latitude if location else 'N/A'
49
+ longitude = location.longitude if location else 'N/A'
50
+ except GeocoderServiceError:
51
+ latitude = 'N/A'
52
+ longitude = 'N/A'
53
+
54
+ # 將抓取的資料新增到列表中
55
+ df_list.append({
56
+ 'Title': title,
57
+ 'Address': address,
58
+ 'Phone': phone,
59
+ 'Description': description,
60
+ 'Latitude': latitude,
61
+ 'Longitude': longitude
62
+ })
63
+ except requests.HTTPError as http_err:
64
+ st.error(f"HTTP error occurred for URL {url}: {http_err}")
65
+ except Exception as err:
66
+ st.error(f"An error occurred for URL {url}: {err}")
67
 
68
  # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
69
  df = pd.DataFrame(df_list)
 
91
  if st.button('顯示每個區的商家數量比例圓餅圖'):
92
  st.plotly_chart(fig_pie)
93