Rozeeeee commited on
Commit
b84601b
·
verified ·
1 Parent(s): c3ba72b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -31
app.py CHANGED
@@ -3,8 +3,6 @@ from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import plotly.graph_objects as go
5
  import streamlit as st
6
- from geopy.geocoders import Nominatim
7
- from geopy.exc import GeocoderServiceError
8
 
9
  # 設定應用標題
10
  st.title("餐廳資料抓取與分析")
@@ -19,51 +17,32 @@ urls = urls_df['網址'].tolist()
19
  # 初始化一個空的 DataFrame 列表來儲存所有資料
20
  df_list = []
21
 
22
- # 初始化 geolocator
23
- geolocator = Nominatim(user_agent="restaurant_data_app")
24
-
25
  # 迭代每個網址並抓取資料
26
  for url in urls:
27
  try:
28
  response = requests.get(url)
29
- response.raise_for_status()
30
  soup = BeautifulSoup(response.content, 'html.parser')
31
 
32
  # 解析並抓取所需資料
33
  title_tag = soup.find('h1', class_='restaurant-details__heading--title')
34
  title = title_tag.text.strip() if title_tag else 'N/A'
35
-
36
  address_tag = soup.find('li', class_='restaurant-details__heading--address')
37
  address = address_tag.text.strip() if address_tag else 'N/A'
38
-
39
  phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
40
  phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
41
-
42
  description_tag = soup.find('div', class_='restaurant-details__description--text')
43
  description = description_tag.text.strip() if description_tag else 'N/A'
44
 
45
- # Geocoding the address
46
- try:
47
- location = geolocator.geocode(address)
48
- latitude = location.latitude if location else 'N/A'
49
- longitude = location.longitude if location else 'N/A'
50
- except GeocoderServiceError:
51
- latitude = 'N/A'
52
- longitude = 'N/A'
53
-
54
  # 將抓取的資料新增到列表中
55
- df_list.append({
56
- 'Title': title,
57
- 'Address': address,
58
- 'Phone': phone,
59
- 'Description': description,
60
- 'Latitude': latitude,
61
- 'Longitude': longitude
62
- })
63
- except requests.HTTPError as http_err:
64
- st.error(f"HTTP error occurred for URL {url}: {http_err}")
65
- except Exception as err:
66
- st.error(f"An error occurred for URL {url}: {err}")
67
 
68
  # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
69
  df = pd.DataFrame(df_list)
@@ -90,4 +69,3 @@ fig_pie.update_layout(title='每個區的商家數量比例')
90
  # 按鈕來顯示圓餅圖
91
  if st.button('顯示每個區的商家數量比例圓餅圖'):
92
  st.plotly_chart(fig_pie)
93
-
 
3
  import pandas as pd
4
  import plotly.graph_objects as go
5
  import streamlit as st
 
 
6
 
7
  # 設定應用標題
8
  st.title("餐廳資料抓取與分析")
 
17
  # 初始化一個空的 DataFrame 列表來儲存所有資料
18
  df_list = []
19
 
 
 
 
20
  # 迭代每個網址並抓取資料
21
  for url in urls:
22
  try:
23
  response = requests.get(url)
24
+ response.raise_for_status() # Raises an HTTPError for bad responses
25
  soup = BeautifulSoup(response.content, 'html.parser')
26
 
27
  # 解析並抓取所需資料
28
  title_tag = soup.find('h1', class_='restaurant-details__heading--title')
29
  title = title_tag.text.strip() if title_tag else 'N/A'
30
+
31
  address_tag = soup.find('li', class_='restaurant-details__heading--address')
32
  address = address_tag.text.strip() if address_tag else 'N/A'
33
+
34
  phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
35
  phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
36
+
37
  description_tag = soup.find('div', class_='restaurant-details__description--text')
38
  description = description_tag.text.strip() if description_tag else 'N/A'
39
 
 
 
 
 
 
 
 
 
 
40
  # 將抓取的資料新增到列表中
41
+ df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description})
42
+
43
+ except requests.exceptions.HTTPError as e:
44
+ print(f"HTTP error occurred for URL {url}: {e}")
45
+ continue # Skip to the next URL if there's an error
 
 
 
 
 
 
 
46
 
47
  # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
48
  df = pd.DataFrame(df_list)
 
69
  # 按鈕來顯示圓餅圖
70
  if st.button('顯示每個區的商家數量比例圓餅圖'):
71
  st.plotly_chart(fig_pie)