Roberta2024 commited on
Commit
e62c753
·
verified ·
1 Parent(s): 7002add

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -50
app.py CHANGED
@@ -9,8 +9,9 @@ from streamlit_folium import st_folium
9
  from geopy.geocoders import Nominatim
10
  from geopy.exc import GeocoderTimedOut, GeocoderServiceError
11
  import time
 
12
 
13
- # Function to set background image
14
  def set_background(png_file):
15
  with open(png_file, "rb") as f:
16
  data = f.read()
@@ -27,28 +28,30 @@ def set_background(png_file):
27
  unsafe_allow_html=True
28
  )
29
 
30
- # Set the background image
31
  set_background('CAT.png')
32
 
33
- # Title of the app
34
  st.title("寵物醫院評分查詢")
35
 
36
- # User input for minimum rating
37
  min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5)
38
 
39
- # List of URLs to scrape
40
  urls = [
41
- # (List of URLs)
 
 
42
  ]
43
 
44
- # Create an empty list to store the extracted data
45
  data_list = []
46
 
47
- # Initialize the geolocator
48
  geolocator = Nominatim(user_agent="geoapiExercises")
49
- geocode_cache = {} # Simple in-memory cache
50
 
51
- # Function to geocode an address with retry logic and caching
52
  def geocode_address(address, retries=5, delay=5):
53
  if address in geocode_cache:
54
  return geocode_cache[address]
@@ -60,31 +63,28 @@ def geocode_address(address, retries=5, delay=5):
60
  geocode_cache[address] = location
61
  return location
62
  except (GeocoderTimedOut, GeocoderServiceError) as e:
63
- st.warning(f"Geocoding error: {e}. Retrying...")
64
  time.sleep(delay)
65
- except GeocoderServiceError as e:
66
- st.error(f"Service error: {e}")
67
- break
68
 
69
- st.warning(f"Failed to geocode address: {address}")
70
  return None
71
 
72
- # Scrape data when the button is pressed
73
  if st.button('開始爬取資料'):
74
  st.write("正在爬取資料,請稍候...")
75
 
76
- # Loop through each URL and extract the data
77
  for url in urls:
78
  response = requests.get(url)
79
  soup = BeautifulSoup(response.content, 'html.parser')
80
 
81
- # Extract data
82
  title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
83
  phone = soup.find('a', class_='t-font-large').get_text(strip=True)
84
  address = soup.find('a', class_='t-font-medium').get_text(strip=True)
85
  rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
86
 
87
- # Append the data to the list if rating meets the threshold
88
  if rating >= min_rating:
89
  location = geocode_address(address)
90
  if location:
@@ -97,65 +97,50 @@ if st.button('開始爬取資料'):
97
  "緯度": location.latitude
98
  })
99
 
100
- # If data was scraped successfully
101
  if data_list:
102
  df1 = pd.DataFrame(data_list)
103
 
104
- # Extract the region from the address (assuming region is part of the address)
105
  df1['區域'] = df1['地址'].apply(lambda x: x.split()[0])
106
 
107
- # Group by region and merge hospitals in the same region
108
  grouped_df = df1.groupby('區域').agg({
109
  '標題': lambda x: ' | '.join(x),
110
  '手機': lambda x: ' | '.join(x),
111
  '地址': lambda x: ' | '.join(x),
112
- '評分': 'mean' # Aggregation for average rating
113
  }).reset_index()
114
 
115
- # Display the dataframe
116
  st.dataframe(df1)
117
 
118
- # Display Plotly bar chart
119
  bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'})
120
  st.plotly_chart(bar_fig)
121
 
122
- # Display Plotly pie chart
123
  pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例")
124
  st.plotly_chart(pie_fig)
125
 
126
- # Display the map
127
  if st.button('顯示地圖'):
128
- # Create a folium map centered around the average location
129
  map_center = [df1['緯度'].mean(), df1['經度'].mean()]
130
  pet_map = folium.Map(location=map_center, zoom_start=12)
131
 
132
- # Add markers for each hospital
 
 
 
133
  for index, row in df1.iterrows():
134
  folium.Marker(
135
  location=[row['緯度'], row['經度']],
136
  popup=f"{row['標題']} (評分: {row['評分']})",
137
  tooltip=row['標題']
138
- ).add_to(pet_map)
139
 
140
- # Render the map using streamlit_folium
141
  st_folium(pet_map, width=700, height=500)
142
 
143
- # Sending notification to LINE
144
- if st.button('發送前五筆資料到Line'):
145
- msg = df1[:5].to_string(index=False)
146
-
147
- token = "E0yvdJqy8zwCCvBtMiR0j3CXNi9xZaXh8g1FrPBmv79" # Replace with your LINE Notify token
148
-
149
- # Send message to LINE
150
- def send_line_notify(token, msg):
151
- headers = {
152
- "Authorization": "Bearer " + token,
153
- "Content-Type": "application/x-www-form-urlencoded"
154
- }
155
- params = {"message": msg}
156
- r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
157
-
158
- send_line_notify(token, msg)
159
- st.success('資料已成功發送到 Line!')
160
- else:
161
- st.warning('沒有符合條件的資料。')
 
9
  from geopy.geocoders import Nominatim
10
  from geopy.exc import GeocoderTimedOut, GeocoderServiceError
11
  import time
12
+ from folium.plugins import MarkerCluster # 新增此行用於標記聚合
13
 
14
+ # 設定背景圖片的函數
15
  def set_background(png_file):
16
  with open(png_file, "rb") as f:
17
  data = f.read()
 
28
  unsafe_allow_html=True
29
  )
30
 
31
+ # 設定背景圖片
32
  set_background('CAT.png')
33
 
34
+ # App 的標題
35
  st.title("寵物醫院評分查詢")
36
 
37
+ # 用戶輸入的最低評分
38
  min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5)
39
 
40
+ # 要爬取的 URL 列表
41
  urls = [
42
+ "https://www.tw-animal.com/pet/171211/c000196.html",
43
+ "https://www.tw-animal.com/pet/171211/c000186.html",
44
+ # ... 其他 URL ...
45
  ]
46
 
47
+ # 存放提取數據的空列表
48
  data_list = []
49
 
50
+ # 初始化地理編碼器
51
  geolocator = Nominatim(user_agent="geoapiExercises")
52
+ geocode_cache = {} # 簡單的內存緩存
53
 
54
+ # 用於地理編碼地址的函數,帶有重試和緩存
55
  def geocode_address(address, retries=5, delay=5):
56
  if address in geocode_cache:
57
  return geocode_cache[address]
 
63
  geocode_cache[address] = location
64
  return location
65
  except (GeocoderTimedOut, GeocoderServiceError) as e:
66
+ st.warning(f"地理編碼錯誤: {e}. 重試中...")
67
  time.sleep(delay)
 
 
 
68
 
69
+ st.warning(f"無法地理編碼地址: {address}")
70
  return None
71
 
72
+ # 當按下「開始爬取資料」按鈕時執行
73
  if st.button('開始爬取資料'):
74
  st.write("正在爬取資料,請稍候...")
75
 
76
+ # 迴圈遍歷每個 URL 並提取數據
77
  for url in urls:
78
  response = requests.get(url)
79
  soup = BeautifulSoup(response.content, 'html.parser')
80
 
81
+ # 提取數據
82
  title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
83
  phone = soup.find('a', class_='t-font-large').get_text(strip=True)
84
  address = soup.find('a', class_='t-font-medium').get_text(strip=True)
85
  rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
86
 
87
+ # 如果評分達到門檻,將數據添加到列表
88
  if rating >= min_rating:
89
  location = geocode_address(address)
90
  if location:
 
97
  "緯度": location.latitude
98
  })
99
 
100
+ # 如果成功爬取到數據
101
  if data_list:
102
  df1 = pd.DataFrame(data_list)
103
 
104
+ # 從地址中提取區域(假設區域是地址的一部分)
105
  df1['區域'] = df1['地址'].apply(lambda x: x.split()[0])
106
 
107
+ # 按區域分組,合併同區域的醫院
108
  grouped_df = df1.groupby('區域').agg({
109
  '標題': lambda x: ' | '.join(x),
110
  '手機': lambda x: ' | '.join(x),
111
  '地址': lambda x: ' | '.join(x),
112
+ '評分': 'mean' # 平均評分
113
  }).reset_index()
114
 
115
+ # 顯示數據表格
116
  st.dataframe(df1)
117
 
118
+ # 顯示 Plotly 柱狀圖
119
  bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'})
120
  st.plotly_chart(bar_fig)
121
 
122
+ # 顯示 Plotly 圓餅圖
123
  pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例")
124
  st.plotly_chart(pie_fig)
125
 
126
+ # 顯示地圖
127
  if st.button('顯示地圖'):
128
+ # 創建一個 Folium 地圖,集中在平均位置
129
  map_center = [df1['緯度'].mean(), df1['經度'].mean()]
130
  pet_map = folium.Map(location=map_center, zoom_start=12)
131
 
132
+ # 創建一個標記聚合器
133
+ marker_cluster = MarkerCluster().add_to(pet_map)
134
+
135
+ # 為每家醫院添加標記
136
  for index, row in df1.iterrows():
137
  folium.Marker(
138
  location=[row['緯度'], row['經度']],
139
  popup=f"{row['標題']} (評分: {row['評分']})",
140
  tooltip=row['標題']
141
+ ).add_to(marker_cluster) # 添加到標記聚合器中
142
 
143
+ # 使用 streamlit_folium 渲染地圖
144
  st_folium(pet_map, width=700, height=500)
145
 
146
+