import streamlit as st import requests from bs4 import BeautifulSoup import pandas as pd import plotly.express as px import base64 import folium from streamlit_folium import st_folium from geopy.geocoders import Nominatim from geopy.exc import GeocoderTimedOut, GeocoderServiceError import time from folium.plugins import MarkerCluster # 新增此行用於標記聚合 # 設定背景圖片的函數 def set_background(png_file): with open(png_file, "rb") as f: data = f.read() encoded = base64.b64encode(data).decode() st.markdown( f""" """, unsafe_allow_html=True ) # 設定背景圖片 set_background('CAT.png') # App 的標題 st.title("寵物醫院評分查詢") # 用戶輸入的最低評分 min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5) # 要爬取的 URL 列表 urls = [ "https://www.tw-animal.com/pet/171211/c000196.html", "https://www.tw-animal.com/pet/171211/c000186.html", # ... 其他 URL ... ] # 存放提取數據的空列表 data_list = [] # 初始化地理編碼器 geolocator = Nominatim(user_agent="geoapiExercises") geocode_cache = {} # 簡單的內存緩存 # 用於地理編碼地址的函數,帶有重試和緩存 def geocode_address(address, retries=5, delay=5): if address in geocode_cache: return geocode_cache[address] for i in range(retries): try: location = geolocator.geocode(address) if location: geocode_cache[address] = location return location except (GeocoderTimedOut, GeocoderServiceError) as e: st.warning(f"地理編碼錯誤: {e}. 重試中...") time.sleep(delay) st.warning(f"無法地理編碼地址: {address}") return None # 當按下「開始爬取資料」按鈕時執行 if st.button('開始爬取資料'): st.write("正在爬取資料,請稍候...") # 迴圈遍歷每個 URL 並提取數據 for url in urls: response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') # 提取數據 title = soup.find('h1', class_='t-intro__title').get_text(strip=True) phone = soup.find('a', class_='t-font-large').get_text(strip=True) address = soup.find('a', class_='t-font-medium').get_text(strip=True) rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True)) # 如果評分達到門檻,將數據添加到列表 if rating >= min_rating: location = geocode_address(address) if location: data_list.append({ "標題": title, "手機": phone, "地址": address, "評分": rating, "經度": location.longitude, "緯度": location.latitude }) # 如果成功爬取到數據 if data_list: df1 = pd.DataFrame(data_list) # 從地址中提取區域(假設區域是地址的一部分) df1['區域'] = df1['地址'].apply(lambda x: x.split()[0]) # 按區域分組,合併同區域的醫院 grouped_df = df1.groupby('區域').agg({ '標題': lambda x: ' | '.join(x), '手機': lambda x: ' | '.join(x), '地址': lambda x: ' | '.join(x), '評分': 'mean' # 平均評分 }).reset_index() # 顯示數據表格 st.dataframe(df1) # 顯示 Plotly 柱狀圖 bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'}) st.plotly_chart(bar_fig) # 顯示 Plotly 圓餅圖 pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例") st.plotly_chart(pie_fig) # 顯示地圖 if st.button('顯示地圖'): # 創建一個 Folium 地圖,集中在平均位置 map_center = [df1['緯度'].mean(), df1['經度'].mean()] pet_map = folium.Map(location=map_center, zoom_start=12) # 創建一個標記聚合器 marker_cluster = MarkerCluster().add_to(pet_map) # 為每家醫院添加標記 for index, row in df1.iterrows(): folium.Marker( location=[row['緯度'], row['經度']], popup=f"{row['標題']} (評分: {row['評分']})", tooltip=row['標題'] ).add_to(marker_cluster) # 添加到標記聚合器中 # 使用 streamlit_folium 渲染地圖 st_folium(pet_map, width=700, height=500)