import streamlit as st import requests from bs4 import BeautifulSoup import pandas as pd import base64 import plotly.express as px def set_background(image_file): with open(image_file, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode() st.markdown( f""" """, unsafe_allow_html=True ) # 設定背景圖片 set_background('ddog.png') # Streamlit app 標題 st.title('寵物診所資訊爬蟲') # 網址列表 urls = [ 'https://www.tw-animal.com/pet/171211/c000196.html', 'https://www.tw-animal.com/pet/171211/c000186.html', 'https://www.tw-animal.com/pet/171211/c000081.html', 'https://www.tw-animal.com/pet/171211/c000848.html', 'https://www.tw-animal.com/pet/171211/c000045.html', 'https://www.tw-animal.com/pet/171211/c001166.html', 'https://www.tw-animal.com/pet/171211/c000773.html', 'https://www.tw-animal.com/pet/171211/c001038.html', 'https://www.tw-animal.com/pet/171211/c000741.html', 'https://www.tw-animal.com/pet/171211/c001451.html', 'https://www.tw-animal.com/pet/171211/c000102.html', 'https://www.tw-animal.com/pet/171211/c000757.html', 'https://www.tw-animal.com/pet/171211/c000703.html', 'https://www.tw-animal.com/pet/171211/c000481.html', 'https://www.tw-animal.com/pet/171211/c000971.html', 'https://www.tw-animal.com/pet/171211/c000187.html', 'https://www.tw-animal.com/pet/171211/c001357.html', 'https://www.tw-animal.com/pet/171211/c001065.html', 'https://www.tw-animal.com/pet/171211/c000165.html', 'https://www.tw-animal.com/pet/171211/c000217.html', 'https://www.tw-animal.com/pet/171211/c000802.html', 'https://www.tw-animal.com/pet/171211/c001034.html', 'https://www.tw-animal.com/pet/171211/c001453.html' ] # 讓使用者輸入評分門檻 min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1) # 當使用者按下「開始爬蟲」按鈕時開始抓取資料 if st.button('開始爬蟲'): all_data = [] progress_bar = st.progress(0) status_text = st.empty() # 遍歷每個網址並抓取資料 for i, url in enumerate(urls): progress = int((i + 1) / len(urls) * 100) progress_bar.progress(progress) status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個') response = requests.get(url) response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') title = soup.find('h1', class_='t-intro__title').get_text(strip=True) phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True) address = soup.find('a', class_='t-font-medium').get_text(strip=True) rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True)) # 提取地址中的區名 district = address.split('區')[0] + '區' if '區' in address else '其他' # 只將符合評分條件的資料加入列表 if rating >= min_rating: all_data.append({ '標題': title, '手機': phone, '地址': address, '評分': rating, '區': district }) # 轉換為 DataFrame df = pd.DataFrame(all_data) # 如果有符合條件的資料,顯示並繪圖 if not df.empty: st.dataframe(df) # 以每個區的診所數量繪製動態圖表 district_counts = df['區'].value_counts().reset_index() district_counts.columns = ['區', '診所數量'] fig = px.bar(district_counts, x='區', y='診所數量', title='各區寵物診所數量') st.plotly_chart(fig) # 提供 CSV 下載功能 csv = df.to_csv(index=False) st.download_button( label="下載 CSV 檔案", data=csv, file_name="pet_clinics.csv", mime="text/csv", ) else: st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。") # 清空進度條和狀態訊息 progress_bar.empty() status_text.empty() # LINE Notify 部分 st.header('傳送至 LINE Notify') token = st.text_input("請輸入 LINE Notify 權杖") if st.button('傳送至 LINE'): if 'df' in locals() and not df.empty: msg = df.to_string(index=False) def send_line_notify(token, msg): headers = { "Authorization": "Bearer " + token, "Content-Type": "application/x-www-form-urlencoded" } params = { "message": msg } r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) return r.status_code status_code = send_line_notify(token, msg) if status_code == 200: st.success('成功傳送至 LINE Notify!') else: st.error('傳送失敗,請檢查您的權杖是否正確。') else: st.warning('沒有資料可以傳送,請先執行爬蟲。')