Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import base64 | |
import plotly.express as px | |
# 將背景圖片設置為標題正上方 | |
def set_background(image_file): | |
with open(image_file, "rb") as image_file: | |
encoded_string = base64.b64encode(image_file.read()).decode() | |
st.markdown( | |
f""" | |
<style> | |
.title-section {{ | |
background-image: url(data:image/png;base64,{encoded_string}); | |
background-size: cover; | |
background-position: center; | |
padding: 100px; | |
text-align: center; | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# 設定背景圖片到標題區域 | |
set_background('ddog.png') | |
# 在背景圖片區域內顯示標題 | |
st.markdown('<div class="title-section"><h1>寵物診所資訊爬蟲</h1></div>', unsafe_allow_html=True) | |
# 網址列表 | |
urls = [ | |
'https://www.tw-animal.com/pet/171211/c000196.html', | |
'https://www.tw-animal.com/pet/171211/c000186.html', | |
# 其餘網址... | |
] | |
# 讓使用者輸入評分門檻 | |
min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1) | |
# 當使用者按下「開始爬蟲」按鈕時開始抓取資料 | |
if st.button('開始爬蟲'): | |
all_data = [] | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
# 遍歷每個網址並抓取資料 | |
for i, url in enumerate(urls): | |
progress = int((i + 1) / len(urls) * 100) | |
progress_bar.progress(progress) | |
status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個') | |
response = requests.get(url) | |
response.encoding = 'utf-8' | |
soup = BeautifulSoup(response.text, 'html.parser') | |
title = soup.find('h1', class_='t-intro__title').get_text(strip=True) | |
phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True) | |
address = soup.find('a', class_='t-font-medium').get_text(strip=True) | |
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True)) | |
# 提取地址中的區名 | |
district = address.split('區')[0] + '區' if '區' in address else '其他' | |
# 只將符合評分條件的資料加入列表 | |
if rating >= min_rating: | |
all_data.append({ | |
'標題': title, | |
'手機': phone, | |
'地址': address, | |
'評分': rating, | |
'區': district | |
}) | |
# 轉換為 DataFrame | |
df = pd.DataFrame(all_data) | |
# 如果有符合條件的資料,顯示並繪圖 | |
if not df.empty: | |
st.dataframe(df) | |
# 以每個區的診所數量繪製動態圖表 | |
district_counts = df['區'].value_counts().reset_index() | |
district_counts.columns = ['區', '診所數量'] | |
fig = px.bar(district_counts, x='區', y='診所數量', title='各區寵物診所數量') | |
st.plotly_chart(fig) | |
# 提供 CSV 下載功能 | |
csv = df.to_csv(index=False) | |
st.download_button( | |
label="下載 CSV 檔案", | |
data=csv, | |
file_name="pet_clinics.csv", | |
mime="text/csv", | |
) | |
else: | |
st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。") | |
# 清空進度條和狀態訊息 | |
progress_bar.empty() | |
status_text.empty() | |
# LINE Notify 部分 | |
st.header('傳送至 LINE Notify') | |
token = st.text_input("請輸入 LINE Notify 權杖") | |
if st.button('傳送至 LINE'): | |
if 'df' in locals() and not df.empty: | |
msg = df.to_string(index=False) | |
def send_line_notify(token, msg): | |
headers = { | |
"Authorization": "Bearer " + token, | |
"Content-Type": "application/x-www-form-urlencoded" | |
} | |
params = { | |
"message": msg | |
} | |
r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) | |
return r.status_code | |
status_code = send_line_notify(token, msg) | |
if status_code == 200: | |
st.success('成功傳送至 LINE Notify!') | |
else: | |
st.error('傳送失敗,請檢查您的權杖是否正確。') | |
else: | |
st.warning('沒有資料可以傳送,請先執行爬蟲。') | |