Spaces:
Sleeping
Sleeping
File size: 5,352 Bytes
5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 5b22f4e 53a8979 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import base64
import plotly.express as px
def set_background(image_file):
with open(image_file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
st.markdown(
f"""
<style>
.stApp {{
background-image: url(data:image/png;base64,{encoded_string});
background-size: cover;
background-position: center top;
padding-top: 100px;
}}
</style>
""",
unsafe_allow_html=True
)
# 設定背景圖片
set_background('ddog.png')
# Streamlit app 標題
st.title('寵物診所資訊爬蟲')
# 網址列表
urls = [
'https://www.tw-animal.com/pet/171211/c000196.html',
'https://www.tw-animal.com/pet/171211/c000186.html',
'https://www.tw-animal.com/pet/171211/c000081.html',
'https://www.tw-animal.com/pet/171211/c000848.html',
'https://www.tw-animal.com/pet/171211/c000045.html',
'https://www.tw-animal.com/pet/171211/c001166.html',
'https://www.tw-animal.com/pet/171211/c000773.html',
'https://www.tw-animal.com/pet/171211/c001038.html',
'https://www.tw-animal.com/pet/171211/c000741.html',
'https://www.tw-animal.com/pet/171211/c001451.html',
'https://www.tw-animal.com/pet/171211/c000102.html',
'https://www.tw-animal.com/pet/171211/c000757.html',
'https://www.tw-animal.com/pet/171211/c000703.html',
'https://www.tw-animal.com/pet/171211/c000481.html',
'https://www.tw-animal.com/pet/171211/c000971.html',
'https://www.tw-animal.com/pet/171211/c000187.html',
'https://www.tw-animal.com/pet/171211/c001357.html',
'https://www.tw-animal.com/pet/171211/c001065.html',
'https://www.tw-animal.com/pet/171211/c000165.html',
'https://www.tw-animal.com/pet/171211/c000217.html',
'https://www.tw-animal.com/pet/171211/c000802.html',
'https://www.tw-animal.com/pet/171211/c001034.html',
'https://www.tw-animal.com/pet/171211/c001453.html'
]
# 讓使用者輸入評分門檻
min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)
# 當使用者按下「開始爬蟲」按鈕時開始抓取資料
if st.button('開始爬蟲'):
all_data = []
progress_bar = st.progress(0)
status_text = st.empty()
# 遍歷每個網址並抓取資料
for i, url in enumerate(urls):
progress = int((i + 1) / len(urls) * 100)
progress_bar.progress(progress)
status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')
response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
address = soup.find('a', class_='t-font-medium').get_text(strip=True)
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
# 提取地址中的區名
district = address.split('區')[0] + '區' if '區' in address else '其他'
# 只將符合評分條件的資料加入列表
if rating >= min_rating:
all_data.append({
'標題': title,
'手機': phone,
'地址': address,
'評分': rating,
'區': district
})
# 轉換為 DataFrame
df = pd.DataFrame(all_data)
# 如果有符合條件的資料,顯示並繪圖
if not df.empty:
st.dataframe(df)
# 以每個區的診所數量繪製動態圖表
district_counts = df['區'].value_counts().reset_index()
district_counts.columns = ['區', '診所數量']
fig = px.bar(district_counts, x='區', y='診所數量', title='各區寵物診所數量')
st.plotly_chart(fig)
# 提供 CSV 下載功能
csv = df.to_csv(index=False)
st.download_button(
label="下載 CSV 檔案",
data=csv,
file_name="pet_clinics.csv",
mime="text/csv",
)
else:
st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")
# 清空進度條和狀態訊息
progress_bar.empty()
status_text.empty()
# LINE Notify 部分
st.header('傳送至 LINE Notify')
token = st.text_input("請輸入 LINE Notify 權杖")
if st.button('傳送至 LINE'):
if 'df' in locals() and not df.empty:
msg = df.to_string(index=False)
def send_line_notify(token, msg):
headers = {
"Authorization": "Bearer " + token,
"Content-Type": "application/x-www-form-urlencoded"
}
params = {
"message": msg
}
r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
return r.status_code
status_code = send_line_notify(token, msg)
if status_code == 200:
st.success('成功傳送至 LINE Notify!')
else:
st.error('傳送失敗,請檢查您的權杖是否正確。')
else:
st.warning('沒有資料可以傳送,請先執行爬蟲。')
|