Spaces:
Sleeping
Sleeping
File size: 5,154 Bytes
5b22f4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import base64
# Function to set background image
def set_background(image_file):
with open(image_file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
st.markdown(
f"""
<style>
.stApp {{
background-image: url(data:image/png;base64,{encoded_string});
background-size: cover;
}}
</style>
""",
unsafe_allow_html=True
)
# Set the background image
set_background('ddog.png')
# Streamlit app title
st.title('寵物診所資訊爬蟲')
# 網址列表
urls = [
'https://www.tw-animal.com/pet/171211/c000196.html',
'https://www.tw-animal.com/pet/171211/c000186.html',
'https://www.tw-animal.com/pet/171211/c000081.html',
'https://www.tw-animal.com/pet/171211/c000848.html',
'https://www.tw-animal.com/pet/171211/c000045.html',
'https://www.tw-animal.com/pet/171211/c001166.html',
'https://www.tw-animal.com/pet/171211/c000773.html',
'https://www.tw-animal.com/pet/171211/c001038.html',
'https://www.tw-animal.com/pet/171211/c000741.html',
'https://www.tw-animal.com/pet/171211/c001451.html',
'https://www.tw-animal.com/pet/171211/c000102.html',
'https://www.tw-animal.com/pet/171211/c000757.html',
'https://www.tw-animal.com/pet/171211/c000703.html',
'https://www.tw-animal.com/pet/171211/c000481.html',
'https://www.tw-animal.com/pet/171211/c000971.html',
'https://www.tw-animal.com/pet/171211/c000187.html',
'https://www.tw-animal.com/pet/171211/c001357.html',
'https://www.tw-animal.com/pet/171211/c001065.html',
'https://www.tw-animal.com/pet/171211/c000165.html',
'https://www.tw-animal.com/pet/171211/c000217.html',
'https://www.tw-animal.com/pet/171211/c000802.html',
'https://www.tw-animal.com/pet/171211/c001034.html',
'https://www.tw-animal.com/pet/171211/c001453.html'
]
# 讓使用者輸入評分門檻
min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)
if st.button('開始爬蟲'):
# 建立空的列表來儲存每一頁的資料
all_data = []
# 顯示進度條
progress_bar = st.progress(0)
status_text = st.empty()
# 遍歷每個網址
for i, url in enumerate(urls):
# 更新進度條和狀態文字
progress = int((i + 1) / len(urls) * 100)
progress_bar.progress(progress)
status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')
# 發送HTTP請求獲取頁面內容
response = requests.get(url)
response.encoding = 'utf-8'
# 使用BeautifulSoup解析頁面
soup = BeautifulSoup(response.text, 'html.parser')
# 抓取標題、手機、地址和評分
title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
address = soup.find('a', class_='t-font-medium').get_text(strip=True)
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
# 將評分大於或等於使用者輸入的資料存入列表
if rating >= min_rating:
all_data.append({
'標題': title,
'手機': phone,
'地址': address,
'評分': rating
})
# 將所有符合條件的資料轉換為DataFrame
df = pd.DataFrame(all_data)
# 檢查是否有符合條件的資料
if not df.empty:
# 輸出篩選後的DataFrame
st.dataframe(df)
# 提供下載 CSV 檔案的選項
csv = df.to_csv(index=False)
st.download_button(
label="下載 CSV 檔案",
data=csv,
file_name="pet_clinics.csv",
mime="text/csv",
)
else:
st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")
# 清除進度條和狀態文字
progress_bar.empty()
status_text.empty()
# LINE Notify 部分
st.header('傳送至 LINE Notify')
token = st.text_input("請輸入 LINE Notify 權杖")
if st.button('傳送至 LINE'):
if 'df' in locals() and not df.empty:
msg = df.to_string(index=False)
# 傳送 LINE Notify 訊息
def send_line_notify(token, msg):
headers = {
"Authorization": "Bearer " + token,
"Content-Type": "application/x-www-form-urlencoded"
}
params = {
"message": msg
}
r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
return r.status_code
# 呼叫傳送 LINE Notify 函數
status_code = send_line_notify(token, msg)
if status_code == 200:
st.success('成功傳送至 LINE Notify!')
else:
st.error('傳送失敗,請檢查您的權杖是否正確。')
else:
st.warning('沒有資料可以傳送,請先執行爬蟲。') |