File size: 5,352 Bytes
5b22f4e
 
 
 
53a8979
 
5b22f4e
 
 
 
 
 
 
 
 
 
53a8979
 
5b22f4e
 
 
 
 
 
53a8979
5b22f4e
 
53a8979
5b22f4e
 
 
 
 
 
53a8979
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b22f4e
 
 
 
 
53a8979
5b22f4e
 
 
 
 
53a8979
5b22f4e
 
 
 
 
 
 
 
 
 
 
 
 
53a8979
 
 
5b22f4e
53a8979
5b22f4e
 
 
 
 
53a8979
 
5b22f4e
 
53a8979
5b22f4e
 
53a8979
5b22f4e
 
53a8979
 
 
 
 
 
 
 
 
5b22f4e
 
 
 
 
 
 
 
 
 
53a8979
5b22f4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53a8979
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import base64
import plotly.express as px

def set_background(image_file):
    with open(image_file, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode()
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url(data:image/png;base64,{encoded_string});
            background-size: cover;
            background-position: center top;
            padding-top: 100px;
        }}
        </style>
        """,
        unsafe_allow_html=True
    )

# 設定背景圖片
set_background('ddog.png')

# Streamlit app 標題
st.title('寵物診所資訊爬蟲')

# 網址列表
urls = [
    'https://www.tw-animal.com/pet/171211/c000196.html',
    'https://www.tw-animal.com/pet/171211/c000186.html',
    'https://www.tw-animal.com/pet/171211/c000081.html',
    'https://www.tw-animal.com/pet/171211/c000848.html',
    'https://www.tw-animal.com/pet/171211/c000045.html',
    'https://www.tw-animal.com/pet/171211/c001166.html',
    'https://www.tw-animal.com/pet/171211/c000773.html',
    'https://www.tw-animal.com/pet/171211/c001038.html',
    'https://www.tw-animal.com/pet/171211/c000741.html',
    'https://www.tw-animal.com/pet/171211/c001451.html',
    'https://www.tw-animal.com/pet/171211/c000102.html',
    'https://www.tw-animal.com/pet/171211/c000757.html',
    'https://www.tw-animal.com/pet/171211/c000703.html',
    'https://www.tw-animal.com/pet/171211/c000481.html',
    'https://www.tw-animal.com/pet/171211/c000971.html',
    'https://www.tw-animal.com/pet/171211/c000187.html',
    'https://www.tw-animal.com/pet/171211/c001357.html',
    'https://www.tw-animal.com/pet/171211/c001065.html',
    'https://www.tw-animal.com/pet/171211/c000165.html',
    'https://www.tw-animal.com/pet/171211/c000217.html',
    'https://www.tw-animal.com/pet/171211/c000802.html',
    'https://www.tw-animal.com/pet/171211/c001034.html',
    'https://www.tw-animal.com/pet/171211/c001453.html'
]

# 讓使用者輸入評分門檻
min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)

# 當使用者按下「開始爬蟲」按鈕時開始抓取資料
if st.button('開始爬蟲'):
    all_data = []
    progress_bar = st.progress(0)
    status_text = st.empty()

    # 遍歷每個網址並抓取資料
    for i, url in enumerate(urls):
        progress = int((i + 1) / len(urls) * 100)
        progress_bar.progress(progress)
        status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')

        response = requests.get(url)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, 'html.parser')

        title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
        phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
        address = soup.find('a', class_='t-font-medium').get_text(strip=True)
        rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
        
        # 提取地址中的區名
        district = address.split('區')[0] + '區' if '區' in address else '其他'

        # 只將符合評分條件的資料加入列表
        if rating >= min_rating:
            all_data.append({
                '標題': title,
                '手機': phone,
                '地址': address,
                '評分': rating,
                '區': district
            })

    # 轉換為 DataFrame
    df = pd.DataFrame(all_data)

    # 如果有符合條件的資料,顯示並繪圖
    if not df.empty:
        st.dataframe(df)

        # 以每個區的診所數量繪製動態圖表
        district_counts = df['區'].value_counts().reset_index()
        district_counts.columns = ['區', '診所數量']

        fig = px.bar(district_counts, x='區', y='診所數量', title='各區寵物診所數量')
        st.plotly_chart(fig)

        # 提供 CSV 下載功能
        csv = df.to_csv(index=False)
        st.download_button(
            label="下載 CSV 檔案",
            data=csv,
            file_name="pet_clinics.csv",
            mime="text/csv",
        )
    else:
        st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")

    # 清空進度條和狀態訊息
    progress_bar.empty()
    status_text.empty()

# LINE Notify 部分
st.header('傳送至 LINE Notify')
token = st.text_input("請輸入 LINE Notify 權杖")
if st.button('傳送至 LINE'):
    if 'df' in locals() and not df.empty:
        msg = df.to_string(index=False)
        
        def send_line_notify(token, msg):
            headers = {
                "Authorization": "Bearer " + token,
                "Content-Type": "application/x-www-form-urlencoded"
            }
            params = {
                "message": msg
            }
            r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
            return r.status_code

        status_code = send_line_notify(token, msg)
        if status_code == 200:
            st.success('成功傳送至 LINE Notify!')
        else:
            st.error('傳送失敗,請檢查您的權杖是否正確。')
    else:
        st.warning('沒有資料可以傳送,請先執行爬蟲。')