File size: 5,154 Bytes
5b22f4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import base64

# Function to set background image
def set_background(image_file):
    with open(image_file, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode()
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url(data:image/png;base64,{encoded_string});
            background-size: cover;
        }}
        </style>
        """,
        unsafe_allow_html=True
    )

# Set the background image
set_background('ddog.png')

# Streamlit app title
st.title('寵物診所資訊爬蟲')

# 網址列表
urls = [
    'https://www.tw-animal.com/pet/171211/c000196.html',
    'https://www.tw-animal.com/pet/171211/c000186.html',
    'https://www.tw-animal.com/pet/171211/c000081.html',
    'https://www.tw-animal.com/pet/171211/c000848.html',
    'https://www.tw-animal.com/pet/171211/c000045.html',
    'https://www.tw-animal.com/pet/171211/c001166.html',
    'https://www.tw-animal.com/pet/171211/c000773.html',
    'https://www.tw-animal.com/pet/171211/c001038.html',
    'https://www.tw-animal.com/pet/171211/c000741.html',
    'https://www.tw-animal.com/pet/171211/c001451.html',
    'https://www.tw-animal.com/pet/171211/c000102.html',
    'https://www.tw-animal.com/pet/171211/c000757.html',
    'https://www.tw-animal.com/pet/171211/c000703.html',
    'https://www.tw-animal.com/pet/171211/c000481.html',
    'https://www.tw-animal.com/pet/171211/c000971.html',
    'https://www.tw-animal.com/pet/171211/c000187.html',
    'https://www.tw-animal.com/pet/171211/c001357.html',
    'https://www.tw-animal.com/pet/171211/c001065.html',
    'https://www.tw-animal.com/pet/171211/c000165.html',
    'https://www.tw-animal.com/pet/171211/c000217.html',
    'https://www.tw-animal.com/pet/171211/c000802.html',
    'https://www.tw-animal.com/pet/171211/c001034.html',
    'https://www.tw-animal.com/pet/171211/c001453.html'
]

# 讓使用者輸入評分門檻
min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)

if st.button('開始爬蟲'):
    # 建立空的列表來儲存每一頁的資料
    all_data = []

    # 顯示進度條
    progress_bar = st.progress(0)
    status_text = st.empty()

    # 遍歷每個網址
    for i, url in enumerate(urls):
        # 更新進度條和狀態文字
        progress = int((i + 1) / len(urls) * 100)
        progress_bar.progress(progress)
        status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')

        # 發送HTTP請求獲取頁面內容
        response = requests.get(url)
        response.encoding = 'utf-8'

        # 使用BeautifulSoup解析頁面
        soup = BeautifulSoup(response.text, 'html.parser')

        # 抓取標題、手機、地址和評分
        title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
        phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
        address = soup.find('a', class_='t-font-medium').get_text(strip=True)
        rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))

        # 將評分大於或等於使用者輸入的資料存入列表
        if rating >= min_rating:
            all_data.append({
                '標題': title,
                '手機': phone,
                '地址': address,
                '評分': rating
            })

    # 將所有符合條件的資料轉換為DataFrame
    df = pd.DataFrame(all_data)

    # 檢查是否有符合條件的資料
    if not df.empty:
        # 輸出篩選後的DataFrame
        st.dataframe(df)
        
        # 提供下載 CSV 檔案的選項
        csv = df.to_csv(index=False)
        st.download_button(
            label="下載 CSV 檔案",
            data=csv,
            file_name="pet_clinics.csv",
            mime="text/csv",
        )
    else:
        st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")

    # 清除進度條和狀態文字
    progress_bar.empty()
    status_text.empty()

# LINE Notify 部分
st.header('傳送至 LINE Notify')
token = st.text_input("請輸入 LINE Notify 權杖")

if st.button('傳送至 LINE'):
    if 'df' in locals() and not df.empty:
        msg = df.to_string(index=False)
        
        # 傳送 LINE Notify 訊息
        def send_line_notify(token, msg):
            headers = {
                "Authorization": "Bearer " + token,
                "Content-Type": "application/x-www-form-urlencoded"
            }
            params = {
                "message": msg
            }
            r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
            return r.status_code

        # 呼叫傳送 LINE Notify 函數
        status_code = send_line_notify(token, msg)
        if status_code == 200:
            st.success('成功傳送至 LINE Notify!')
        else:
            st.error('傳送失敗,請檢查您的權杖是否正確。')
    else:
        st.warning('沒有資料可以傳送,請先執行爬蟲。')