Roberta2024 commited on
Commit
5b22f4e
1 Parent(s): 42d0384

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import base64
6
+
7
+ # Function to set background image
8
+ def set_background(image_file):
9
+ with open(image_file, "rb") as image_file:
10
+ encoded_string = base64.b64encode(image_file.read()).decode()
11
+ st.markdown(
12
+ f"""
13
+ <style>
14
+ .stApp {{
15
+ background-image: url(data:image/png;base64,{encoded_string});
16
+ background-size: cover;
17
+ }}
18
+ </style>
19
+ """,
20
+ unsafe_allow_html=True
21
+ )
22
+
23
+ # Set the background image
24
+ set_background('ddog.png')
25
+
26
+ # Streamlit app title
27
+ st.title('寵物診所資訊爬蟲')
28
+
29
+ # 網址列表
30
+ urls = [
31
+ 'https://www.tw-animal.com/pet/171211/c000196.html',
32
+ 'https://www.tw-animal.com/pet/171211/c000186.html',
33
+ 'https://www.tw-animal.com/pet/171211/c000081.html',
34
+ 'https://www.tw-animal.com/pet/171211/c000848.html',
35
+ 'https://www.tw-animal.com/pet/171211/c000045.html',
36
+ 'https://www.tw-animal.com/pet/171211/c001166.html',
37
+ 'https://www.tw-animal.com/pet/171211/c000773.html',
38
+ 'https://www.tw-animal.com/pet/171211/c001038.html',
39
+ 'https://www.tw-animal.com/pet/171211/c000741.html',
40
+ 'https://www.tw-animal.com/pet/171211/c001451.html',
41
+ 'https://www.tw-animal.com/pet/171211/c000102.html',
42
+ 'https://www.tw-animal.com/pet/171211/c000757.html',
43
+ 'https://www.tw-animal.com/pet/171211/c000703.html',
44
+ 'https://www.tw-animal.com/pet/171211/c000481.html',
45
+ 'https://www.tw-animal.com/pet/171211/c000971.html',
46
+ 'https://www.tw-animal.com/pet/171211/c000187.html',
47
+ 'https://www.tw-animal.com/pet/171211/c001357.html',
48
+ 'https://www.tw-animal.com/pet/171211/c001065.html',
49
+ 'https://www.tw-animal.com/pet/171211/c000165.html',
50
+ 'https://www.tw-animal.com/pet/171211/c000217.html',
51
+ 'https://www.tw-animal.com/pet/171211/c000802.html',
52
+ 'https://www.tw-animal.com/pet/171211/c001034.html',
53
+ 'https://www.tw-animal.com/pet/171211/c001453.html'
54
+ ]
55
+
56
+ # 讓使用者輸入評分門檻
57
+ min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)
58
+
59
+ if st.button('開始爬蟲'):
60
+ # 建立空的列表來儲存每一頁的資料
61
+ all_data = []
62
+
63
+ # 顯示進度條
64
+ progress_bar = st.progress(0)
65
+ status_text = st.empty()
66
+
67
+ # 遍歷每個網址
68
+ for i, url in enumerate(urls):
69
+ # 更新進度條和狀態文字
70
+ progress = int((i + 1) / len(urls) * 100)
71
+ progress_bar.progress(progress)
72
+ status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')
73
+
74
+ # 發送HTTP請求獲取頁面內容
75
+ response = requests.get(url)
76
+ response.encoding = 'utf-8'
77
+
78
+ # 使用BeautifulSoup解析頁面
79
+ soup = BeautifulSoup(response.text, 'html.parser')
80
+
81
+ # 抓取標題、手機、地址和評分
82
+ title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
83
+ phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
84
+ address = soup.find('a', class_='t-font-medium').get_text(strip=True)
85
+ rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
86
+
87
+ # 將評分大於或等於使用者輸入的資料存入列表
88
+ if rating >= min_rating:
89
+ all_data.append({
90
+ '標題': title,
91
+ '手機': phone,
92
+ '地址': address,
93
+ '評分': rating
94
+ })
95
+
96
+ # 將所有符合條件的資料轉換為DataFrame
97
+ df = pd.DataFrame(all_data)
98
+
99
+ # 檢查是否有符合條件的資料
100
+ if not df.empty:
101
+ # 輸出篩選後的DataFrame
102
+ st.dataframe(df)
103
+
104
+ # 提供下載 CSV 檔案的選項
105
+ csv = df.to_csv(index=False)
106
+ st.download_button(
107
+ label="下載 CSV 檔案",
108
+ data=csv,
109
+ file_name="pet_clinics.csv",
110
+ mime="text/csv",
111
+ )
112
+ else:
113
+ st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")
114
+
115
+ # 清除進度條和狀態文字
116
+ progress_bar.empty()
117
+ status_text.empty()
118
+
119
+ # LINE Notify 部分
120
+ st.header('傳送至 LINE Notify')
121
+ token = st.text_input("請輸入 LINE Notify 權杖")
122
+
123
+ if st.button('傳送至 LINE'):
124
+ if 'df' in locals() and not df.empty:
125
+ msg = df.to_string(index=False)
126
+
127
+ # 傳送 LINE Notify 訊息
128
+ def send_line_notify(token, msg):
129
+ headers = {
130
+ "Authorization": "Bearer " + token,
131
+ "Content-Type": "application/x-www-form-urlencoded"
132
+ }
133
+ params = {
134
+ "message": msg
135
+ }
136
+ r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
137
+ return r.status_code
138
+
139
+ # 呼叫傳送 LINE Notify 函數
140
+ status_code = send_line_notify(token, msg)
141
+ if status_code == 200:
142
+ st.success('成功���送至 LINE Notify!')
143
+ else:
144
+ st.error('傳送失敗,請檢查您的權杖是否正確。')
145
+ else:
146
+ st.warning('沒有資料可以傳送,請先執行爬蟲。')