Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
import base64
|
7 |
+
|
8 |
+
# Function to set background image
|
9 |
+
def set_background(png_file):
|
10 |
+
with open(png_file, "rb") as f:
|
11 |
+
data = f.read()
|
12 |
+
encoded = base64.b64encode(data).decode()
|
13 |
+
st.markdown(
|
14 |
+
f"""
|
15 |
+
<style>
|
16 |
+
.stApp {{
|
17 |
+
background: url(data:image/png;base64,{encoded});
|
18 |
+
background-size: cover;
|
19 |
+
}}
|
20 |
+
</style>
|
21 |
+
""",
|
22 |
+
unsafe_allow_html=True
|
23 |
+
)
|
24 |
+
|
25 |
+
# Set the background image
|
26 |
+
set_background('CAT.png')
|
27 |
+
|
28 |
+
# Title of the app
|
29 |
+
st.title("寵物醫院評分查詢")
|
30 |
+
|
31 |
+
# User input for minimum rating
|
32 |
+
min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5)
|
33 |
+
|
34 |
+
# List of URLs to scrape
|
35 |
+
urls = [
|
36 |
+
"https://www.tw-animal.com/pet/171211/c000196.html",
|
37 |
+
"https://www.tw-animal.com/pet/171211/c000186.html",
|
38 |
+
"https://www.tw-animal.com/pet/171211/c000081.html",
|
39 |
+
"https://www.tw-animal.com/pet/171211/c001166.html",
|
40 |
+
"https://www.tw-animal.com/pet/171211/c000773.html",
|
41 |
+
"https://www.tw-animal.com/pet/171211/c001038.html",
|
42 |
+
"https://www.tw-animal.com/pet/171211/c000741.html",
|
43 |
+
"https://www.tw-animal.com/pet/171211/c001451.html",
|
44 |
+
"https://www.tw-animal.com/pet/171211/c000102.html",
|
45 |
+
"https://www.tw-animal.com/pet/171211/c000757.html",
|
46 |
+
"https://www.tw-animal.com/pet/171211/c000703.html",
|
47 |
+
"https://www.tw-animal.com/pet/171211/c000481.html",
|
48 |
+
"https://www.tw-animal.com/pet/171211/c000971.html",
|
49 |
+
"https://www.tw-animal.com/pet/171211/c000187.html",
|
50 |
+
"https://www.tw-animal.com/pet/171211/c001357.html",
|
51 |
+
"https://www.tw-animal.com/pet/171211/c001065.html",
|
52 |
+
"https://www.tw-animal.com/pet/171211/c000165.html",
|
53 |
+
"https://www.tw-animal.com/pet/171211/c001138.html",
|
54 |
+
"https://www.tw-animal.com/pet/171211/c000484.html",
|
55 |
+
"https://www.tw-animal.com/pet/171211/c001089.html",
|
56 |
+
"https://www.tw-animal.com/pet/171211/c001252.html"
|
57 |
+
]
|
58 |
+
|
59 |
+
# Create an empty list to store the extracted data
|
60 |
+
data_list = []
|
61 |
+
|
62 |
+
# Scrape data when the button is pressed
|
63 |
+
if st.button('開始爬取資料'):
|
64 |
+
st.write("正在爬取資料,請稍候...")
|
65 |
+
|
66 |
+
# Loop through each URL and extract the data
|
67 |
+
for url in urls:
|
68 |
+
response = requests.get(url)
|
69 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
70 |
+
|
71 |
+
# Extract data
|
72 |
+
title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
|
73 |
+
phone = soup.find('a', class_='t-font-large').get_text(strip=True)
|
74 |
+
address = soup.find('a', class_='t-font-medium').get_text(strip=True)
|
75 |
+
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
|
76 |
+
|
77 |
+
# Append the data to the list if rating meets the threshold
|
78 |
+
if rating >= min_rating:
|
79 |
+
data_list.append({
|
80 |
+
"標題": title,
|
81 |
+
"手機": phone,
|
82 |
+
"地址": address,
|
83 |
+
"評分": rating
|
84 |
+
})
|
85 |
+
|
86 |
+
# If data was scraped successfully
|
87 |
+
if data_list:
|
88 |
+
df1 = pd.DataFrame(data_list)
|
89 |
+
|
90 |
+
# Extract the region from the address (assuming region is part of the address)
|
91 |
+
df1['區域'] = df1['地址'].apply(lambda x: x.split()[0])
|
92 |
+
|
93 |
+
# Group by region and merge hospitals in the same region
|
94 |
+
grouped_df = df1.groupby('區域').agg({
|
95 |
+
'標題': lambda x: ' | '.join(x),
|
96 |
+
'手機': lambda x: ' | '.join(x),
|
97 |
+
'地址': lambda x: ' | '.join(x),
|
98 |
+
'評分': 'mean' # Aggregation for average rating
|
99 |
+
}).reset_index()
|
100 |
+
|
101 |
+
# Display the dataframe
|
102 |
+
st.dataframe(df1)
|
103 |
+
|
104 |
+
# Display Plotly bar chart
|
105 |
+
bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'})
|
106 |
+
st.plotly_chart(bar_fig)
|
107 |
+
|
108 |
+
# Display Plotly pie chart
|
109 |
+
pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例")
|
110 |
+
st.plotly_chart(pie_fig)
|
111 |
+
|
112 |
+
# Sending notification to LINE
|
113 |
+
if st.button('發送前五筆資料到Line'):
|
114 |
+
msg = df1[:5].to_string(index=False)
|
115 |
+
|
116 |
+
token = "DzMHaXosskpjtGuIjuB7NIcQ5TIoptLz7l7VYzV3Wp4" # Replace with your LINE Notify token
|
117 |
+
|
118 |
+
# Send message to LINE
|
119 |
+
def send_line_notify(token, msg):
|
120 |
+
headers = {
|
121 |
+
"Authorization": "Bearer " + token,
|
122 |
+
"Content-Type": "application/x-www-form-urlencoded"
|
123 |
+
}
|
124 |
+
params = {"message": msg}
|
125 |
+
r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
|
126 |
+
|
127 |
+
send_line_notify(token, msg)
|
128 |
+
st.success('資料已成功發送到 Line!')
|
129 |
+
else:
|
130 |
+
st.warning('沒有符合條件的資料。')
|
131 |
+
|