Spaces:
Sleeping
Sleeping
File size: 6,306 Bytes
f844013 8a76c02 43a78ca f844013 8a76c02 f844013 8a76c02 43a78ca f844013 43a78ca f844013 8a76c02 f844013 57a4e7b f844013 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import base64
import folium
from streamlit_folium import st_folium
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
# Function to set background image
def set_background(png_file):
with open(png_file, "rb") as f:
data = f.read()
encoded = base64.b64encode(data).decode()
st.markdown(
f"""
<style>
.stApp {{
background: url(data:image/png;base64,{encoded});
background-size: cover;
}}
</style>
""",
unsafe_allow_html=True
)
# Set the background image
set_background('CAT.png')
# Title of the app
st.title("寵物醫院評分查詢")
# User input for minimum rating
min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5)
# List of URLs to scrape
urls = [
"https://www.tw-animal.com/pet/171211/c000196.html",
"https://www.tw-animal.com/pet/171211/c000186.html",
"https://www.tw-animal.com/pet/171211/c000186.html",
"https://www.tw-animal.com/pet/171211/c000081.html",
"https://www.tw-animal.com/pet/171211/c001166.html",
"https://www.tw-animal.com/pet/171211/c000773.html",
"https://www.tw-animal.com/pet/171211/c001038.html",
"https://www.tw-animal.com/pet/171211/c000741.html",
"https://www.tw-animal.com/pet/171211/c001451.html",
"https://www.tw-animal.com/pet/171211/c000102.html",
"https://www.tw-animal.com/pet/171211/c000757.html",
"https://www.tw-animal.com/pet/171211/c000703.html",
"https://www.tw-animal.com/pet/171211/c000481.html",
"https://www.tw-animal.com/pet/171211/c000971.html",
"https://www.tw-animal.com/pet/171211/c000187.html",
"https://www.tw-animal.com/pet/171211/c001357.html",
"https://www.tw-animal.com/pet/171211/c001065.html",
"https://www.tw-animal.com/pet/171211/c000165.html",
"https://www.tw-animal.com/pet/171211/c001138.html",
"https://www.tw-animal.com/pet/171211/c000484.html",
"https://www.tw-animal.com/pet/171211/c001089.html",
"https://www.tw-animal.com/pet/171211/c001252.html"
]
# Create an empty list to store the extracted data
data_list = []
# Initialize the geolocator
geolocator = Nominatim(user_agent="geoapiExercises")
# Function to geocode an address
def geocode_address(address):
try:
return geolocator.geocode(address)
except GeocoderTimedOut:
return geocode_address(address)
# Scrape data when the button is pressed
if st.button('開始爬取資料'):
st.write("正在爬取資料,請稍候...")
# Loop through each URL and extract the data
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# Extract data
title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
phone = soup.find('a', class_='t-font-large').get_text(strip=True)
address = soup.find('a', class_='t-font-medium').get_text(strip=True)
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
# Append the data to the list if rating meets the threshold
if rating >= min_rating:
location = geocode_address(address)
if location:
data_list.append({
"標題": title,
"手機": phone,
"地址": address,
"評分": rating,
"經度": location.longitude,
"緯度": location.latitude
})
else:
st.warning(f"無法獲取經緯度: {address}")
# If data was scraped successfully
if data_list:
df1 = pd.DataFrame(data_list)
# Extract the region from the address (assuming region is part of the address)
df1['區域'] = df1['地址'].apply(lambda x: x.split()[0])
# Group by region and merge hospitals in the same region
grouped_df = df1.groupby('區域').agg({
'標題': lambda x: ' | '.join(x),
'手機': lambda x: ' | '.join(x),
'地址': lambda x: ' | '.join(x),
'評分': 'mean' # Aggregation for average rating
}).reset_index()
# Display the dataframe
st.dataframe(df1)
# Display Plotly bar chart
bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'})
st.plotly_chart(bar_fig)
# Display Plotly pie chart
pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例")
st.plotly_chart(pie_fig)
# Display the map
if st.button('顯示地圖'):
# Create a folium map centered around the average location
map_center = [df1['緯度'].mean(), df1['經度'].mean()]
pet_map = folium.Map(location=map_center, zoom_start=12)
# Add markers for each hospital
for index, row in df1.iterrows():
folium.Marker(
location=[row['緯度'], row['經度']],
popup=f"{row['標題']} (評分: {row['評分']})",
tooltip=row['標題']
).add_to(pet_map)
# Render the map using streamlit_folium
st_folium(pet_map, width=700, height=500)
# Sending notification to LINE
if st.button('發送前五筆資料到Line'):
msg = df1[:5].to_string(index=False)
token = "E0yvdJqy8zwCCvBtMiR0j3CXNi9xZaXh8g1FrPBmv79" # Replace with your LINE Notify token
# Send message to LINE
def send_line_notify(token, msg):
headers = {
"Authorization": "Bearer " + token,
"Content-Type": "application/x-www-form-urlencoded"
}
params = {"message": msg}
r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
send_line_notify(token, msg)
st.success('資料已成功發送到 Line!')
else:
st.warning('沒有符合條件的資料。')
|