Spaces:

Rozeeeee
/

ttttttttttttttt

Sleeping

App Files Files Community

Rozeeeee commited on Sep 3, 2024

Commit

68ed0b7

verified ·

1 Parent(s): fcfadd9

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -62

app.py CHANGED Viewed

@@ -1,70 +1,150 @@
-from flask import Flask, render_template, request
-import requests
 from bs4 import BeautifulSoup
 import pandas as pd
 import plotly.graph_objects as go
-import os
-app = Flask(__name__)
-# Function to scrape restaurant data from URLs
-def scrape_data(urls):
-    df_list = []
-    for url in urls:
-        response = requests.get(url)
-        soup = BeautifulSoup(response.content, 'html.parser')
-        title_tag = soup.find('h1', class_='restaurant-details__heading--title')
-        title = title_tag.text.strip() if title_tag else 'N/A'
-        address_tag = soup.find('li', class_='restaurant-details__heading--address')
-        address = address_tag.text.strip() if address_tag else 'N/A'
-        phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
-        phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
-        description_tag = soup.find('div', class_='restaurant-details__description--text')
-        description = description_tag.text.strip() if description_tag else 'N/A'
-        lat = 'N/A'
-        lon = 'N/A'
-        df_list.append({
-            'Title': title,
-            'Address': address,
-            'Phone': phone,
-            'Description': description,
-            'Latitude': lat,
-            'Longitude': lon
-        })
-    return pd.DataFrame(df_list)
-@app.route('/')
-def home():
-    return render_template('index.html')
-@app.route('/scrape', methods=['POST'])
-def scrape():
-    sheet_id = request.form['sheet_id']
-    urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
-    urls = urls_df['網址'].tolist()
-    df = scrape_data(urls)
-    # Generate bar chart
-    df['Area'] = df['Address'].str.extract(r'(\w+區)')
-    area_counts = df['Area'].value_counts()
-    fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
-    fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
-    bar_chart = fig_bar.to_html(full_html=False)
-    # Generate pie chart
-    fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
-    fig_pie.update_layout(title='每個區的商家數量比例')
-    pie_chart = fig_pie.to_html(full_html=False)
-    return render_template('results.html', tables=[df.to_html(classes='data')], bar_chart=bar_chart, pie_chart=pie_chart)
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 8080)), debug=True)

+mport requests
 from bs4 import BeautifulSoup
 import pandas as pd
 import plotly.graph_objects as go
+import streamlit as st
+# 設定應用標題
+st.title("餐廳資料抓取與分析")
+# 從 Google 試算表中讀取 URLs
+sheet_id = "1W20lawjiQtEpljUKoEaMVPDlSdnhvJLPUy2jk5xao_w"
+urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
+# 將 URLs 轉換為列表
+urls = urls_df['網址'].tolist()
+# 初始化一個空的 DataFrame 列表來儲存所有資料
+df_list = []
+# 迭代每個網址並抓取資料
+for url in urls:
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # 解析並抓取所需資料
+    title_tag = soup.find('h1', class_='restaurant-details__heading--title')
+    title = title_tag.text.strip() if title_tag else 'N/A'
+    address_tag = soup.find('li', class_='restaurant-details__heading--address')
+    address = address_tag.text.strip() if address_tag else 'N/A'
+    phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
+    phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
+    description_tag = soup.find('div', class_='restaurant-details__description--text')
+    description = description_tag.text.strip() if description_tag else 'N/A'
+    # 將抓取的資料新增到列表中
+    df_list.append({
+        'Title': title,
+        'Address': address,
+        'Phone': phone,
+        'Description': description,
+        'Latitude': lat,
+        'Longitude': lon
+    })
+# 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
+df = pd.DataFrame(df_list)
+# 顯示抓取的資料
+st.subheader("抓取的餐廳資料")
+st.dataframe(df)
+# 統計每個區的商家數量
+df['Area'] = df['Address'].str.extract(r'(\w+區)')  # 提取區域
+area_counts = df['Area'].value_counts()  # 統計各區的商家數量
+# 繪製柱狀圖
+fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
+fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
+# 顯示柱狀圖
+st.plotly_chart(fig_bar)
+# 繪製圓餅圖
+fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
+fig_pie.update_layout(title='每個區的商家數量比例')
+# 按鈕來顯示圓餅圖
+if st.button('顯示每個區的商家數量比例圓餅圖'):
+    st.plotly_chart(fig_pie)NameError: name 'lat' is not defined
+mport requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import plotly.graph_objects as go
+import streamlit as st
+# 設定應用標題
+st.title("餐廳資料抓取與分析")
+# 從 Google 試算表中讀取 URLs
+sheet_id = "1W20lawjiQtEpljUKoEaMVPDlSdnhvJLPUy2jk5xao_w"
+urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
+# 將 URLs 轉換為列表
+urls = urls_df['網址'].tolist()
+# 初始化一個空的 DataFrame 列表來儲存所有資料
+df_list = []
+# 迭代每個網址並抓取資料
+for url in urls:
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # 解析並抓取所需資料
+    title_tag = soup.find('h1', class_='restaurant-details__heading--title')
+    title = title_tag.text.strip() if title_tag else 'N/A'
+    address_tag = soup.find('li', class_='restaurant-details__heading--address')
+    address = address_tag.text.strip() if address_tag else 'N/A'
+    phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
+    phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
+    description_tag = soup.find('div', class_='restaurant-details__description--text')
+    description = description_tag.text.strip() if description_tag else 'N/A'
+    # 將抓取的資料新增到列表中
+    df_list.append({
+        'Title': title,
+        'Address': address,
+        'Phone': phone,
+        'Description': description,
+        'Latitude': lat,
+        'Longitude': lon
+    })
+# 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
+df = pd.DataFrame(df_list)
+# 顯示抓取的資料
+st.subheader("抓取的餐廳資料")
+st.dataframe(df)
+# 統計每個區的商家數量
+df['Area'] = df['Address'].str.extract(r'(\w+區)')  # 提取區域
+area_counts = df['Area'].value_counts()  # 統計各區的商家數量
+# 繪製柱狀圖
+fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
+fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
+# 顯示柱狀圖
+st.plotly_chart(fig_bar)
+# 繪製圓餅圖
+fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
+fig_pie.update_layout(title='每個區的商家數量比例')
+# 按鈕來顯示圓餅圖
+if st.button('顯示每個區的商家數量比例圓餅圖'):
+    st.plotly_chart(fig_pie)NameError: name 'lat' is not defined