Rozeeeee commited on
Commit
68ed0b7
·
verified ·
1 Parent(s): fcfadd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -62
app.py CHANGED
@@ -1,70 +1,150 @@
1
- from flask import Flask, render_template, request
2
- import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
  import plotly.graph_objects as go
6
- import os
7
-
8
- app = Flask(__name__)
9
-
10
- # Function to scrape restaurant data from URLs
11
- def scrape_data(urls):
12
- df_list = []
13
- for url in urls:
14
- response = requests.get(url)
15
- soup = BeautifulSoup(response.content, 'html.parser')
16
-
17
- title_tag = soup.find('h1', class_='restaurant-details__heading--title')
18
- title = title_tag.text.strip() if title_tag else 'N/A'
19
-
20
- address_tag = soup.find('li', class_='restaurant-details__heading--address')
21
- address = address_tag.text.strip() if address_tag else 'N/A'
22
-
23
- phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
24
- phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
25
-
26
- description_tag = soup.find('div', class_='restaurant-details__description--text')
27
- description = description_tag.text.strip() if description_tag else 'N/A'
28
-
29
- lat = 'N/A'
30
- lon = 'N/A'
31
-
32
- df_list.append({
33
- 'Title': title,
34
- 'Address': address,
35
- 'Phone': phone,
36
- 'Description': description,
37
- 'Latitude': lat,
38
- 'Longitude': lon
39
- })
40
-
41
- return pd.DataFrame(df_list)
42
-
43
- @app.route('/')
44
- def home():
45
- return render_template('index.html')
46
-
47
- @app.route('/scrape', methods=['POST'])
48
- def scrape():
49
- sheet_id = request.form['sheet_id']
50
- urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
51
- urls = urls_df['網址'].tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- df = scrape_data(urls)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- # Generate bar chart
56
- df['Area'] = df['Address'].str.extract(r'(\w+區)')
57
- area_counts = df['Area'].value_counts()
58
- fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
59
- fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
60
- bar_chart = fig_bar.to_html(full_html=False)
61
 
62
- # Generate pie chart
63
- fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
64
- fig_pie.update_layout(title='每個區的商家數量比例')
65
- pie_chart = fig_pie.to_html(full_html=False)
66
 
67
- return render_template('results.html', tables=[df.to_html(classes='data')], bar_chart=bar_chart, pie_chart=pie_chart)
 
 
68
 
69
- if __name__ == '__main__':
70
- app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 8080)), debug=True)
 
 
1
+ mport requests
 
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import plotly.graph_objects as go
5
+ import streamlit as st
6
+
7
+ # 設定應用標題
8
+ st.title("餐廳資料抓取與分析")
9
+
10
+ # 從 Google 試算表中讀取 URLs
11
+ sheet_id = "1W20lawjiQtEpljUKoEaMVPDlSdnhvJLPUy2jk5xao_w"
12
+ urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
13
+
14
+ # URLs 轉換為列表
15
+ urls = urls_df['網址'].tolist()
16
+
17
+ # 初始化一個空的 DataFrame 列表來儲存所有資料
18
+ df_list = []
19
+
20
+
21
+ # 迭代每個網址並抓取資料
22
+ for url in urls:
23
+ response = requests.get(url)
24
+ soup = BeautifulSoup(response.content, 'html.parser')
25
+
26
+ # 解析並抓取所需資料
27
+ title_tag = soup.find('h1', class_='restaurant-details__heading--title')
28
+ title = title_tag.text.strip() if title_tag else 'N/A'
29
+
30
+ address_tag = soup.find('li', class_='restaurant-details__heading--address')
31
+ address = address_tag.text.strip() if address_tag else 'N/A'
32
+
33
+ phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
34
+ phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
35
+
36
+ description_tag = soup.find('div', class_='restaurant-details__description--text')
37
+ description = description_tag.text.strip() if description_tag else 'N/A'
38
+
39
+
40
+
41
+ # 將抓取的資料新增到列表中
42
+ df_list.append({
43
+ 'Title': title,
44
+ 'Address': address,
45
+ 'Phone': phone,
46
+ 'Description': description,
47
+ 'Latitude': lat,
48
+ 'Longitude': lon
49
+ })
50
+
51
+ # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
52
+ df = pd.DataFrame(df_list)
53
+
54
+ # 顯示抓取的資料
55
+ st.subheader("抓取的餐廳資料")
56
+ st.dataframe(df)
57
+
58
+ # 統計每個區的商家數量
59
+ df['Area'] = df['Address'].str.extract(r'(\w+區)') # 提取區域
60
+ area_counts = df['Area'].value_counts() # 統計各區的商家數量
61
+
62
+ # 繪製柱狀圖
63
+ fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
64
+ fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
65
+
66
+ # 顯示柱狀圖
67
+ st.plotly_chart(fig_bar)
68
+
69
+ # 繪製圓餅圖
70
+ fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
71
+ fig_pie.update_layout(title='每個區的商家數量比例')
72
+
73
+ # 按鈕來顯示圓餅圖
74
+ if st.button('顯示每個區的商家數量比例圓餅圖'):
75
+ st.plotly_chart(fig_pie)NameError: name 'lat' is not defined
76
+ mport requests
77
+ from bs4 import BeautifulSoup
78
+ import pandas as pd
79
+ import plotly.graph_objects as go
80
+ import streamlit as st
81
+
82
+ # 設定應用標題
83
+ st.title("餐廳資料抓取與分析")
84
+
85
+ # 從 Google 試算表中讀取 URLs
86
+ sheet_id = "1W20lawjiQtEpljUKoEaMVPDlSdnhvJLPUy2jk5xao_w"
87
+ urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
88
+
89
+ # 將 URLs 轉換為列表
90
+ urls = urls_df['網址'].tolist()
91
+
92
+ # 初始化一個空的 DataFrame 列表來儲存所有資料
93
+ df_list = []
94
+
95
+
96
+ # 迭代每個網址並抓取資料
97
+ for url in urls:
98
+ response = requests.get(url)
99
+ soup = BeautifulSoup(response.content, 'html.parser')
100
+
101
+ # 解析並抓取所需資料
102
+ title_tag = soup.find('h1', class_='restaurant-details__heading--title')
103
+ title = title_tag.text.strip() if title_tag else 'N/A'
104
+
105
+ address_tag = soup.find('li', class_='restaurant-details__heading--address')
106
+ address = address_tag.text.strip() if address_tag else 'N/A'
107
+
108
+ phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
109
+ phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
110
 
111
+ description_tag = soup.find('div', class_='restaurant-details__description--text')
112
+ description = description_tag.text.strip() if description_tag else 'N/A'
113
+
114
+
115
+
116
+ # 將抓取的資料新增到列表中
117
+ df_list.append({
118
+ 'Title': title,
119
+ 'Address': address,
120
+ 'Phone': phone,
121
+ 'Description': description,
122
+ 'Latitude': lat,
123
+ 'Longitude': lon
124
+ })
125
+
126
+ # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
127
+ df = pd.DataFrame(df_list)
128
+
129
+ # 顯示抓取的資料
130
+ st.subheader("抓取的餐廳資料")
131
+ st.dataframe(df)
132
+
133
+ # 統計每個區的商家數量
134
+ df['Area'] = df['Address'].str.extract(r'(\w+區)') # 提取區域
135
+ area_counts = df['Area'].value_counts() # 統計各區的商家數量
136
 
137
+ # 繪製柱狀圖
138
+ fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
139
+ fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
 
 
 
140
 
141
+ # 顯示柱狀圖
142
+ st.plotly_chart(fig_bar)
 
 
143
 
144
+ # 繪製圓餅圖
145
+ fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
146
+ fig_pie.update_layout(title='每個區的商家數量比例')
147
 
148
+ # 按鈕來顯示圓餅圖
149
+ if st.button('顯示每個區的商家數量比例圓餅圖'):
150
+ st.plotly_chart(fig_pie)NameError: name 'lat' is not defined