Rozeeeee commited on
Commit
4726133
·
verified ·
1 Parent(s): f109089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -30
app.py CHANGED
@@ -3,31 +3,17 @@ import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
  import plotly.graph_objects as go
6
- import plotly.io as pio
7
 
8
  app = Flask(__name__)
9
 
10
- @app.route('/')
11
- def home():
12
- # 設定應用標題
13
- title = "餐廳資料抓取與分析"
14
-
15
- # 從 Google 試算表中讀取 URLs
16
- sheet_id = "1W20lawjiQtEpljUKoEaMVPDlSdnhvJLPUy2jk5xao_w"
17
- urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
18
-
19
- # 將 URLs 轉換為列表
20
- urls = urls_df['網址'].tolist()
21
-
22
- # 初始化一個空的 DataFrame 列表來儲存所有資料
23
  df_list = []
24
-
25
- # 迭代每個網址並抓取資料
26
  for url in urls:
27
  response = requests.get(url)
28
  soup = BeautifulSoup(response.content, 'html.parser')
29
 
30
- # 解析並抓取所需資料
31
  title_tag = soup.find('h1', class_='restaurant-details__heading--title')
32
  title = title_tag.text.strip() if title_tag else 'N/A'
33
 
@@ -40,11 +26,9 @@ def home():
40
  description_tag = soup.find('div', class_='restaurant-details__description--text')
41
  description = description_tag.text.strip() if description_tag else 'N/A'
42
 
43
- # NOTE: Assuming latitude and longitude are not available from the current page content, you can omit them or fetch them if necessary
44
  lat = 'N/A'
45
  lon = 'N/A'
46
 
47
- # 將抓取的資料新增到列表中
48
  df_list.append({
49
  'Title': title,
50
  'Address': address,
@@ -54,25 +38,33 @@ def home():
54
  'Longitude': lon
55
  })
56
 
57
- # 使用 pd.DataFrame() 將所有資料合併成一個 DataFrame
58
- df = pd.DataFrame(df_list)
 
 
 
59
 
60
- # 統計每個區的商家數量
61
- df['Area'] = df['Address'].str.extract(r'(\w+區)') # 正确关闭字符串
 
 
 
 
 
62
 
63
- # 生成柱狀圖
64
- area_counts = df['Area'].value_counts() # 統計各區的商家數量
 
65
  fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
66
  fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
67
- bar_chart = pio.to_html(fig_bar, full_html=False)
68
 
69
- # 生成圓餅圖
70
  fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
71
  fig_pie.update_layout(title='每個區的商家數量比例')
72
- pie_chart = pio.to_html(fig_pie, full_html=False)
73
 
74
- # 渲染模板,顯示結果
75
  return render_template('results.html', tables=[df.to_html(classes='data')], bar_chart=bar_chart, pie_chart=pie_chart)
76
 
77
  if __name__ == '__main__':
78
- app.run(host='0.0.0.0', port=8080, debug=True)
 
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
  import plotly.graph_objects as go
6
+ import os
7
 
8
  app = Flask(__name__)
9
 
10
+ # Function to scrape restaurant data from URLs
11
+ def scrape_data(urls):
 
 
 
 
 
 
 
 
 
 
 
12
  df_list = []
 
 
13
  for url in urls:
14
  response = requests.get(url)
15
  soup = BeautifulSoup(response.content, 'html.parser')
16
 
 
17
  title_tag = soup.find('h1', class_='restaurant-details__heading--title')
18
  title = title_tag.text.strip() if title_tag else 'N/A'
19
 
 
26
  description_tag = soup.find('div', class_='restaurant-details__description--text')
27
  description = description_tag.text.strip() if description_tag else 'N/A'
28
 
 
29
  lat = 'N/A'
30
  lon = 'N/A'
31
 
 
32
  df_list.append({
33
  'Title': title,
34
  'Address': address,
 
38
  'Longitude': lon
39
  })
40
 
41
+ return pd.DataFrame(df_list)
42
+
43
+ @app.route('/')
44
+ def home():
45
+ return render_template('index.html')
46
 
47
+ @app.route('/scrape', methods=['POST'])
48
+ def scrape():
49
+ sheet_id = request.form['sheet_id']
50
+ urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
51
+ urls = urls_df['網址'].tolist()
52
+
53
+ df = scrape_data(urls)
54
 
55
+ # Generate bar chart
56
+ df['Area'] = df['Address'].str.extract(r'(\w+區)')
57
+ area_counts = df['Area'].value_counts()
58
  fig_bar = go.Figure(data=[go.Bar(x=area_counts.index, y=area_counts.values)])
59
  fig_bar.update_layout(title='每個區的商家數量', xaxis_title='區域', yaxis_title='商家數量')
60
+ bar_chart = fig_bar.to_html(full_html=False)
61
 
62
+ # Generate pie chart
63
  fig_pie = go.Figure(data=[go.Pie(labels=area_counts.index, values=area_counts.values)])
64
  fig_pie.update_layout(title='每個區的商家數量比例')
65
+ pie_chart = fig_pie.to_html(full_html=False)
66
 
 
67
  return render_template('results.html', tables=[df.to_html(classes='data')], bar_chart=bar_chart, pie_chart=pie_chart)
68
 
69
  if __name__ == '__main__':
70
+ app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 8080)), debug=True)