mistpe commited on
Commit
fa8566a
·
verified ·
1 Parent(s): 92d60db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -45
app.py CHANGED
@@ -1,24 +1,61 @@
1
  from bs4 import BeautifulSoup
2
  from flask import Flask, request, jsonify
3
  import requests
4
-
5
- from flask_cors import CORS
 
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
  from datetime import datetime
8
 
9
  app = Flask(__name__)
10
- CORS(app)
11
 
12
- # 缓存数据及更新时间
13
  cached_results_soaring = []
14
  cached_results_hot = []
15
  cached_results_newSongs = []
16
  cached_results_popular = []
17
  last_update_time = None
18
 
19
- def song(url):
20
- """从 QQ 音乐榜单页面爬取歌曲信息"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
22
  headers = {
23
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
24
  "Referer": "https://y.qq.com/",
@@ -27,35 +64,24 @@ def song(url):
27
 
28
  results = []
29
  try:
30
-
31
- # 发送请求
32
  response = requests.get(url, headers=headers)
33
  response.encoding = 'utf-8'
34
-
35
- # 使用 BeautifulSoup 解析页面
36
  soup = BeautifulSoup(response.text, 'html.parser')
37
-
38
- # 查找所有歌曲列表项
39
  song_items = soup.select("ul.songlist__list li")
40
 
41
  for item in song_items[:10]:
42
  try:
43
- # 提取歌曲名称
44
  song_name_span = item.select_one("span.songlist__songname_txt")
45
  song_name = song_name_span.text.strip()
46
 
47
- # 提取歌手名称
48
  artist_div = item.select_one("div.songlist__artist")
49
  artist_name = artist_div.text.strip()
50
 
51
- # 提取 mid
52
  song_link = song_name_span.find('a', class_='')
53
-
54
  mid = "default_mid"
55
  if song_link and song_link.get('href'):
56
  mid = song_link['href'].split('/')[-1]
57
 
58
- # 提取图片链接
59
  img_link = song_name_span.find('a', class_='songlist__cover')
60
  if img_link and img_link.get('href'):
61
  imgsrc = img_link['href'].split('/')[-1]
@@ -74,58 +100,50 @@ def song(url):
74
  "muId": 1
75
  })
76
  except Exception as e:
77
- print(f"处理歌曲项时出错: {e}")
78
  continue
79
 
80
  except Exception as e:
81
- print(f"爬取过程中出错: {e}")
82
  return []
83
 
84
- # 更新缓存数据及更新时间
85
  if results:
86
- print(f"数据已更新:{last_update_time}")
87
  return results
88
 
89
-
90
  def schedule_music_fetch():
91
- """设置每天早上 8 点定时任务"""
92
  scheduler = BackgroundScheduler()
93
-
94
 
95
  def run_tasks():
96
- global cached_results_soaring, cached_results_hot, cached_results_newSongs, cached_results_popular, last_update_time
97
- # 依次运行每个任务
98
  cached_results_soaring = song("https://y.qq.com/n/ryqq/toplist/62")
99
  cached_results_hot = song("https://y.qq.com/n/ryqq/toplist/26")
100
  cached_results_newSongs = song("https://y.qq.com/n/ryqq/toplist/27")
101
  cached_results_popular = song("https://y.qq.com/n/ryqq/toplist/4")
102
- last_update_time = datetime.now()
103
-
104
 
105
- # 设置定时任务,每天 8 点执行 run_tasks
106
  scheduler.add_job(run_tasks, 'cron', hour=8, minute=0)
107
-
108
  scheduler.start()
109
- print("定时任务已启动,爬取时间为每天早上 8 点。")
110
 
111
  @app.route('/star')
112
  def star():
113
- global cached_results_soaring, cached_results_hot, cached_results_newSongs, cached_results_popular, last_update_time
114
  cached_results_soaring = song("https://y.qq.com/n/ryqq/toplist/62")
115
  cached_results_hot = song("https://y.qq.com/n/ryqq/toplist/26")
116
  cached_results_newSongs = song("https://y.qq.com/n/ryqq/toplist/27")
117
  cached_results_popular = song("https://y.qq.com/n/ryqq/toplist/4")
118
- last_update_time = datetime.now()
119
  return jsonify({
120
- "data": "数据已更新",
121
- "updata time": last_update_time
122
  })
123
 
124
  @app.route('/fetch_music_soaring', methods=['GET'])
125
  def fetch_music_route_soaring():
126
- """提供缓存的 QQ 音乐榜单数据"""
127
  if not cached_results_soaring:
128
- return jsonify({"message": "数据尚未准备好,请先访问/star 更新数据"}), 503
129
 
130
  return jsonify({
131
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
@@ -134,9 +152,8 @@ def fetch_music_route_soaring():
134
 
135
  @app.route('/fetch_music_hot', methods=['GET'])
136
  def fetch_music_route_hot():
137
- """提供缓存的 QQ 音乐榜单数据"""
138
  if not cached_results_hot:
139
- return jsonify({"message": "数据尚未准备好,请先访问/star 更新数据"}), 503
140
 
141
  return jsonify({
142
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
@@ -145,9 +162,8 @@ def fetch_music_route_hot():
145
 
146
  @app.route('/fetch_music_newSongs', methods=['GET'])
147
  def fetch_music_route_newSongs():
148
- """提供缓存的 QQ 音乐榜单数据"""
149
  if not cached_results_newSongs:
150
- return jsonify({"message": "数据尚未准备好,请先访问/star 更新数据"}), 503
151
 
152
  return jsonify({
153
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
@@ -156,15 +172,41 @@ def fetch_music_route_newSongs():
156
 
157
  @app.route('/fetch_music_popular', methods=['GET'])
158
  def fetch_music_route_popular():
159
- """提供缓存的 QQ 音乐榜单数据"""
160
  if not cached_results_popular:
161
- return jsonify({"message": "数据尚未准备好,请先访问/star 更新数据"}), 503
162
 
163
  return jsonify({
164
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
165
  "data": cached_results_popular
166
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  if __name__ == '__main__':
168
- # 启动 Flask 应用时初始化定时任务
169
  schedule_music_fetch()
170
- app.run(debug=True,host='0.0.0.0',port=5734)
 
1
  from bs4 import BeautifulSoup
2
  from flask import Flask, request, jsonify
3
  import requests
4
+ import re
5
+ import json
6
+ from flask_cors import CORS
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
  from datetime import datetime
9
 
10
  app = Flask(__name__)
11
+ CORS(app)
12
 
13
+ # Cache data and update time
14
  cached_results_soaring = []
15
  cached_results_hot = []
16
  cached_results_newSongs = []
17
  cached_results_popular = []
18
  last_update_time = None
19
 
20
+ def parse_time(time_str):
21
+ if not time_str:
22
+ return "00:00.000"
23
+
24
+ time_str = time_str.strip('[]')
25
+ time_parts = time_str.split(':')
26
+ if len(time_parts) == 2:
27
+ minutes = time_parts[0]
28
+ seconds = time_parts[1]
29
+ if '.' not in seconds:
30
+ seconds = seconds + '.000'
31
+ elif len(seconds.split('.')[1]) == 2:
32
+ seconds = seconds + '0'
33
+ return f"{minutes}:{seconds}"
34
+
35
+ return "00:00.000"
36
+
37
+ def convert_lyrics(input_str):
38
+ """Convert lyrics format"""
39
+ lyrics = []
40
+ lines = input_str.split('\n')
41
+
42
+ for line in lines:
43
+ if not line.strip():
44
+ continue
45
+
46
+ match = re.match(r'\[(\d{2}:\d{2}\.\d{2})\](.*)', line)
47
+ if match:
48
+ time_str, content = match.groups()
49
+ formatted_time = parse_time(time_str)
50
+ lyrics.append({
51
+ "name": content.strip(),
52
+ "time": formatted_time
53
+ })
54
 
55
+ return {"lyric": lyrics}
56
+
57
+ def song(url):
58
+ """Crawl song information from QQ Music rankings page"""
59
  headers = {
60
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
61
  "Referer": "https://y.qq.com/",
 
64
 
65
  results = []
66
  try:
 
 
67
  response = requests.get(url, headers=headers)
68
  response.encoding = 'utf-8'
 
 
69
  soup = BeautifulSoup(response.text, 'html.parser')
 
 
70
  song_items = soup.select("ul.songlist__list li")
71
 
72
  for item in song_items[:10]:
73
  try:
 
74
  song_name_span = item.select_one("span.songlist__songname_txt")
75
  song_name = song_name_span.text.strip()
76
 
 
77
  artist_div = item.select_one("div.songlist__artist")
78
  artist_name = artist_div.text.strip()
79
 
 
80
  song_link = song_name_span.find('a', class_='')
 
81
  mid = "default_mid"
82
  if song_link and song_link.get('href'):
83
  mid = song_link['href'].split('/')[-1]
84
 
 
85
  img_link = song_name_span.find('a', class_='songlist__cover')
86
  if img_link and img_link.get('href'):
87
  imgsrc = img_link['href'].split('/')[-1]
 
100
  "muId": 1
101
  })
102
  except Exception as e:
103
+ print(f"Error processing song item: {e}")
104
  continue
105
 
106
  except Exception as e:
107
+ print(f"Error during crawling: {e}")
108
  return []
109
 
 
110
  if results:
111
+ print(f"Data updated: {last_update_time}")
112
  return results
113
 
 
114
  def schedule_music_fetch():
115
+ """Schedule daily task at 8 AM"""
116
  scheduler = BackgroundScheduler()
 
117
 
118
  def run_tasks():
119
+ global cached_results_soaring, cached_results_hot, cached_results_newSongs, cached_results_popular, last_update_time
 
120
  cached_results_soaring = song("https://y.qq.com/n/ryqq/toplist/62")
121
  cached_results_hot = song("https://y.qq.com/n/ryqq/toplist/26")
122
  cached_results_newSongs = song("https://y.qq.com/n/ryqq/toplist/27")
123
  cached_results_popular = song("https://y.qq.com/n/ryqq/toplist/4")
124
+ last_update_time = datetime.now()
 
125
 
 
126
  scheduler.add_job(run_tasks, 'cron', hour=8, minute=0)
 
127
  scheduler.start()
128
+ print("Scheduled task started, crawling time is 8 AM daily.")
129
 
130
  @app.route('/star')
131
  def star():
132
+ global cached_results_soaring, cached_results_hot, cached_results_newSongs, cached_results_popular, last_update_time
133
  cached_results_soaring = song("https://y.qq.com/n/ryqq/toplist/62")
134
  cached_results_hot = song("https://y.qq.com/n/ryqq/toplist/26")
135
  cached_results_newSongs = song("https://y.qq.com/n/ryqq/toplist/27")
136
  cached_results_popular = song("https://y.qq.com/n/ryqq/toplist/4")
137
+ last_update_time = datetime.now()
138
  return jsonify({
139
+ "data": "Data updated",
140
+ "update_time": last_update_time
141
  })
142
 
143
  @app.route('/fetch_music_soaring', methods=['GET'])
144
  def fetch_music_route_soaring():
 
145
  if not cached_results_soaring:
146
+ return jsonify({"message": "Data not ready, please visit /star first to update data"}), 503
147
 
148
  return jsonify({
149
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
 
152
 
153
  @app.route('/fetch_music_hot', methods=['GET'])
154
  def fetch_music_route_hot():
 
155
  if not cached_results_hot:
156
+ return jsonify({"message": "Data not ready, please visit /star first to update data"}), 503
157
 
158
  return jsonify({
159
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
 
162
 
163
  @app.route('/fetch_music_newSongs', methods=['GET'])
164
  def fetch_music_route_newSongs():
 
165
  if not cached_results_newSongs:
166
+ return jsonify({"message": "Data not ready, please visit /star first to update data"}), 503
167
 
168
  return jsonify({
169
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
 
172
 
173
  @app.route('/fetch_music_popular', methods=['GET'])
174
  def fetch_music_route_popular():
 
175
  if not cached_results_popular:
176
+ return jsonify({"message": "Data not ready, please visit /star first to update data"}), 503
177
 
178
  return jsonify({
179
  "last_update_time": last_update_time.strftime("%Y-%m-%d %H:%M:%S"),
180
  "data": cached_results_popular
181
  })
182
+
183
+ @app.route('/get_lyrics', methods=['GET'])
184
+ def get_lyrics():
185
+ mid = request.args.get('mid')
186
+ if not mid:
187
+ return jsonify({"error": "Missing mid parameter"}), 400
188
+
189
+ target_url = f"https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric_new.fcg?songmid={mid}&format=json&nobase64=1"
190
+ headers = {
191
+ "Referer": "https://y.qq.com/portal/player.html"
192
+ }
193
+
194
+ try:
195
+ response = requests.get(target_url, headers=headers)
196
+ response.raise_for_status()
197
+ data = response.json()
198
+ matched_lyrics = data.get('lyric')
199
+
200
+ if not matched_lyrics:
201
+ return jsonify({"error": f"No lyrics found for mid: {mid}"}), 404
202
+
203
+ result_json = convert_lyrics(matched_lyrics)
204
+ return jsonify(result_json)
205
+ except requests.RequestException as e:
206
+ return jsonify({"error": "Failed to get lyrics", "details": str(e)}), 500
207
+ except (KeyError, ValueError):
208
+ return jsonify({"error": "Unexpected data format from server"}), 500
209
+
210
  if __name__ == '__main__':
 
211
  schedule_music_fetch()
212
+ app.run(debug=True, host='0.0.0.0', port=5734)