rein0421 commited on
Commit
074316c
·
verified ·
1 Parent(s): a8bae83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -27
app.py CHANGED
@@ -1,44 +1,82 @@
1
  from flask import Flask, request, jsonify, send_from_directory
2
  import base64
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  app = Flask(__name__)
6
 
7
  @app.route('/')
8
  def index():
9
- return send_from_directory(".", "index.html")
10
 
11
  @app.route('/upload_audio', methods=['POST'])
12
  def upload_audio():
13
  try:
14
  data = request.get_json()
15
- if not data:
16
- return jsonify({"error": "JSONが送信されていません"}), 400
17
-
18
- audio_data = data.get('audio_data')
19
- if not audio_data:
20
- return jsonify({"error": "音声データが送信されていません"}), 400
21
-
22
- # Base64デコード
23
- try:
24
- audio_binary = base64.b64decode(audio_data)
25
- except Exception as decode_err:
26
- return jsonify({"error": "Base64デコードに失敗しました", "details": str(decode_err)}), 400
27
-
28
- # 書き込み用ディレクトリとして /tmp/data を使用(/tmp は書き込み可能)
29
- persist_dir = "/tmp/data"
30
- os.makedirs(persist_dir, exist_ok=True)
31
-
32
- filepath = os.path.join(persist_dir, "recorded_audio.wav")
33
- with open(filepath, 'wb') as f:
34
  f.write(audio_binary)
35
-
36
- return jsonify({"message": "音声が正常に保存されました", "filepath": filepath}), 200
37
-
 
 
 
38
  except Exception as e:
39
- app.logger.error("エラー: %s", str(e))
40
- return jsonify({"error": "サーバー内部エラー", "details": str(e)}), 500
41
 
42
  if __name__ == '__main__':
43
- port = int(os.environ.get("PORT", 7860))
44
- app.run(debug=True, host="0.0.0.0", port=port)
 
1
  from flask import Flask, request, jsonify, send_from_directory
2
  import base64
3
  import os
4
+ import shutil
5
+ import numpy as np
6
+ from pyannote.audio import Model, Inference
7
+ from pydub import AudioSegment
8
+
9
+ os.environ["HUGGINGFACE_HUB_TOKEN"] = os.environ.get("HF") # トークンを適切に設定
10
+
11
+ # 事前学習済みモデルの読み込み
12
+ model = Model.from_pretrained("pyannote/embedding")
13
+ inference = Inference(model)
14
+
15
+ def cosine_similarity(vec1, vec2):
16
+ vec1 = vec1 / np.linalg.norm(vec1)
17
+ vec2 = vec2 / np.linalg.norm(vec2)
18
+ return np.dot(vec1, vec2)
19
+
20
+ def segment_audio(path, target_path='./setup_voice', seg_duration=1.0):
21
+ """音声を指定秒数ごとに分割する"""
22
+ os.makedirs(target_path, exist_ok=True)
23
+ base_sound = AudioSegment.from_file(path)
24
+ duration_ms = len(base_sound)
25
+ seg_duration_ms = int(seg_duration * 1000)
26
+
27
+ for i, start in enumerate(range(0, duration_ms, seg_duration_ms)):
28
+ end = min(start + seg_duration_ms, duration_ms)
29
+ segment = base_sound[start:end]
30
+ segment.export(os.path.join(target_path, f'{i}.wav'), format="wav")
31
+
32
+ return target_path, duration_ms
33
+
34
+ def calculate_similarity(path1, path2):
35
+ embedding1 = inference(path1)
36
+ embedding2 = inference(path2)
37
+ return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))
38
+
39
+ def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
40
+ os.makedirs(output_folder, exist_ok=True)
41
+ base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration)
42
+
43
+ matched_time_ms = 0
44
+ for file in sorted(os.listdir(base_path)):
45
+ segment_file = os.path.join(base_path, file)
46
+ similarity = calculate_similarity(segment_file, reference_path)
47
+ if similarity > threshold:
48
+ shutil.copy(segment_file, output_folder)
49
+ matched_time_ms += len(AudioSegment.from_file(segment_file))
50
+
51
+ unmatched_time_ms = total_duration_ms - matched_time_ms
52
+ return matched_time_ms, unmatched_time_ms
53
 
54
  app = Flask(__name__)
55
 
56
  @app.route('/')
57
  def index():
58
+ return send_from_directory('.', 'index.html')
59
 
60
  @app.route('/upload_audio', methods=['POST'])
61
  def upload_audio():
62
  try:
63
  data = request.get_json()
64
+ if not data or 'audio_data' not in data:
65
+ return jsonify({"error": "音声データがありません"}), 400
66
+
67
+ audio_binary = base64.b64decode(data['audio_data'])
68
+ audio_path = "/tmp/data/recorded_audio.wav"
69
+ os.makedirs(os.path.dirname(audio_path), exist_ok=True)
70
+ with open(audio_path, 'wb') as f:
 
 
 
 
 
 
 
 
 
 
 
 
71
  f.write(audio_binary)
72
+
73
+ reference_audio = './sample.wav' # 参照音声
74
+ matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1)
75
+ rate = (matched_time / (matched_time + unmatched_time)) * 100 if (matched_time + unmatched_time) > 0 else 0
76
+
77
+ return jsonify({"rate": rate}), 200
78
  except Exception as e:
79
+ return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
 
80
 
81
  if __name__ == '__main__':
82
+ app.run(debug=True, host="0.0.0.0", port=7860)