Spaces:

Justtalk
/

JusTalk

Running

App Files Files Community

rein0421 commited on Mar 15

Commit

397a06b

verified ·

1 Parent(s): 7b3969c

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -41

app.py CHANGED Viewed

@@ -1,69 +1,134 @@
-from flask import Flask, request, jsonify, render_template
 import base64
 import os
 import string
 import random
 from datetime import datetime
 app = Flask(__name__)
-# トップページ（GET/POSTどちらにも対応）
 @app.route('/')
 @app.route('/index', methods=['GET', 'POST'])
 def index():
     return render_template('index.html')
-# フィードバック画面（GET/POSTどちらにも対応）
 @app.route('/feedback', methods=['GET', 'POST'])
 def feedback():
-    return render_template("feedback.html")
-# 会話詳細画面（GET/POSTどちらにも対応）
 @app.route('/talk_detail', methods=['GET', 'POST'])
 def talk_detail():
-    return render_template("talkDetail.html")
-# 音声データアップロード用エンドポイント
 @app.route('/upload_audio', methods=['POST'])
 def upload_audio():
     try:
         data = request.get_json()
-        if not data:
-            return jsonify({"error": "JSONが送信されていません"}), 400
-        audio_data = data.get('audio_data')
-        if not audio_data:
-            return jsonify({"error": "音声データが送信されていません"}), 400
-        # Base64デコード
-        try:
-            audio_binary = base64.b64decode(audio_data)
-        except Exception as decode_err:
-            return jsonify({"error": "Base64デコードに失敗しました", "details": str(decode_err)}), 400
-        # 書き込み用ディレクトリとして /tmp/data を使用（/tmp は書き込み可能）
-        persist_dir = "/tmp/data"
-        os.makedirs(persist_dir, exist_ok=True)
-        filepath = os.path.join(persist_dir, generate_filename(6))  # ここだけ変更しました
-        with open(filepath, 'wb') as f:
             f.write(audio_binary)
-        return jsonify({"message": "音声が正常に保存されました", "filepath": filepath}), 200
     except Exception as e:
-        app.logger.error("エラー: %s", str(e))
-        return jsonify({"error": "サーバー内部エラー", "details": str(e)}), 500
-def generate_random_string(length):
-    letters = string.ascii_letters + string.digits
-    return ''.join(random.choice(letters) for i in range(length))
-def generate_filename(random_length):
-    random_string = generate_random_string(random_length)
-    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
-    filename = f"{current_time}_{random_string}.wav"
-    return filename
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 7860))

+from flask import Flask, request, jsonify, render_template, send_from_directory
 import base64
 import os
+import shutil
+import numpy as np
 import string
 import random
 from datetime import datetime
+from pyannote.audio import Model, Inference
+from pydub import AudioSegment
+# Hugging Face のトークン取得（環境変数 HF に設定）
+hf_token = os.environ.get("HF")
+if hf_token is None:
+    raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")
+# キャッシュディレクトリの作成（書き込み可能な /tmp を利用）
+cache_dir = "/tmp/hf_cache"
+os.makedirs(cache_dir, exist_ok=True)
+# pyannote モデルの読み込み
+model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir)
+inference = Inference(model)
+def cosine_similarity(vec1, vec2):
+    vec1 = vec1 / np.linalg.norm(vec1)
+    vec2 = vec2 / np.linalg.norm(vec2)
+    return np.dot(vec1, vec2)
+def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0):
+    """
+    音声を指定秒数ごとに分割する。
+    target_path に分割したファイルを保存し、元の音声の総長（ミリ秒）を返す。
+    """
+    os.makedirs(target_path, exist_ok=True)
+    base_sound = AudioSegment.from_file(path)
+    duration_ms = len(base_sound)
+    seg_duration_ms = int(seg_duration * 1000)
+    for i, start in enumerate(range(0, duration_ms, seg_duration_ms)):
+        end = min(start + seg_duration_ms, duration_ms)
+        segment = base_sound[start:end]
+        segment.export(os.path.join(target_path, f'{i}.wav'), format="wav")
+    return target_path, duration_ms
+def calculate_similarity(path1, path2):
+    embedding1 = inference(path1)
+    embedding2 = inference(path2)
+    return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))
+def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
+    """
+    入力音声ファイルを seg_duration 秒ごとに分割し、各セグメントと参照音声の類似度を計算。
+    類似度が threshold を超えたセグメントを output_folder にコピーし、マッチした時間（ms）と
+    マッチしなかった時間（ms）を返す。
+    """
+    os.makedirs(output_folder, exist_ok=True)
+    segmented_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration)
+    matched_time_ms = 0
+    for file in sorted(os.listdir(segmented_path)):
+        segment_file = os.path.join(segmented_path, file)
+        similarity = calculate_similarity(segment_file, reference_path)
+        if similarity > threshold:
+            shutil.copy(segment_file, output_folder)
+            matched_time_ms += len(AudioSegment.from_file(segment_file))
+    unmatched_time_ms = total_duration_ms - matched_time_ms
+    return matched_time_ms, unmatched_time_ms
+def generate_random_string(length):
+    letters = string.ascii_letters + string.digits
+    return ''.join(random.choice(letters) for i in range(length))
+def generate_filename(random_length):
+    random_string = generate_random_string(random_length)
+    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
+    filename = f"{current_time}_{random_string}.wav"
+    return filename
 app = Flask(__name__)
+# トップページ（テンプレート: index.html）
 @app.route('/')
 @app.route('/index', methods=['GET', 'POST'])
 def index():
     return render_template('index.html')
+# フィードバック画面（テンプレート: feedback.html）
 @app.route('/feedback', methods=['GET', 'POST'])
 def feedback():
+    return render_template('feedback.html')
+# 会話詳細画面（テンプレート: talkDetail.html）
 @app.route('/talk_detail', methods=['GET', 'POST'])
 def talk_detail():
+    return render_template('talkDetail.html')
+# 音声アップロード＆解析エンドポイント
 @app.route('/upload_audio', methods=['POST'])
 def upload_audio():
     try:
         data = request.get_json()
+        if not data or 'audio_data' not in data:
+            return jsonify({"error": "音声データがありません"}), 400
+        # Base64デコードして音声バイナリを取得
+        audio_binary = base64.b64decode(data['audio_data'])
+        audio_dir = "/tmp/data"
+        os.makedirs(audio_dir, exist_ok=True)
+        # 固定ファイル名（必要に応じて generate_filename() で一意のファイル名に変更可能）
+        audio_path = os.path.join(audio_dir, "recorded_audio.wav")
+        with open(audio_path, 'wb') as f:
             f.write(audio_binary)
+        # 参照音声ファイルのパスを指定（sample.wav を正しい場所に配置すること）
+        reference_audio = os.path.abspath('./sample.wav')
+        if not os.path.exists(reference_audio):
+            return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500
+        # 音声解析：参照音声とアップロードされた音声との類似度をセグメント毎に計算
+        # threshold の値は調整可能です（例: 0.1）
+        matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1)
+        total_time = matched_time + unmatched_time
+        rate = (matched_time / total_time) * 100 if total_time > 0 else 0
+        return jsonify({"rate": rate}), 200
     except Exception as e:
+        print("Error in /upload_audio:", str(e))
+        return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 7860))