Spaces:

Justtalk
/

JusTalk

Running

App Files Files Community

rein0421 commited on Mar 17

Commit

db2374b

verified ·

1 Parent(s): 4ea224b

Upload 2 files

Browse files

Files changed (2) hide show

app.py +39 -8
process.py +19 -12

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from flask import Flask, request, jsonify, render_template, send_from_directory
 import base64
 import os
 import shutil
 from process import AudioProcessor
@@ -24,11 +24,6 @@ def feedback():
 def talk_detail():
     return render_template('talkDetail.html')
-# 会話履歴画面（テンプレート: history.html）
-@app.route('/history', methods=['GET', 'POST'])
-def history():
-    return render_template('history.html')
 # 音声アップロード＆解析エンドポイント
 @app.route('/upload_audio', methods=['POST'])
 def upload_audio():
@@ -47,13 +42,13 @@ def upload_audio():
             f.write(audio_binary)
         # 参照音声ファイルのパスを指定（sample.wav を正しい場所に配置すること）
-        reference_audio = os.path.abspath('./sample.wav')
         if not os.path.exists(reference_audio):
             return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500
         # 音声解析：参照音声とアップロードされた音声との類似度をセグメント毎に計算
         # threshold の値は調整可能です（例: 0.1）
-        matched_time, unmatched_time = process.process_audio(reference_audio, audio_path, threshold=0.1)
         total_time = matched_time + unmatched_time
         rate = (matched_time / total_time) * 100 if total_time > 0 else 0
@@ -61,6 +56,42 @@ def upload_audio():
     except Exception as e:
         print("Error in /upload_audio:", str(e))
         return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 7860))

 from flask import Flask, request, jsonify, render_template, send_from_directory
 import base64
+from pydub import AudioSegment  # 変換用にpydubをインポート
 import os
 import shutil
 from process import AudioProcessor
 def talk_detail():
     return render_template('talkDetail.html')
 # 音声アップロード＆解析エンドポイント
 @app.route('/upload_audio', methods=['POST'])
 def upload_audio():
             f.write(audio_binary)
         # 参照音声ファイルのパスを指定（sample.wav を正しい場所に配置すること）
+        reference_audio = os.path.abspath('/tmp/data/base_audio/recorded_base_audio.wav')
         if not os.path.exists(reference_audio):
             return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500
         # 音声解析：参照音声とアップロードされた音声との類似度をセグメント毎に計算
         # threshold の値は調整可能です（例: 0.1）
+        matched_time, unmatched_time = process.process_audio(reference_audio, audio_path, threshold=0.05)
         total_time = matched_time + unmatched_time
         rate = (matched_time / total_time) * 100 if total_time > 0 else 0
     except Exception as e:
         print("Error in /upload_audio:", str(e))
         return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
+@app.route('/upload_base_audio', methods=['POST'])
+def upload_base_audio():
+    try:
+        data = request.get_json()
+        if not data or 'audio_data' not in data:
+            return jsonify({"error": "音声データがありません"}), 400
+        # Base64デコードして音声バイナリを取得
+        audio_binary = base64.b64decode(data['audio_data'])
+        # 保存先ディレクトリの作成
+        audio_dir = "/tmp/data/base_audio"
+        os.makedirs(audio_dir, exist_ok=True)
+        # 一時ファイルに保存（実際の形式は WebM などと仮定）
+        temp_audio_path = os.path.join(audio_dir, "temp_audio")
+        with open(temp_audio_path, 'wb') as f:
+            f.write(audio_binary)
+        # pydub を使って一時ファイルを WAV に変換
+        # ※ここでは WebM 形式と仮定していますが、実際の形式に合わせて format の指定を変更してください
+        try:
+            audio = AudioSegment.from_file(temp_audio_path, format="webm")
+        except Exception as e:
+            # 形式が不明な場合は自動判別させる（ただし変換できない場合もあり）
+            audio = AudioSegment.from_file(temp_audio_path)
+        wav_audio_path = os.path.join(audio_dir, "recorded_base_audio.wav")
+        audio.export(wav_audio_path, format="wav")
+        # 一時ファイルを削除
+        os.remove(temp_audio_path)
+        return jsonify({"state": "Registration Success!"}), 200
+    except Exception as e:
+        print("Error in /upload_base_audio:", str(e))
+        return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 7860))

process.py CHANGED Viewed

@@ -11,7 +11,6 @@ class AudioProcessor():
     def __init__(self,cache_dir = "/tmp/hf_cache"):
         hf_token = os.environ.get("HF")
         if hf_token is None:
-            print('3')
             raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")
         os.makedirs(cache_dir, exist_ok=True)
         # pyannote モデルの読み込み
@@ -24,10 +23,15 @@ class AudioProcessor():
         return np.dot(vec1, vec2)
     def segment_audio(self, path, target_path='/tmp/setup_voice', seg_duration=1.0):
-        """
-        音声を指定秒数ごとに分割し、短いセグメントには無音をパディングする。
-        """
-        os.makedirs(target_path, exist_ok=True)
         base_sound = AudioSegment.from_file(path)
         duration_ms = len(base_sound)
         seg_duration_ms = int(seg_duration * 1000)
@@ -50,13 +54,16 @@ class AudioProcessor():
         embedding2 = self.inference(path2)
         return float(self.cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))
-    def process_audio(self,reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
-        """
-        入力音声ファイルを seg_duration 秒ごとに分割し、各セグメントと参照音声の類似度を計算。
-        類似度が threshold を超えたセグメントを output_folder にコピーし、マッチした時間（ms）と
-        マッチしなかった時間（ms）を返す。
-        """
-        os.makedirs(output_folder, exist_ok=True)
         segmented_path, total_duration_ms = self.segment_audio(input_path, seg_duration=seg_duration)
         matched_time_ms = 0

     def __init__(self,cache_dir = "/tmp/hf_cache"):
         hf_token = os.environ.get("HF")
         if hf_token is None:
             raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")
         os.makedirs(cache_dir, exist_ok=True)
         # pyannote モデルの読み込み
         return np.dot(vec1, vec2)
     def segment_audio(self, path, target_path='/tmp/setup_voice', seg_duration=1.0):
+        # 出力先ディレクトリが存在していれば中身をクリアする
+        if os.path.exists(target_path):
+            for file in os.listdir(target_path):
+                file_path = os.path.join(target_path, file)
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+        else:
+            os.makedirs(target_path, exist_ok=True)
         base_sound = AudioSegment.from_file(path)
         duration_ms = len(base_sound)
         seg_duration_ms = int(seg_duration * 1000)
         embedding2 = self.inference(path2)
         return float(self.cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))
+    def process_audio(self, reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
+        # 出力先ディレクトリの中身をクリアする
+        if os.path.exists(output_folder):
+            for file in os.listdir(output_folder):
+                file_path = os.path.join(output_folder, file)
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+        else:
+            os.makedirs(output_folder, exist_ok=True)
         segmented_path, total_duration_ms = self.segment_audio(input_path, seg_duration=seg_duration)
         matched_time_ms = 0