from flask import Flask, request, jsonify, send_from_directory import base64 import os import shutil import numpy as np from pyannote.audio import Model, Inference from pydub import AudioSegment hf_token = os.environ.get("HF") if hf_token is None: raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。") # 書き込み可能なキャッシュディレクトリを指定 cache_dir = "/tmp/hf_cache" os.makedirs(cache_dir, exist_ok=True) # use_auth_token と cache_dir を指定してモデルを読み込む model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir) inference = Inference(model) def cosine_similarity(vec1, vec2): vec1 = vec1 / np.linalg.norm(vec1) vec2 = vec2 / np.linalg.norm(vec2) return np.dot(vec1, vec2) def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0): """音声を指定秒数ごとに分割する""" os.makedirs(target_path, exist_ok=True) base_sound = AudioSegment.from_file(path) duration_ms = len(base_sound) seg_duration_ms = int(seg_duration * 1000) for i, start in enumerate(range(0, duration_ms, seg_duration_ms)): end = min(start + seg_duration_ms, duration_ms) segment = base_sound[start:end] segment.export(os.path.join(target_path, f'{i}.wav'), format="wav") return target_path, duration_ms def calculate_similarity(path1, path2): embedding1 = inference(path1) embedding2 = inference(path2) return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten())) def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5): os.makedirs(output_folder, exist_ok=True) base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration) matched_time_ms = 0 for file in sorted(os.listdir(base_path)): segment_file = os.path.join(base_path, file) similarity = calculate_similarity(segment_file, reference_path) if similarity > threshold: shutil.copy(segment_file, output_folder) matched_time_ms += len(AudioSegment.from_file(segment_file)) unmatched_time_ms = total_duration_ms - matched_time_ms return matched_time_ms, unmatched_time_ms app = Flask(__name__) @app.route('/') def index(): return send_from_directory('.', 'index.html') @app.route('/upload_audio', methods=['POST']) def upload_audio(): try: data = request.get_json() if not data or 'audio_data' not in data: return jsonify({"error": "音声データがありません"}), 400 audio_binary = base64.b64decode(data['audio_data']) audio_path = "/tmp/data/recorded_audio.wav" os.makedirs(os.path.dirname(audio_path), exist_ok=True) with open(audio_path, 'wb') as f: f.write(audio_binary) # 参照音声ファイルのパスが正しいか確認! reference_audio = './sample.wav' # ※sample.wavの絶対パスに変更するか、正しい場所に配置する if not os.path.exists(reference_audio): return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500 matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1) total_time = matched_time + unmatched_time rate = (matched_time / total_time) * 100 if total_time > 0 else 0 return jsonify({"rate": rate}), 200 except Exception as e: # ログにエラー内容を出力(デバッグ中のみ有効にすることを推奨) print("Error in /upload_audio:", str(e)) return jsonify({"error": "サーバーエラー", "details": str(e)}), 500 if __name__ == '__main__': app.run(debug=True, host="0.0.0.0", port=7860)