JusTalk_test / app.py
rein0421's picture
Update app.py
412e118 verified
from flask import Flask, request, jsonify, send_from_directory
import base64
import os
import shutil
import numpy as np
from pyannote.audio import Model, Inference
from pydub import AudioSegment
hf_token = os.environ.get("HF")
if hf_token is None:
raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")
# 書き込み可能なキャッシュディレクトリを指定
cache_dir = "/tmp/hf_cache"
os.makedirs(cache_dir, exist_ok=True)
# use_auth_token と cache_dir を指定してモデルを読み込む
model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir)
inference = Inference(model)
def cosine_similarity(vec1, vec2):
vec1 = vec1 / np.linalg.norm(vec1)
vec2 = vec2 / np.linalg.norm(vec2)
return np.dot(vec1, vec2)
def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0):
"""音声を指定秒数ごとに分割する"""
os.makedirs(target_path, exist_ok=True)
base_sound = AudioSegment.from_file(path)
duration_ms = len(base_sound)
seg_duration_ms = int(seg_duration * 1000)
for i, start in enumerate(range(0, duration_ms, seg_duration_ms)):
end = min(start + seg_duration_ms, duration_ms)
segment = base_sound[start:end]
segment.export(os.path.join(target_path, f'{i}.wav'), format="wav")
return target_path, duration_ms
def calculate_similarity(path1, path2):
embedding1 = inference(path1)
embedding2 = inference(path2)
return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))
def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
os.makedirs(output_folder, exist_ok=True)
base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration)
matched_time_ms = 0
for file in sorted(os.listdir(base_path)):
segment_file = os.path.join(base_path, file)
similarity = calculate_similarity(segment_file, reference_path)
if similarity > threshold:
shutil.copy(segment_file, output_folder)
matched_time_ms += len(AudioSegment.from_file(segment_file))
unmatched_time_ms = total_duration_ms - matched_time_ms
return matched_time_ms, unmatched_time_ms
app = Flask(__name__)
@app.route('/')
def index():
return send_from_directory('.', 'index.html')
@app.route('/upload_audio', methods=['POST'])
def upload_audio():
try:
data = request.get_json()
if not data or 'audio_data' not in data:
return jsonify({"error": "音声データがありません"}), 400
audio_binary = base64.b64decode(data['audio_data'])
audio_path = "/tmp/data/recorded_audio.wav"
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
with open(audio_path, 'wb') as f:
f.write(audio_binary)
# 参照音声ファイルのパスが正しいか確認!
reference_audio = './sample.wav' # ※sample.wavの絶対パスに変更するか、正しい場所に配置する
if not os.path.exists(reference_audio):
return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500
matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1)
total_time = matched_time + unmatched_time
rate = (matched_time / total_time) * 100 if total_time > 0 else 0
return jsonify({"rate": rate}), 200
except Exception as e:
# ログにエラー内容を出力(デバッグ中のみ有効にすることを推奨)
print("Error in /upload_audio:", str(e))
return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0", port=7860)