Spaces:
Running
Running
File size: 3,925 Bytes
7475ce4 524d49f 074316c fd9ed96 8fa0d1d 074316c 1cfebfc 074316c 8ff5245 074316c 524d49f 834ef3b 074316c 524d49f 8ff5245 524d49f 7475ce4 074316c 524d49f 074316c 8ff5245 412e118 8ff5245 074316c 8ff5245 074316c 524d49f 8ff5245 074316c 524d49f 074316c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
from flask import Flask, request, jsonify, send_from_directory
import base64
import os
import shutil
import numpy as np
from pyannote.audio import Model, Inference
from pydub import AudioSegment
hf_token = os.environ.get("HF")
if hf_token is None:
raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")
# 書き込み可能なキャッシュディレクトリを指定
cache_dir = "/tmp/hf_cache"
os.makedirs(cache_dir, exist_ok=True)
# use_auth_token と cache_dir を指定してモデルを読み込む
model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir)
inference = Inference(model)
def cosine_similarity(vec1, vec2):
vec1 = vec1 / np.linalg.norm(vec1)
vec2 = vec2 / np.linalg.norm(vec2)
return np.dot(vec1, vec2)
def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0):
"""音声を指定秒数ごとに分割する"""
os.makedirs(target_path, exist_ok=True)
base_sound = AudioSegment.from_file(path)
duration_ms = len(base_sound)
seg_duration_ms = int(seg_duration * 1000)
for i, start in enumerate(range(0, duration_ms, seg_duration_ms)):
end = min(start + seg_duration_ms, duration_ms)
segment = base_sound[start:end]
segment.export(os.path.join(target_path, f'{i}.wav'), format="wav")
return target_path, duration_ms
def calculate_similarity(path1, path2):
embedding1 = inference(path1)
embedding2 = inference(path2)
return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))
def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
os.makedirs(output_folder, exist_ok=True)
base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration)
matched_time_ms = 0
for file in sorted(os.listdir(base_path)):
segment_file = os.path.join(base_path, file)
similarity = calculate_similarity(segment_file, reference_path)
if similarity > threshold:
shutil.copy(segment_file, output_folder)
matched_time_ms += len(AudioSegment.from_file(segment_file))
unmatched_time_ms = total_duration_ms - matched_time_ms
return matched_time_ms, unmatched_time_ms
app = Flask(__name__)
@app.route('/')
def index():
return send_from_directory('.', 'index.html')
@app.route('/upload_audio', methods=['POST'])
def upload_audio():
try:
data = request.get_json()
if not data or 'audio_data' not in data:
return jsonify({"error": "音声データがありません"}), 400
audio_binary = base64.b64decode(data['audio_data'])
audio_path = "/tmp/data/recorded_audio.wav"
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
with open(audio_path, 'wb') as f:
f.write(audio_binary)
# 参照音声ファイルのパスが正しいか確認!
reference_audio = './sample.wav' # ※sample.wavの絶対パスに変更するか、正しい場所に配置する
if not os.path.exists(reference_audio):
return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500
matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1)
total_time = matched_time + unmatched_time
rate = (matched_time / total_time) * 100 if total_time > 0 else 0
return jsonify({"rate": rate}), 200
except Exception as e:
# ログにエラー内容を出力(デバッグ中のみ有効にすることを推奨)
print("Error in /upload_audio:", str(e))
return jsonify({"error": "サーバーエラー", "details": str(e)}), 500
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0", port=7860)
|