File size: 3,925 Bytes
7475ce4
524d49f
 
074316c
 
 
 
 
fd9ed96
8fa0d1d
 
074316c
1cfebfc
 
 
 
 
 
074316c
 
 
 
 
 
 
8ff5245
074316c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524d49f
 
 
 
834ef3b
074316c
524d49f
8ff5245
 
524d49f
 
 
7475ce4
074316c
 
 
 
 
 
 
524d49f
074316c
8ff5245
412e118
8ff5245
 
 
074316c
8ff5245
 
074316c
 
524d49f
8ff5245
 
074316c
524d49f
 
074316c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from flask import Flask, request, jsonify, send_from_directory
import base64
import os
import shutil
import numpy as np
from pyannote.audio import Model, Inference
from pydub import AudioSegment

hf_token = os.environ.get("HF")
if hf_token is None:
    raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")

# 書き込み可能なキャッシュディレクトリを指定
cache_dir = "/tmp/hf_cache"
os.makedirs(cache_dir, exist_ok=True)

# use_auth_token と cache_dir を指定してモデルを読み込む
model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir)
inference = Inference(model)

def cosine_similarity(vec1, vec2):
    vec1 = vec1 / np.linalg.norm(vec1)
    vec2 = vec2 / np.linalg.norm(vec2)
    return np.dot(vec1, vec2)

def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0):
    """音声を指定秒数ごとに分割する"""
    os.makedirs(target_path, exist_ok=True)
    base_sound = AudioSegment.from_file(path)
    duration_ms = len(base_sound)
    seg_duration_ms = int(seg_duration * 1000)
    
    for i, start in enumerate(range(0, duration_ms, seg_duration_ms)):
        end = min(start + seg_duration_ms, duration_ms)
        segment = base_sound[start:end]
        segment.export(os.path.join(target_path, f'{i}.wav'), format="wav")
    
    return target_path, duration_ms

def calculate_similarity(path1, path2):
    embedding1 = inference(path1)
    embedding2 = inference(path2)
    return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))

def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
    os.makedirs(output_folder, exist_ok=True)
    base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration)
    
    matched_time_ms = 0
    for file in sorted(os.listdir(base_path)):
        segment_file = os.path.join(base_path, file)
        similarity = calculate_similarity(segment_file, reference_path)
        if similarity > threshold:
            shutil.copy(segment_file, output_folder)
            matched_time_ms += len(AudioSegment.from_file(segment_file))
    
    unmatched_time_ms = total_duration_ms - matched_time_ms
    return matched_time_ms, unmatched_time_ms

app = Flask(__name__)

@app.route('/')
def index():
    return send_from_directory('.', 'index.html')



@app.route('/upload_audio', methods=['POST'])
def upload_audio():
    try:
        data = request.get_json()
        if not data or 'audio_data' not in data:
            return jsonify({"error": "音声データがありません"}), 400
        
        audio_binary = base64.b64decode(data['audio_data'])
        audio_path = "/tmp/data/recorded_audio.wav"
        os.makedirs(os.path.dirname(audio_path), exist_ok=True)
        with open(audio_path, 'wb') as f:
            f.write(audio_binary)
        
        # 参照音声ファイルのパスが正しいか確認!
        reference_audio = './sample.wav'  # ※sample.wavの絶対パスに変更するか、正しい場所に配置する
        if not os.path.exists(reference_audio):
            return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500
        
        matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1)
        total_time = matched_time + unmatched_time
        rate = (matched_time / total_time) * 100 if total_time > 0 else 0
        
        return jsonify({"rate": rate}), 200
    except Exception as e:
        # ログにエラー内容を出力(デバッグ中のみ有効にすることを推奨)
        print("Error in /upload_audio:", str(e))
        return jsonify({"error": "サーバーエラー", "details": str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True, host="0.0.0.0", port=7860)