JusTalk_test

Sleeping

App Files Files Community

JusTalk_test / app.py

rein0421

Update app.py

412e118 verified 4 months ago

raw

history blame contribute delete

3.93 kB

	from flask import Flask, request, jsonify, send_from_directory
	import base64
	import os
	import shutil
	import numpy as np
	from pyannote.audio import Model, Inference
	from pydub import AudioSegment

	hf_token = os.environ.get("HF")
	if hf_token is None:
	raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。")

	# 書き込み可能なキャッシュディレクトリを指定
	cache_dir = "/tmp/hf_cache"
	os.makedirs(cache_dir, exist_ok=True)

	# use_auth_token と cache_dir を指定してモデルを読み込む
	model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir)
	inference = Inference(model)

	def cosine_similarity(vec1, vec2):
	vec1 = vec1 / np.linalg.norm(vec1)
	vec2 = vec2 / np.linalg.norm(vec2)
	return np.dot(vec1, vec2)

	def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0):
	"""音声を指定秒数ごとに分割する"""
	os.makedirs(target_path, exist_ok=True)
	base_sound = AudioSegment.from_file(path)
	duration_ms = len(base_sound)
	seg_duration_ms = int(seg_duration * 1000)

	for i, start in enumerate(range(0, duration_ms, seg_duration_ms)):
	end = min(start + seg_duration_ms, duration_ms)
	segment = base_sound[start:end]
	segment.export(os.path.join(target_path, f'{i}.wav'), format="wav")

	return target_path, duration_ms

	def calculate_similarity(path1, path2):
	embedding1 = inference(path1)
	embedding2 = inference(path2)
	return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten()))

	def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
	os.makedirs(output_folder, exist_ok=True)
	base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration)

	matched_time_ms = 0
	for file in sorted(os.listdir(base_path)):
	segment_file = os.path.join(base_path, file)
	similarity = calculate_similarity(segment_file, reference_path)
	if similarity > threshold:
	shutil.copy(segment_file, output_folder)
	matched_time_ms += len(AudioSegment.from_file(segment_file))

	unmatched_time_ms = total_duration_ms - matched_time_ms
	return matched_time_ms, unmatched_time_ms

	app = Flask(__name__)

	@app.route('/')
	def index():
	return send_from_directory('.', 'index.html')



	@app.route('/upload_audio', methods=['POST'])
	def upload_audio():
	try:
	data = request.get_json()
	if not data or 'audio_data' not in data:
	return jsonify({"error": "音声データがありません"}), 400

	audio_binary = base64.b64decode(data['audio_data'])
	audio_path = "/tmp/data/recorded_audio.wav"
	os.makedirs(os.path.dirname(audio_path), exist_ok=True)
	with open(audio_path, 'wb') as f:
	f.write(audio_binary)

	# 参照音声ファイルのパスが正しいか確認！
	reference_audio = './sample.wav' # ※sample.wavの絶対パスに変更するか、正しい場所に配置する
	if not os.path.exists(reference_audio):
	return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500

	matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1)
	total_time = matched_time + unmatched_time
	rate = (matched_time / total_time) * 100 if total_time > 0 else 0

	return jsonify({"rate": rate}), 200
	except Exception as e:
	# ログにエラー内容を出力（デバッグ中のみ有効にすることを推奨）
	print("Error in /upload_audio:", str(e))
	return jsonify({"error": "サーバーエラー", "details": str(e)}), 500

	if __name__ == '__main__':
	app.run(debug=True, host="0.0.0.0", port=7860)