Spaces:
Running
Running
import os | |
from faster_whisper import WhisperModel | |
from pydub import AudioSegment | |
import string | |
import random | |
from datetime import datetime | |
# Matplotlibのキャッシュディレクトリを変更 | |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib" | |
# Hugging Faceのキャッシュディレクトリを変更 | |
os.environ["HF_HOME"] = "/tmp/huggingface" | |
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface" | |
class TranscriptionMaker(): | |
# 書き起こしファイルを吐き出すディレクトリを指定 | |
def __init__(self, output_dir="/tmp/data/transcriptions"): | |
self.model = WhisperModel("base", device="cpu", download_root="/tmp/huggingface") | |
self.output_dir = output_dir | |
os.makedirs(self.output_dir, exist_ok=True) | |
#音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する | |
def create_transcription(self,audio_directory): | |
conversation = [] | |
#ディレクトリ内のファイルを全て取得 | |
if not os.path.isdir(audio_directory): | |
raise ValueError(f"The specified path is not a valid directory: {audio_directory}") | |
audio_files = self.sort_audio_files_in_directory(audio_directory) | |
merged_segments = self.combine_audio(audio_files) | |
merged_audio_directory = self.save_marged_segments(merged_segments, output_directory='/tmp/data/transcription_audio') | |
merged_files = self.sort_audio_files_in_directory(merged_audio_directory) | |
for audio_file in merged_files: | |
if os.path.splitext(audio_file)[-1].lower() != '.wav': | |
continue | |
audio_path = os.path.join(merged_audio_directory, audio_file) | |
try: | |
segments,info = list(self.model.transcribe(audio_path)) | |
except Exception as e: | |
print(f"Error transcripting file {audio_path}: {e}") | |
raise | |
sorted_segments = sorted(segments, key=lambda s: s.start) | |
results = [] | |
for segment in sorted_segments: | |
results.append({ | |
"start": segment.start, | |
"end": segment.end, | |
"text": segment.text | |
}) | |
combined_text = "".join([result["text"] for result in results]) | |
speaker = os.path.basename(audio_file).split("_")[0] | |
# 無音ならスキップ | |
if not combined_text: | |
continue | |
conversation.append(f"{speaker}: {combined_text}<br>") | |
#ファイルの書き込み。ファイル名は"transcription.txt" | |
output_file=os.path.join(self.output_dir,"transcription.txt") | |
print(conversation) | |
try: | |
with open(output_file,"w",encoding="utf-8") as f: | |
for result in conversation: | |
f.write(result) | |
except OSError as e: | |
print(f"Error writing transcription file: {e}") | |
raise | |
return output_file | |
# 受け取った音声ファイルを話者ごとに整理する | |
def combine_audio(self,audio_files): | |
if not audio_files: | |
raise | |
merged_segments = [] | |
current_speaker = None | |
current_segment = [] | |
for segment in audio_files: | |
speaker = os.path.basename(segment).split("_")[0] | |
if speaker != current_speaker: | |
# 話者が変わった場合はセグメントを保存 | |
if current_segment: | |
merged_segments.append((current_speaker, current_segment)) | |
current_speaker = speaker | |
current_segment = [segment] | |
else: | |
# 話者が同一の場合はセグメントを結合 | |
current_segment.append(segment) | |
# 最後のセグメントを保存 | |
if current_segment: | |
merged_segments.append((current_speaker, current_segment)) | |
return merged_segments | |
# ディレクトリ内の音声ファイルを並べ替える | |
def sort_audio_files_in_directory(self, directory): | |
files = os.listdir(directory) | |
audio_files = [f for f in files if f.endswith(".wav")] | |
audio_files.sort(key=lambda x: datetime.strptime(x.split("_")[1].split(".")[0], "%Y%m%d%H%M%S")) | |
return [os.path.join(directory, f) for f in audio_files] | |
def save_marged_segments(self,merged_segments,output_directory='/tmp/data/conversations'): | |
if not merged_segments: | |
print("merged_segmentsが見つかりませんでした。") | |
raise | |
conversation = [] | |
for speaker, segments in merged_segments: | |
combined_audio = self.merge_segments(segments) | |
conversation.append((speaker,combined_audio)) | |
if not os.path.exists(output_directory): | |
os.makedirs(output_directory) | |
for i, (speaker, combined_audio) in enumerate(conversation): | |
current_time = datetime.now().strftime("%Y%m%d%H%M%S") | |
filename = f"{speaker}_{current_time}.wav" | |
file_path = os.path.join(output_directory,filename) | |
combined_audio.export(file_path,format = "wav") | |
print(f"Saved: {file_path}") | |
return output_directory | |
def merge_segments(self,segments): | |
combined = AudioSegment.empty() # 空のAudioSegmentを初期化 | |
for segment in segments: | |
if isinstance(segment, str): | |
# セグメントがファイルパスの場合、読み込む | |
audio = AudioSegment.from_file(segment) | |
elif isinstance(segment, AudioSegment): | |
# セグメントがすでにAudioSegmentの場合、そのまま使用 | |
audio = segment | |
else: | |
raise ValueError("Invalid segment type. Must be file path or AudioSegment.") | |
combined += audio | |
return combined | |
def generate_random_string(self,length): | |
letters = string.ascii_letters + string.digits | |
return ''.join(random.choice(letters) for i in range(length)) | |
def generate_filename(self,random_length): | |
current_time = datetime.now().strftime("%Y%m%d%H%M%S") | |
filename = f"{current_time}.wav" | |
return filename |