import os from faster_whisper import WhisperModel from pydub import AudioSegment import string import random from datetime import datetime # Matplotlibのキャッシュディレクトリを変更 os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib" # Hugging Faceのキャッシュディレクトリを変更 os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface" class TranscriptionMaker(): # 書き起こしファイルを吐き出すディレクトリを指定 def __init__(self, output_dir="/tmp/data/transcriptions"): self.model = WhisperModel("base", device="cpu", download_root="/tmp/huggingface") self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する def create_transcription(self,audio_directory): conversation = [] #ディレクトリ内のファイルを全て取得 if not os.path.isdir(audio_directory): raise ValueError(f"The specified path is not a valid directory: {audio_directory}") audio_files = self.sort_audio_files_in_directory(audio_directory) merged_segments = self.combine_audio(audio_files) merged_audio_directory = self.save_marged_segments(merged_segments, output_directory='/tmp/data/transcription_audio') merged_files = self.sort_audio_files_in_directory(merged_audio_directory) for audio_file in merged_files: if os.path.splitext(audio_file)[-1].lower() != '.wav': continue audio_path = os.path.join(merged_audio_directory, audio_file) try: segments,info = list(self.model.transcribe(audio_path)) except Exception as e: print(f"Error transcripting file {audio_path}: {e}") raise sorted_segments = sorted(segments, key=lambda s: s.start) results = [] for segment in sorted_segments: results.append({ "start": segment.start, "end": segment.end, "text": segment.text }) combined_text = "".join([result["text"] for result in results]) speaker = os.path.basename(audio_file).split("_")[0] # 無音ならスキップ if not combined_text: continue conversation.append(f"{speaker}: {combined_text}
") #ファイルの書き込み。ファイル名は"transcription.txt" output_file=os.path.join(self.output_dir,"transcription.txt") print(conversation) try: with open(output_file,"w",encoding="utf-8") as f: for result in conversation: f.write(result) except OSError as e: print(f"Error writing transcription file: {e}") raise return output_file # 受け取った音声ファイルを話者ごとに整理する def combine_audio(self,audio_files): if not audio_files: raise merged_segments = [] current_speaker = None current_segment = [] for segment in audio_files: speaker = os.path.basename(segment).split("_")[0] if speaker != current_speaker: # 話者が変わった場合はセグメントを保存 if current_segment: merged_segments.append((current_speaker, current_segment)) current_speaker = speaker current_segment = [segment] else: # 話者が同一の場合はセグメントを結合 current_segment.append(segment) # 最後のセグメントを保存 if current_segment: merged_segments.append((current_speaker, current_segment)) return merged_segments # ディレクトリ内の音声ファイルを並べ替える def sort_audio_files_in_directory(self, directory): files = os.listdir(directory) audio_files = [f for f in files if f.endswith(".wav")] audio_files.sort(key=lambda x: datetime.strptime(x.split("_")[1].split(".")[0], "%Y%m%d%H%M%S")) return [os.path.join(directory, f) for f in audio_files] def save_marged_segments(self,merged_segments,output_directory='/tmp/data/conversations'): if not merged_segments: print("merged_segmentsが見つかりませんでした。") raise conversation = [] for speaker, segments in merged_segments: combined_audio = self.merge_segments(segments) conversation.append((speaker,combined_audio)) if not os.path.exists(output_directory): os.makedirs(output_directory) for i, (speaker, combined_audio) in enumerate(conversation): current_time = datetime.now().strftime("%Y%m%d%H%M%S") filename = f"{speaker}_{current_time}.wav" file_path = os.path.join(output_directory,filename) combined_audio.export(file_path,format = "wav") print(f"Saved: {file_path}") return output_directory def merge_segments(self,segments): combined = AudioSegment.empty() # 空のAudioSegmentを初期化 for segment in segments: if isinstance(segment, str): # セグメントがファイルパスの場合、読み込む audio = AudioSegment.from_file(segment) elif isinstance(segment, AudioSegment): # セグメントがすでにAudioSegmentの場合、そのまま使用 audio = segment else: raise ValueError("Invalid segment type. Must be file path or AudioSegment.") combined += audio return combined def generate_random_string(self,length): letters = string.ascii_letters + string.digits return ''.join(random.choice(letters) for i in range(length)) def generate_filename(self,random_length): current_time = datetime.now().strftime("%Y%m%d%H%M%S") filename = f"{current_time}.wav" return filename