Spaces:
Running
Running
File size: 6,458 Bytes
10d653d d8bbfec 10d653d d8bbfec 10d653d d8bbfec 10d653d d8bbfec 10d653d d8bbfec 917a40d d8bbfec 10d653d d8bbfec 10d653d d8bbfec 10d653d 0dbd483 d8bbfec 10d653d 3fbb133 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
from faster_whisper import WhisperModel
from pydub import AudioSegment
import string
import random
from datetime import datetime
# Matplotlibのキャッシュディレクトリを変更
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
# Hugging Faceのキャッシュディレクトリを変更
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface"
class TranscriptionMaker():
# 書き起こしファイルを吐き出すディレクトリを指定
def __init__(self, output_dir="/tmp/data/transcriptions"):
self.model = WhisperModel("base", device="cpu", download_root="/tmp/huggingface")
self.output_dir = output_dir
os.makedirs(self.output_dir, exist_ok=True)
#音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
def create_transcription(self,audio_directory):
conversation = []
#ディレクトリ内のファイルを全て取得
if not os.path.isdir(audio_directory):
raise ValueError(f"The specified path is not a valid directory: {audio_directory}")
audio_files = self.sort_audio_files_in_directory(audio_directory)
merged_segments = self.combine_audio(audio_files)
merged_audio_directory = self.save_marged_segments(merged_segments, output_directory='/tmp/data/transcription_audio')
merged_files = self.sort_audio_files_in_directory(merged_audio_directory)
for audio_file in merged_files:
if os.path.splitext(audio_file)[-1].lower() != '.wav':
continue
audio_path = os.path.join(merged_audio_directory, audio_file)
try:
segments,info = list(self.model.transcribe(audio_path))
except Exception as e:
print(f"Error transcripting file {audio_path}: {e}")
raise
sorted_segments = sorted(segments, key=lambda s: s.start)
results = []
for segment in sorted_segments:
results.append({
"start": segment.start,
"end": segment.end,
"text": segment.text
})
combined_text = "".join([result["text"] for result in results])
speaker = os.path.basename(audio_file).split("_")[0]
# 無音ならスキップ
if not combined_text:
continue
conversation.append(f"{speaker}: {combined_text}<br>")
#ファイルの書き込み。ファイル名は"transcription.txt"
output_file=os.path.join(self.output_dir,"transcription.txt")
print(conversation)
try:
with open(output_file,"w",encoding="utf-8") as f:
for result in conversation:
f.write(result)
except OSError as e:
print(f"Error writing transcription file: {e}")
raise
return output_file
# 受け取った音声ファイルを話者ごとに整理する
def combine_audio(self,audio_files):
if not audio_files:
raise
merged_segments = []
current_speaker = None
current_segment = []
for segment in audio_files:
speaker = os.path.basename(segment).split("_")[0]
if speaker != current_speaker:
# 話者が変わった場合はセグメントを保存
if current_segment:
merged_segments.append((current_speaker, current_segment))
current_speaker = speaker
current_segment = [segment]
else:
# 話者が同一の場合はセグメントを結合
current_segment.append(segment)
# 最後のセグメントを保存
if current_segment:
merged_segments.append((current_speaker, current_segment))
return merged_segments
# ディレクトリ内の音声ファイルを並べ替える
def sort_audio_files_in_directory(self, directory):
files = os.listdir(directory)
audio_files = [f for f in files if f.endswith(".wav")]
audio_files.sort(key=lambda x: datetime.strptime(x.split("_")[1].split(".")[0], "%Y%m%d%H%M%S"))
return [os.path.join(directory, f) for f in audio_files]
def save_marged_segments(self,merged_segments,output_directory='/tmp/data/conversations'):
if not merged_segments:
print("merged_segmentsが見つかりませんでした。")
raise
conversation = []
for speaker, segments in merged_segments:
combined_audio = self.merge_segments(segments)
conversation.append((speaker,combined_audio))
if not os.path.exists(output_directory):
os.makedirs(output_directory)
for i, (speaker, combined_audio) in enumerate(conversation):
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{speaker}_{current_time}.wav"
file_path = os.path.join(output_directory,filename)
combined_audio.export(file_path,format = "wav")
print(f"Saved: {file_path}")
return output_directory
def merge_segments(self,segments):
combined = AudioSegment.empty() # 空のAudioSegmentを初期化
for segment in segments:
if isinstance(segment, str):
# セグメントがファイルパスの場合、読み込む
audio = AudioSegment.from_file(segment)
elif isinstance(segment, AudioSegment):
# セグメントがすでにAudioSegmentの場合、そのまま使用
audio = segment
else:
raise ValueError("Invalid segment type. Must be file path or AudioSegment.")
combined += audio
return combined
def generate_random_string(self,length):
letters = string.ascii_letters + string.digits
return ''.join(random.choice(letters) for i in range(length))
def generate_filename(self,random_length):
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{current_time}.wav"
return filename |