Spaces:
Running
Running
File size: 3,372 Bytes
5acb820 3fbb133 89c5d41 5acb820 89c5d41 5acb820 89c5d41 5acb820 89c5d41 5acb820 3fbb133 5acb820 3fbb133 5acb820 a82832c 5acb820 a82832c 5acb820 34cbda6 30f99ab 5acb820 34cbda6 5acb820 34cbda6 5acb820 34cbda6 5acb820 34cbda6 5acb820 34cbda6 3fbb133 34cbda6 3fbb133 a82832c 3fbb133 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
from faster_whisper import WhisperModel
from pydub import AudioSegment
import string
import random
from datetime import datetime
# Matplotlibのキャッシュディレクトリを変更
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
# Hugging Faceのキャッシュディレクトリを変更
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface"
class TranscriptionMaker():
# 書き起こしファイルを吐き出すディレクトリを指定
def __init__(self, output_dir="/tmp/data/transcriptions"):
self.model = WhisperModel("base", device="cpu", download_root="/tmp/huggingface")
self.output_dir = output_dir
os.makedirs(self.output_dir, exist_ok=True)
#音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
def create_transcription(self,audio_directory):
results = []
#ディレクトリ内のファイルを全て取得
if not os.path.isdir(audio_directory):
raise ValueError(f"The specified path is not a valid directory: {audio_directory}")
audio_files = os.listdir(audio_directory)
audio_files = sorted(os.listdir(audio_directory))
for audio_file in audio_files:
if os.path.splitext(audio_file)[-1].lower() != '.wav':
continue
audio_path = os.path.join(audio_directory, audio_file)
try:
segments,info = list(self.model.transcribe(audio_path))
except Exception as e:
print(f"Error transcripting file {audio_path}: {e}")
raise
sorted_segments = sorted(segments, key=lambda s: s.start)
for segment in sorted_segments:
results.append({
"start": segment.start,
"end": segment.end,
"text": segment.text
})
#ファイルの書き込み。ファイル名は"transcription.txt"
output_file=os.path.join(self.output_dir,"transcription.txt")
try:
with open(output_file,"w",encoding="utf-8") as f:
for result in results:
f.write(f"{result['text']}\n")
except OSError as e:
print(f"Error writing transcription file: {e}")
raise
return output_file
#ディレクトリ内の音声ファイルをくっつける
def merge_segments(self, segments_dir, output_dir="/tmp/data/merged_segment"):
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
if len(files) <= 1:
print('No need to merge')
return output_dir
combined_audio = AudioSegment.empty()
for file in files:
file_path = os.path.join(segments_dir, file)
segment = AudioSegment.from_file(file_path)
combined_audio += segment
output_file = os.path.join(output_dir, self.generate_filename())
combined_audio.export(output_file, format="wav")
return output_dir
def generate_filename(self):
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{current_time}.wav"
return filename |