File size: 3,372 Bytes
5acb820
 
 
3fbb133
 
 
89c5d41
5acb820
 
 
 
 
89c5d41
5acb820
 
89c5d41
 
 
5acb820
89c5d41
 
5acb820
3fbb133
5acb820
3fbb133
5acb820
 
 
 
a82832c
5acb820
 
 
 
 
 
 
 
 
a82832c
 
5acb820
 
 
 
 
34cbda6
30f99ab
 
5acb820
 
 
 
 
 
 
 
 
34cbda6
 
5acb820
 
 
 
 
34cbda6
 
 
 
 
5acb820
 
34cbda6
 
 
5acb820
34cbda6
5acb820
34cbda6
3fbb133
 
34cbda6
3fbb133
a82832c
3fbb133
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
from faster_whisper import WhisperModel
from pydub import AudioSegment
import string
import random
from datetime import datetime

# Matplotlibのキャッシュディレクトリを変更
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"

# Hugging Faceのキャッシュディレクトリを変更
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface"

class TranscriptionMaker():
    # 書き起こしファイルを吐き出すディレクトリを指定
    def __init__(self, output_dir="/tmp/data/transcriptions"):
        self.model = WhisperModel("base", device="cpu", download_root="/tmp/huggingface")
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)

    #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
    def create_transcription(self,audio_directory):
        results = []

        #ディレクトリ内のファイルを全て取得
        if not os.path.isdir(audio_directory):
            raise ValueError(f"The specified path is not a valid directory: {audio_directory}")
        audio_files = os.listdir(audio_directory)
        audio_files = sorted(os.listdir(audio_directory))
        for audio_file in audio_files:
            if os.path.splitext(audio_file)[-1].lower() != '.wav':
                continue
            audio_path =  os.path.join(audio_directory, audio_file)
            try:
                segments,info = list(self.model.transcribe(audio_path))
            except Exception as e:
                print(f"Error transcripting file {audio_path}: {e}")
                raise
            sorted_segments = sorted(segments, key=lambda s: s.start)
            for segment in sorted_segments:
                results.append({
                    "start": segment.start,
                    "end": segment.end,
                    "text": segment.text
                })

        #ファイルの書き込み。ファイル名は"transcription.txt"
        output_file=os.path.join(self.output_dir,"transcription.txt")
        try:
            with open(output_file,"w",encoding="utf-8") as f:
                for result in results:
                    f.write(f"{result['text']}\n")
        except OSError as e:
            print(f"Error writing transcription file: {e}")
            raise
        return output_file
    
    #ディレクトリ内の音声ファイルをくっつける
    def merge_segments(self, segments_dir, output_dir="/tmp/data/merged_segment"):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])

        if len(files) <= 1:
            print('No need to merge')
            return output_dir

        combined_audio = AudioSegment.empty()

        for file in files:
            file_path = os.path.join(segments_dir, file)
            segment = AudioSegment.from_file(file_path)
            combined_audio += segment

        output_file = os.path.join(output_dir, self.generate_filename())

        combined_audio.export(output_file, format="wav")
        return output_dir

    def generate_filename(self):
        current_time = datetime.now().strftime("%Y%m%d%H%M%S")
        filename = f"{current_time}.wav"
        return filename