A-yum1 commited on
Commit
7375917
·
1 Parent(s): cb5ad90

Update transcription.py

Browse files
Files changed (1) hide show
  1. transcription.py +21 -25
transcription.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from faster_whisper import WhisperModel
3
 
4
  class TranscriptionMaker():
5
- #書き起こしファイル(ファイル名_transcription.txt)を吐き出すディレクトリを指定
6
  def __init__(self,output_dir=os.path.abspath("/tmp/data/transcriptions")):
7
  self.model = WhisperModel("base", device="cpu")
8
  self.output_dir = output_dir
@@ -13,35 +13,31 @@ class TranscriptionMaker():
13
  print(f"Error creating directory {self.output_dir}: {e}")
14
  raise
15
 
16
- #音声ファイルのパスを受け取り、書き起こしファイルを作成する
17
- def create_transcription(self,audio_path):
18
- try:
19
- if not os.path.isfile(audio_path):
20
- raise FileNotFoundError(f"The specified audio file does not exist: {audio_path}")
21
-
22
- segments, info = self.model.transcribe(audio_path)
23
- results = []
 
24
 
 
25
  for segment in segments:
26
  results.append({
27
  "start": segment.start,
28
  "end": segment.end,
29
  "text": segment.text
30
  })
31
-
32
- #ファイルの書き込み
33
- output_file=os.path.join(self.output_dir,os.path.basename(audio_path)+"_transcription.txt")
34
- try:
35
- with open(output_file,"w",encoding="utf-8") as f:
36
- for result in results:
37
- f.write(f"[{result['start']:.2f}s - {result['end']:.2f}s] {result['text']}\n")
38
- except OSError as e:
39
- print(f"Error writing transcription file: {e}")
40
- raise
41
- return output_file
42
- except FileNotFoundError as e:
43
- print(f"Error: {e}")
44
  raise
45
- except Exception as e:
46
- print(f"An unexpected error occurred: {e}")
47
- raise
 
2
  from faster_whisper import WhisperModel
3
 
4
  class TranscriptionMaker():
5
+ #書き起こしファイルを吐き出すディレクトリを指定
6
  def __init__(self,output_dir=os.path.abspath("/tmp/data/transcriptions")):
7
  self.model = WhisperModel("base", device="cpu")
8
  self.output_dir = output_dir
 
13
  print(f"Error creating directory {self.output_dir}: {e}")
14
  raise
15
 
16
+ #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
17
+ def create_transcription(self,audio_directory):
18
+ results = []
19
+ #ディレクトリ内のファイルを全て取得
20
+ audio_files = os.listdir(audio_directory)
21
+ for audio_file in audio_files:
22
+ if os.path.splitext(audio_file)[-1].lower() != '.wav':
23
+ continue
24
+ audio_path = os.path.join(audio_directory, audio_file)
25
 
26
+ segments,info = list(self.model.transcribe(audio_path))
27
  for segment in segments:
28
  results.append({
29
  "start": segment.start,
30
  "end": segment.end,
31
  "text": segment.text
32
  })
33
+ #ファイルの書き込み。ファイル名は"読み込みディレクトリ名_transcription.txt"
34
+ output_file=os.path.join(self.output_dir,os.path.basename(audio_directory)+"_transcription.txt")
35
+ try:
36
+ with open(output_file,"w",encoding="utf-8") as f:
37
+ for result in results:
38
+ f.write(f"{result['text']}\n")
39
+ except OSError as e:
40
+ print(f"Error writing transcription file: {e}")
 
 
 
 
 
41
  raise
42
+ return output_file
43
+