File size: 1,536 Bytes
cf72e88
 
 
 
 
b9adb22
 
 
 
cf72e88
 
b9adb22
cf72e88
 
 
 
b9adb22
cf72e88
 
 
b9adb22
cf72e88
 
 
 
 
 
 
 
 
 
 
 
b9adb22
cf72e88
 
 
b9adb22
cf72e88
 
 
 
 
 
b9adb22
cf72e88
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
from pathlib import Path
import torch


def srt_create(whisper_model, path: str, series: str, part: int, filename: str, **kwargs) -> str:
    # Transcribe using Whisper model

    # Replace whitespaces with underscores for series name
    series = series.replace(' ', '_')

    # Retrieve the folder path of srt and ass files
    srt_path = f"{path}{os.sep}{series}{os.sep}"
    srt_filename = f"{srt_path}{series}_{part}.srt"
    ass_filename = f"{srt_path}{series}_{part}.ass"

    # Get the absolute path
    absolute_srt_path = Path(srt_filename).absolute()
    absolute_ass_path = Path(ass_filename).absolute()

    # Subtitle style dict
    word_dict = {
        'Fontname': kwargs.get('font', 'Arial'),
        'Alignment': kwargs.get('sub_position', 5),
        'BorderStyle': '1',
        'Outline': '1',
        'Shadow': '2',
        'Blur': '21',
        'Fontsize': kwargs.get('font_size', 21),
        'MarginL': '0',
        'MarginR': '0',
    }

    # Transcribe the .mp3 file using Whisper
    transcribe = whisper_model.transcribe(
        filename, regroup=True, fp16=torch.cuda.is_available())

    # Adjustments to the style
    transcribe.split_by_gap(0.5).split_by_length(kwargs.get(
        'max_characters')).merge_by_gap(0.15, max_words=kwargs.get('max_words'))

    transcribe.to_srt_vtt(str(absolute_srt_path), word_level=True)
    transcribe.to_ass(str(absolute_ass_path), word_level=True,
                      highlight_color=kwargs.get('font_color'), **word_dict)
    
    return ass_filename