File size: 4,543 Bytes
b8f59b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
import torch
import time
import os
from src.transcription_utils import transcribe, language_options, model_options, ModelManager

class TranscriptionApp:
    def __init__(self):
        """

        Initializes an instance with a ModelManager for managing AI models,

        sets default device and model based on CUDA availability, 

        and prepares a Gradio app and outputs dictionary for UI interactions and storing results.

        """
        self.model_manager = ModelManager()
        self.default_device = "cuda" if torch.cuda.is_available() else "cpu"
        self.default_model = "Large-v2" if torch.cuda.is_available() else "Medium"
        self.app = gr.Blocks()
        self.outputs = {}  
        self.last_transcription_time = 0  

        # Crear carpeta Temp si no existe
        if not os.path.exists('Temp'):
            os.makedirs('Temp')

    def start_transcription(self, file, device, language, model):
        """Start transcription process."""
        start_time = time.time()

        try:
            results = transcribe(file, device, language, model, self.model_manager)
        except ValueError as e:
            return str(e), 0  

        end_time = time.time()
        self.last_transcription_time = round(end_time - start_time, 1)  

        if results:
            json_output, txt_path, vtt_path, srt_path = results
            self.outputs = {
                'TXT': txt_path,
                'SRT': srt_path,
                'JSON': json_output,
                'VTT': vtt_path                
            }
            return self.update_output_text('TXT'), self.last_transcription_time
        return "No transcription available.", self.last_transcription_time


    def update_output_text(self, format_choice):
        """Update the text area based on the format choice."""
        if format_choice and self.outputs.get(format_choice):
            file_path = self.outputs[format_choice]
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    return file.read()
            except FileNotFoundError:
                return "File not found."
        return "No file available or format not selected."

    # User interface for the transcription kit using Gradio
    def setup_ui(self):
        with self.app:
            gr.Markdown("# Kit Transcriptor Whisperx")
            gr.Markdown("❤️ Follow us on [YouTube](https://www.youtube.com/channel/UC_YzjCh-CSSCSGANvt5wBNQ?sub_confirmation=1), [GitHub](https://github.com/rgcodeai) 🌐 More on [Mister Contenidos](https://mistercontenidos.com)")  
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### Supported Formats: Audio (mp3, wav) and Video (mp4, avi, mov, flv)")
                    file_input = gr.File(label="Upload your multimedia file", type="filepath")
                    device_dropdown = gr.Dropdown(label="Select device", choices=["cuda", "cpu"], value=self.default_device)
                    model_dropdown = gr.Dropdown(label="Select model", choices=list(model_options.keys()), value=self.default_model)
                    language_dropdown = gr.Dropdown(label="Select language", choices=list(language_options.keys()), value="Identify")
                    transcribe_button = gr.Button("Start Transcription")
                    
                with gr.Column():
                    transcription_time_display = gr.Textbox(label="Last Transcription Time (seconds)", interactive=False, lines=1)
                    format_choice = gr.Radio(['TXT', 'SRT', 'VTT', 'JSON'], label="Select format to view:", value='TXT')
                    output_text = gr.Textbox(label="File Content", interactive=False, lines=10)
                    download_button = gr.Button("Download Transcription")
                    format_choice.change(fn=self.update_output_text, inputs=format_choice, outputs=output_text, queue=True)
                    download_button.click(fn=lambda x: self.outputs.get(x), inputs=format_choice, outputs=gr.File())

            transcribe_button.click(fn=self.start_transcription, inputs=[file_input, device_dropdown, language_dropdown, model_dropdown], outputs=[output_text, transcription_time_display])

    def launch(self):
        """Launch the transcription application."""
        self.setup_ui()
        self.app.launch()


if __name__ == '__main__':
    app = TranscriptionApp()
    app.launch()