rgcodeai commited on
Commit
7279c42
·
verified ·
1 Parent(s): a9746d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -91
app.py CHANGED
@@ -1,92 +1,92 @@
1
- import gradio as gr
2
- import torch
3
- import time
4
- import os
5
- from src.transcription_utils import transcribe, language_options, model_options, ModelManager
6
-
7
- class TranscriptionApp:
8
- def __init__(self):
9
- """
10
- Initializes an instance with a ModelManager for managing AI models,
11
- sets default device and model based on CUDA availability,
12
- and prepares a Gradio app and outputs dictionary for UI interactions and storing results.
13
- """
14
- self.model_manager = ModelManager()
15
- self.default_device = "cuda" if torch.cuda.is_available() else "cpu"
16
- self.default_model = "Large-v2" if torch.cuda.is_available() else "Medium"
17
- self.app = gr.Blocks()
18
- self.outputs = {}
19
- self.last_transcription_time = 0
20
-
21
- # Crear carpeta Temp si no existe
22
- if not os.path.exists('Temp'):
23
- os.makedirs('Temp')
24
-
25
- def start_transcription(self, file, device, language, model):
26
- """Start transcription process."""
27
- start_time = time.time()
28
-
29
- try:
30
- results = transcribe(file, device, language, model, self.model_manager)
31
- except ValueError as e:
32
- return str(e), 0
33
-
34
- end_time = time.time()
35
- self.last_transcription_time = round(end_time - start_time, 1)
36
-
37
- if results:
38
- json_output, txt_path, vtt_path, srt_path = results
39
- self.outputs = {
40
- 'TXT': txt_path,
41
- 'SRT': srt_path,
42
- 'JSON': json_output,
43
- 'VTT': vtt_path
44
- }
45
- return self.update_output_text('TXT'), self.last_transcription_time
46
- return "No transcription available.", self.last_transcription_time
47
-
48
-
49
- def update_output_text(self, format_choice):
50
- """Update the text area based on the format choice."""
51
- if format_choice and self.outputs.get(format_choice):
52
- file_path = self.outputs[format_choice]
53
- try:
54
- with open(file_path, 'r', encoding='utf-8') as file:
55
- return file.read()
56
- except FileNotFoundError:
57
- return "File not found."
58
- return "No file available or format not selected."
59
-
60
- # User interface for the transcription kit using Gradio
61
- def setup_ui(self):
62
- with self.app:
63
- gr.Markdown("# Kit Transcriptor Whisperx")
64
- gr.Markdown("❤️ Follow us on [YouTube](https://www.youtube.com/channel/UC_YzjCh-CSSCSGANvt5wBNQ?sub_confirmation=1), [GitHub](https://github.com/rgcodeai) 🌐 More on [Mister Contenidos](https://mistercontenidos.com)")
65
- with gr.Row():
66
- with gr.Column():
67
- gr.Markdown("### Supported Formats: Audio (mp3, wav) and Video (mp4, avi, mov, flv)")
68
- file_input = gr.File(label="Upload your multimedia file", type="filepath")
69
- device_dropdown = gr.Dropdown(label="Select device", choices=["cuda", "cpu"], value=self.default_device)
70
- model_dropdown = gr.Dropdown(label="Select model", choices=list(model_options.keys()), value=self.default_model)
71
- language_dropdown = gr.Dropdown(label="Select language", choices=list(language_options.keys()), value="Identify")
72
- transcribe_button = gr.Button("Start Transcription")
73
-
74
- with gr.Column():
75
- transcription_time_display = gr.Textbox(label="Last Transcription Time (seconds)", interactive=False, lines=1)
76
- format_choice = gr.Radio(['TXT', 'SRT', 'VTT', 'JSON'], label="Select format to view:", value='TXT')
77
- output_text = gr.Textbox(label="File Content", interactive=False, lines=10)
78
- download_button = gr.Button("Download Transcription")
79
- format_choice.change(fn=self.update_output_text, inputs=format_choice, outputs=output_text, queue=True)
80
- download_button.click(fn=lambda x: self.outputs.get(x), inputs=format_choice, outputs=gr.File())
81
-
82
- transcribe_button.click(fn=self.start_transcription, inputs=[file_input, device_dropdown, language_dropdown, model_dropdown], outputs=[output_text, transcription_time_display])
83
-
84
- def launch(self):
85
- """Launch the transcription application."""
86
- self.setup_ui()
87
- self.app.launch()
88
-
89
-
90
- if __name__ == '__main__':
91
- app = TranscriptionApp()
92
  app.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ import time
4
+ import os
5
+ from src.transcription_utils import transcribe, language_options, model_options, ModelManager
6
+
7
+ class TranscriptionApp:
8
+ def __init__(self):
9
+ """
10
+ Initializes an instance with a ModelManager for managing AI models,
11
+ sets default device and model based on CUDA availability,
12
+ and prepares a Gradio app and outputs dictionary for UI interactions and storing results.
13
+ """
14
+ self.model_manager = ModelManager()
15
+ self.default_device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ self.default_model = "Large-v2" if torch.cuda.is_available() else "Medium"
17
+ self.app = gr.Blocks()
18
+ self.outputs = {}
19
+ self.last_transcription_time = 0
20
+
21
+ # Crear carpeta Temp si no existe
22
+ if not os.path.exists('Temp'):
23
+ os.makedirs('Temp')
24
+
25
+ def start_transcription(self, file, device, language, model):
26
+ """Start transcription process."""
27
+ start_time = time.time()
28
+
29
+ try:
30
+ results = transcribe(file, device, language, model, self.model_manager)
31
+ except ValueError as e:
32
+ return str(e), 0
33
+
34
+ end_time = time.time()
35
+ self.last_transcription_time = round(end_time - start_time, 1)
36
+
37
+ if results:
38
+ json_output, txt_path, vtt_path, srt_path = results
39
+ self.outputs = {
40
+ 'TXT': txt_path,
41
+ 'SRT': srt_path,
42
+ 'JSON': json_output,
43
+ 'VTT': vtt_path
44
+ }
45
+ return self.update_output_text('TXT'), self.last_transcription_time
46
+ return "No transcription available.", self.last_transcription_time
47
+
48
+
49
+ def update_output_text(self, format_choice):
50
+ """Update the text area based on the format choice."""
51
+ if format_choice and self.outputs.get(format_choice):
52
+ file_path = self.outputs[format_choice]
53
+ try:
54
+ with open(file_path, 'r', encoding='utf-8') as file:
55
+ return file.read()
56
+ except FileNotFoundError:
57
+ return "File not found."
58
+ return "No file available or format not selected."
59
+
60
+ # User interface for the transcription kit using Gradio
61
+ def setup_ui(self):
62
+ with self.app:
63
+ gr.Markdown("# Kit Transcriptor Whisperx")
64
+ gr.Markdown("❤️ Follow us on [YouTube](https://www.youtube.com/channel/UC_YzjCh-CSSCSGANvt5wBNQ?sub_confirmation=1), [GitHub](https://github.com/rgcodeai) 🌐 More on [Mister Contenidos](https://mistercontenidos.com)")
65
+ with gr.Row():
66
+ with gr.Column():
67
+ gr.Markdown("### Supported Formats: Audio (mp3, wav) and Video (mp4, avi, mov, flv)")
68
+ file_input = gr.File(label="Upload your multimedia file", type="filepath")
69
+ device_dropdown = gr.Dropdown(label="Select device", choices=["cuda", "cpu"], value=self.default_device)
70
+ model_dropdown = gr.Dropdown(label="Select model", choices=list(model_options.keys()), value=self.default_model)
71
+ language_dropdown = gr.Dropdown(label="Select language", choices=list(language_options.keys()), value="Identify")
72
+ transcribe_button = gr.Button("Start Transcription")
73
+
74
+ with gr.Column():
75
+ transcription_time_display = gr.Textbox(label="Last Transcription Time (seconds)", interactive=False, lines=1)
76
+ format_choice = gr.Radio(['TXT', 'SRT', 'VTT', 'JSON'], label="Select format to view:", value='TXT')
77
+ output_text = gr.Textbox(label="File Content", interactive=False, lines=10)
78
+ download_button = gr.Button("Download Transcription")
79
+ format_choice.change(fn=self.update_output_text, inputs=format_choice, outputs=output_text)
80
+ download_button.click(fn=lambda x: self.outputs.get(x), inputs=format_choice, outputs=gr.File())
81
+
82
+ transcribe_button.click(fn=self.start_transcription, inputs=[file_input, device_dropdown, language_dropdown, model_dropdown], outputs=[output_text, transcription_time_display])
83
+
84
+ def launch(self):
85
+ """Launch the transcription application."""
86
+ self.setup_ui()
87
+ self.app.launch()
88
+
89
+
90
+ if __name__ == '__main__':
91
+ app = TranscriptionApp()
92
  app.launch()