Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,76 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from faster_whisper import WhisperModel
|
3 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
model_size = "medium"
|
7 |
model = WhisperModel(model_size)
|
8 |
|
9 |
-
segments, info = model.transcribe(
|
10 |
segments = list(segments) # The transcription will actually run here.
|
11 |
-
|
12 |
wordlevel_info = []
|
|
|
13 |
for segment in segments:
|
14 |
for word in segment.words:
|
15 |
-
wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
|
21 |
-
|
|
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
output_text = gr.outputs.Textbox(label="Transcription")
|
26 |
|
27 |
-
|
28 |
-
gr.Interface(transcribe_audio, inputs=audiofile_input, outputs=output_text).launch()
|
|
|
|
|
|
|
1 |
import json
|
2 |
+
import gradio as gr
|
3 |
+
from faster_whisper import WhisperModel # Assuming you have installed this library
|
4 |
+
|
5 |
+
def split_text_into_lines(data):
|
6 |
+
MaxChars = 30
|
7 |
+
MaxDuration = 2.5
|
8 |
+
MaxGap = 1.5
|
9 |
+
|
10 |
+
subtitles = []
|
11 |
+
line = []
|
12 |
+
line_duration = 0
|
13 |
+
|
14 |
+
for idx, word_data in enumerate(data):
|
15 |
+
word = word_data["word"]
|
16 |
+
start = word_data["start"]
|
17 |
+
end = word_data["end"]
|
18 |
+
|
19 |
+
line.append(word_data)
|
20 |
+
line_duration += end - start
|
21 |
+
|
22 |
+
temp = " ".join(item["word"] for item in line)
|
23 |
|
24 |
+
duration_exceeded = line_duration > MaxDuration
|
25 |
+
chars_exceeded = len(temp) > MaxChars
|
26 |
+
maxgap_exceeded = (word_data['start'] - data[idx - 1]['end']) > MaxGap if idx > 0 else False
|
27 |
+
|
28 |
+
if duration_exceeded or chars_exceeded or maxgap_exceeded:
|
29 |
+
if line:
|
30 |
+
subtitle_line = {
|
31 |
+
"word": temp,
|
32 |
+
"start": line[0]["start"],
|
33 |
+
"end": line[-1]["end"],
|
34 |
+
"textcontents": line
|
35 |
+
}
|
36 |
+
subtitles.append(subtitle_line)
|
37 |
+
line = []
|
38 |
+
line_duration = 0
|
39 |
+
|
40 |
+
if line:
|
41 |
+
subtitle_line = {
|
42 |
+
"word": " ".join(item["word"] for item in line),
|
43 |
+
"start": line[0]["start"],
|
44 |
+
"end": line[-1]["end"],
|
45 |
+
"textcontents": line
|
46 |
+
}
|
47 |
+
subtitles.append(subtitle_line)
|
48 |
+
|
49 |
+
return subtitles
|
50 |
+
|
51 |
+
def transcribe_audio(audiofilename):
|
52 |
model_size = "medium"
|
53 |
model = WhisperModel(model_size)
|
54 |
|
55 |
+
segments, info = model.transcribe(audiofilename, word_timestamps=True)
|
56 |
segments = list(segments) # The transcription will actually run here.
|
|
|
57 |
wordlevel_info = []
|
58 |
+
|
59 |
for segment in segments:
|
60 |
for word in segment.words:
|
61 |
+
wordlevel_info.append({'word': word.word, 'start': word.start, 'end': word.end})
|
62 |
+
|
63 |
+
linelevel_subtitles = split_text_into_lines(wordlevel_info)
|
64 |
+
return linelevel_subtitles
|
65 |
|
66 |
+
def audio_transcription(audiofile):
|
67 |
+
transcription = transcribe_audio(audiofile.name)
|
68 |
+
return transcription
|
69 |
|
70 |
+
inputs = gr.inputs.Audio(label="Upload Audio File")
|
71 |
+
outputs = gr.outputs.Json(label="Transcription Output")
|
72 |
|
73 |
+
title = "Audio Transcription"
|
74 |
+
description = "Upload an audio file and get the transcription in JSON format."
|
|
|
75 |
|
76 |
+
gr.Interface(fn=audio_transcription, inputs=inputs, outputs=outputs, title=title, description=description).launch()
|
|