Spaces:
Runtime error
Runtime error
import gradio as gr | |
import json | |
from faster_whisper import WhisperModel # Assuming you have installed this library | |
def split_text_into_lines(data, max_chars, max_duration, max_gap): | |
subtitles = [] | |
line = [] | |
line_duration = 0 | |
for idx, word_data in enumerate(data): | |
word = word_data["word"] | |
start = word_data["start"] | |
end = word_data["end"] | |
line.append(word_data) | |
line_duration += end - start | |
temp = " ".join(item["word"] for item in line) | |
duration_exceeded = line_duration > max_duration | |
chars_exceeded = len(temp) > max_chars | |
maxgap_exceeded = (word_data['start'] - data[idx - 1]['end']) > max_gap if idx > 0 else False | |
if duration_exceeded or chars_exceeded or maxgap_exceeded: | |
if line: | |
subtitle_line = { | |
"word": temp, | |
"start": line[0]["start"], | |
"end": line[-1]["end"], | |
"textcontents": line | |
} | |
subtitles.append(subtitle_line) | |
line = [] | |
line_duration = 0 | |
if line: | |
subtitle_line = { | |
"word": " ".join(item["word"] for item in line), | |
"start": line[0]["start"], | |
"end": line[-1]["end"], | |
"textcontents": line | |
} | |
subtitles.append(subtitle_line) | |
return subtitles | |
def transcribe_audio(audiofilename, max_chars, max_duration, max_gap): | |
model_size = "medium" | |
model = WhisperModel(model_size) | |
segments, info = model.transcribe(audiofilename, word_timestamps=True) | |
segments = list(segments) # The transcription will actually run here. | |
wordlevel_info = [] | |
for segment in segments: | |
for word in segment.words: | |
wordlevel_info.append({'word': word.word, 'start': word.start, 'end': word.end}) | |
linelevel_subtitles = split_text_into_lines(wordlevel_info, max_chars, max_duration, max_gap) | |
return linelevel_subtitles | |
def audio_transcription(audiofile, max_chars, max_duration, max_gap): | |
transcription = transcribe_audio(audiofile, max_chars, max_duration, max_gap) | |
return json.dumps(transcription, indent=4) | |
iface = gr.Interface(audio_transcription, | |
[gr.Audio(sources="upload", type="filepath"), | |
gr.Number(label="MaxChars"), | |
gr.Number(label="MaxDuration"), | |
gr.Number(label="MaxGap")], | |
"text", | |
description="Upload an audio file and get its transcription in JSON format.") | |
iface.launch() | |