marquesafonso
commited on
Commit
•
55728fe
1
Parent(s):
27d74f3
add clean text transcription output
Browse files- app.py +3 -2
- src/transcriber.py +3 -1
app.py
CHANGED
@@ -8,11 +8,12 @@ def main():
|
|
8 |
video_file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
|
9 |
max_words_per_line = gr.Number(value=6, label="Max words per line")
|
10 |
task = gr.Dropdown(choices=["transcribe", "translate"], value="transcribe", label="Select Task")
|
11 |
-
text_output = gr.Textbox(label="Text transcription", show_copy_button=True)
|
12 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
|
|
13 |
gr.Interface(transcriber,
|
14 |
inputs=[video_file, max_words_per_line, task],
|
15 |
-
outputs=[text_output, srt_file],
|
16 |
allow_flagging="never")
|
17 |
demo.launch()
|
18 |
|
|
|
8 |
video_file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
|
9 |
max_words_per_line = gr.Number(value=6, label="Max words per line")
|
10 |
task = gr.Dropdown(choices=["transcribe", "translate"], value="transcribe", label="Select Task")
|
11 |
+
text_output = gr.Textbox(label="SRT Text transcription", show_copy_button=True)
|
12 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
13 |
+
text_clean_output = gr.Textbox(label="Text transcription", show_copy_button=True)
|
14 |
gr.Interface(transcriber,
|
15 |
inputs=[video_file, max_words_per_line, task],
|
16 |
+
outputs=[text_output, srt_file, text_clean_output],
|
17 |
allow_flagging="never")
|
18 |
demo.launch()
|
19 |
|
src/transcriber.py
CHANGED
@@ -23,6 +23,7 @@ def convert_seconds_to_time(seconds):
|
|
23 |
def write_srt(segments, max_words_per_line, srt_path):
|
24 |
with open(srt_path, "w", encoding='utf-8') as file:
|
25 |
result = ''
|
|
|
26 |
line_counter = 1
|
27 |
for _, segment in enumerate(segments):
|
28 |
words_in_line = []
|
@@ -35,11 +36,12 @@ def write_srt(segments, max_words_per_line, srt_path):
|
|
35 |
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
36 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
37 |
result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
|
|
|
38 |
# Reset for the next line and increment line counter
|
39 |
line_counter += 1
|
40 |
words_in_line = [] # Reset words list for the next line
|
41 |
file.write(result)
|
42 |
-
return result, srt_path
|
43 |
|
44 |
def transcriber(video_input:gr.File,
|
45 |
max_words_per_line:int,
|
|
|
23 |
def write_srt(segments, max_words_per_line, srt_path):
|
24 |
with open(srt_path, "w", encoding='utf-8') as file:
|
25 |
result = ''
|
26 |
+
result_clean = []
|
27 |
line_counter = 1
|
28 |
for _, segment in enumerate(segments):
|
29 |
words_in_line = []
|
|
|
36 |
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
37 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
38 |
result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
|
39 |
+
result_clean += [line_text]
|
40 |
# Reset for the next line and increment line counter
|
41 |
line_counter += 1
|
42 |
words_in_line = [] # Reset words list for the next line
|
43 |
file.write(result)
|
44 |
+
return result, srt_path, " ".join(result_clean)
|
45 |
|
46 |
def transcriber(video_input:gr.File,
|
47 |
max_words_per_line:int,
|