Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
title = "
|
4 |
-
description = "Input your
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
def greet(name, intensity):
|
8 |
-
return "Hello, " + name + "!" * int(intensity)
|
9 |
|
10 |
-
|
11 |
-
fn=
|
12 |
-
inputs=
|
13 |
-
outputs=["
|
14 |
title = title,
|
15 |
description = description
|
16 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
demo.queue(max_size = 20)
|
18 |
|
19 |
-
demo.launch()
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""audio_transcript.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1XRGgJvXg3QOPl2XecLjkwngRQRsv0g-6
|
8 |
+
|
9 |
+
# install packages
|
10 |
+
|
11 |
+
# !pip install --upgrade -q ipython-autotime
|
12 |
+
|
13 |
+
# %load_ext autotime
|
14 |
+
|
15 |
+
# Download youtube video
|
16 |
+
|
17 |
+
# !pip install -q pytube
|
18 |
+
|
19 |
+
# youtube video download function
|
20 |
+
|
21 |
+
import os
|
22 |
+
from pytube import YouTube
|
23 |
+
|
24 |
+
def progress_function(stream, chunk, bytes_remaining):
|
25 |
+
total_size = stream.filesize
|
26 |
+
bytes_downloaded = total_size - bytes_remaining
|
27 |
+
percentage_of_completion = bytes_downloaded / total_size * 100
|
28 |
+
print(f"Downloaded {percentage_of_completion}%")
|
29 |
+
|
30 |
+
def youtube_download(video_url):
|
31 |
+
yt = YouTube(video_url, on_progress_callback=progress_function)
|
32 |
+
|
33 |
+
# get video title
|
34 |
+
video_title = yt.title
|
35 |
+
print(f"Downloading video: {video_title}")
|
36 |
+
|
37 |
+
stream = yt.streams.get_highest_resolution()
|
38 |
+
# get video default name
|
39 |
+
default_filename = stream.default_filename
|
40 |
+
stream.download()
|
41 |
+
|
42 |
+
return default_filename
|
43 |
+
|
44 |
+
# use insanely-fast-whisper
|
45 |
+
# !pip install --upgrade -q transformers optimum accelerate pyannote.audio
|
46 |
+
|
47 |
+
import re
|
48 |
+
import json
|
49 |
+
import torch
|
50 |
+
from transformers import pipeline
|
51 |
+
from pyannote.audio import Pipeline
|
52 |
+
|
53 |
+
# transfer srt to plain text
|
54 |
+
|
55 |
+
import json
|
56 |
+
|
57 |
+
def seconds_to_hms(seconds):
|
58 |
+
# Simple conversion of seconds to HH:MM:SS format
|
59 |
+
hours, remainder = divmod(seconds, 3600)
|
60 |
+
minutes, seconds = divmod(remainder, 60)
|
61 |
+
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"
|
62 |
+
|
63 |
+
def transcript_json2txt(segmented_transcript,file_path):
|
64 |
+
|
65 |
+
# with open(file_path, 'r') as file:
|
66 |
+
# formatted_dialogue = json.load(file)
|
67 |
+
# Generating the dialogue text
|
68 |
+
formatted_dialogue = segmented_transcript
|
69 |
+
dialogue_text = ""
|
70 |
+
for dialogue in formatted_dialogue:
|
71 |
+
# Converting start time to HH:MM:SS format
|
72 |
+
start_time = seconds_to_hms(dialogue['timestamp'][0])
|
73 |
+
speaker = dialogue.get('speaker',"").replace("SPEAKER_", "speaker") # Formatting speaker name
|
74 |
+
text = dialogue.get('text',"").strip() # Removing any leading/trailing whitespaces from the text
|
75 |
+
|
76 |
+
# Constructing each dialogue entry
|
77 |
+
dialogue_text += f"{start_time}, {speaker}: {text}\n\n"
|
78 |
+
|
79 |
+
# Checking the first part of the generated dialogue text
|
80 |
+
|
81 |
+
print("preview txt...")
|
82 |
+
print('---------------------------------\n')
|
83 |
+
print(dialogue_text[:500]) # Displaying the first 500 characters for review
|
84 |
+
|
85 |
+
# Save the dialogue text to a file
|
86 |
+
output_txt_file_path = file_path.replace('.json','.txt')
|
87 |
+
with open(output_txt_file_path, 'w',encoding="utf8") as file:
|
88 |
+
file.write(dialogue_text)
|
89 |
+
print(
|
90 |
+
f"Voila!β¨ Your file has been transcribed go check it out over here π {output_txt_file_path}"
|
91 |
+
)
|
92 |
+
return dialogue_text
|
93 |
+
|
94 |
+
# transcript function
|
95 |
+
|
96 |
+
model_name = "openai/whisper-large-v3"
|
97 |
+
flash = False # Set to True to use Flash Attention 2
|
98 |
+
print('---------------------------------')
|
99 |
+
print('load pipe...')
|
100 |
+
print('---------------------------------')
|
101 |
+
# Initialize the pipeline
|
102 |
+
pipe = pipeline(
|
103 |
+
"automatic-speech-recognition",
|
104 |
+
model=model_name,
|
105 |
+
torch_dtype=torch.float16,
|
106 |
+
# low_cpu_mem_usage=True,
|
107 |
+
device='cuda:0',
|
108 |
+
model_kwargs={"use_flash_attention_2": flash},
|
109 |
+
)
|
110 |
+
|
111 |
+
def transcript(file_path,pipe = pipe):
|
112 |
+
pattern = '\.mp4|\.wav|\.mp3'
|
113 |
+
transcript_path = re.sub(pattern,'.json',file_path)
|
114 |
+
device_id = "0" # or "mps" for Macs with Apple Silicon
|
115 |
+
device = "cuda" # or "mps" for Macs with Apple Silicon
|
116 |
+
task = "transcribe" # or "translate"
|
117 |
+
language = 'Chinese' # Whisper auto-detects the language
|
118 |
+
batch_size = 24
|
119 |
+
timestamp = "chunk" # or "word"
|
120 |
+
diarization_model = "pyannote/speaker-diarization-3.1"
|
121 |
+
|
122 |
+
# Transcribe the audio
|
123 |
+
print('Transcribing...')
|
124 |
+
print('---------------------------------\n')
|
125 |
+
|
126 |
+
outputs = pipe(
|
127 |
+
file_path,
|
128 |
+
chunk_length_s=30,
|
129 |
+
batch_size=batch_size,
|
130 |
+
# generate_kwargs={"task": task, "language": language},
|
131 |
+
generate_kwargs={"task": task},
|
132 |
+
return_timestamps=True
|
133 |
+
)
|
134 |
+
|
135 |
+
# Save or display the output
|
136 |
+
print('Saving transcript...')
|
137 |
+
print('---------------------------------\n')
|
138 |
+
|
139 |
+
|
140 |
+
with open(transcript_path, "w", encoding="utf8") as fp:
|
141 |
+
json.dump(outputs, fp, ensure_ascii=False)
|
142 |
+
|
143 |
+
print(
|
144 |
+
f"Voila!β¨ Your file has been transcribed go check it out over here π {transcript_path}"
|
145 |
+
)
|
146 |
+
|
147 |
+
# save to transcript txt file
|
148 |
+
transcript_txt = transcript_json2txt(outputs['chunks'],transcript_path)
|
149 |
+
transcript_txt_path = transcript_path.replace('.json','.txt')
|
150 |
+
|
151 |
+
# save to srt file
|
152 |
+
|
153 |
+
# Function to convert time in seconds to SRT time format
|
154 |
+
def convert_to_srt_time(timestamp):
|
155 |
+
hours = int(timestamp // 3600)
|
156 |
+
minutes = int((timestamp % 3600) // 60)
|
157 |
+
seconds = int(timestamp % 60)
|
158 |
+
milliseconds = int((timestamp % 1) * 1000)
|
159 |
+
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
|
160 |
+
|
161 |
+
# Creating the SRT content
|
162 |
+
srt_content = ""
|
163 |
+
for index, entry in enumerate(outputs['chunks']):
|
164 |
+
try:
|
165 |
+
start_time = convert_to_srt_time(entry['timestamp'][0])
|
166 |
+
end_time = convert_to_srt_time(entry['timestamp'][1] if entry['timestamp'][1] is not None else entry['timestamp'][0]+1)
|
167 |
+
srt_content += f"{index + 1}\n{start_time} --> {end_time}\n{entry['text']}\n\n"
|
168 |
+
except Exception as e:
|
169 |
+
print(e)
|
170 |
+
print(entry)
|
171 |
+
|
172 |
+
# Saving the SRT content to a file
|
173 |
+
srt_file_path = transcript_path.replace('.json','.srt')
|
174 |
+
# srt_file_path = '/kaggle/working/6-revolution_transcript.srt'
|
175 |
+
with open(srt_file_path, 'w',encoding="utf8") as file:
|
176 |
+
file.write(srt_content)
|
177 |
+
|
178 |
+
print(
|
179 |
+
f"Voila!β¨ Your file has been transcribed go check it out over here π {srt_file_path}"
|
180 |
+
)
|
181 |
+
return transcript_txt,srt_file_path
|
182 |
+
|
183 |
+
# youtube transcript function
|
184 |
+
|
185 |
+
def transcript_youtube(url):
|
186 |
+
# download youtube video
|
187 |
+
default_filename = youtube_download(url)
|
188 |
+
file_path = os.path.join(os.getcwd(),default_filename)
|
189 |
+
transcript_txt,srt_file_path = transcript(file_path)
|
190 |
+
return transcript_txt[:500],file_path,srt_file_path
|
191 |
+
|
192 |
+
# test youtube transcript
|
193 |
+
|
194 |
+
# url = "https://www.youtube.com/watch?v=2UP7pfGVm0Y&t=252s&ab_channel=TheTEFLOrg"
|
195 |
+
# transcript_youtube(url)
|
196 |
+
|
197 |
+
# gradio interface
|
198 |
+
|
199 |
+
# !pip install --upgrade -q gradio
|
200 |
+
|
201 |
import gradio as gr
|
202 |
|
203 |
+
title = "Fastly audio transcript"
|
204 |
+
description = "Input your audio or record your audio"
|
205 |
+
|
206 |
+
|
207 |
+
def audio_func(audio_file):
|
208 |
+
return f"This is the audio file path: {audio_file}"
|
209 |
+
def file_func(file_path):
|
210 |
+
return f"This is the file path: {file_path}"
|
211 |
+
|
212 |
+
audio_input = gr.Audio(type='filepath')
|
213 |
+
file_input = gr.File(type="filepath")
|
214 |
|
215 |
+
youtube_interface = gr.Interface(
|
216 |
+
fn = transcript_youtube,
|
217 |
+
inputs = gr.Textbox(label="youtube video", info="Input a youtube video url"),
|
218 |
+
outputs = [
|
219 |
+
gr.Textbox(label="Transcript preview", lines=3),
|
220 |
+
gr.File(label="Download Video"),
|
221 |
+
gr.File(label="Srt file")
|
222 |
+
],
|
223 |
+
title = "Fastly Youtube Video Transcrip",
|
224 |
+
description = "Transcript Any Youtube video in Seconds!!!"
|
225 |
+
)
|
226 |
|
|
|
|
|
227 |
|
228 |
+
audio_interface = gr.Interface(
|
229 |
+
fn=audio_func,
|
230 |
+
inputs=audio_input,
|
231 |
+
outputs=[gr.Textbox(label="Greeting",lines=3)],
|
232 |
title = title,
|
233 |
description = description
|
234 |
)
|
235 |
+
|
236 |
+
file_interface = gr.Interface(
|
237 |
+
fn=file_func,
|
238 |
+
inputs=file_input,
|
239 |
+
outputs=[gr.Textbox(label="Greeting",lines=3)],
|
240 |
+
title = title,
|
241 |
+
description = description
|
242 |
+
)
|
243 |
+
|
244 |
+
demo = gr.TabbedInterface([youtube_interface], ["Transcript youtube video"])
|
245 |
demo.queue(max_size = 20)
|
246 |
|
247 |
+
demo.launch(share = True)
|
248 |
+
|