liyaoshi commited on
Commit
954b6ac
Β·
verified Β·
1 Parent(s): 79af996

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -9
app.py CHANGED
@@ -1,19 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- title = "Space demo for gradio: Greeting"
4
- description = "Input your name"
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- def greet(name, intensity):
8
- return "Hello, " + name + "!" * int(intensity)
9
 
10
- demo = gr.Interface(
11
- fn=greet,
12
- inputs=["text", "slider"],
13
- outputs=["text"],
14
  title = title,
15
  description = description
16
  )
 
 
 
 
 
 
 
 
 
 
17
  demo.queue(max_size = 20)
18
 
19
- demo.launch()
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """audio_transcript.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1XRGgJvXg3QOPl2XecLjkwngRQRsv0g-6
8
+
9
+ # install packages
10
+
11
+ # !pip install --upgrade -q ipython-autotime
12
+
13
+ # %load_ext autotime
14
+
15
+ # Download youtube video
16
+
17
+ # !pip install -q pytube
18
+
19
+ # youtube video download function
20
+
21
+ import os
22
+ from pytube import YouTube
23
+
24
+ def progress_function(stream, chunk, bytes_remaining):
25
+ total_size = stream.filesize
26
+ bytes_downloaded = total_size - bytes_remaining
27
+ percentage_of_completion = bytes_downloaded / total_size * 100
28
+ print(f"Downloaded {percentage_of_completion}%")
29
+
30
+ def youtube_download(video_url):
31
+ yt = YouTube(video_url, on_progress_callback=progress_function)
32
+
33
+ # get video title
34
+ video_title = yt.title
35
+ print(f"Downloading video: {video_title}")
36
+
37
+ stream = yt.streams.get_highest_resolution()
38
+ # get video default name
39
+ default_filename = stream.default_filename
40
+ stream.download()
41
+
42
+ return default_filename
43
+
44
+ # use insanely-fast-whisper
45
+ # !pip install --upgrade -q transformers optimum accelerate pyannote.audio
46
+
47
+ import re
48
+ import json
49
+ import torch
50
+ from transformers import pipeline
51
+ from pyannote.audio import Pipeline
52
+
53
+ # transfer srt to plain text
54
+
55
+ import json
56
+
57
+ def seconds_to_hms(seconds):
58
+ # Simple conversion of seconds to HH:MM:SS format
59
+ hours, remainder = divmod(seconds, 3600)
60
+ minutes, seconds = divmod(remainder, 60)
61
+ return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"
62
+
63
+ def transcript_json2txt(segmented_transcript,file_path):
64
+
65
+ # with open(file_path, 'r') as file:
66
+ # formatted_dialogue = json.load(file)
67
+ # Generating the dialogue text
68
+ formatted_dialogue = segmented_transcript
69
+ dialogue_text = ""
70
+ for dialogue in formatted_dialogue:
71
+ # Converting start time to HH:MM:SS format
72
+ start_time = seconds_to_hms(dialogue['timestamp'][0])
73
+ speaker = dialogue.get('speaker',"").replace("SPEAKER_", "speaker") # Formatting speaker name
74
+ text = dialogue.get('text',"").strip() # Removing any leading/trailing whitespaces from the text
75
+
76
+ # Constructing each dialogue entry
77
+ dialogue_text += f"{start_time}, {speaker}: {text}\n\n"
78
+
79
+ # Checking the first part of the generated dialogue text
80
+
81
+ print("preview txt...")
82
+ print('---------------------------------\n')
83
+ print(dialogue_text[:500]) # Displaying the first 500 characters for review
84
+
85
+ # Save the dialogue text to a file
86
+ output_txt_file_path = file_path.replace('.json','.txt')
87
+ with open(output_txt_file_path, 'w',encoding="utf8") as file:
88
+ file.write(dialogue_text)
89
+ print(
90
+ f"Voila!✨ Your file has been transcribed go check it out over here πŸ‘‰ {output_txt_file_path}"
91
+ )
92
+ return dialogue_text
93
+
94
+ # transcript function
95
+
96
+ model_name = "openai/whisper-large-v3"
97
+ flash = False # Set to True to use Flash Attention 2
98
+ print('---------------------------------')
99
+ print('load pipe...')
100
+ print('---------------------------------')
101
+ # Initialize the pipeline
102
+ pipe = pipeline(
103
+ "automatic-speech-recognition",
104
+ model=model_name,
105
+ torch_dtype=torch.float16,
106
+ # low_cpu_mem_usage=True,
107
+ device='cuda:0',
108
+ model_kwargs={"use_flash_attention_2": flash},
109
+ )
110
+
111
+ def transcript(file_path,pipe = pipe):
112
+ pattern = '\.mp4|\.wav|\.mp3'
113
+ transcript_path = re.sub(pattern,'.json',file_path)
114
+ device_id = "0" # or "mps" for Macs with Apple Silicon
115
+ device = "cuda" # or "mps" for Macs with Apple Silicon
116
+ task = "transcribe" # or "translate"
117
+ language = 'Chinese' # Whisper auto-detects the language
118
+ batch_size = 24
119
+ timestamp = "chunk" # or "word"
120
+ diarization_model = "pyannote/speaker-diarization-3.1"
121
+
122
+ # Transcribe the audio
123
+ print('Transcribing...')
124
+ print('---------------------------------\n')
125
+
126
+ outputs = pipe(
127
+ file_path,
128
+ chunk_length_s=30,
129
+ batch_size=batch_size,
130
+ # generate_kwargs={"task": task, "language": language},
131
+ generate_kwargs={"task": task},
132
+ return_timestamps=True
133
+ )
134
+
135
+ # Save or display the output
136
+ print('Saving transcript...')
137
+ print('---------------------------------\n')
138
+
139
+
140
+ with open(transcript_path, "w", encoding="utf8") as fp:
141
+ json.dump(outputs, fp, ensure_ascii=False)
142
+
143
+ print(
144
+ f"Voila!✨ Your file has been transcribed go check it out over here πŸ‘‰ {transcript_path}"
145
+ )
146
+
147
+ # save to transcript txt file
148
+ transcript_txt = transcript_json2txt(outputs['chunks'],transcript_path)
149
+ transcript_txt_path = transcript_path.replace('.json','.txt')
150
+
151
+ # save to srt file
152
+
153
+ # Function to convert time in seconds to SRT time format
154
+ def convert_to_srt_time(timestamp):
155
+ hours = int(timestamp // 3600)
156
+ minutes = int((timestamp % 3600) // 60)
157
+ seconds = int(timestamp % 60)
158
+ milliseconds = int((timestamp % 1) * 1000)
159
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
160
+
161
+ # Creating the SRT content
162
+ srt_content = ""
163
+ for index, entry in enumerate(outputs['chunks']):
164
+ try:
165
+ start_time = convert_to_srt_time(entry['timestamp'][0])
166
+ end_time = convert_to_srt_time(entry['timestamp'][1] if entry['timestamp'][1] is not None else entry['timestamp'][0]+1)
167
+ srt_content += f"{index + 1}\n{start_time} --> {end_time}\n{entry['text']}\n\n"
168
+ except Exception as e:
169
+ print(e)
170
+ print(entry)
171
+
172
+ # Saving the SRT content to a file
173
+ srt_file_path = transcript_path.replace('.json','.srt')
174
+ # srt_file_path = '/kaggle/working/6-revolution_transcript.srt'
175
+ with open(srt_file_path, 'w',encoding="utf8") as file:
176
+ file.write(srt_content)
177
+
178
+ print(
179
+ f"Voila!✨ Your file has been transcribed go check it out over here πŸ‘‰ {srt_file_path}"
180
+ )
181
+ return transcript_txt,srt_file_path
182
+
183
+ # youtube transcript function
184
+
185
+ def transcript_youtube(url):
186
+ # download youtube video
187
+ default_filename = youtube_download(url)
188
+ file_path = os.path.join(os.getcwd(),default_filename)
189
+ transcript_txt,srt_file_path = transcript(file_path)
190
+ return transcript_txt[:500],file_path,srt_file_path
191
+
192
+ # test youtube transcript
193
+
194
+ # url = "https://www.youtube.com/watch?v=2UP7pfGVm0Y&t=252s&ab_channel=TheTEFLOrg"
195
+ # transcript_youtube(url)
196
+
197
+ # gradio interface
198
+
199
+ # !pip install --upgrade -q gradio
200
+
201
  import gradio as gr
202
 
203
+ title = "Fastly audio transcript"
204
+ description = "Input your audio or record your audio"
205
+
206
+
207
+ def audio_func(audio_file):
208
+ return f"This is the audio file path: {audio_file}"
209
+ def file_func(file_path):
210
+ return f"This is the file path: {file_path}"
211
+
212
+ audio_input = gr.Audio(type='filepath')
213
+ file_input = gr.File(type="filepath")
214
 
215
+ youtube_interface = gr.Interface(
216
+ fn = transcript_youtube,
217
+ inputs = gr.Textbox(label="youtube video", info="Input a youtube video url"),
218
+ outputs = [
219
+ gr.Textbox(label="Transcript preview", lines=3),
220
+ gr.File(label="Download Video"),
221
+ gr.File(label="Srt file")
222
+ ],
223
+ title = "Fastly Youtube Video Transcrip",
224
+ description = "Transcript Any Youtube video in Seconds!!!"
225
+ )
226
 
 
 
227
 
228
+ audio_interface = gr.Interface(
229
+ fn=audio_func,
230
+ inputs=audio_input,
231
+ outputs=[gr.Textbox(label="Greeting",lines=3)],
232
  title = title,
233
  description = description
234
  )
235
+
236
+ file_interface = gr.Interface(
237
+ fn=file_func,
238
+ inputs=file_input,
239
+ outputs=[gr.Textbox(label="Greeting",lines=3)],
240
+ title = title,
241
+ description = description
242
+ )
243
+
244
+ demo = gr.TabbedInterface([youtube_interface], ["Transcript youtube video"])
245
  demo.queue(max_size = 20)
246
 
247
+ demo.launch(share = True)
248
+