mrmuminov commited on
Commit
31a57d8
·
verified ·
1 Parent(s): 9e4dfaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -74
app.py CHANGED
@@ -23,22 +23,14 @@ device = 0 if torch.cuda.is_available() else "cpu"
23
  pipe = pipeline(
24
  task="automatic-speech-recognition",
25
  model=MODEL_NAME,
26
- chunk_length_s=30,
27
  device=device,
 
 
 
 
28
  )
29
 
30
- # Extract YouTube Video ID
31
- def _extract_yt_video_id(yt_url):
32
- parsed_url = urlparse(yt_url)
33
- return parse_qs(parsed_url.query).get("v", [""])[0]
34
-
35
- # Embed YouTube Video in HTML
36
- def _return_yt_html_embed(yt_url):
37
- video_id = _extract_yt_video_id(yt_url)
38
- if not video_id:
39
- raise gr.Error("Invalid YouTube URL. Please check and try again.")
40
- return f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe> </center>'
41
-
42
  # Transcription function (Fix applied)
43
  def transcribe(audio_file, task):
44
  if audio_file is None:
@@ -72,58 +64,10 @@ def transcribe(audio_file, task):
72
  inputs,
73
  batch_size=BATCH_SIZE,
74
  generate_kwargs=generate_kwargs,
75
- return_timestamps="word"
76
  )
77
 
78
  return result["text"]
79
- # Download YouTube audio
80
- def download_yt_audio(yt_url, filename):
81
- ydl_opts = {
82
- "format": "bestaudio/best",
83
- "outtmpl": filename,
84
- "postprocessors": [
85
- {"key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192"}
86
- ],
87
- }
88
-
89
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
90
- try:
91
- info = ydl.extract_info(yt_url, download=False)
92
- file_length_s = info.get("duration", 0) # Duration in seconds
93
- if file_length_s > YT_LENGTH_LIMIT_S:
94
- raise gr.Error(f"Maximum YouTube length is 1 hour. Your video is {file_length_s // 3600}h {file_length_s % 3600 // 60}m {file_length_s % 60}s.")
95
- ydl.download([yt_url])
96
- except youtube_dl.utils.DownloadError as err:
97
- raise gr.Error(str(err))
98
-
99
- # YouTube transcription function
100
- def yt_transcribe(yt_url, task, max_filesize=75.0):
101
- html_embed_str = _return_yt_html_embed(yt_url)
102
-
103
- with tempfile.TemporaryDirectory() as tmpdirname:
104
- filepath = os.path.join(tmpdirname, "audio.mp3")
105
- download_yt_audio(yt_url, filepath)
106
-
107
- if os.path.getsize(filepath) > max_filesize * 1024 * 1024:
108
- raise gr.Error(f"File too large! Max allowed size is {max_filesize}MB.")
109
-
110
- with open(filepath, "rb") as f:
111
- inputs = ffmpeg_read(f.read(), pipe.feature_extractor.sampling_rate)
112
-
113
- inputs = {
114
- "array": inputs,
115
- "sampling_rate": pipe.feature_extractor.sampling_rate,
116
- "attention_mask": torch.ones(len(inputs), dtype=torch.long),
117
- }
118
-
119
- text = pipe(
120
- {"input_features": inputs},
121
- batch_size=BATCH_SIZE,
122
- generate_kwargs={"task": task, "forced_decoder_ids": None},
123
- return_timestamps=True
124
- )["text"]
125
-
126
- return html_embed_str, text
127
 
128
  # Gradio UI
129
  demo = gr.Blocks()
@@ -140,18 +84,6 @@ file_transcribe = gr.Interface(
140
  flagging_mode="never",
141
  )
142
 
143
- yt_transcribe = gr.Interface(
144
- fn=yt_transcribe,
145
- inputs=[
146
- gr.Textbox(lines=1, placeholder="Paste YouTube URL here", label="YouTube URL"),
147
- gr.Radio(["transcribe", "translate"], label="Task")
148
- ],
149
- outputs=["html", "text"],
150
- title="Whisper Large V3: Transcribe YouTube",
151
- description="Whisper Large V3 fine-tuned for Uzbek language by Dataprizma",
152
- flagging_mode="never",
153
- )
154
-
155
  with demo:
156
  gr.TabbedInterface([file_transcribe], ["Audio file"])
157
 
 
23
  pipe = pipeline(
24
  task="automatic-speech-recognition",
25
  model=MODEL_NAME,
26
+ chunk_length_s=9,
27
  device=device,
28
+ model_kwargs={
29
+ # "torch_dtype": torch.float16,
30
+ "attn_implementation": "eager"
31
+ },
32
  )
33
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Transcription function (Fix applied)
35
  def transcribe(audio_file, task):
36
  if audio_file is None:
 
64
  inputs,
65
  batch_size=BATCH_SIZE,
66
  generate_kwargs=generate_kwargs,
67
+ return_timestamps=False
68
  )
69
 
70
  return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  # Gradio UI
73
  demo = gr.Blocks()
 
84
  flagging_mode="never",
85
  )
86
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  with demo:
88
  gr.TabbedInterface([file_transcribe], ["Audio file"])
89