whisper-asr-uz

Running

App Files Files Community

mrmuminov commited on Apr 30

Commit

31a57d8

verified ·

1 Parent(s): 9e4dfaa

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -74

app.py CHANGED Viewed

@@ -23,22 +23,14 @@ device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
-    chunk_length_s=30,
     device=device,
 )
-# Extract YouTube Video ID
-def _extract_yt_video_id(yt_url):
-    parsed_url = urlparse(yt_url)
-    return parse_qs(parsed_url.query).get("v", [""])[0]
-# Embed YouTube Video in HTML
-def _return_yt_html_embed(yt_url):
-    video_id = _extract_yt_video_id(yt_url)
-    if not video_id:
-        raise gr.Error("Invalid YouTube URL. Please check and try again.")
-    return f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe> </center>'
 # Transcription function (Fix applied)
 def transcribe(audio_file, task):
     if audio_file is None:
@@ -72,58 +64,10 @@ def transcribe(audio_file, task):
         inputs,
         batch_size=BATCH_SIZE,
         generate_kwargs=generate_kwargs,
-        return_timestamps="word"
     )
     return result["text"]
-# Download YouTube audio
-def download_yt_audio(yt_url, filename):
-    ydl_opts = {
-        "format": "bestaudio/best",
-        "outtmpl": filename,
-        "postprocessors": [
-            {"key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192"}
-        ],
-    }
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        try:
-            info = ydl.extract_info(yt_url, download=False)
-            file_length_s = info.get("duration", 0)  # Duration in seconds
-            if file_length_s > YT_LENGTH_LIMIT_S:
-                raise gr.Error(f"Maximum YouTube length is 1 hour. Your video is {file_length_s // 3600}h {file_length_s % 3600 // 60}m {file_length_s % 60}s.")
-            ydl.download([yt_url])
-        except youtube_dl.utils.DownloadError as err:
-            raise gr.Error(str(err))
-# YouTube transcription function
-def yt_transcribe(yt_url, task, max_filesize=75.0):
-    html_embed_str = _return_yt_html_embed(yt_url)
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        filepath = os.path.join(tmpdirname, "audio.mp3")
-        download_yt_audio(yt_url, filepath)
-        if os.path.getsize(filepath) > max_filesize * 1024 * 1024:
-            raise gr.Error(f"File too large! Max allowed size is {max_filesize}MB.")
-        with open(filepath, "rb") as f:
-            inputs = ffmpeg_read(f.read(), pipe.feature_extractor.sampling_rate)
-    inputs = {
-        "array": inputs,
-        "sampling_rate": pipe.feature_extractor.sampling_rate,
-        "attention_mask": torch.ones(len(inputs), dtype=torch.long),
-    }
-    text = pipe(
-        {"input_features": inputs},
-        batch_size=BATCH_SIZE,
-        generate_kwargs={"task": task, "forced_decoder_ids": None},
-        return_timestamps=True
-    )["text"]
-    return html_embed_str, text
 # Gradio UI
 demo = gr.Blocks()
@@ -140,18 +84,6 @@ file_transcribe = gr.Interface(
     flagging_mode="never",
 )
-yt_transcribe = gr.Interface(
-    fn=yt_transcribe,
-    inputs=[
-        gr.Textbox(lines=1, placeholder="Paste YouTube URL here", label="YouTube URL"),
-        gr.Radio(["transcribe", "translate"], label="Task")
-    ],
-    outputs=["html", "text"],
-    title="Whisper Large V3: Transcribe YouTube",
-    description="Whisper Large V3 fine-tuned for Uzbek language by Dataprizma",
-    flagging_mode="never",
-)
 with demo:
     gr.TabbedInterface([file_transcribe], ["Audio file"])

 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
+    chunk_length_s=9,
     device=device,
+    model_kwargs={
+#        "torch_dtype": torch.float16,
+        "attn_implementation": "eager"
+    },
 )
 # Transcription function (Fix applied)
 def transcribe(audio_file, task):
     if audio_file is None:
         inputs,
         batch_size=BATCH_SIZE,
         generate_kwargs=generate_kwargs,
+        return_timestamps=False
     )
     return result["text"]
 # Gradio UI
 demo = gr.Blocks()
     flagging_mode="never",
 )
 with demo:
     gr.TabbedInterface([file_transcribe], ["Audio file"])