Spaces:

akjedidtz
/

bo2

Configuration error

App Files Files Community

akjedidtz commited on Nov 11, 2024

Commit

9266b14

verified ·

1 Parent(s): 8c6eb23

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -60

app.py CHANGED Viewed

@@ -1,64 +1,140 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-if __name__ == "__main__":
-    demo.launch()

+Hugging Face's logo
+Hugging Face
+Search models, datasets, users...
+Models
+Datasets
+Spaces
+Posts
+Docs
+Solutions
+Pricing
+Spaces:
+KingNish
+/
+Realtime-whisper-large-v3-turbo
+like
+254
+App
+Files
+Community
+5
+Realtime-whisper-large-v3-turbo
+/
+app.py
+KingNish's picture
+KingNish
+Update app.py
+fc21d85
+verified
+about 1 month ago
+raw
+Copy download link
+history
+blame
+contribute
+delete
+5.6 kB
+import spaces
+import torch
 import gradio as gr
+import tempfile
+import os
+import uuid
+import scipy.io.wavfile
+import time
+import numpy as np
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
+import subprocess
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16
+MODEL_NAME = "openai/whisper-large-v3-turbo"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation="flash_attention_2"
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME)
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=model,
+    tokenizer=tokenizer,
+    feature_extractor=processor.feature_extractor,
+    chunk_length_s=10,
+    torch_dtype=torch_dtype,
+    device=device,
 )
+@spaces.GPU
+def transcribe(inputs, previous_transcription):
+    start_time = time.time()
+    try:
+        filename = f"{uuid.uuid4().hex}.wav"
+        sample_rate, audio_data = inputs
+        scipy.io.wavfile.write(filename, sample_rate, audio_data)
+        transcription = pipe(filename)["text"]
+        previous_transcription += transcription
+        end_time = time.time()
+        latency = end_time - start_time
+        return previous_transcription, f"{latency:.2f}"
+    except Exception as e:
+        print(f"Error during Transcription: {e}")
+        return previous_transcription, "Error"
+@spaces.GPU
+def translate_and_transcribe(inputs, previous_transcription, target_language):
+    start_time = time.time()
+    try:
+        filename = f"{uuid.uuid4().hex}.wav"
+        sample_rate, audio_data = inputs
+        scipy.io.wavfile.write(filename, sample_rate, audio_data)
+        translation = pipe(filename, generate_kwargs={"task": "translate", "language": target_language} )["text"]
+        previous_transcription += translation
+        end_time = time.time()
+        latency = end_time - start_time
+        return previous_transcription, f"{latency:.2f}"
+    except Exception as e:
+        print(f"Error during Translation and Transcription: {e}")
+        return previous_transcription, "Error"
+def clear():
+    return ""
+with gr.Blocks() as microphone:
+    with gr.Column():
+        gr.Markdown(f"# Realtime Whisper Large V3 Turbo: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
+        with gr.Row():
+            input_audio_microphone = gr.Audio(streaming=True)
+            output = gr.Textbox(label="Transcription", value="")
+            latency_textbox = gr.Textbox(label="Latency (seconds)", value="0.0", scale=0)
+        with gr.Row():
+            clear_button = gr.Button("Clear Output")
+        input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output, latency_textbox], time_limit=45, stream_every=2, concurrency_limit=None)
+        clear_button.click(clear, outputs=[output])
+demo.launch()