Spaces:

kh-CHEUNG
/

EIL-Demo

Sleeping

App Files Files

kh-CHEUNG commited on Jul 23, 2024

Commit

c46a1db

verified ·

1 Parent(s): 03dad0c

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -5

app.py CHANGED Viewed

@@ -1,22 +1,43 @@
 import gradio as gr
 from threading import Thread
 import re
-import time
 from PIL import Image
-import torch
-import spaces
 from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
 processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
 model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
 model.to("cuda:0")
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 @spaces.GPU
 def respond(
     message,
@@ -50,10 +71,24 @@ def respond(
         response += token
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
-demo = gr.ChatInterface(
     respond,
     title="Enlight Innovations Limited -- Demo",
     description="This demo is desgined to illustrate our basic idea and feasibility in implementation.",
@@ -71,6 +106,8 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

+import torch
+import spaces
 import gradio as gr
 from threading import Thread
 import re
+import time
+import tempfile
+import os
+from transformers import pipeline
+from transformers.pipelines.audio_utils import ffmpeg_read
 from PIL import Image
 from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
 processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
 model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
 model.to("cuda:0")
+ASR_MODEL_NAME = "openai/whisper-large-v3"
+ASR_BATCH_SIZE = 8
+ASR_CHUNK_LENGTH_S = 30
+TEMP_FILE_LIMIT_MB = 1000
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+device = 0 if torch.cuda.is_available() else "cpu"
+asr_pl = pipeline(
+    task="automatic-speech-recognition",
+    model=ASR_MODEL_NAME,
+    chunk_length_s=ASR_CHUNK_LENGTH_S,
+    device=device,
+)
 @spaces.GPU
 def respond(
     message,
         response += token
         yield response
+@spaces.GPU
+def transcribe(inputs, task):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    text = asr_pl(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+    return  text
+demo = gr.Blocks()
+transcribe_interface
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
+chat_interface = gr.ChatInterface(
     respond,
     title="Enlight Innovations Limited -- Demo",
     description="This demo is desgined to illustrate our basic idea and feasibility in implementation.",
     ],
 )
+with demo:
+    gr.TabbedInterface([transcribe_interface, chat_interface], ["Step 1: Transcribe", "Step 2: "])
 if __name__ == "__main__":
+    demo.queue().launch() #demo.launch()