kh-CHEUNG commited on
Commit
c46a1db
1 Parent(s): 03dad0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -5
app.py CHANGED
@@ -1,22 +1,43 @@
 
 
 
1
  import gradio as gr
2
  from threading import Thread
3
  import re
4
- import time
 
 
 
 
 
 
5
  from PIL import Image
6
- import torch
7
- import spaces
8
 
9
  from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
10
  processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
11
  model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
12
  model.to("cuda:0")
13
 
 
 
 
 
 
14
  from huggingface_hub import InferenceClient
15
  """
16
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
17
  """
18
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
19
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
  def respond(
22
  message,
@@ -50,10 +71,24 @@ def respond(
50
  response += token
51
  yield response
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  """
54
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
55
  """
56
- demo = gr.ChatInterface(
 
57
  respond,
58
  title="Enlight Innovations Limited -- Demo",
59
  description="This demo is desgined to illustrate our basic idea and feasibility in implementation.",
@@ -71,6 +106,8 @@ demo = gr.ChatInterface(
71
  ],
72
  )
73
 
 
 
74
 
75
  if __name__ == "__main__":
76
- demo.launch()
 
1
+ import torch
2
+ import spaces
3
+
4
  import gradio as gr
5
  from threading import Thread
6
  import re
7
+ import time
8
+ import tempfile
9
+ import os
10
+
11
+ from transformers import pipeline
12
+ from transformers.pipelines.audio_utils import ffmpeg_read
13
+
14
  from PIL import Image
 
 
15
 
16
  from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
17
  processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
18
  model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
19
  model.to("cuda:0")
20
 
21
+ ASR_MODEL_NAME = "openai/whisper-large-v3"
22
+ ASR_BATCH_SIZE = 8
23
+ ASR_CHUNK_LENGTH_S = 30
24
+ TEMP_FILE_LIMIT_MB = 1000
25
+
26
  from huggingface_hub import InferenceClient
27
  """
28
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
29
  """
30
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
31
 
32
+ device = 0 if torch.cuda.is_available() else "cpu"
33
+
34
+ asr_pl = pipeline(
35
+ task="automatic-speech-recognition",
36
+ model=ASR_MODEL_NAME,
37
+ chunk_length_s=ASR_CHUNK_LENGTH_S,
38
+ device=device,
39
+ )
40
+
41
  @spaces.GPU
42
  def respond(
43
  message,
 
71
  response += token
72
  yield response
73
 
74
+ @spaces.GPU
75
+ def transcribe(inputs, task):
76
+ if inputs is None:
77
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
78
+
79
+ text = asr_pl(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
80
+ return text
81
+
82
+ demo = gr.Blocks()
83
+
84
+ transcribe_interface
85
+
86
+
87
  """
88
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
89
  """
90
+
91
+ chat_interface = gr.ChatInterface(
92
  respond,
93
  title="Enlight Innovations Limited -- Demo",
94
  description="This demo is desgined to illustrate our basic idea and feasibility in implementation.",
 
106
  ],
107
  )
108
 
109
+ with demo:
110
+ gr.TabbedInterface([transcribe_interface, chat_interface], ["Step 1: Transcribe", "Step 2: "])
111
 
112
  if __name__ == "__main__":
113
+ demo.queue().launch() #demo.launch()