radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 19, 2024

Commit

4d969d0

verified ·

1 Parent(s): 69ce2dd

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -0

app.py CHANGED Viewed

@@ -243,6 +243,47 @@ def insert_prompt(current_text, prompt):
     return prompt[0] if prompt else current_text
 # Create the Gradio Blocks interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
@@ -250,6 +291,7 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     with gr.Row():
         with gr.Column():
            question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
@@ -276,6 +318,10 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     # Define interaction for hitting the Enter key
     question_input.submit(fn=add_message, inputs=[chatbot, question_input], outputs=[chatbot, question_input])\
                   .then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot)
     generate_audio_btn.click(fn=generate_audio_from_last_response, inputs=chatbot, outputs=audio_output)

     return prompt[0] if prompt else current_text
+# Define the ASR model with Whisper
+model_id = 'openai/whisper-small'
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+pipe_asr = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=15,
+    batch_size=16,
+    torch_dtype=torch_dtype,
+    device=device,
+    return_timestamps=True
+)
+# Define the transcription function for streaming audio
+def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    y = y.astype(np.float32) / np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    return stream, full_text, full_text
 # Create the Gradio Blocks interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     with gr.Row():
         with gr.Column():
            question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
+           audio_input = gr.Audio(sources=["microphone"],streaming=True,type='numpy',every=0.1,label="Speak to Ask")
     # Define interaction for hitting the Enter key
     question_input.submit(fn=add_message, inputs=[chatbot, question_input], outputs=[chatbot, question_input])\
                   .then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot)
+    # Speech-to-Text functionality
+    state = gr.State()
+    audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, question_input])
     generate_audio_btn.click(fn=generate_audio_from_last_response, inputs=chatbot, outputs=audio_output)