kamal45 commited on
Commit
c0d86c1
·
verified ·
1 Parent(s): 4c4f108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -1,10 +1,11 @@
1
-
2
  import os
3
  import torch
4
  import whisper
5
  from gtts import gTTS
6
  import gradio as gr
7
- from groq import Groq # Assuming Groq is installed and accessible
 
 
8
 
9
  # Load the Whisper model
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -14,14 +15,21 @@ GROQ_API_KEY ="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"
14
 
15
  Client = Groq(api_key=GROQ_API_KEY)
16
 
17
- # Set your Groq API key
18
  os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"
19
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
20
 
21
  # Function to transcribe audio using Whisper
22
- def transcribe(audio_path):
23
  try:
 
 
 
 
 
 
24
  result = model.transcribe(audio_path)
 
25
  return result["text"]
26
  except Exception as e:
27
  return f"Error during transcription: {e}"
@@ -48,8 +56,8 @@ def text_to_speech(text):
48
  return f"Error during text-to-speech conversion: {e}"
49
 
50
  # Combined function for processing audio input and generating audio output
51
- def process_audio(audio_path):
52
- transcription = transcribe(audio_path)
53
  if "Error" in transcription:
54
  return transcription, None, None
55
 
@@ -68,7 +76,7 @@ with gr.Blocks() as app:
68
  gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
69
  with gr.Row():
70
  with gr.Column():
71
- audio_input = gr.Audio(type="filepath", label="Speak", interactive=True)
72
  with gr.Column():
73
  transcription_output = gr.Textbox(label="Transcription (Text)", lines=2)
74
  response_output = gr.Textbox(label="Response (LLM Text)", lines=2)
 
 
1
  import os
2
  import torch
3
  import whisper
4
  from gtts import gTTS
5
  import gradio as gr
6
+ from groq import Groq
7
+ import numpy as np
8
+ import io
9
 
10
  # Load the Whisper model
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
15
 
16
  Client = Groq(api_key=GROQ_API_KEY)
17
 
18
+ # Set your Groq API key (replace with your actual key or set it in the environment)
19
  os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"
20
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
 
22
  # Function to transcribe audio using Whisper
23
+ def transcribe(audio_data):
24
  try:
25
+ # Convert numpy array (audio) to bytes and save it as a temporary file
26
+ audio_path = "temp_audio.wav"
27
+ with open(audio_path, "wb") as f:
28
+ f.write(audio_data)
29
+
30
+ # Transcribe the saved audio file
31
  result = model.transcribe(audio_path)
32
+ os.remove(audio_path) # Clean up the temporary file
33
  return result["text"]
34
  except Exception as e:
35
  return f"Error during transcription: {e}"
 
56
  return f"Error during text-to-speech conversion: {e}"
57
 
58
  # Combined function for processing audio input and generating audio output
59
+ def process_audio(audio_data):
60
+ transcription = transcribe(audio_data)
61
  if "Error" in transcription:
62
  return transcription, None, None
63
 
 
76
  gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
77
  with gr.Row():
78
  with gr.Column():
79
+ audio_input = gr.Audio(type="numpy", label="Speak", interactive=True)
80
  with gr.Column():
81
  transcription_output = gr.Textbox(label="Transcription (Text)", lines=2)
82
  response_output = gr.Textbox(label="Response (LLM Text)", lines=2)