Voice-To-Voice_test

Sleeping

App Files Files Community

sal-maq commited on Aug 23, 2024

Commit

2bcdf1f

verified ·

1 Parent(s): e7619ed

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -70

app.py CHANGED Viewed

@@ -1,81 +1,63 @@
 import os
-import tempfile
-import numpy as np
 import gradio as gr
 import whisper
 from gtts import gTTS
 from groq import Groq
-import soundfile as sf
-# Set up Groq API key
-os.environ['GROQ_API_KEY'] = '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'
-groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
-# Load Whisper model
-whisper_model = whisper.load_model("base")
-def process_audio(audio_file_path):
     try:
-        # Ensure audio_file_path is valid
-        if not audio_file_path:
-            raise ValueError("No audio file provided")
-        print(f"Received audio file path: {audio_file_path}")
-        # Read the audio file from the file path
-        with open(audio_file_path, 'rb') as f:
-            audio_data = f.read()
-        # Save the audio data to a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
-            temp_audio_path = temp_audio_file.name
-            temp_audio_file.write(audio_data)
-        # Ensure the temporary file is properly closed before processing
-        temp_audio_file.close()
-        # Transcribe audio using Whisper
-        result = whisper_model.transcribe(temp_audio_path)
-        user_text = result['text']
-        print(f"Transcribed text: {user_text}")
-        # Generate response using Llama 8b model with Groq API
-        chat_completion = groq_client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": user_text,
-                }
-            ],
-            model="llama3-8b-8192",
         )
-        response_text = chat_completion.choices[0].message.content
-        print(f"Response text: {response_text}")
-        # Convert response text to speech using gTTS
-        tts = gTTS(text=response_text, lang='en')
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
-            response_audio_path = temp_audio_file.name
-            tts.save(response_audio_path)
-        # Ensure the temporary file is properly closed before returning the path
-        temp_audio_file.close()
-        return response_text, response_audio_path
     except Exception as e:
-        return f"Error: {str(e)}", None
-# Create Gradio interface with updated layout
 with gr.Blocks() as demo:
     gr.Markdown(
         """
         <style>
         .gradio-container {
             font-family: Arial, sans-serif;
-            background-color: #e0f7fa;  /* Changed background color */
             border-radius: 10px;
             padding: 20px;
             box-shadow: 0 4px 12px rgba(0,0,0,0.2);
         }
         .gradio-input, .gradio-output {
             border-radius: 6px;
@@ -83,43 +65,43 @@ with gr.Blocks() as demo:
             padding: 10px;
         }
         .gradio-button {
-            background-color: #28a745;
             color: white;
             border-radius: 6px;
             border: none;
-            padding: 8px 16px;  /* Adjusted padding */
             font-size: 16px;  /* Adjusted font size */
         }
         .gradio-button:hover {
-            background-color: #218838;
         }
         .gradio-title {
-            font-size: 24px;
             font-weight: bold;
             margin-bottom: 20px;
         }
         .gradio-description {
-            font-size: 14px;
             margin-bottom: 20px;
-            color: #555;
         }
         </style>
         """
     )
-    gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool")
     gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(type="filepath", label="Upload Audio File")
-            submit_button = gr.Button("Submit")
         with gr.Column():
-            response_text = gr.Textbox(label="Response Text", placeholder="Generated response will appear here")
-            response_audio = gr.Audio(label="Response Audio", type="filepath")
-    submit_button.click(process_audio, inputs=audio_input, outputs=[response_text, response_audio])
 # Launch the Gradio app
-demo.launch()

 import os
 import gradio as gr
 import whisper
 from gtts import gTTS
+import io
 from groq import Groq
+# Initialize the Groq client
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Load the Whisper model
+model = whisper.load_model("base")
+def process_audio(file_path):
     try:
+        # Load the audio file
+        audio = whisper.load_audio(file_path)
+        # Transcribe the audio using Whisper
+        result = model.transcribe(audio)
+        text = result["text"]
+        # Generate a response using Groq
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": text}],
+            model="llama3-8b-8192",  # Replace with the correct model if necessary
         )
+        # Access the response using dot notation
+        response_message = chat_completion.choices[0].message.content.strip()
+        # Convert the response text to speech
+        tts = gTTS(response_message)
+        response_audio_io = io.BytesIO()
+        tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
+        response_audio_io.seek(0)
+        # Save audio to a file to ensure it's generated correctly
+        response_audio_path = "response.mp3"
+        with open(response_audio_path, "wb") as audio_file:
+            audio_file.write(response_audio_io.getvalue())
+        # Return the response text and the path to the saved audio file
+        return response_message, response_audio_path
     except Exception as e:
+        return f"An error occurred: {e}", None
+# Create the Gradio interface with customized UI
 with gr.Blocks() as demo:
     gr.Markdown(
         """
         <style>
         .gradio-container {
             font-family: Arial, sans-serif;
+            background-color: #f0f4c3;  /* Light green background color */
             border-radius: 10px;
             padding: 20px;
             box-shadow: 0 4px 12px rgba(0,0,0,0.2);
+            text-align: center;
         }
         .gradio-input, .gradio-output {
             border-radius: 6px;
             padding: 10px;
         }
         .gradio-button {
+            background-color: #ff7043;
             color: white;
             border-radius: 6px;
             border: none;
+            padding: 10px 20px;  /* Adjusted padding */
             font-size: 16px;  /* Adjusted font size */
+            cursor: pointer;
         }
         .gradio-button:hover {
+            background-color: #e64a19;
         }
         .gradio-title {
+            font-size: 28px;
             font-weight: bold;
             margin-bottom: 20px;
+            color: #37474f;
         }
         .gradio-description {
+            font-size: 16px;
             margin-bottom: 20px;
+            color: #616161;
         }
         </style>
         """
     )
+    gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️")
     gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")
     with gr.Row():
         with gr.Column():
+            gr.Audio(type="filepath", label="Upload Audio File")
+            gr.Button("Submit")
         with gr.Column():
+            gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5)
+            gr.Audio(label="Response Audio", type="filepath")
 # Launch the Gradio app
+demo.launch()