File size: 3,640 Bytes
53b4111 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import openai
import os
from dotenv import load_dotenv
from pydub import AudioSegment
load_dotenv()
#accessing openapi Key
openai.api_key = os.getenv("OPENAI_API_KEY")
audio_messages = [{"role": "system", "content": 'You are an AI assistant expert. Respond to all input in precise, crisp and easy to understand language.'}]
text_messages = [{"role": "system", "content": 'You are an AI assistant expert. Respond to all input in precise, crisp and easy to understand language.'}]
global user_text_input, text_output, user_audio_input, audio_output
"""
It seems like the gr.Audio source is not generating a WAV file, which is required for the openai.Audio.transcribe() method to work.
To convert the audio file to WAV format, i have used a library like Pydub.
"""
def audio_transcribe(audio):
global audio_messages
audio_message = audio_messages
#audio processing to whisper API.
audio_file = AudioSegment.from_file(audio)
audio_file.export("temp.wav", format="wav")
final_audio_file = open("temp.wav", "rb")
transcript = openai.Audio.transcribe("whisper-1", final_audio_file)
os.remove("temp.wav")
#transcripted input to chatGPT API for chatCompletion
audio_message.append({"role": "user", "content": transcript["text"]}) # type: ignore
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=audio_message)
system_message = response["choices"][0]["message"] # type: ignore
audio_message.append(system_message)
chat_transcript = ""
for message in audio_message:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
return chat_transcript
def text_transcribe(name):
global text_messages
text_message = text_messages
user_text_input.update("")
#transcripted input to chatGPT API
text_message.append({"role": "user", "content": name}) # type: ignore
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=text_message)
system_message = response["choices"][0]["message"] # type: ignore
text_message.append(system_message)
chat_transcript = ""
for message in text_message:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
return chat_transcript
title = """<h1 align="center">Your Chat-GPT AI Assistant at your Service!! 😎 </h1>"""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.HTML(title)
with gr.Tab("Audio Input"):
with gr.Row():
user_audio_input = (gr.Audio(source="microphone", type="filepath", label="Speak Here"))
audio_input = user_audio_input
audio_output = gr.Textbox(label="AI Response", lines=20, placeholder="AI Response will be displayed here...")
with gr.Row():
audio_submit_button = gr.Button("Submit")
with gr.Tab("Text Input"):
with gr.Row():
user_text_input = (gr.Textbox(label="Type Here", lines=20, placeholder="Type your message here..."))
text_input = user_text_input
text_output = gr.Textbox(label="AI Response", lines=20, placeholder="AI Response will be displayed here...")
with gr.Row():
text_submit_button = gr.Button("Submit")
audio_submit_button.click(fn=audio_transcribe, inputs=audio_input, outputs=audio_output)
text_submit_button.click(fn=text_transcribe, inputs=text_input, outputs=text_output)
gr.Markdown("<center> Made with ❤️ by Tanish Gupta. Credits to 🤗 Spaces for Hosting this App </center>")
demo.launch()
|