akshayvkt's picture
Update app.py
bed9a71
raw
history blame
3.04 kB
import gradio as gr
import openai
import requests
import json
import os
openai.api_key = os.environ.get('OPENAI_API_KEY')
messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]
# Set up the API endpoint URL and headers
url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
headers = {
"accept": "*/*",
"xi-api-key": os.environ.get('elevenlabs_api_key'),
"Content-Type": "application/json",
}
# Define a function to handle the Gradio input and generate the response
def transcribe(audio):
global messages
# Use OpenAI to transcribe the user's audio input
# API call 1
audio_file = open(audio, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
# Append the user's message to the message history
messages.append({"role": "user", "content": transcript["text"]})
# Generate a response using OpenAI's chat API
#API call 2
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
# Extract the system message from the API response and append it to the message history
system_message = response["choices"][0]["message"]
messages.append(system_message)
#API Call 3
# Use the voice synthesis API to generate an audio response from the system message
data = {
"text": system_message["content"],
"voice_settings": {
"stability": 0,
"similarity_boost": 0
}
}
response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)
# Save the audio response to a file
if response.ok:
with open("output.wav", "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
f.write(chunk)
else:
print(f"Error: {response.status_code} - {response.reason}")
# IPython.display.display(IPython.display.Audio('output.wav'))
# Generate a chat transcript for display in the Gradio UI
chat_transcript = ""
for message in messages:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
return chat_transcript,'output.wav'
# css = """
# #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
# #header {text-align: center;}
# }
# """
# with gr.Blocks(css=css) as ui:
# with gr.Column(elem_id="col-container"):
# gr.Markdown("""## Talk to AI Steve Jobs: Audio-to-Text+Audio generation
# Powered by ChatGPT + Whisper + ElevenLabs + HuggingFace <br>
# <br>
# """,
# elem_id="header")
# Define the Gradio UI interface
# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'])
ui.launch(debug=True)