Spaces:
Running
Running
import gradio as gr | |
import openai, config, subprocess | |
import requests | |
import json | |
messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}] | |
# Set up the API endpoint URL and headers | |
url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream" | |
headers = { | |
"accept": "*/*", | |
"xi-api-key": os.environ.get('elevenlabs_api_key'), | |
"Content-Type": "application/json", | |
} | |
# Define a function to handle the Gradio input and generate the response | |
def transcribe(audio): | |
global messages | |
# Use OpenAI to transcribe the user's audio input | |
# API call 1 | |
audio_file = open(audio, "rb") | |
transcript = openai.Audio.transcribe("whisper-1", audio_file) | |
# Append the user's message to the message history | |
messages.append({"role": "user", "content": transcript["text"]}) | |
# Generate a response using OpenAI's chat API | |
#API call 2 | |
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages) | |
# Extract the system message from the API response and append it to the message history | |
system_message = response["choices"][0]["message"] | |
messages.append(system_message) | |
#API Call 3 | |
# Use the voice synthesis API to generate an audio response from the system message | |
data = { | |
"text": system_message["content"], | |
"voice_settings": { | |
"stability": 0, | |
"similarity_boost": 0 | |
} | |
} | |
response = requests.post(url, headers=headers, data=json.dumps(data), stream=True) | |
# Save the audio response to a file | |
if response.ok: | |
with open("output.wav", "wb") as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
f.write(chunk) | |
else: | |
print(f"Error: {response.status_code} - {response.reason}") | |
IPython.display.display(IPython.display.Audio('output.wav')) | |
# Generate a chat transcript for display in the Gradio UI | |
chat_transcript = "" | |
for message in messages: | |
if message['role'] != 'system': | |
chat_transcript += message['role'] + ": " + message['content'] + "\n\n" | |
return chat_transcript,'output.wav' | |
# Define the Gradio UI interface | |
# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text") | |
ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio']) | |
ui.launch(share=True) | |