Spaces:

akshayvkt
/

talk-To-SteveJobs

Running

App Files Files Community

talk-To-SteveJobs / app.py

akshayvkt

Update app.py

e2c2451 over 2 years ago

raw

history blame

3.2 kB

	import gradio as gr
	import openai
	import requests
	import json
	import os

	openai.api_key = os.environ.get('OPENAI_API_KEY')


	gr.HTML("""
	<div style="text-align: center; max-width: 700px; margin: 0 auto;">
	<div
	style="
	display: inline-flex;
	align-items: center;
	gap: 0.8rem;
	font-size: 1.75rem;
	"
	>

	<h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
	Talk to AI Steve Jobs: Audio-to-Text+Audio generation
	</h1>
	</div>
	</div>
	<p>
	Have a back-and-forth conversation with AI Steve Jobs, powered by ChatGPT + Whisper + ElevenLabs + HuggingFace
	<br/>
	<p/>
	""")

	messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]

	# Set up the API endpoint URL and headers
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
	headers = {
	"accept": "/",
	"xi-api-key": os.environ.get('elevenlabs_api_key'),
	"Content-Type": "application/json",
	}

	# Define a function to handle the Gradio input and generate the response
	def transcribe(audio):
	global messages

	# Use OpenAI to transcribe the user's audio input
	# API call 1
	audio_file = open(audio, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)

	# Append the user's message to the message history
	messages.append({"role": "user", "content": transcript["text"]})

	# Generate a response using OpenAI's chat API
	#API call 2
	response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)

	# Extract the system message from the API response and append it to the message history
	system_message = response["choices"][0]["message"]
	messages.append(system_message)


	#API Call 3
	# Use the voice synthesis API to generate an audio response from the system message
	data = {
	"text": system_message["content"],
	"voice_settings": {
	"stability": 0,
	"similarity_boost": 0
	}
	}
	response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)

	# Save the audio response to a file
	if response.ok:
	with open("output.wav", "wb") as f:
	for chunk in response.iter_content(chunk_size=1024):
	f.write(chunk)
	else:
	print(f"Error: {response.status_code} - {response.reason}")

	# IPython.display.display(IPython.display.Audio('output.wav'))

	# Generate a chat transcript for display in the Gradio UI
	chat_transcript = ""
	for message in messages:
	if message['role'] != 'system':
	chat_transcript += message['role'] + ": " + message['content'] + "\n\n"

	return chat_transcript,'output.wav'

	# Define the Gradio UI interface
	# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
	ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'])
	ui.launch(debug=True)