import gradio as gr
import openai
import requests
import json
import os

openai.api_key = os.environ.get('OPENAI_API_KEY')

    
messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]

# Set up the API endpoint URL and headers
url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
headers = {
    "accept": "*/*",
    "xi-api-key": os.environ.get('elevenlabs_api_key'),
    "Content-Type": "application/json",
}

# Define a function to handle the Gradio input and generate the response
def transcribe(audio):
    global messages

    # Use OpenAI to transcribe the user's audio input
    # API call 1
    audio_file = open(audio, "rb")
    transcript = openai.Audio.transcribe("whisper-1", audio_file)

    # Append the user's message to the message history
    messages.append({"role": "user", "content": transcript["text"]})

    # Generate a response using OpenAI's chat API
    #API call 2
    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)

    # Extract the system message from the API response and append it to the message history
    system_message = response["choices"][0]["message"]
    messages.append(system_message)
    
    
    #API Call 3
    # Use the voice synthesis API to generate an audio response from the system message
    data = {
        "text": system_message["content"],
        "voice_settings": {
            "stability": 0,
            "similarity_boost": 0
        }
    }
    response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)

    # Save the audio response to a file
    if response.ok:
        with open("output.wav", "wb") as f:
            for chunk in response.iter_content(chunk_size=1024):
                f.write(chunk)
    else:
        print(f"Error: {response.status_code} - {response.reason}")
        
    # IPython.display.display(IPython.display.Audio('output.wav'))

    # Generate a chat transcript for display in the Gradio UI
    chat_transcript = ""
    for message in messages:
        if message['role'] != 'system':
            chat_transcript += 'Steve Jobs' + ": " + message['content'] + "\n\n"

    return chat_transcript,'output.wav'

# css = """
#       #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
#       #header {text-align: center;}
#         }
#         """

# with gr.Blocks(css=css) as ui:
    
    
#     with gr.Column(elem_id="col-container"):
#         gr.Markdown("""## Talk to AI Steve Jobs: Audio-to-Text+Audio generation
#                     Powered by ChatGPT + Whisper + ElevenLabs + HuggingFace <br>
#                     <br>
#                     """,
#                     elem_id="header")

# Define the Gradio UI interface
# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'])
ui.launch(debug=True)