# %% [markdown]
# # ChatBot app with Gradio

# %%
import os
from dotenv import load_dotenv, find_dotenv
import gradio as gr
import openai

# _ = load_dotenv(find_dotenv(filename="secrets.env", raise_error_if_not_found=False))

# Global variable
# ROOT_DIR = os.environ["ROOT_DIR"]
AUTH_USERNAME = os.environ["AUTH_USERNAME"]
AUTH_PASSWORD = os.environ["AUTH_PASSWORD"]

# Load credentials
openai.api_key = os.environ["OPENAI_API_KEY"]

SYSTEM_PROMPT = "You are a helpful assistant and do your best to answer the user's questions.\
	You do not make up answers."

# %% [markdown]
# ## Define and test the API calls

# %%
# define the function that will make the API calls 
def APIcall(prompt:str, temperature = 0.7, max_tokens = 1024, model="GPT-3.5", stream=True):
	if model == "GPT-3.5":
		model = "gpt-3.5-turbo"
	else:
		model = "gpt-4-turbo-preview"
	# make the API call with the given parameter
	response = openai.chat.completions.create(
		model=model,
		messages=prompt,
		max_tokens = max_tokens,
		temperature=temperature,
		stream=stream,
	)

	# return the completed text
	if stream:
		for chunk in response:
			output = chunk.choices[0].delta.content # when Stream is set to True
			yield output
	else:
		output = response.choices[0].message.content # when Stream is set to False


# %% [markdown]
# ## Building the ChatBot with Gradio

# %%
# Helper function: format the prompt to include history
def formatPrompt(newMsg:str, chatHistory, instruction):
	
	# start with the system prompt
	messages = []
	messages.append({
		"role": "system",
		"content": instruction
	})

	# add the history
	for turn in chatHistory:
		# retrieve the user and assistant messages from history
		userMsg, AssistantMsg = turn
		
		# add the user message
		messages.append({
			"role": "user",
			"content": userMsg
		})

		# add the assistant message
		messages.append({
			"role": "assistant",
			"content": AssistantMsg
		})
	
	# add the last message that needs to be answer
	messages.append({
		"role": "user",
		"content": newMsg
	})

	# return the formated messages
	return messages

# def the response function (to get the answer as one block after generation)
def response(newMsg:str, chatHistory, instruction, temperature, max_tokens, model, stream=False):
	prompt = formatPrompt(newMsg=newMsg, chatHistory=chatHistory, instruction=instruction)
	response = APIcall(prompt=prompt, temperature=temperature, max_tokens=max_tokens, model=model)
	chatHistory.append([newMsg, response])
	return "", chatHistory

# def the streamResponse function, to stream the results as they are generated
def streamResponse(newMsg:str, chatHistory, instruction, temperature, max_tokens, model, stream = True):
	chatHistory.append([newMsg, ""])
	prompt = formatPrompt(newMsg=newMsg, chatHistory=chatHistory, instruction=instruction)
	stream = APIcall(prompt=prompt, temperature=temperature, max_tokens=max_tokens, model=model)
	for chunk in stream:
		if chunk != None:
			chatHistory[-1][1] += chunk
			yield "", chatHistory
		else:
			return "", chatHistory

# Build the app
with gr.Blocks(theme='Insuz/Mocha') as app:
	gr.Markdown("# Private GPT")
	gr.Markdown("This chatbot is powered by the openAI GPT series.\
			 \nThe default model is `GPT-3.5`, but `GPT-4` can be selected in the advanced options.\
			 \nAs it uses the openAI API, user data is not used to train openAI models. (side note: GPT-4 is currently 500 times more expensive than GPT-3.5")
	chatbot = gr.Chatbot() # Associated variable: chatHistory
	msg = gr.Textbox(label="Message")
	with gr.Accordion(label="Advanced options", open=False):
		model = gr.Dropdown(
			choices=["GPT-3.5", "GPT-4"],
			value="GPT-3.5",
			multiselect=False,
			label="Model",
			info="Choose the model you want to chat with"
		)
		instruction = gr.Textbox(
			value=SYSTEM_PROMPT,
			label="System instructions",
			lines=2,)
		temperature = gr.Slider(
			minimum=0,
			maximum=2,
			step=0.1,
			value=0.7,
			label="Temperature",
			info="The higher, the more random the results will be"
		)
		max_token = gr.Slider(
			minimum=64,
			maximum=2048,
			step=64,
			value=1024,
			label="Max Token",
			info="Maximum number of token the model will take into consideration"
		)
	Button = gr.Button(value="Submit")
	msg.submit(
		fn=streamResponse,
		inputs=[msg, chatbot, instruction, temperature, max_token, model],
		outputs=[msg, chatbot]
	)
	Button.click(
		fn=streamResponse,
		inputs=[msg, chatbot, instruction, temperature, max_token, model],
		outputs=[msg, chatbot]
	)

gr.close_all()
app.queue().launch(auth=(AUTH_USERNAME, AUTH_PASSWORD), share=True)

# %%