Spaces:

son9john
/

US

Runtime error

App Files Files Community

US / app.py

son9john

Update app.py

c3ffc00 over 2 years ago

raw

history blame

6.99 kB

	import openai
	import gradio as gr
	from gradio.components import Audio, Textbox
	import os
	import re
	import tiktoken
	from transformers import GPT2Tokenizer
	import whisper
	import pandas as pd
	from datetime import datetime, timezone, timedelta
	import notion_df
	import concurrent.futures
	import nltk
	from nltk.tokenize import sent_tokenize
	nltk.download('punkt')


	# Define the tokenizer and model
	tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
	model = openai.api_key = os.environ["OPENAI_API_KEY"]

	# Define the initial message and messages list
	initmessage = 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'
	initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'}
	messages = [initial_message]
	messages_rev = [initial_message]

	# Define the answer counter
	answer_count = 0

	# Define the Notion API key
	API_KEY = os.environ["API_KEY"]

	def transcribe(audio, text):
	global messages
	global answer_count
	messages = [initial_message]
	messages_rev = [initial_message]

	transcript = {'text': ''}
	input_text = []

	counter = 0
	# Transcribe the audio if provided
	if audio is not None:
	audio_file = open(audio, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
	messages.append({"role": "user", "content": transcript["text"]})
	system_message = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages,
	max_tokens=2000
	)["choices"][0]["message"]

	messages.append({"role": "system", "content": str(system_message['content'])})
	messages_rev.append({"role": "system", "content": str(system_message['content'])})

	# Concatenate the chat history
	chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
	# if not isinstance(messages[-1]['content'], str):
	# continue

	# Append the number of tokens used to the end of the chat transcript
	df = pd.DataFrame([chat_transcript])
	# Get the current time in Eastern Time (ET)
	now_et = datetime.now(timezone(timedelta(hours=-4)))
	# Format the time as string (YY-MM-DD HH:MM)
	published_date = now_et.strftime('%m-%d-%y %H:%M')
	notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)


	# Split the input text into sentences
	sentences = sent_tokenize(text)

	# Split the input text into sub-input tokens based on the condition
	subinput_tokens = []
	buffer = []
	for sentence in sentences:
	sentence_tokens = tokenizer.encode(sentence)
	if len(buffer) + len(sentence_tokens) > 800:
	subinput_tokens.append(buffer)
	buffer = []
	buffer.extend(sentence_tokens)
	if buffer:
	subinput_tokens.append(buffer)

	chat_transcript = ''

	for tokens in subinput_tokens:
	messages.append[{"role": "user", "content": initmessage}]
	# Decode the tokens into text
	subinput_text = tokenizer.decode(tokens)
	messages.append({"role": "user", "content": transcript["text"]+str(subinput_text)})

	num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
	if num_tokens > 2096:
	# Concatenate the chat history
	chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user'])
	# Append the number of tokens used to the end of the chat transcript
	chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"

	# Get the current time in Eastern Time (ET)
	now_et = datetime.now(timezone(timedelta(hours=-4)))
	# Format the time as string (YY-MM-DD HH:MM)
	published_date = now_et.strftime('%m-%d-%y %H:%M')
	if counter > 0:
	# Upload the chat transcript to Notion
	df = pd.DataFrame([chat_transcript])
	notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date+'FULL'), api_key=API_KEY)
	counter += 1
	messages = [{"role": "system", "content": initmessage}]
	messages = [{"role": "user", "content": subinput_text}]
	answer_count = 0

	# Generate the system message using the OpenAI API
	# with concurrent.futures.ThreadPoolExecutor() as executor:
	system_message = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages,
	max_tokens=2000
	)["choices"][0]["message"]

	messages.append({"role": "system", "content": str(system_message['content'])})
	messages_rev.append({"role": "system", "content": str(system_message['content'])})

	# Concatenate the chat history
	chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
	# if not isinstance(messages[-1]['content'], str):
	# continue

	# Append the number of tokens used to the end of the chat transcript
	chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
	df = pd.DataFrame([chat_transcript])
	# Get the current time in Eastern Time (ET)
	now_et = datetime.now(timezone(timedelta(hours=-4)))
	# Format the time as string (YY-MM-DD HH:MM)
	published_date = now_et.strftime('%m-%d-%y %H:%M')
	notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)

	# Return the chat transcript
	return chat_transcript

	# Define the input and output components for Gradio
	audio_input = Audio(source="microphone", type="filepath", label="Record your message")
	text_input = Textbox(label="Type your message", max_length=4096)
	output_text = gr.outputs.Textbox(label="Response")
	output_audio = Audio()

	# Define the Gradio interface
	iface = gr.Interface(
	fn=transcribe,
	inputs=[audio_input, text_input],
	outputs=[output_text],
	title="Hold On, Pain Ends (HOPE) 2",
	description="Talk to Your Nephrology Tutor HOPE",
	theme="compact",
	layout="vertical",
	allow_flagging=False
	)

	# Run the Gradio interface
	iface.launch()