import openai import gradio as gr from gradio.components import Audio, Textbox import os import re import tiktoken from transformers import GPT2Tokenizer import whisper import pandas as pd from datetime import datetime, timezone, timedelta import notion_df import concurrent.futures import nltk from nltk.tokenize import sent_tokenize nltk.download('punkt') # Define the tokenizer and model tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium') model = openai.api_key = os.environ["OPENAI_API_KEY"] # Define the initial message and messages list initmessage = 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.' initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'} messages = [initial_message] messages_rev = [initial_message] # Define the answer counter answer_count = 0 # Define the Notion API key API_KEY = os.environ["API_KEY"] def transcribe(audio, text): global messages global answer_count messages = [initial_message] messages_rev = [initial_message] chat_transcript = '' transcript = {'text': ''} input_text = [] counter = 0 # Transcribe the audio if provided if audio is not None: audio_file = open(audio, "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en") messages.append({"role": "user", "content": transcript["text"]}) system_message = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, max_tokens=2000 )["choices"][0]["message"] messages.append({"role": "system", "content": str(system_message['content'])}) messages_rev.append({"role": "system", "content": str(system_message['content'])}) # Concatenate the chat history chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user']) # if not isinstance(messages[-1]['content'], str): # continue # Append the number of tokens used to the end of the chat transcript df = pd.DataFrame([chat_transcript]) # Get the current time in Eastern Time (ET) now_et = datetime.now(timezone(timedelta(hours=-4))) # Format the time as string (YY-MM-DD HH:MM) published_date = now_et.strftime('%m-%d-%y %H:%M') notion_df.upload(df, 'https://www.notion.so/US-My-04095f009651427bb8247b9e680b18e5?pvs=4', title=str(published_date), api_key=API_KEY) if text is not None: # Split the input text into sentences sentences = sent_tokenize(text) # Split the input text into sub-input tokens based on the condition subinput_tokens = [] buffer = [] for sentence in sentences: sentence_tokens = tokenizer.encode(sentence) if len(buffer) + len(sentence_tokens) > 400: subinput_tokens.append(buffer) buffer = [] buffer.extend(sentence_tokens) if buffer: subinput_tokens.append(buffer) chat_transcript = "" for tokens in subinput_tokens: # Decode the tokens into text subinput_text = tokenizer.decode(tokens) messages.append({"role": "system", "content": initmessage}) messages.append({"role": "user", "content": transcript["text"]+subinput_text}) num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages) if num_tokens > 1400: # Concatenate the chat history chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user']) # Append the number of tokens used to the end of the chat transcript chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" # Get the current time in Eastern Time (ET) now_et = datetime.now(timezone(timedelta(hours=-5))) # Format the time as string (YY-MM-DD HH:MM) published_date = now_et.strftime('%m-%d-%y %H:%M') if counter > 0: # Upload the chat transcript to Notion df = pd.DataFrame([chat_transcript]) notion_df.upload(df, 'https://www.notion.so/US-My-04095f009651427bb8247b9e680b18e5?pvs=4', title=str(published_date), api_key=API_KEY) counter += 1 messages = [{"role": "system", "content": initial_message}] messages = [{"role": "user", "content": subinput_text}] answer_count = 0 # Generate the system message using the OpenAI API # with concurrent.futures.ThreadPoolExecutor() as executor: system_message = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, max_tokens=2000 )["choices"][0]["message"] messages.append({"role": "system", "content": str(system_message['content'])}) # messages_rev.append({"role": "system", "content": str(system_message['content'])}) # Add the system message to the beginning of the messages list messages_rev.insert(0, system_message) # Add the input text to the messages list messages_rev.insert(0, {"role": "user", "content": subinput_text + transcript["text"]}) chat_transcript = f"\n\nNumber of tokens used: {num_tokens}\n\n" # Concatenate the chat history chat_transcript += "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user']) # if not isinstance(messages[-1]['content'], str): # continue # Append the number of tokens used to the end of the chat transcript df = pd.DataFrame([chat_transcript]) # Get the current time in Eastern Time (ET) now_et = datetime.now(timezone(timedelta(hours=-4))) # Format the time as string (YY-MM-DD HH:MM) published_date = now_et.strftime('%m-%d-%y %H:%M') notion_df.upload(df, 'https://www.notion.so/US-My-04095f009651427bb8247b9e680b18e5?pvs=4', title=str(published_date), api_key=API_KEY) # Return the chat transcript return chat_transcript # Define the input and output components for Gradio audio_input = Audio(source="microphone", type="filepath", label="Record your message") text_input = Textbox(label="Type your message", max_length=4096) output_text = gr.outputs.Textbox(label="Response") output_audio = Audio() # Define the Gradio interface iface = gr.Interface( fn=transcribe, inputs=[audio_input, text_input], outputs=[output_text], title="Hold On, Pain Ends (HOPE) 2", description="Talk to Your Nephrology Tutor HOPE", theme="compact", layout="vertical", allow_flagging=False ) # Run the Gradio interface iface.launch()