|
import openai |
|
import gradio as gr |
|
from gradio.components import Audio, Textbox |
|
import os |
|
import re |
|
import tiktoken |
|
from transformers import GPT2Tokenizer |
|
import whisper |
|
import pandas as pd |
|
from datetime import datetime, timezone, timedelta |
|
import notion_df |
|
import concurrent.futures |
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium') |
|
model = openai.api_key = os.environ["OPENAI_API_KEY"] |
|
|
|
|
|
initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'} |
|
messages = [initial_message] |
|
|
|
|
|
answer_count = 0 |
|
|
|
|
|
API_KEY = os.environ["API_KEY"] |
|
|
|
def transcribe(audio, text): |
|
global messages |
|
global answer_count |
|
|
|
|
|
if audio is not None: |
|
audio_file = open(audio, "rb") |
|
transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en") |
|
|
|
|
|
if text is not None: |
|
|
|
sentences = re.split("(?<=[.!?]) +", text) |
|
|
|
|
|
input_tokens = [] |
|
|
|
|
|
for sentence in sentences: |
|
|
|
sentence_tokens = tokenizer.encode(sentence) |
|
|
|
if len(input_tokens) + len(sentence_tokens) < 1440: |
|
|
|
input_tokens.extend(sentence_tokens) |
|
else: |
|
|
|
sentence_tokens = sentence_tokens[:1440-len(input_tokens)] |
|
input_tokens.extend(sentence_tokens) |
|
break |
|
|
|
input_text = tokenizer.decode(input_tokens) |
|
|
|
|
|
messages.append({"role": "user", "content": transcript["text"]+input_text}) |
|
|
|
|
|
num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages) |
|
if num_tokens > 2096: |
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'system']) |
|
|
|
|
|
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" |
|
|
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-5))) |
|
|
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
|
|
|
|
df = pd.DataFrame([chat_transcript]) |
|
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date+'FULL'), api_key=API_KEY) |
|
|
|
|
|
messages = [initial_message] |
|
answer_count = 0 |
|
else: |
|
|
|
answer_count += 1 |
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
prompt = [{"text": f"{message['role']}: {message['content']}\n\n"} for message in messages] |
|
system_message = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
max_tokens=2000 |
|
)["choices"][0]["message"] |
|
|
|
|
|
|
|
messages.append(system_message) |
|
|
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'system']) |
|
|
|
|
|
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" |
|
|
|
|
|
with open("conversation_history.txt", "a") as f: |
|
f.write(chat_transcript) |
|
|
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-5))) |
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
df = pd.DataFrame([chat_transcript]) |
|
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY) |
|
|
|
|
|
return system_message['content'] |
|
|
|
|
|
audio_input = Audio(source="microphone", type="filepath", label="Record your message") |
|
text_input = Textbox(label="Type your message", max_length=4096) |
|
output_text = gr.outputs.Textbox(label="Response") |
|
output_audio = Audio() |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=[audio_input, text_input], |
|
outputs=[output_text], |
|
title="Hold On, Pain Ends (HOPE)", |
|
description="Talk to Your Nephrology Tutor HOPE", |
|
theme="compact", |
|
layout="vertical", |
|
allow_flagging=False |
|
) |
|
|
|
|
|
iface.launch() |