|
import openai |
|
import gradio as gr |
|
from gradio.components import Audio, Textbox |
|
import os |
|
import re |
|
import tiktoken |
|
from transformers import GPT2Tokenizer |
|
import whisper |
|
import pandas as pd |
|
from datetime import datetime, timezone, timedelta |
|
import notion_df |
|
import concurrent.futures |
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium') |
|
model = openai.api_key = os.environ["OAPI_KEY"] |
|
|
|
|
|
initialt = 'If user asked COLORIZE, dont need to do anything but present the input as it is with organized tabs (layers). You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) \ |
|
to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response. \ |
|
You are going to keep answer and also challenge the student to learn USMLE anatomy, phsysiology, and pathology.' |
|
initial_message = {"role": "system", "content": initialt} |
|
messages = [initial_message] |
|
messages_rev = [initial_message] |
|
|
|
|
|
answer_count = 0 |
|
|
|
|
|
API_KEY = os.environ["NAPI_KEY"] |
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
def process_nlp(system_message): |
|
|
|
colorized_text = colorize_text(system_message['content']) |
|
return colorized_text |
|
|
|
def colorize_text(text): |
|
colorized_text = "" |
|
lines = text.split("\n") |
|
|
|
for line in lines: |
|
doc = nlp(line) |
|
for token in doc: |
|
if token.ent_type_: |
|
colorized_text += f'**{token.text_with_ws}**' |
|
elif token.pos_ == 'NOUN': |
|
colorized_text += f'<span style="color: #FF3300; background-color: transparent;">{token.text_with_ws}</span>' |
|
elif token.pos_ == 'VERB': |
|
colorized_text += f'<span style="color: #FFFF00; background-color: transparent;">{token.text_with_ws}</span>' |
|
elif token.pos_ == 'ADJ': |
|
colorized_text += f'<span style="color: #00CC00; background-color: transparent;">{token.text_with_ws}</span>' |
|
elif token.pos_ == 'ADV': |
|
colorized_text += f'<span style="color: #FF6600; background-color: transparent;">{token.text_with_ws}</span>' |
|
elif token.is_digit: |
|
colorized_text += f'<span style="color: #9900CC; background-color: transparent;">{token.text_with_ws}</span>' |
|
elif token.is_punct: |
|
colorized_text += f'<span style="color: #8B4513; background-color: transparent;">{token.text_with_ws}</span>' |
|
elif token.is_quote: |
|
colorized_text += f'<span style="color: #008080; background-color: transparent;">{token.text_with_ws}</span>' |
|
else: |
|
colorized_text += token.text_with_ws |
|
colorized_text += "<br>" |
|
|
|
return colorized_text |
|
|
|
def colorize_and_update(system_message, submit_update): |
|
colorized_system_message = colorize_text(system_message['content']) |
|
submit_update(None, colorized_system_message) |
|
|
|
def update_text_output(system_message, submit_update): |
|
submit_update(system_message['content'], None) |
|
|
|
def transcribe(audio, text): |
|
global messages |
|
global answer_count |
|
transcript = {'text': ''} |
|
input_text = [] |
|
|
|
if audio is not None: |
|
audio_file = open(audio, "rb") |
|
transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en") |
|
|
|
|
|
if text is not None: |
|
|
|
sentences = re.split("(?<=[.!?]) +", text) |
|
|
|
|
|
input_tokens = [] |
|
|
|
|
|
for sentence in sentences: |
|
|
|
sentence_tokens = tokenizer.encode(sentence) |
|
|
|
if len(input_tokens) + len(sentence_tokens) < 1440: |
|
|
|
input_tokens.extend(sentence_tokens) |
|
else: |
|
|
|
sentence_tokens = sentence_tokens[:1440-len(input_tokens)] |
|
input_tokens.extend(sentence_tokens) |
|
break |
|
|
|
input_text = tokenizer.decode(input_tokens) |
|
|
|
|
|
messages.append({"role": "user", "content": transcript["text"]+input_text}) |
|
|
|
|
|
|
|
num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages) |
|
if num_tokens > 2096: |
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'system']) |
|
|
|
|
|
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" |
|
|
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-4))) |
|
|
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
|
|
|
|
df = pd.DataFrame([chat_transcript]) |
|
notion_df.upload(df, 'https://www.notion.so/YENA-be569d0a40c940e7b6e0679318215790?pvs=4', title=str(published_date+'back_up'), api_key=API_KEY) |
|
|
|
|
|
messages = [initial_message] |
|
messages.append({"role": "user", "content": initialt}) |
|
answer_count = 0 |
|
|
|
messages.append({"role": "user", "content": input_text}) |
|
else: |
|
|
|
answer_count += 1 |
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
prompt = [{"text": f"{message['role']}: {message['content']}\n\n"} for message in messages] |
|
system_message = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
max_tokens=2000 |
|
)["choices"][0]["message"] |
|
|
|
|
|
if submit_update: |
|
update_text_output(system_message, submit_update) |
|
|
|
|
|
messages.append(system_message) |
|
|
|
|
|
messages_rev.insert(0, system_message) |
|
|
|
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]}) |
|
|
|
|
|
if submit_update: |
|
colorize_thread = threading.Thread(target=colorize_and_update, args=(system_message, submit_update)) |
|
colorize_thread.start() |
|
|
|
|
|
chat_transcript = system_message['content'] |
|
|
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system']) |
|
|
|
|
|
|
|
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" |
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-4))) |
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
df = pd.DataFrame([chat_transcript]) |
|
notion_df.upload(df, 'https://www.notion.so/YENA-be569d0a40c940e7b6e0679318215790?pvs=4', title=str(published_date), api_key=API_KEY) |
|
|
|
|
|
return system_message['content'], colorize_text(system_message['content']) |
|
|
|
|
|
|
|
audio_input = Audio(source="microphone", type="filepath", label="Record your message") |
|
text_input = Textbox(label="Type your message", max_length=4096) |
|
output_text = Textbox(label="Text Output") |
|
output_html = Markdown() |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=[audio_input, text_input], |
|
outputs=[output_text, output_html], |
|
title="Hold On, Pain Ends (HOPE)", |
|
description="Talk to Your USMLE Tutor HOPE", |
|
theme="compact", |
|
layout="vertical", |
|
allow_flagging=False |
|
) |
|
|
|
|
|
iface.launch() |