|
import openai |
|
import gradio as gr |
|
from gradio.components import Audio, Textbox |
|
import os |
|
import re |
|
import tiktoken |
|
from transformers import GPT2Tokenizer |
|
import whisper |
|
import pandas as pd |
|
from datetime import datetime, timezone, timedelta |
|
import notion_df |
|
import concurrent.futures |
|
import nltk |
|
from nltk.tokenize import sent_tokenize |
|
nltk.download('punkt') |
|
|
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium') |
|
model = openai.api_key = os.environ["OPENAI_API_KEY"] |
|
|
|
|
|
initmessage = 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.' |
|
initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'} |
|
messages = [initial_message] |
|
messages_rev = [initial_message] |
|
|
|
|
|
answer_count = 0 |
|
|
|
|
|
API_KEY = os.environ["API_KEY"] |
|
|
|
def transcribe(audio, text): |
|
global messages |
|
global answer_count |
|
messages = [initial_message] |
|
messages_rev = [initial_message] |
|
|
|
transcript = {'text': ''} |
|
input_text = [] |
|
|
|
counter = 0 |
|
|
|
if audio is not None: |
|
audio_file = open(audio, "rb") |
|
transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en") |
|
messages.append({"role": "user", "content": transcript["text"]}) |
|
system_message = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
max_tokens=2000 |
|
)["choices"][0]["message"] |
|
|
|
messages.append({"role": "system", "content": str(system_message['content'])}) |
|
messages_rev.append({"role": "system", "content": str(system_message['content'])}) |
|
|
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user']) |
|
|
|
|
|
|
|
|
|
df = pd.DataFrame([chat_transcript]) |
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-4))) |
|
|
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY) |
|
|
|
|
|
|
|
sentences = sent_tokenize(text) |
|
|
|
|
|
subinput_tokens = [] |
|
buffer = [] |
|
for sentence in sentences: |
|
sentence_tokens = tokenizer.encode(sentence) |
|
if len(buffer) + len(sentence_tokens) > 800: |
|
subinput_tokens.append(buffer) |
|
buffer = [] |
|
buffer.extend(sentence_tokens) |
|
if buffer: |
|
subinput_tokens.append(buffer) |
|
|
|
chat_transcript = '' |
|
|
|
for tokens in subinput_tokens: |
|
messages.append[{"role": "user", "content": initmessage}] |
|
|
|
subinput_text = tokenizer.decode(tokens) |
|
messages.append({"role": "user", "content": transcript["text"]+str(subinput_text)}) |
|
|
|
num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages) |
|
if num_tokens > 2096: |
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user']) |
|
|
|
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" |
|
|
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-4))) |
|
|
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
if counter > 0: |
|
|
|
df = pd.DataFrame([chat_transcript]) |
|
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date+'FULL'), api_key=API_KEY) |
|
counter += 1 |
|
messages = [{"role": "system", "content": initmessage}] |
|
messages = [{"role": "user", "content": subinput_text}] |
|
answer_count = 0 |
|
|
|
|
|
|
|
system_message = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
max_tokens=2000 |
|
)["choices"][0]["message"] |
|
|
|
messages.append({"role": "system", "content": str(system_message['content'])}) |
|
messages_rev.append({"role": "system", "content": str(system_message['content'])}) |
|
|
|
|
|
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user']) |
|
|
|
|
|
|
|
|
|
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n" |
|
df = pd.DataFrame([chat_transcript]) |
|
|
|
now_et = datetime.now(timezone(timedelta(hours=-4))) |
|
|
|
published_date = now_et.strftime('%m-%d-%y %H:%M') |
|
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY) |
|
|
|
|
|
return chat_transcript |
|
|
|
|
|
audio_input = Audio(source="microphone", type="filepath", label="Record your message") |
|
text_input = Textbox(label="Type your message", max_length=4096) |
|
output_text = gr.outputs.Textbox(label="Response") |
|
output_audio = Audio() |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=[audio_input, text_input], |
|
outputs=[output_text], |
|
title="Hold On, Pain Ends (HOPE) 2", |
|
description="Talk to Your Nephrology Tutor HOPE", |
|
theme="compact", |
|
layout="vertical", |
|
allow_flagging=False |
|
) |
|
|
|
|
|
iface.launch() |