Spaces:

son9john
/

US

Runtime error

App Files Files Community

son9john commited on Mar 8, 2023

Commit

53c9ab2

1 Parent(s): a5b21b0

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -136

app.py CHANGED Viewed

@@ -1,31 +1,16 @@
-import openai, subprocess
 import gradio as gr
 from gradio.components import Audio, Textbox
 import os
 import re
-import tiktoken
 from transformers import GPT2Tokenizer
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 import whisper
 import pandas as pd
-import os
 from datetime import datetime, timezone, timedelta
-# import dropbox
-# from notion_client import Client
 import notion_df
-API_KEY = os.environ["API_KEY"]
-# # Define your API key
-# my_API_KEY = os.environ["NOTION"]
-# notion = Client(auth=my_API_KEY)
-# # find the page you want to upload the file to
-# ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
-# dbx = dropbox.Dropbox(ACCESS_TOKEN)
 openai.api_key = os.environ["OPENAI_API_KEY"]
 initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'}
 messages = [initial_message]
@@ -35,8 +20,6 @@ answer_count = 0
 # set up whisper model
 model = whisper.load_model("base")
 def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
     """Returns the number of tokens used by a list of messages."""
     try:
@@ -58,175 +41,129 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
 See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
 def transcribe(audio, text):
     global messages
     global answer_count
-    transcript = None
     if audio is not None:
         audio_file = open(audio, "rb")
         transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
-        # transcript = model.transcribe(audio_file, language="english")
         messages.append({"role": "user", "content": transcript["text"]})
-    if transcript is None:
         # Split the input text into sentences
         sentences = re.split("(?<=[.!?]) +", text)
-        # Initialize a list to store the tokens
-        input_tokens = []
-        # Add each sentence to the input_tokens list
-        for sentence in sentences:
-            # Tokenize the sentence using the GPT-2 tokenizer
-            sentence_tokens = tokenizer.encode(sentence)
-            # Check if adding the sentence would exceed the token limit
-            if len(input_tokens) + len(sentence_tokens) < 1440:
-                # Add the sentence tokens to the input_tokens list
-                input_tokens.extend(sentence_tokens)
-            else:
-                # If adding the sentence would exceed the token limit, truncate it
-                sentence_tokens = sentence_tokens[:1440-len(input_tokens)]
-                input_tokens.extend(sentence_tokens)
-                break
-        # Decode the input tokens into text
-        input_text = tokenizer.decode(input_tokens)
-        # Add the input text to the messages list
-        messages.append({"role": "user", "content": input_text})
-    # Get the current date and time in the local timezone
-    now_local = datetime.now()
-    # Create a timezone object for Eastern Time (ET)
-    et_tz = timezone(timedelta(hours=-5))
-    # Adjust the date and time to Eastern Time (ET)
-    now_et = now_local.astimezone(et_tz)
-    # Check if the accumulated tokens have exceeded 2096
     num_tokens = num_tokens_from_messages(messages)
     if num_tokens > 2096:
         # Concatenate the chat history
         chat_transcript = ""
         for message in messages:
             if message['role'] != 'system':
-                chat_transcript += f"[ANSWER {answer_count}]" + message['role'] + ": " + message['content'] + "\n\n"
         # Append the number of tokens used to the end of the chat transcript
-        chat_transcript_copy = chat_transcript
-        chat_transcript_copy += f"Number of tokens used: {num_tokens}\n\n"
-        # Get the current UTC time
-        utc_time = datetime.now(timezone.utc)
-        # Convert to Eastern Time Zone
-        eastern_time = utc_time + timedelta(hours=-5)
-        # Format as string (YY-MM-DD HH:MM)
-        published_date = eastern_time.strftime('%m-%d-%y %H:%M')
-        # string dataframe?
         df = pd.DataFrame([chat_transcript])
         notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
-    if num_tokens > 2200:
         # Reset the messages list and answer counter
         messages = [initial_message]
         answer_count = 0
-        input_text = 'Can you click the Submit button one more time? (say Yes)'
-        # Add the input text to the messages list
-        messages.append({"role": "user", "content": input_text})
     # Increment the answer counter
     answer_count += 1
-    # Add the answer counter to the system message
     system_message = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=messages,
         max_tokens=2000
     )["choices"][0]["message"]
     # Add the system message to the messages list
-    messages.append(system_message)
     # Concatenate the chat history
     chat_transcript = ""
     for message in messages:
         if message['role'] != 'system':
-            chat_transcript += f"[ANSWER {answer_count}]" + message['role'] + ": " + message['content'] + "\n\n"
-    # Append the number of tokens used to the end of the chat transcript
-    with open("conversation_history.txt", "a") as f:
-        f.write(chat_transcript)
-    chat_transcript_copy = chat_transcript
-    chat_transcript_copy += f"Number of tokens used: {num_tokens}\n\n"
-    filename = datetime.now().strftime("%m%d%y_%H:%M_conversation_history.txt")
-    # dbx.files_upload(chat_transcript_copy.encode('utf-8'), f'/{filename}', mode=dropbox.files.WriteMode.overwrite, autorename=False, client_modified=None, mute=False)
-    # dbx.files_upload(chat_transcript_copy.encode('utf-8'), '/conversation_history.txt', mode=dropbox.files.WriteMode.overwrite, autorename=False, client_modified=None, mute=False)
-    # Get the current UTC time
-    utc_time = datetime.now(timezone.utc)
-    # Convert to Eastern Time Zone
-    eastern_time = utc_time + timedelta(hours=-5)
-    # Format as string (YY-MM-DD HH:MM)
-    published_date = eastern_time.strftime('%m-%d-%y %H:%M')
-    # Get the current UTC time
-    utc_time = datetime.now(timezone.utc)
-    # Convert to Eastern Time Zone
-    eastern_time = utc_time + timedelta(hours=-5)
-    # Format as string (YY-MM-DD HH:MM)
-    published_date = eastern_time.strftime('%m-%d-%y %H:%M')
-    # string dataframe
-    df = pd.DataFrame([chat_transcript_copy])
     notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
-    return chat_transcript
 audio_input = Audio(source="microphone", type="filepath", label="Record your message")
 text_input = Textbox(label="Type your message", max_length=4096)
 output_text = gr.outputs.Textbox(label="Response")
-output_audio = Audio()
 iface = gr.Interface(
     fn=transcribe,
     inputs=[audio_input, text_input],
-    # outputs=(["audio", "text"]),
     outputs="text",
-    title="Your Excellence Never Abates (YENA)",
-    description="Talk to the AI Tutor YENA",
-    capture_session=True,
-    autoplay=True)
 # Launch Gradio interface
 iface.launch()
-# from transformers import pipeline, T5Tokenizer
-# import pyttsx3
-# import threading
-# import time
-# Set up speech engine
-# engine = pyttsx3.init()
-# def speak(text):
-#     # Get the current rate of the engine
-#     rate = engine.getProperty('rate')
-#     # Calculate the estimated time in seconds based on the length of the message and the current rate
-#     estimated_time = len(text) / (rate / 10)
-#     # Speak the text using the text-to-speech engine
-#     engine.say(text)
-#     engine.runAndWait()
-#     if engine._inLoop:
-#         # Wait for the speech engine to finish speaking
-#         time.sleep(estimated_time*1.5)
-#         engine.endLoop()

+import openai
 import gradio as gr
 from gradio.components import Audio, Textbox
 import os
 import re
 from transformers import GPT2Tokenizer
 import whisper
 import pandas as pd
 from datetime import datetime, timezone, timedelta
 import notion_df
 openai.api_key = os.environ["OPENAI_API_KEY"]
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'}
 messages = [initial_message]
 # set up whisper model
 model = whisper.load_model("base")
 def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
     """Returns the number of tokens used by a list of messages."""
     try:
 See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
 def transcribe(audio, text):
     global messages
     global answer_count
     if audio is not None:
         audio_file = open(audio, "rb")
         transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
         messages.append({"role": "user", "content": transcript["text"]})
+    if text is not None:
         # Split the input text into sentences
         sentences = re.split("(?<=[.!?]) +", text)
+        # Tokenize the sentences using the GPT-2 tokenizer
+        sentence_tokens = [tokenizer.encode(sentence) for sentence in sentences]
+        # Flatten the list of tokens
+        input_tokens = [token for sentence in sentence_tokens for token in sentence]
+        # Check if adding the input tokens would exceed the token limit
+        num_tokens = num_tokens_from_messages(messages)
+        if num_tokens + len(input_tokens) > 2200:
+            # Reset the messages list and answer counter
+            messages = [initial_message]
+            answer_count = 0
+            input_text = 'Can you click the Submit button one more time? (say Yes)'
+            messages.append({"role": "user", "content": input_text})
+        else:
+            # Add the input tokens to the messages list
+            input_text = tokenizer.decode(input_tokens)
+            messages.append({"role": "user", "content": input_text})
+    # Check if the accumulated tokens have exceeded the limit
     num_tokens = num_tokens_from_messages(messages)
     if num_tokens > 2096:
         # Concatenate the chat history
         chat_transcript = ""
         for message in messages:
             if message['role'] != 'system':
+                chat_transcript += f"[ANSWER {answer_count}]{message['role']}: {message['content']}\n\n"
         # Append the number of tokens used to the end of the chat transcript
+        chat_transcript += f"Number of tokens used: {num_tokens}\n\n"
+        # Get the current time in Eastern Time (ET)
+        now_et = datetime.now(timezone(timedelta(hours=-5)))
+        # Format the time as string (YY-MM-DD HH:MM)
+        published_date = now_et.strftime('%m-%d-%y %H:%M')
+        # Upload the chat transcript to Notion
         df = pd.DataFrame([chat_transcript])
         notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
         # Reset the messages list and answer counter
         messages = [initial_message]
         answer_count = 0
     # Increment the answer counter
     answer_count += 1
+    # Generate the system message using the OpenAI API
     system_message = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=messages,
         max_tokens=2000
     )["choices"][0]["message"]
     # Add the system message to the messages list
+    messages.append({"role": "system", "content": system_message})
     # Concatenate the chat history
     chat_transcript = ""
     for message in messages:
         if message['role'] != 'system':
+            chat_transcript += f"[ANSWER {answer_count}]{message['role']}: {message['content']}\n\n"
+    # Append the number of tokens used to the end of the chat transcript
+    num_tokens = num_tokens_from_messages(messages)
+    chat_transcript += f"Number of tokens used: {num_tokens}\n\n"
+    # Get the current time in Eastern Time (ET)
+    now_et = datetime.now(timezone(timedelta(hours=-5)))
+    # Format the time as string (YY-MM-DD HH:MM)
+    published_date = now_et.strftime('%m-%d-%y %H:%M')
+    # Upload the chat transcript to Notion
+    df = pd.DataFrame([chat_transcript])
     notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
+    # Reset the messages list and answer counter if the token limit is exceeded
+    if num_tokens > 2096:
+        messages = [initial_message]
+        answer_count = 0
+    else:
+        # Increment the answer counter
+        answer_count += 1
+    # Generate the system message using the OpenAI API
+    system_message = openai.Completion.create(
+        engine="text-davinci-002",
+        prompt=[{"text": f"{message['role']}: {message['content']}\n\n"} for message in messages],
+        temperature=0.7,
+        max_tokens=2000,
+        n=1,
+        stop=None,
+    )[0]["text"]
+    # Add the system message to the messages list
+    messages.append({"role": "system", "content": system_message})
 audio_input = Audio(source="microphone", type="filepath", label="Record your message")
 text_input = Textbox(label="Type your message", max_length=4096)
 output_text = gr.outputs.Textbox(label="Response")
 iface = gr.Interface(
     fn=transcribe,
     inputs=[audio_input, text_input],
     outputs="text",
+    title="YENA",
+    description="Tutor YENA")
 # Launch Gradio interface
 iface.launch()