Spaces:

son9john
/

US

Runtime error

App Files Files Community

son9john commited on Mar 13, 2023

Commit

7064432

1 Parent(s): b95e143

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -54

app.py CHANGED Viewed

@@ -35,7 +35,8 @@ def transcribe(audio, text):
     global messages
     global answer_count
     messages = [initial_message]
-    messages_rev = [initial_message]
     transcript = {'text': ''}
     input_text = []
@@ -51,7 +52,7 @@ def transcribe(audio, text):
             messages=messages,
             max_tokens=2000
         )["choices"][0]["message"]
         messages.append({"role": "system", "content": str(system_message['content'])})
         messages_rev.append({"role": "system", "content": str(system_message['content'])})
@@ -66,80 +67,80 @@ def transcribe(audio, text):
         now_et = datetime.now(timezone(timedelta(hours=-4)))
         # Format the time as string (YY-MM-DD HH:MM)
         published_date = now_et.strftime('%m-%d-%y %H:%M')
-        notion_df.upload(df, 'https://www.notion.so/US-Mine-922ea77abf8b4493bd63909b43bfbb2f?pvs=4', title=str(published_date), api_key=API_KEY)
-    # Split the input text into sentences
-    sentences = sent_tokenize(text)
-    # Split the input text into sub-input tokens based on the condition
-    subinput_tokens = []
-    buffer = []
-    for sentence in sentences:
-        sentence_tokens = tokenizer.encode(sentence)
-        if len(buffer) + len(sentence_tokens) > 800:
             subinput_tokens.append(buffer)
-            buffer = []
-        buffer.extend(sentence_tokens)
-    if buffer:
-        subinput_tokens.append(buffer)
-    chat_transcript = ''
-    for tokens in subinput_tokens:
-        messages.append({"role": "user", "content": initmessage})
-        # Decode the tokens into text
-        subinput_text = tokenizer.decode(tokens)
-        messages.append({"role": "user", "content": transcript["text"]})
-        num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
-        if num_tokens > 2096:
-            # Concatenate the chat history
-            chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user'])
-            # Append the number of tokens used to the end of the chat transcript
-            chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
-            # Get the current time in Eastern Time (ET)
-            now_et = datetime.now(timezone(timedelta(hours=-4)))
-            # Format the time as string (YY-MM-DD HH:MM)
-            published_date = now_et.strftime('%m-%d-%y %H:%M')
-            if counter > 0:
-                # Upload the chat transcript to Notion
-                df = pd.DataFrame([chat_transcript])
-                notion_df.upload(df, 'https://www.notion.so/US-Mine-922ea77abf8b4493bd63909b43bfbb2f?pvs=4', title=str(published_date+'FULL'), api_key=API_KEY)
-            counter += 1
-            messages = [{"role": "system", "content": initmessage}]
-            messages = [{"role": "user", "content": subinput_text}]
-            answer_count = 0
-        # Generate the system message using the OpenAI API
-        # with concurrent.futures.ThreadPoolExecutor() as executor:
-        system_message = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=messages,
-            max_tokens=2000
-        )["choices"][0]["message"]
-        messages.append({"role": "system", "content": str(system_message['content'])})
-        messages_rev.append({"role": "system", "content": str(system_message['content'])})
         # Concatenate the chat history
-        chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
         # if not isinstance(messages[-1]['content'], str):
         #     continue
         # Append the number of tokens used to the end of the chat transcript
-        chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
         df = pd.DataFrame([chat_transcript])
         # Get the current time in Eastern Time (ET)
         now_et = datetime.now(timezone(timedelta(hours=-4)))
         # Format the time as string (YY-MM-DD HH:MM)
         published_date = now_et.strftime('%m-%d-%y %H:%M')
         notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
     # Return the chat transcript
     return chat_transcript
 # Define the input and output components for Gradio
 audio_input = Audio(source="microphone", type="filepath", label="Record your message")
 text_input = Textbox(label="Type your message", max_length=4096)

     global messages
     global answer_count
     messages = [initial_message]
+    messages_rev = [initial_message]
+    chat_transcript = ''
     transcript = {'text': ''}
     input_text = []
             messages=messages,
             max_tokens=2000
         )["choices"][0]["message"]
         messages.append({"role": "system", "content": str(system_message['content'])})
         messages_rev.append({"role": "system", "content": str(system_message['content'])})
         now_et = datetime.now(timezone(timedelta(hours=-4)))
         # Format the time as string (YY-MM-DD HH:MM)
         published_date = now_et.strftime('%m-%d-%y %H:%M')
+        notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
+    if text is not None:
+        # Split the input text into sentences
+        sentences = sent_tokenize(text)
+        # Split the input text into sub-input tokens based on the condition
+        subinput_tokens = []
+        buffer = []
+        for sentence in sentences:
+            sentence_tokens = tokenizer.encode(sentence)
+            if len(buffer) + len(sentence_tokens) > 800:
+                subinput_tokens.append(buffer)
+                buffer = []
+            buffer.extend(sentence_tokens)
+        if buffer:
             subinput_tokens.append(buffer)
+        for tokens in subinput_tokens:
+            # Decode the tokens into text
+            subinput_text = tokenizer.decode(tokens)
+            messages.append({"role": "system", "content": initmessage})
+            messages.append({"role": "user", "content": transcript["text"]})
+            num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
+            if num_tokens > 1640:
+                # Concatenate the chat history
+                chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user'])
+                # Append the number of tokens used to the end of the chat transcript
+                chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
+                # Get the current time in Eastern Time (ET)
+                now_et = datetime.now(timezone(timedelta(hours=-5)))
+                # Format the time as string (YY-MM-DD HH:MM)
+                published_date = now_et.strftime('%m-%d-%y %H:%M')
+                if counter > 0:
+                    # Upload the chat transcript to Notion
+                    df = pd.DataFrame([chat_transcript])
+                    notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
+                counter += 1
+                messages = [{"role": "system", "content": initial_message}]
+                messages = [{"role": "user", "content": subinput_text}]
+                answer_count = 0
+            # Generate the system message using the OpenAI API
+            # with concurrent.futures.ThreadPoolExecutor() as executor:
+            system_message = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                max_tokens=2000
+            )["choices"][0]["message"]
+            messages.append({"role": "system", "content": str(system_message['content'])})
+            messages_rev.append({"role": "system", "content": str(system_message['content'])})
+            chat_transcript = f"\n\nNumber of tokens used: {num_tokens}\n\n"
         # Concatenate the chat history
+        chat_transcript += "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
         # if not isinstance(messages[-1]['content'], str):
         #     continue
         # Append the number of tokens used to the end of the chat transcript
         df = pd.DataFrame([chat_transcript])
         # Get the current time in Eastern Time (ET)
         now_et = datetime.now(timezone(timedelta(hours=-4)))
         # Format the time as string (YY-MM-DD HH:MM)
         published_date = now_et.strftime('%m-%d-%y %H:%M')
         notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
     # Return the chat transcript
     return chat_transcript
 # Define the input and output components for Gradio
 audio_input = Audio(source="microphone", type="filepath", label="Record your message")
 text_input = Textbox(label="Type your message", max_length=4096)