Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,13 @@ import pandas as pd
|
|
10 |
from datetime import datetime, timezone, timedelta
|
11 |
import notion_df
|
12 |
import concurrent.futures
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Define the tokenizer and model
|
15 |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
|
@@ -27,6 +34,50 @@ answer_count = 0
|
|
27 |
# Define the Notion API key
|
28 |
API_KEY = os.environ["API_KEY"]
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def transcribe(audio, text):
|
31 |
global messages
|
32 |
global answer_count
|
@@ -102,6 +153,9 @@ def transcribe(audio, text):
|
|
102 |
)["choices"][0]["message"]
|
103 |
# Wait for the completion of the OpenAI API call
|
104 |
|
|
|
|
|
|
|
105 |
# Add the system message to the messages list
|
106 |
messages.append(system_message)
|
107 |
|
@@ -110,7 +164,11 @@ def transcribe(audio, text):
|
|
110 |
# Add the input text to the messages list
|
111 |
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]})
|
112 |
|
113 |
-
|
|
|
|
|
|
|
|
|
114 |
# Concatenate the chat history
|
115 |
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system'])
|
116 |
|
@@ -127,8 +185,9 @@ def transcribe(audio, text):
|
|
127 |
df = pd.DataFrame([chat_transcript])
|
128 |
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
|
129 |
|
130 |
-
# Return the chat transcript
|
131 |
-
return system_message['content']
|
|
|
132 |
|
133 |
# Define the input and output components for Gradio
|
134 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
|
|
10 |
from datetime import datetime, timezone, timedelta
|
11 |
import notion_df
|
12 |
import concurrent.futures
|
13 |
+
import nltk
|
14 |
+
from nltk.tokenize import sent_tokenize
|
15 |
+
nltk.download('punkt')
|
16 |
+
import spacy
|
17 |
+
from spacy import displacy
|
18 |
+
from gradio import Markdown
|
19 |
+
import threading
|
20 |
|
21 |
# Define the tokenizer and model
|
22 |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
|
|
|
34 |
# Define the Notion API key
|
35 |
API_KEY = os.environ["API_KEY"]
|
36 |
|
37 |
+
|
38 |
+
nlp = spacy.load("en_core_web_sm")
|
39 |
+
def process_nlp(system_message):
|
40 |
+
# Colorize the system message text
|
41 |
+
colorized_text = colorize_text(system_message['content'])
|
42 |
+
return colorized_text
|
43 |
+
|
44 |
+
def colorize_text(text):
|
45 |
+
colorized_text = ""
|
46 |
+
lines = text.split("\n")
|
47 |
+
|
48 |
+
for line in lines:
|
49 |
+
doc = nlp(line)
|
50 |
+
for token in doc:
|
51 |
+
if token.ent_type_:
|
52 |
+
colorized_text += f'**{token.text_with_ws}**'
|
53 |
+
elif token.pos_ == 'NOUN':
|
54 |
+
colorized_text += f'<span style="color: #FF3300; background-color: transparent;">{token.text_with_ws}</span>'
|
55 |
+
elif token.pos_ == 'VERB':
|
56 |
+
colorized_text += f'<span style="color: #FFFF00; background-color: transparent;">{token.text_with_ws}</span>'
|
57 |
+
elif token.pos_ == 'ADJ':
|
58 |
+
colorized_text += f'<span style="color: #00CC00; background-color: transparent;">{token.text_with_ws}</span>'
|
59 |
+
elif token.pos_ == 'ADV':
|
60 |
+
colorized_text += f'<span style="color: #FF6600; background-color: transparent;">{token.text_with_ws}</span>'
|
61 |
+
elif token.is_digit:
|
62 |
+
colorized_text += f'<span style="color: #9900CC; background-color: transparent;">{token.text_with_ws}</span>'
|
63 |
+
elif token.is_punct:
|
64 |
+
colorized_text += f'<span style="color: #8B4513; background-color: transparent;">{token.text_with_ws}</span>'
|
65 |
+
elif token.is_quote:
|
66 |
+
colorized_text += f'<span style="color: #008080; background-color: transparent;">{token.text_with_ws}</span>'
|
67 |
+
else:
|
68 |
+
colorized_text += token.text_with_ws
|
69 |
+
colorized_text += "<br>"
|
70 |
+
|
71 |
+
return colorized_text
|
72 |
+
|
73 |
+
def colorize_and_update(system_message, submit_update):
|
74 |
+
colorized_system_message = colorize_text(system_message['content'])
|
75 |
+
submit_update(None, colorized_system_message) # Pass the colorized_system_message as the second output
|
76 |
+
|
77 |
+
def update_text_output(system_message, submit_update):
|
78 |
+
submit_update(system_message['content'], None)
|
79 |
+
|
80 |
+
|
81 |
def transcribe(audio, text):
|
82 |
global messages
|
83 |
global answer_count
|
|
|
153 |
)["choices"][0]["message"]
|
154 |
# Wait for the completion of the OpenAI API call
|
155 |
|
156 |
+
if submit_update: # Check if submit_update is not None
|
157 |
+
update_text_output(system_message, submit_update)
|
158 |
+
|
159 |
# Add the system message to the messages list
|
160 |
messages.append(system_message)
|
161 |
|
|
|
164 |
# Add the input text to the messages list
|
165 |
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]})
|
166 |
|
167 |
+
# Start a separate thread to process the colorization and update the Gradio interface
|
168 |
+
if submit_update: # Check if submit_update is not None
|
169 |
+
colorize_thread = threading.Thread(target=colorize_and_update, args=(system_message, submit_update))
|
170 |
+
colorize_thread.start()
|
171 |
+
|
172 |
# Concatenate the chat history
|
173 |
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system'])
|
174 |
|
|
|
185 |
df = pd.DataFrame([chat_transcript])
|
186 |
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
|
187 |
|
188 |
+
# Return the chat transcript
|
189 |
+
return system_message['content'], colorize_text(system_message['content'])
|
190 |
+
|
191 |
|
192 |
# Define the input and output components for Gradio
|
193 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|