Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,7 @@ tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
|
|
16 |
model = openai.api_key = os.environ["OAPI_KEY"]
|
17 |
|
18 |
# Define the initial message and messages list
|
19 |
-
initialt = 'If user asked COLORIZE,
|
20 |
to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response. \
|
21 |
You are going to keep answer and also challenge the student to learn USMLE anatomy, phsysiology, and pathology.'
|
22 |
initial_message = {"role": "system", "content": initialt}
|
@@ -29,6 +29,48 @@ answer_count = 0
|
|
29 |
# Define the Notion API key
|
30 |
API_KEY = os.environ["NAPI_KEY"]
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def transcribe(audio, text):
|
33 |
global messages
|
34 |
global answer_count
|
@@ -104,42 +146,53 @@ def transcribe(audio, text):
|
|
104 |
max_tokens=2000
|
105 |
)["choices"][0]["message"]
|
106 |
# Wait for the completion of the OpenAI API call
|
107 |
-
|
|
|
|
|
|
|
108 |
# Add the system message to the messages list
|
109 |
messages.append(system_message)
|
110 |
-
|
111 |
# Add the system message to the beginning of the messages list
|
112 |
messages_rev.insert(0, system_message)
|
113 |
# Add the input text to the messages list
|
114 |
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]})
|
115 |
|
116 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system'])
|
118 |
|
119 |
# chat_transcript_copy = chat_transcript
|
120 |
# Append the number of tokens used to the end of the chat transcript
|
121 |
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
122 |
-
|
123 |
-
# Upload the chat transcript to Notion
|
124 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
125 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
126 |
df = pd.DataFrame([chat_transcript])
|
127 |
notion_df.upload(df, 'https://www.notion.so/YENA-be569d0a40c940e7b6e0679318215790?pvs=4', title=str(published_date), api_key=API_KEY)
|
128 |
|
129 |
-
# Return the chat transcript
|
130 |
-
return system_message['content']
|
131 |
-
|
|
|
132 |
# Define the input and output components for Gradio
|
133 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
134 |
text_input = Textbox(label="Type your message", max_length=4096)
|
135 |
-
output_text =
|
136 |
-
|
137 |
|
138 |
# Define the Gradio interface
|
139 |
iface = gr.Interface(
|
140 |
fn=transcribe,
|
141 |
inputs=[audio_input, text_input],
|
142 |
-
outputs=[output_text],
|
143 |
title="Hold On, Pain Ends (HOPE)",
|
144 |
description="Talk to Your USMLE Tutor HOPE",
|
145 |
theme="compact",
|
|
|
16 |
model = openai.api_key = os.environ["OAPI_KEY"]
|
17 |
|
18 |
# Define the initial message and messages list
|
19 |
+
initialt = 'If user asked COLORIZE, dont need to do anything but present the input as it is with organized tabs (layers). You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) \
|
20 |
to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response. \
|
21 |
You are going to keep answer and also challenge the student to learn USMLE anatomy, phsysiology, and pathology.'
|
22 |
initial_message = {"role": "system", "content": initialt}
|
|
|
29 |
# Define the Notion API key
|
30 |
API_KEY = os.environ["NAPI_KEY"]
|
31 |
|
32 |
+
nlp = spacy.load("en_core_web_sm")
|
33 |
+
def process_nlp(system_message):
|
34 |
+
# Colorize the system message text
|
35 |
+
colorized_text = colorize_text(system_message['content'])
|
36 |
+
return colorized_text
|
37 |
+
|
38 |
+
def colorize_text(text):
|
39 |
+
colorized_text = ""
|
40 |
+
lines = text.split("\n")
|
41 |
+
|
42 |
+
for line in lines:
|
43 |
+
doc = nlp(line)
|
44 |
+
for token in doc:
|
45 |
+
if token.ent_type_:
|
46 |
+
colorized_text += f'**{token.text_with_ws}**'
|
47 |
+
elif token.pos_ == 'NOUN':
|
48 |
+
colorized_text += f'<span style="color: #FF3300; background-color: transparent;">{token.text_with_ws}</span>'
|
49 |
+
elif token.pos_ == 'VERB':
|
50 |
+
colorized_text += f'<span style="color: #FFFF00; background-color: transparent;">{token.text_with_ws}</span>'
|
51 |
+
elif token.pos_ == 'ADJ':
|
52 |
+
colorized_text += f'<span style="color: #00CC00; background-color: transparent;">{token.text_with_ws}</span>'
|
53 |
+
elif token.pos_ == 'ADV':
|
54 |
+
colorized_text += f'<span style="color: #FF6600; background-color: transparent;">{token.text_with_ws}</span>'
|
55 |
+
elif token.is_digit:
|
56 |
+
colorized_text += f'<span style="color: #9900CC; background-color: transparent;">{token.text_with_ws}</span>'
|
57 |
+
elif token.is_punct:
|
58 |
+
colorized_text += f'<span style="color: #8B4513; background-color: transparent;">{token.text_with_ws}</span>'
|
59 |
+
elif token.is_quote:
|
60 |
+
colorized_text += f'<span style="color: #008080; background-color: transparent;">{token.text_with_ws}</span>'
|
61 |
+
else:
|
62 |
+
colorized_text += token.text_with_ws
|
63 |
+
colorized_text += "<br>"
|
64 |
+
|
65 |
+
return colorized_text
|
66 |
+
|
67 |
+
def colorize_and_update(system_message, submit_update):
|
68 |
+
colorized_system_message = colorize_text(system_message['content'])
|
69 |
+
submit_update(None, colorized_system_message) # Pass the colorized_system_message as the second output
|
70 |
+
|
71 |
+
def update_text_output(system_message, submit_update):
|
72 |
+
submit_update(system_message['content'], None)
|
73 |
+
|
74 |
def transcribe(audio, text):
|
75 |
global messages
|
76 |
global answer_count
|
|
|
146 |
max_tokens=2000
|
147 |
)["choices"][0]["message"]
|
148 |
# Wait for the completion of the OpenAI API call
|
149 |
+
|
150 |
+
if submit_update: # Check if submit_update is not None
|
151 |
+
update_text_output(system_message, submit_update)
|
152 |
+
|
153 |
# Add the system message to the messages list
|
154 |
messages.append(system_message)
|
155 |
+
|
156 |
# Add the system message to the beginning of the messages list
|
157 |
messages_rev.insert(0, system_message)
|
158 |
# Add the input text to the messages list
|
159 |
messages_rev.insert(0, {"role": "user", "content": input_text + transcript["text"]})
|
160 |
|
161 |
+
# Start a separate thread to process the colorization and update the Gradio interface
|
162 |
+
if submit_update: # Check if submit_update is not None
|
163 |
+
colorize_thread = threading.Thread(target=colorize_and_update, args=(system_message, submit_update))
|
164 |
+
colorize_thread.start()
|
165 |
+
|
166 |
+
# Return the system message immediately
|
167 |
+
chat_transcript = system_message['content']
|
168 |
+
|
169 |
+
# Concatenate the chat
|
170 |
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'system'])
|
171 |
|
172 |
# chat_transcript_copy = chat_transcript
|
173 |
# Append the number of tokens used to the end of the chat transcript
|
174 |
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
175 |
+
|
|
|
176 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
177 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
178 |
df = pd.DataFrame([chat_transcript])
|
179 |
notion_df.upload(df, 'https://www.notion.so/YENA-be569d0a40c940e7b6e0679318215790?pvs=4', title=str(published_date), api_key=API_KEY)
|
180 |
|
181 |
+
# Return the chat transcript
|
182 |
+
return system_message['content'], colorize_text(system_message['content'])
|
183 |
+
|
184 |
+
|
185 |
# Define the input and output components for Gradio
|
186 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
187 |
text_input = Textbox(label="Type your message", max_length=4096)
|
188 |
+
output_text = Textbox(label="Text Output")
|
189 |
+
output_html = Markdown()
|
190 |
|
191 |
# Define the Gradio interface
|
192 |
iface = gr.Interface(
|
193 |
fn=transcribe,
|
194 |
inputs=[audio_input, text_input],
|
195 |
+
outputs=[output_text, output_html],
|
196 |
title="Hold On, Pain Ends (HOPE)",
|
197 |
description="Talk to Your USMLE Tutor HOPE",
|
198 |
theme="compact",
|