son9john commited on
Commit
53c9ab2
·
1 Parent(s): a5b21b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -136
app.py CHANGED
@@ -1,31 +1,16 @@
1
- import openai, subprocess
2
  import gradio as gr
3
  from gradio.components import Audio, Textbox
4
-
5
  import os
6
  import re
7
- import tiktoken
8
  from transformers import GPT2Tokenizer
9
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
10
  import whisper
11
  import pandas as pd
12
- import os
13
  from datetime import datetime, timezone, timedelta
14
- # import dropbox
15
- # from notion_client import Client
16
  import notion_df
17
 
18
- API_KEY = os.environ["API_KEY"]
19
- # # Define your API key
20
-
21
- # my_API_KEY = os.environ["NOTION"]
22
- # notion = Client(auth=my_API_KEY)
23
- # # find the page you want to upload the file to
24
-
25
- # ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
26
- # dbx = dropbox.Dropbox(ACCESS_TOKEN)
27
-
28
  openai.api_key = os.environ["OPENAI_API_KEY"]
 
29
 
30
  initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'}
31
  messages = [initial_message]
@@ -35,8 +20,6 @@ answer_count = 0
35
  # set up whisper model
36
  model = whisper.load_model("base")
37
 
38
-
39
-
40
  def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
41
  """Returns the number of tokens used by a list of messages."""
42
  try:
@@ -58,175 +41,129 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
58
  See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
59
 
60
  def transcribe(audio, text):
61
-
62
  global messages
63
  global answer_count
64
- transcript = None
65
-
66
  if audio is not None:
67
  audio_file = open(audio, "rb")
68
  transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
69
- # transcript = model.transcribe(audio_file, language="english")
70
  messages.append({"role": "user", "content": transcript["text"]})
71
-
72
- if transcript is None:
73
  # Split the input text into sentences
74
  sentences = re.split("(?<=[.!?]) +", text)
75
-
76
- # Initialize a list to store the tokens
77
- input_tokens = []
78
-
79
- # Add each sentence to the input_tokens list
80
- for sentence in sentences:
81
- # Tokenize the sentence using the GPT-2 tokenizer
82
- sentence_tokens = tokenizer.encode(sentence)
83
- # Check if adding the sentence would exceed the token limit
84
- if len(input_tokens) + len(sentence_tokens) < 1440:
85
- # Add the sentence tokens to the input_tokens list
86
- input_tokens.extend(sentence_tokens)
87
- else:
88
- # If adding the sentence would exceed the token limit, truncate it
89
- sentence_tokens = sentence_tokens[:1440-len(input_tokens)]
90
- input_tokens.extend(sentence_tokens)
91
- break
92
- # Decode the input tokens into text
93
- input_text = tokenizer.decode(input_tokens)
94
-
95
- # Add the input text to the messages list
96
- messages.append({"role": "user", "content": input_text})
97
-
98
- # Get the current date and time in the local timezone
99
- now_local = datetime.now()
100
- # Create a timezone object for Eastern Time (ET)
101
- et_tz = timezone(timedelta(hours=-5))
102
- # Adjust the date and time to Eastern Time (ET)
103
- now_et = now_local.astimezone(et_tz)
104
-
105
- # Check if the accumulated tokens have exceeded 2096
106
  num_tokens = num_tokens_from_messages(messages)
107
  if num_tokens > 2096:
108
  # Concatenate the chat history
109
  chat_transcript = ""
110
  for message in messages:
111
  if message['role'] != 'system':
112
- chat_transcript += f"[ANSWER {answer_count}]" + message['role'] + ": " + message['content'] + "\n\n"
113
  # Append the number of tokens used to the end of the chat transcript
114
-
115
- chat_transcript_copy = chat_transcript
116
- chat_transcript_copy += f"Number of tokens used: {num_tokens}\n\n"
117
 
118
- # Get the current UTC time
119
- utc_time = datetime.now(timezone.utc)
120
- # Convert to Eastern Time Zone
121
- eastern_time = utc_time + timedelta(hours=-5)
122
- # Format as string (YY-MM-DD HH:MM)
123
- published_date = eastern_time.strftime('%m-%d-%y %H:%M')
124
- # string dataframe?
125
  df = pd.DataFrame([chat_transcript])
126
  notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
127
 
128
- if num_tokens > 2200:
129
  # Reset the messages list and answer counter
130
  messages = [initial_message]
131
  answer_count = 0
132
- input_text = 'Can you click the Submit button one more time? (say Yes)'
133
- # Add the input text to the messages list
134
- messages.append({"role": "user", "content": input_text})
135
 
136
  # Increment the answer counter
137
  answer_count += 1
138
- # Add the answer counter to the system message
 
139
  system_message = openai.ChatCompletion.create(
140
  model="gpt-3.5-turbo",
141
  messages=messages,
142
  max_tokens=2000
143
  )["choices"][0]["message"]
 
 
 
144
  # Add the system message to the messages list
145
- messages.append(system_message)
146
 
147
  # Concatenate the chat history
148
  chat_transcript = ""
149
  for message in messages:
150
  if message['role'] != 'system':
151
- chat_transcript += f"[ANSWER {answer_count}]" + message['role'] + ": " + message['content'] + "\n\n"
152
- # Append the number of tokens used to the end of the chat transcript
153
 
154
- with open("conversation_history.txt", "a") as f:
155
- f.write(chat_transcript)
156
-
157
- chat_transcript_copy = chat_transcript
158
- chat_transcript_copy += f"Number of tokens used: {num_tokens}\n\n"
159
- filename = datetime.now().strftime("%m%d%y_%H:%M_conversation_history.txt")
160
 
161
- # dbx.files_upload(chat_transcript_copy.encode('utf-8'), f'/{filename}', mode=dropbox.files.WriteMode.overwrite, autorename=False, client_modified=None, mute=False)
162
- # dbx.files_upload(chat_transcript_copy.encode('utf-8'), '/conversation_history.txt', mode=dropbox.files.WriteMode.overwrite, autorename=False, client_modified=None, mute=False)
 
 
163
 
164
- # Get the current UTC time
165
- utc_time = datetime.now(timezone.utc)
166
- # Convert to Eastern Time Zone
167
- eastern_time = utc_time + timedelta(hours=-5)
168
- # Format as string (YY-MM-DD HH:MM)
169
- published_date = eastern_time.strftime('%m-%d-%y %H:%M')
170
-
171
- # Get the current UTC time
172
- utc_time = datetime.now(timezone.utc)
173
- # Convert to Eastern Time Zone
174
- eastern_time = utc_time + timedelta(hours=-5)
175
- # Format as string (YY-MM-DD HH:MM)
176
- published_date = eastern_time.strftime('%m-%d-%y %H:%M')
177
- # string dataframe
178
- df = pd.DataFrame([chat_transcript_copy])
179
  notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
180
-
181
- return chat_transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
 
184
  audio_input = Audio(source="microphone", type="filepath", label="Record your message")
185
  text_input = Textbox(label="Type your message", max_length=4096)
186
 
187
  output_text = gr.outputs.Textbox(label="Response")
188
- output_audio = Audio()
189
 
190
  iface = gr.Interface(
191
  fn=transcribe,
192
  inputs=[audio_input, text_input],
193
- # outputs=(["audio", "text"]),
194
  outputs="text",
195
- title="Your Excellence Never Abates (YENA)",
196
- description="Talk to the AI Tutor YENA",
197
- capture_session=True,
198
- autoplay=True)
199
-
200
 
201
  # Launch Gradio interface
202
  iface.launch()
203
 
204
-
205
-
206
-
207
- # from transformers import pipeline, T5Tokenizer
208
- # import pyttsx3
209
- # import threading
210
- # import time
211
-
212
-
213
-
214
-
215
- # Set up speech engine
216
- # engine = pyttsx3.init()
217
-
218
- # def speak(text):
219
- # # Get the current rate of the engine
220
- # rate = engine.getProperty('rate')
221
-
222
- # # Calculate the estimated time in seconds based on the length of the message and the current rate
223
- # estimated_time = len(text) / (rate / 10)
224
-
225
- # # Speak the text using the text-to-speech engine
226
-
227
- # engine.say(text)
228
- # engine.runAndWait()
229
- # if engine._inLoop:
230
- # # Wait for the speech engine to finish speaking
231
- # time.sleep(estimated_time*1.5)
232
- # engine.endLoop()
 
1
+ import openai
2
  import gradio as gr
3
  from gradio.components import Audio, Textbox
 
4
  import os
5
  import re
 
6
  from transformers import GPT2Tokenizer
 
7
  import whisper
8
  import pandas as pd
 
9
  from datetime import datetime, timezone, timedelta
 
 
10
  import notion_df
11
 
 
 
 
 
 
 
 
 
 
 
12
  openai.api_key = os.environ["OPENAI_API_KEY"]
13
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
14
 
15
  initial_message = {"role": "system", "content": 'You are a USMLE Tutor. Respond with ALWAYS layered "bullet points" (listing rather than sentences) to all input with a fun mneumonics to memorize that list. But you can answer up to 1200 words if the user requests longer response.'}
16
  messages = [initial_message]
 
20
  # set up whisper model
21
  model = whisper.load_model("base")
22
 
 
 
23
  def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
24
  """Returns the number of tokens used by a list of messages."""
25
  try:
 
41
  See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
42
 
43
  def transcribe(audio, text):
 
44
  global messages
45
  global answer_count
46
+
 
47
  if audio is not None:
48
  audio_file = open(audio, "rb")
49
  transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
 
50
  messages.append({"role": "user", "content": transcript["text"]})
51
+
52
+ if text is not None:
53
  # Split the input text into sentences
54
  sentences = re.split("(?<=[.!?]) +", text)
55
+
56
+ # Tokenize the sentences using the GPT-2 tokenizer
57
+ sentence_tokens = [tokenizer.encode(sentence) for sentence in sentences]
58
+
59
+ # Flatten the list of tokens
60
+ input_tokens = [token for sentence in sentence_tokens for token in sentence]
61
+
62
+ # Check if adding the input tokens would exceed the token limit
63
+ num_tokens = num_tokens_from_messages(messages)
64
+ if num_tokens + len(input_tokens) > 2200:
65
+ # Reset the messages list and answer counter
66
+ messages = [initial_message]
67
+ answer_count = 0
68
+ input_text = 'Can you click the Submit button one more time? (say Yes)'
69
+ messages.append({"role": "user", "content": input_text})
70
+ else:
71
+ # Add the input tokens to the messages list
72
+ input_text = tokenizer.decode(input_tokens)
73
+ messages.append({"role": "user", "content": input_text})
74
+
75
+ # Check if the accumulated tokens have exceeded the limit
 
 
 
 
 
 
 
 
 
 
76
  num_tokens = num_tokens_from_messages(messages)
77
  if num_tokens > 2096:
78
  # Concatenate the chat history
79
  chat_transcript = ""
80
  for message in messages:
81
  if message['role'] != 'system':
82
+ chat_transcript += f"[ANSWER {answer_count}]{message['role']}: {message['content']}\n\n"
83
  # Append the number of tokens used to the end of the chat transcript
84
+ chat_transcript += f"Number of tokens used: {num_tokens}\n\n"
 
 
85
 
86
+ # Get the current time in Eastern Time (ET)
87
+ now_et = datetime.now(timezone(timedelta(hours=-5)))
88
+ # Format the time as string (YY-MM-DD HH:MM)
89
+ published_date = now_et.strftime('%m-%d-%y %H:%M')
90
+
91
+ # Upload the chat transcript to Notion
 
92
  df = pd.DataFrame([chat_transcript])
93
  notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
94
 
 
95
  # Reset the messages list and answer counter
96
  messages = [initial_message]
97
  answer_count = 0
 
 
 
98
 
99
  # Increment the answer counter
100
  answer_count += 1
101
+
102
+ # Generate the system message using the OpenAI API
103
  system_message = openai.ChatCompletion.create(
104
  model="gpt-3.5-turbo",
105
  messages=messages,
106
  max_tokens=2000
107
  )["choices"][0]["message"]
108
+
109
+
110
+
111
  # Add the system message to the messages list
112
+ messages.append({"role": "system", "content": system_message})
113
 
114
  # Concatenate the chat history
115
  chat_transcript = ""
116
  for message in messages:
117
  if message['role'] != 'system':
118
+ chat_transcript += f"[ANSWER {answer_count}]{message['role']}: {message['content']}\n\n"
 
119
 
120
+ # Append the number of tokens used to the end of the chat transcript
121
+ num_tokens = num_tokens_from_messages(messages)
122
+ chat_transcript += f"Number of tokens used: {num_tokens}\n\n"
 
 
 
123
 
124
+ # Get the current time in Eastern Time (ET)
125
+ now_et = datetime.now(timezone(timedelta(hours=-5)))
126
+ # Format the time as string (YY-MM-DD HH:MM)
127
+ published_date = now_et.strftime('%m-%d-%y %H:%M')
128
 
129
+ # Upload the chat transcript to Notion
130
+ df = pd.DataFrame([chat_transcript])
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  notion_df.upload(df, 'https://www.notion.so/personal-5e3978680ca848bda844452129955138?pvs=4', title=str(published_date), api_key=API_KEY)
132
+
133
+ # Reset the messages list and answer counter if the token limit is exceeded
134
+ if num_tokens > 2096:
135
+ messages = [initial_message]
136
+ answer_count = 0
137
+ else:
138
+ # Increment the answer counter
139
+ answer_count += 1
140
+
141
+ # Generate the system message using the OpenAI API
142
+ system_message = openai.Completion.create(
143
+ engine="text-davinci-002",
144
+ prompt=[{"text": f"{message['role']}: {message['content']}\n\n"} for message in messages],
145
+ temperature=0.7,
146
+ max_tokens=2000,
147
+ n=1,
148
+ stop=None,
149
+ )[0]["text"]
150
+
151
+ # Add the system message to the messages list
152
+ messages.append({"role": "system", "content": system_message})
153
 
154
 
155
  audio_input = Audio(source="microphone", type="filepath", label="Record your message")
156
  text_input = Textbox(label="Type your message", max_length=4096)
157
 
158
  output_text = gr.outputs.Textbox(label="Response")
 
159
 
160
  iface = gr.Interface(
161
  fn=transcribe,
162
  inputs=[audio_input, text_input],
 
163
  outputs="text",
164
+ title="YENA",
165
+ description="Tutor YENA")
 
 
 
166
 
167
  # Launch Gradio interface
168
  iface.launch()
169