Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,8 @@ def transcribe(audio, text):
|
|
35 |
global messages
|
36 |
global answer_count
|
37 |
messages = [initial_message]
|
38 |
-
messages_rev = [initial_message]
|
|
|
39 |
|
40 |
transcript = {'text': ''}
|
41 |
input_text = []
|
@@ -51,7 +52,7 @@ def transcribe(audio, text):
|
|
51 |
messages=messages,
|
52 |
max_tokens=2000
|
53 |
)["choices"][0]["message"]
|
54 |
-
|
55 |
messages.append({"role": "system", "content": str(system_message['content'])})
|
56 |
messages_rev.append({"role": "system", "content": str(system_message['content'])})
|
57 |
|
@@ -66,80 +67,80 @@ def transcribe(audio, text):
|
|
66 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
67 |
# Format the time as string (YY-MM-DD HH:MM)
|
68 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
69 |
-
notion_df.upload(df, 'https://www.notion.so/US-
|
70 |
-
|
71 |
|
72 |
-
|
73 |
-
|
|
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
81 |
subinput_tokens.append(buffer)
|
82 |
-
buffer = []
|
83 |
-
buffer.extend(sentence_tokens)
|
84 |
-
if buffer:
|
85 |
-
subinput_tokens.append(buffer)
|
86 |
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
# Append the number of tokens used to the end of the chat transcript
|
100 |
-
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
101 |
|
102 |
-
#
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
notion_df.upload(df, 'https://www.notion.so/US-Mine-922ea77abf8b4493bd63909b43bfbb2f?pvs=4', title=str(published_date+'FULL'), api_key=API_KEY)
|
110 |
-
counter += 1
|
111 |
-
messages = [{"role": "system", "content": initmessage}]
|
112 |
-
messages = [{"role": "user", "content": subinput_text}]
|
113 |
-
answer_count = 0
|
114 |
-
|
115 |
-
# Generate the system message using the OpenAI API
|
116 |
-
# with concurrent.futures.ThreadPoolExecutor() as executor:
|
117 |
-
system_message = openai.ChatCompletion.create(
|
118 |
-
model="gpt-3.5-turbo",
|
119 |
-
messages=messages,
|
120 |
-
max_tokens=2000
|
121 |
-
)["choices"][0]["message"]
|
122 |
-
|
123 |
-
messages.append({"role": "system", "content": str(system_message['content'])})
|
124 |
-
messages_rev.append({"role": "system", "content": str(system_message['content'])})
|
125 |
|
|
|
|
|
|
|
|
|
126 |
# Concatenate the chat history
|
127 |
-
chat_transcript
|
128 |
# if not isinstance(messages[-1]['content'], str):
|
129 |
# continue
|
130 |
|
131 |
# Append the number of tokens used to the end of the chat transcript
|
132 |
-
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
133 |
df = pd.DataFrame([chat_transcript])
|
134 |
# Get the current time in Eastern Time (ET)
|
135 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
136 |
# Format the time as string (YY-MM-DD HH:MM)
|
137 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
138 |
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
|
139 |
-
|
140 |
# Return the chat transcript
|
141 |
return chat_transcript
|
142 |
|
|
|
143 |
# Define the input and output components for Gradio
|
144 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
145 |
text_input = Textbox(label="Type your message", max_length=4096)
|
|
|
35 |
global messages
|
36 |
global answer_count
|
37 |
messages = [initial_message]
|
38 |
+
messages_rev = [initial_message]
|
39 |
+
chat_transcript = ''
|
40 |
|
41 |
transcript = {'text': ''}
|
42 |
input_text = []
|
|
|
52 |
messages=messages,
|
53 |
max_tokens=2000
|
54 |
)["choices"][0]["message"]
|
55 |
+
|
56 |
messages.append({"role": "system", "content": str(system_message['content'])})
|
57 |
messages_rev.append({"role": "system", "content": str(system_message['content'])})
|
58 |
|
|
|
67 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
68 |
# Format the time as string (YY-MM-DD HH:MM)
|
69 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
70 |
+
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
|
|
|
71 |
|
72 |
+
if text is not None:
|
73 |
+
# Split the input text into sentences
|
74 |
+
sentences = sent_tokenize(text)
|
75 |
|
76 |
+
# Split the input text into sub-input tokens based on the condition
|
77 |
+
subinput_tokens = []
|
78 |
+
buffer = []
|
79 |
+
for sentence in sentences:
|
80 |
+
sentence_tokens = tokenizer.encode(sentence)
|
81 |
+
if len(buffer) + len(sentence_tokens) > 800:
|
82 |
+
subinput_tokens.append(buffer)
|
83 |
+
buffer = []
|
84 |
+
buffer.extend(sentence_tokens)
|
85 |
+
if buffer:
|
86 |
subinput_tokens.append(buffer)
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
for tokens in subinput_tokens:
|
89 |
+
# Decode the tokens into text
|
90 |
+
subinput_text = tokenizer.decode(tokens)
|
91 |
+
messages.append({"role": "system", "content": initmessage})
|
92 |
+
messages.append({"role": "user", "content": transcript["text"]})
|
93 |
+
|
94 |
+
num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
|
95 |
+
if num_tokens > 1640:
|
96 |
+
# Concatenate the chat history
|
97 |
+
chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user'])
|
98 |
+
# Append the number of tokens used to the end of the chat transcript
|
99 |
+
chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
100 |
|
101 |
+
# Get the current time in Eastern Time (ET)
|
102 |
+
now_et = datetime.now(timezone(timedelta(hours=-5)))
|
103 |
+
# Format the time as string (YY-MM-DD HH:MM)
|
104 |
+
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
105 |
+
if counter > 0:
|
106 |
+
# Upload the chat transcript to Notion
|
107 |
+
df = pd.DataFrame([chat_transcript])
|
108 |
+
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
|
109 |
|
110 |
+
counter += 1
|
111 |
+
messages = [{"role": "system", "content": initial_message}]
|
112 |
+
messages = [{"role": "user", "content": subinput_text}]
|
113 |
+
answer_count = 0
|
|
|
|
|
114 |
|
115 |
+
# Generate the system message using the OpenAI API
|
116 |
+
# with concurrent.futures.ThreadPoolExecutor() as executor:
|
117 |
+
system_message = openai.ChatCompletion.create(
|
118 |
+
model="gpt-3.5-turbo",
|
119 |
+
messages=messages,
|
120 |
+
max_tokens=2000
|
121 |
+
)["choices"][0]["message"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
+
messages.append({"role": "system", "content": str(system_message['content'])})
|
124 |
+
messages_rev.append({"role": "system", "content": str(system_message['content'])})
|
125 |
+
chat_transcript = f"\n\nNumber of tokens used: {num_tokens}\n\n"
|
126 |
+
|
127 |
# Concatenate the chat history
|
128 |
+
chat_transcript += "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
|
129 |
# if not isinstance(messages[-1]['content'], str):
|
130 |
# continue
|
131 |
|
132 |
# Append the number of tokens used to the end of the chat transcript
|
|
|
133 |
df = pd.DataFrame([chat_transcript])
|
134 |
# Get the current time in Eastern Time (ET)
|
135 |
now_et = datetime.now(timezone(timedelta(hours=-4)))
|
136 |
# Format the time as string (YY-MM-DD HH:MM)
|
137 |
published_date = now_et.strftime('%m-%d-%y %H:%M')
|
138 |
notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
|
139 |
+
|
140 |
# Return the chat transcript
|
141 |
return chat_transcript
|
142 |
|
143 |
+
|
144 |
# Define the input and output components for Gradio
|
145 |
audio_input = Audio(source="microphone", type="filepath", label="Record your message")
|
146 |
text_input = Textbox(label="Type your message", max_length=4096)
|