son9john commited on
Commit
7064432
·
1 Parent(s): b95e143

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -54
app.py CHANGED
@@ -35,7 +35,8 @@ def transcribe(audio, text):
35
  global messages
36
  global answer_count
37
  messages = [initial_message]
38
- messages_rev = [initial_message]
 
39
 
40
  transcript = {'text': ''}
41
  input_text = []
@@ -51,7 +52,7 @@ def transcribe(audio, text):
51
  messages=messages,
52
  max_tokens=2000
53
  )["choices"][0]["message"]
54
-
55
  messages.append({"role": "system", "content": str(system_message['content'])})
56
  messages_rev.append({"role": "system", "content": str(system_message['content'])})
57
 
@@ -66,80 +67,80 @@ def transcribe(audio, text):
66
  now_et = datetime.now(timezone(timedelta(hours=-4)))
67
  # Format the time as string (YY-MM-DD HH:MM)
68
  published_date = now_et.strftime('%m-%d-%y %H:%M')
69
- notion_df.upload(df, 'https://www.notion.so/US-Mine-922ea77abf8b4493bd63909b43bfbb2f?pvs=4', title=str(published_date), api_key=API_KEY)
70
-
71
 
72
- # Split the input text into sentences
73
- sentences = sent_tokenize(text)
 
74
 
75
- # Split the input text into sub-input tokens based on the condition
76
- subinput_tokens = []
77
- buffer = []
78
- for sentence in sentences:
79
- sentence_tokens = tokenizer.encode(sentence)
80
- if len(buffer) + len(sentence_tokens) > 800:
 
 
 
 
81
  subinput_tokens.append(buffer)
82
- buffer = []
83
- buffer.extend(sentence_tokens)
84
- if buffer:
85
- subinput_tokens.append(buffer)
86
 
87
- chat_transcript = ''
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- for tokens in subinput_tokens:
90
- messages.append({"role": "user", "content": initmessage})
91
- # Decode the tokens into text
92
- subinput_text = tokenizer.decode(tokens)
93
- messages.append({"role": "user", "content": transcript["text"]})
 
 
 
94
 
95
- num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
96
- if num_tokens > 2096:
97
- # Concatenate the chat history
98
- chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user'])
99
- # Append the number of tokens used to the end of the chat transcript
100
- chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
101
 
102
- # Get the current time in Eastern Time (ET)
103
- now_et = datetime.now(timezone(timedelta(hours=-4)))
104
- # Format the time as string (YY-MM-DD HH:MM)
105
- published_date = now_et.strftime('%m-%d-%y %H:%M')
106
- if counter > 0:
107
- # Upload the chat transcript to Notion
108
- df = pd.DataFrame([chat_transcript])
109
- notion_df.upload(df, 'https://www.notion.so/US-Mine-922ea77abf8b4493bd63909b43bfbb2f?pvs=4', title=str(published_date+'FULL'), api_key=API_KEY)
110
- counter += 1
111
- messages = [{"role": "system", "content": initmessage}]
112
- messages = [{"role": "user", "content": subinput_text}]
113
- answer_count = 0
114
-
115
- # Generate the system message using the OpenAI API
116
- # with concurrent.futures.ThreadPoolExecutor() as executor:
117
- system_message = openai.ChatCompletion.create(
118
- model="gpt-3.5-turbo",
119
- messages=messages,
120
- max_tokens=2000
121
- )["choices"][0]["message"]
122
-
123
- messages.append({"role": "system", "content": str(system_message['content'])})
124
- messages_rev.append({"role": "system", "content": str(system_message['content'])})
125
 
 
 
 
 
126
  # Concatenate the chat history
127
- chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
128
  # if not isinstance(messages[-1]['content'], str):
129
  # continue
130
 
131
  # Append the number of tokens used to the end of the chat transcript
132
- chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
133
  df = pd.DataFrame([chat_transcript])
134
  # Get the current time in Eastern Time (ET)
135
  now_et = datetime.now(timezone(timedelta(hours=-4)))
136
  # Format the time as string (YY-MM-DD HH:MM)
137
  published_date = now_et.strftime('%m-%d-%y %H:%M')
138
  notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
139
-
140
  # Return the chat transcript
141
  return chat_transcript
142
 
 
143
  # Define the input and output components for Gradio
144
  audio_input = Audio(source="microphone", type="filepath", label="Record your message")
145
  text_input = Textbox(label="Type your message", max_length=4096)
 
35
  global messages
36
  global answer_count
37
  messages = [initial_message]
38
+ messages_rev = [initial_message]
39
+ chat_transcript = ''
40
 
41
  transcript = {'text': ''}
42
  input_text = []
 
52
  messages=messages,
53
  max_tokens=2000
54
  )["choices"][0]["message"]
55
+
56
  messages.append({"role": "system", "content": str(system_message['content'])})
57
  messages_rev.append({"role": "system", "content": str(system_message['content'])})
58
 
 
67
  now_et = datetime.now(timezone(timedelta(hours=-4)))
68
  # Format the time as string (YY-MM-DD HH:MM)
69
  published_date = now_et.strftime('%m-%d-%y %H:%M')
70
+ notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
 
71
 
72
+ if text is not None:
73
+ # Split the input text into sentences
74
+ sentences = sent_tokenize(text)
75
 
76
+ # Split the input text into sub-input tokens based on the condition
77
+ subinput_tokens = []
78
+ buffer = []
79
+ for sentence in sentences:
80
+ sentence_tokens = tokenizer.encode(sentence)
81
+ if len(buffer) + len(sentence_tokens) > 800:
82
+ subinput_tokens.append(buffer)
83
+ buffer = []
84
+ buffer.extend(sentence_tokens)
85
+ if buffer:
86
  subinput_tokens.append(buffer)
 
 
 
 
87
 
88
+ for tokens in subinput_tokens:
89
+ # Decode the tokens into text
90
+ subinput_text = tokenizer.decode(tokens)
91
+ messages.append({"role": "system", "content": initmessage})
92
+ messages.append({"role": "user", "content": transcript["text"]})
93
+
94
+ num_tokens = sum(len(tokenizer.encode(message["content"])) for message in messages)
95
+ if num_tokens > 1640:
96
+ # Concatenate the chat history
97
+ chat_transcript = "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages if message['role'] != 'user'])
98
+ # Append the number of tokens used to the end of the chat transcript
99
+ chat_transcript += f"\n\nNumber of tokens used: {num_tokens}\n\n"
100
 
101
+ # Get the current time in Eastern Time (ET)
102
+ now_et = datetime.now(timezone(timedelta(hours=-5)))
103
+ # Format the time as string (YY-MM-DD HH:MM)
104
+ published_date = now_et.strftime('%m-%d-%y %H:%M')
105
+ if counter > 0:
106
+ # Upload the chat transcript to Notion
107
+ df = pd.DataFrame([chat_transcript])
108
+ notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
109
 
110
+ counter += 1
111
+ messages = [{"role": "system", "content": initial_message}]
112
+ messages = [{"role": "user", "content": subinput_text}]
113
+ answer_count = 0
 
 
114
 
115
+ # Generate the system message using the OpenAI API
116
+ # with concurrent.futures.ThreadPoolExecutor() as executor:
117
+ system_message = openai.ChatCompletion.create(
118
+ model="gpt-3.5-turbo",
119
+ messages=messages,
120
+ max_tokens=2000
121
+ )["choices"][0]["message"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ messages.append({"role": "system", "content": str(system_message['content'])})
124
+ messages_rev.append({"role": "system", "content": str(system_message['content'])})
125
+ chat_transcript = f"\n\nNumber of tokens used: {num_tokens}\n\n"
126
+
127
  # Concatenate the chat history
128
+ chat_transcript += "\n\n".join([f"[ANSWER {answer_count}]{message['role']}: {message['content']}" for message in messages_rev if message['role'] != 'user'])
129
  # if not isinstance(messages[-1]['content'], str):
130
  # continue
131
 
132
  # Append the number of tokens used to the end of the chat transcript
 
133
  df = pd.DataFrame([chat_transcript])
134
  # Get the current time in Eastern Time (ET)
135
  now_et = datetime.now(timezone(timedelta(hours=-4)))
136
  # Format the time as string (YY-MM-DD HH:MM)
137
  published_date = now_et.strftime('%m-%d-%y %H:%M')
138
  notion_df.upload(df, 'https://www.notion.so/US-62e861a0b35f43da8ef9a7789512b8c2?pvs=4', title=str(published_date), api_key=API_KEY)
139
+
140
  # Return the chat transcript
141
  return chat_transcript
142
 
143
+
144
  # Define the input and output components for Gradio
145
  audio_input = Audio(source="microphone", type="filepath", label="Record your message")
146
  text_input = Textbox(label="Type your message", max_length=4096)