Div99 commited on
Commit
cb763d5
·
1 Parent(s): 53b0871

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +258 -56
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import io
2
  import os
 
3
  from contextlib import closing
4
  from typing import Optional, Tuple
5
  import datetime
@@ -30,17 +31,31 @@ from openai.error import AuthenticationError, InvalidRequestError, RateLimitErro
30
  from langchain.prompts import PromptTemplate
31
 
32
  from polly_utils import PollyVoiceData, NEURAL_ENGINE
 
 
 
 
 
 
 
 
33
 
34
  news_api_key = os.environ["NEWS_API_KEY"]
35
  tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
 
36
 
37
  TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
38
  'open-meteo-api'] # 'google-search'
39
  TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
40
  BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
41
- AUTH_ERR_MSG = "Please paste your OpenAI key. It is not necessary to hit a button or key after pasting it."
 
42
  MAX_TOKENS = 512
43
 
 
 
 
 
44
  # Pertains to Express-inator functionality
45
  NUM_WORDS_DEFAULT = 0
46
  MAX_WORDS = 400
@@ -51,11 +66,13 @@ LANG_LEVEL_DEFAULT = "N/A"
51
  TRANSLATE_TO_DEFAULT = "N/A"
52
  LITERARY_STYLE_DEFAULT = "N/A"
53
  PROMPT_TEMPLATE = PromptTemplate(
54
- input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to", "literary_style"],
 
55
  template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
56
  )
57
 
58
  POLLY_VOICE_DATA = PollyVoiceData()
 
59
 
60
  # Pertains to WHISPER functionality
61
  WHISPER_DETECT_LANG = "Detect language"
@@ -87,6 +104,29 @@ def transcribe(aud_inp, whisper_lang):
87
  return result_text
88
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # Pertains to Express-inator functionality
91
  def transform_text(desc, express_chain, num_words, formality,
92
  anticipation_level, joy_level, trust_level,
@@ -143,12 +183,15 @@ def transform_text(desc, express_chain, num_words, formality,
143
 
144
  translate_to_str = ""
145
  if translate_to != TRANSLATE_TO_DEFAULT:
146
- translate_to_str = "translated to " + ("" if lang_level == TRANSLATE_TO_DEFAULT else lang_level + " level ") + translate_to + ", "
 
147
 
148
  literary_style_str = ""
149
  if literary_style != LITERARY_STYLE_DEFAULT:
150
  if literary_style == "Prose":
151
  literary_style_str = "as prose, "
 
 
152
  elif literary_style == "Summary":
153
  literary_style_str = "as a summary, "
154
  elif literary_style == "Outline":
@@ -161,10 +204,14 @@ def transform_text(desc, express_chain, num_words, formality,
161
  literary_style_str = "as a haiku, "
162
  elif literary_style == "Limerick":
163
  literary_style_str = "as a limerick, "
 
 
164
  elif literary_style == "Joke":
165
  literary_style_str = "as a very funny joke with a setup and punchline, "
166
  elif literary_style == "Knock-knock":
167
  literary_style_str = "as a very funny knock-knock joke, "
 
 
168
 
169
  formatted_prompt = PROMPT_TEMPLATE.format(
170
  original_words=desc,
@@ -200,6 +247,7 @@ def transform_text(desc, express_chain, num_words, formality,
200
  def load_chain(tools_list, llm):
201
  chain = None
202
  express_chain = None
 
203
  if llm:
204
  print("\ntools_list", tools_list)
205
  tool_names = tools_list
@@ -209,21 +257,35 @@ def load_chain(tools_list, llm):
209
 
210
  chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
211
  express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
212
-
213
- return chain, express_chain
214
 
215
 
216
  def set_openai_api_key(api_key):
217
  """Set the api key and return chain.
218
  If no api_key, then None is returned.
219
  """
220
- if api_key and api_key.startswith("sk-") and len(api_key) > 50:
221
- os.environ["OPENAI_API_KEY"] = api_key
 
 
 
 
 
 
222
  llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS)
223
- chain, express_chain = load_chain(TOOLS_DEFAULT_LIST, llm)
224
- os.environ["OPENAI_API_KEY"] = ""
225
- return chain, express_chain, llm
226
- return None, None, None
 
 
 
 
 
 
 
 
 
227
 
228
 
229
  def run_chain(chain, inp, capture_hidden_text):
@@ -238,7 +300,8 @@ def run_chain(chain, inp, capture_hidden_text):
238
  try:
239
  output = chain.run(input=inp)
240
  except AuthenticationError as ae:
241
- error_msg = AUTH_ERR_MSG
 
242
  except RateLimitError as rle:
243
  error_msg = "\n\nRateLimitError: " + str(rle)
244
  except ValueError as ve:
@@ -275,7 +338,8 @@ def run_chain(chain, inp, capture_hidden_text):
275
  try:
276
  output = chain.run(input=inp)
277
  except AuthenticationError as ae:
278
- output = AUTH_ERR_MSG
 
279
  except RateLimitError as rle:
280
  output = "\n\nRateLimitError: " + str(rle)
281
  except ValueError as ve:
@@ -288,6 +352,12 @@ def run_chain(chain, inp, capture_hidden_text):
288
  return output, hidden_text
289
 
290
 
 
 
 
 
 
 
291
  class ChatWrapper:
292
 
293
  def __init__(self):
@@ -295,10 +365,10 @@ class ChatWrapper:
295
 
296
  def __call__(
297
  self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
298
- trace_chain: bool, speak_text: bool, monologue: bool, express_chain: Optional[LLMChain],
299
  num_words, formality, anticipation_level, joy_level, trust_level,
300
  fear_level, surprise_level, sadness_level, disgust_level, anger_level,
301
- lang_level, translate_to, literary_style
302
  ):
303
  """Execute the chat functionality."""
304
  self.lock.acquire()
@@ -307,19 +377,29 @@ class ChatWrapper:
307
  print("inp: " + inp)
308
  print("trace_chain: ", trace_chain)
309
  print("speak_text: ", speak_text)
 
310
  print("monologue: ", monologue)
311
  history = history or []
312
  # If chain is None, that is because no API key was provided.
313
- output = "Please paste your OpenAI key to use this application. It is not necessary to hit a button or " \
314
- "key after pasting it."
315
  hidden_text = output
316
 
317
- if chain and chain != "":
318
  # Set OpenAI key
319
  import openai
320
  openai.api_key = api_key
321
  if not monologue:
322
- output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
 
 
 
 
 
 
 
 
 
 
323
  else:
324
  output, hidden_text = inp, None
325
 
@@ -333,16 +413,32 @@ class ChatWrapper:
333
  text_to_display = hidden_text + "\n\n" + output
334
  history.append((inp, text_to_display))
335
 
336
- # html_video, temp_file = do_html_video_speak(output)
337
- html_audio, temp_file = None, None
338
  if speak_text:
339
- html_audio, temp_file = do_html_audio_speak(output, translate_to)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  except Exception as e:
341
  raise e
342
  finally:
343
  self.lock.release()
344
- # return history, history, html_video, temp_file, ""
345
- return history, history, html_audio, temp_file, ""
346
 
347
 
348
  chat = ChatWrapper()
@@ -355,9 +451,11 @@ def do_html_audio_speak(words_to_speak, polly_language):
355
  region_name=os.environ["AWS_DEFAULT_REGION"]
356
  ).client('polly')
357
 
358
- voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
 
359
  if not voice_id:
360
- voice_id = "Joanna"
 
361
  language_code = "en-US"
362
  engine = NEURAL_ENGINE
363
  response = polly_client.synthesize_speech(
@@ -393,24 +491,39 @@ def do_html_audio_speak(words_to_speak, polly_language):
393
  return html_audio, "audios/tempfile.mp3"
394
 
395
 
396
- def do_html_video_speak(words_to_speak):
 
 
 
 
 
 
 
 
 
 
397
  headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
398
  body = {
399
  'bot_name': 'Masahiro',
400
  'bot_response': words_to_speak,
401
- 'voice_name': 'Masahiro-EN'
 
 
402
  }
403
  api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
404
  res = requests.post(api_endpoint, json=body, headers=headers)
 
405
 
406
  html_video = '<pre>no video</pre>'
407
  if isinstance(res.content, bytes):
408
  response_stream = io.BytesIO(res.content)
 
 
409
  with open('videos/tempfile.mp4', 'wb') as f:
410
  f.write(response_stream.read())
411
  temp_file = gr.File("videos/tempfile.mp4")
412
  temp_file_url = "/file=" + temp_file.value['name']
413
- html_video = f'<video width="256" height="256" autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
414
  else:
415
  print('video url unknown')
416
  return html_video, "videos/tempfile.mp4"
@@ -419,16 +532,45 @@ def do_html_video_speak(words_to_speak):
419
  def update_selected_tools(widget, state, llm):
420
  if widget:
421
  state = widget
422
- chain, express_chain = load_chain(state, llm)
423
  return state, llm, chain, express_chain
424
 
425
 
 
 
 
 
 
 
 
 
 
 
 
426
  def update_foo(widget, state):
427
  if widget:
428
  state = widget
429
  return state
430
 
431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
433
  llm_state = gr.State()
434
  history_state = gr.State()
@@ -437,7 +579,9 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
437
  tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
438
  trace_chain_state = gr.State(False)
439
  speak_text_state = gr.State(False)
 
440
  monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it.
 
441
 
442
  # Pertains to Express-inator functionality
443
  num_words_state = gr.State(NUM_WORDS_DEFAULT)
@@ -457,22 +601,34 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
457
  # Pertains to WHISPER functionality
458
  whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
459
 
 
 
 
 
 
 
460
  with gr.Tab("Chat"):
461
  with gr.Row():
462
  with gr.Column():
463
  gr.HTML(
464
  """<b><center>GPT + WolframAlpha + Whisper</center></b>
465
- <p><center>New feature in Settings: Babel fish mode</center></p>""")
466
 
467
  openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
468
- show_label=False, lines=1, type='password')
 
 
469
 
470
  with gr.Row():
471
- with gr.Column(scale=1, min_width=100, visible=False):
 
 
 
 
472
  my_file = gr.File(label="Upload a file", type="file", visible=False)
473
- tmp_file = gr.File("videos/Masahiro.mp4", visible=False)
474
- tmp_file_url = "/file=" + tmp_file.value['name']
475
- htm_video = f'<video width="256" height="256" autoplay muted loop><source src={tmp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
476
  video_html = gr.HTML(htm_video)
477
 
478
  # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
@@ -481,7 +637,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
481
  htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
482
  audio_html = gr.HTML(htm_audio)
483
 
484
- with gr.Column(scale=3):
485
  chatbot = gr.Chatbot()
486
 
487
  with gr.Row():
@@ -496,6 +652,11 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
496
  interactive=True, streaming=False)
497
  audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
498
 
 
 
 
 
 
499
  gr.Examples(
500
  examples=["How many people live in Canada?",
501
  "What is 2 to the 30th power?",
@@ -519,15 +680,22 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
519
  trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
520
  outputs=[trace_chain_state])
521
 
522
- speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
523
- speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
524
- outputs=[speak_text_state])
 
 
 
 
525
 
526
  monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
527
  value=False)
528
  monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
529
  outputs=[monologue_state])
530
 
 
 
 
531
  with gr.Tab("Whisper STT"):
532
  whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
533
  WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
@@ -559,7 +727,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
559
  "Korean", "Norwegian", "Polish",
560
  "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
561
  "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
562
- "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon",
563
  "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
564
  value=TRANSLATE_TO_DEFAULT)
565
 
@@ -577,8 +745,8 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
577
 
578
  with gr.Tab("Lit style"):
579
  literary_style_radio = gr.Radio(label="Literary style:", choices=[
580
- LITERARY_STYLE_DEFAULT, "Prose", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick", "Joke",
581
- "Knock-knock"],
582
  value=LITERARY_STYLE_DEFAULT)
583
 
584
  literary_style_radio.change(update_foo,
@@ -649,34 +817,68 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
649
  inputs=[num_words_slider, num_words_state],
650
  outputs=[num_words_state])
651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
  gr.HTML("""
653
- <p>This application, developed by AI Researcher <a href='https://divyanshgarg.com/'>Div Garg</a>,
654
- demonstrates a conversational AI agent implemented with OpenAI GPT-3.5 and LangChain.
655
  When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
 
656
  For faster inference without waiting in queue, you may duplicate the space.
657
  </p>""")
658
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
 
660
  message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
661
- speak_text_state, monologue_state,
662
  express_chain_state, num_words_state, formality_state,
663
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
664
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
665
- lang_level_state, translate_to_state, literary_style_state],
666
- # outputs=[chatbot, history_state, video_html, my_file, message])
667
- outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
 
668
 
669
  submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
670
- speak_text_state, monologue_state,
671
  express_chain_state, num_words_state, formality_state,
672
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
673
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
674
- lang_level_state, translate_to_state, literary_style_state],
675
- # outputs=[chatbot, history_state, video_html, my_file, message])
676
- outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
 
677
 
678
- openai_api_key_textbox.change(set_openai_api_key,
679
  inputs=[openai_api_key_textbox],
680
- outputs=[chain_state, express_chain_state, llm_state])
 
681
 
682
- block.launch(debug=True)
 
1
  import io
2
  import os
3
+ import ssl
4
  from contextlib import closing
5
  from typing import Optional, Tuple
6
  import datetime
 
31
  from langchain.prompts import PromptTemplate
32
 
33
  from polly_utils import PollyVoiceData, NEURAL_ENGINE
34
+ from azure_utils import AzureVoiceData
35
+
36
+ # Pertains to question answering functionality
37
+ from langchain.embeddings.openai import OpenAIEmbeddings
38
+ from langchain.text_splitter import CharacterTextSplitter
39
+ from langchain.vectorstores.faiss import FAISS
40
+ from langchain.docstore.document import Document
41
+ from langchain.chains.question_answering import load_qa_chain
42
 
43
  news_api_key = os.environ["NEWS_API_KEY"]
44
  tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
45
+ openai_api_key = os.environ["OPENAI_API_KEY"]
46
 
47
  TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
48
  'open-meteo-api'] # 'google-search'
49
  TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
50
  BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
51
+ # AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
52
+ AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. "
53
  MAX_TOKENS = 512
54
 
55
+ LOOPING_TALKING_HEAD = "videos/Masahiro.mp4"
56
+ TALKING_HEAD_WIDTH = "192"
57
+ MAX_TALKING_HEAD_TEXT_LENGTH = 155
58
+
59
  # Pertains to Express-inator functionality
60
  NUM_WORDS_DEFAULT = 0
61
  MAX_WORDS = 400
 
66
  TRANSLATE_TO_DEFAULT = "N/A"
67
  LITERARY_STYLE_DEFAULT = "N/A"
68
  PROMPT_TEMPLATE = PromptTemplate(
69
+ input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to",
70
+ "literary_style"],
71
  template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
72
  )
73
 
74
  POLLY_VOICE_DATA = PollyVoiceData()
75
+ AZURE_VOICE_DATA = AzureVoiceData()
76
 
77
  # Pertains to WHISPER functionality
78
  WHISPER_DETECT_LANG = "Detect language"
 
104
  return result_text
105
 
106
 
107
+ # Temporarily address Wolfram Alpha SSL certificate issue
108
+ ssl._create_default_https_context = ssl._create_unverified_context
109
+
110
+
111
+ # TEMPORARY FOR TESTING
112
+ def transcribe_dummy(aud_inp_tb, whisper_lang):
113
+ if aud_inp_tb is None:
114
+ return ""
115
+ # aud = whisper.load_audio(aud_inp)
116
+ # aud = whisper.pad_or_trim(aud)
117
+ # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
118
+ # _, probs = WHISPER_MODEL.detect_language(mel)
119
+ # options = whisper.DecodingOptions()
120
+ # options = whisper.DecodingOptions(language="ja")
121
+ # result = whisper.decode(WHISPER_MODEL, mel, options)
122
+ result_text = "Whisper will detect language"
123
+ if whisper_lang != WHISPER_DETECT_LANG:
124
+ whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
125
+ result_text = f"Whisper will use lang code: {whisper_lang_code}"
126
+ print("result_text", result_text)
127
+ return aud_inp_tb
128
+
129
+
130
  # Pertains to Express-inator functionality
131
  def transform_text(desc, express_chain, num_words, formality,
132
  anticipation_level, joy_level, trust_level,
 
183
 
184
  translate_to_str = ""
185
  if translate_to != TRANSLATE_TO_DEFAULT:
186
+ translate_to_str = "translated to " + (
187
+ "" if lang_level == TRANSLATE_TO_DEFAULT else lang_level + " level ") + translate_to + ", "
188
 
189
  literary_style_str = ""
190
  if literary_style != LITERARY_STYLE_DEFAULT:
191
  if literary_style == "Prose":
192
  literary_style_str = "as prose, "
193
+ if literary_style == "Story":
194
+ literary_style_str = "as a story, "
195
  elif literary_style == "Summary":
196
  literary_style_str = "as a summary, "
197
  elif literary_style == "Outline":
 
204
  literary_style_str = "as a haiku, "
205
  elif literary_style == "Limerick":
206
  literary_style_str = "as a limerick, "
207
+ elif literary_style == "Rap":
208
+ literary_style_str = "as a rap, "
209
  elif literary_style == "Joke":
210
  literary_style_str = "as a very funny joke with a setup and punchline, "
211
  elif literary_style == "Knock-knock":
212
  literary_style_str = "as a very funny knock-knock joke, "
213
+ elif literary_style == "FAQ":
214
+ literary_style_str = "as a FAQ with several questions and answers, "
215
 
216
  formatted_prompt = PROMPT_TEMPLATE.format(
217
  original_words=desc,
 
247
  def load_chain(tools_list, llm):
248
  chain = None
249
  express_chain = None
250
+ memory = None
251
  if llm:
252
  print("\ntools_list", tools_list)
253
  tool_names = tools_list
 
257
 
258
  chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
259
  express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
260
+ return chain, express_chain, memory
 
261
 
262
 
263
  def set_openai_api_key(api_key):
264
  """Set the api key and return chain.
265
  If no api_key, then None is returned.
266
  """
267
+ # if api_key and api_key.startswith("sk-") and len(api_key) > 50:
268
+ if "OPENAI_API_KEY" not in os.environ:
269
+ print("OpenAI Key Not found")
270
+ else:
271
+ # os.environ["OPENAI_API_KEY"] = api_key
272
+ # print("\n\n ++++++++++++++ Setting OpenAI API key ++++++++++++++ \n\n")
273
+ # print(str(datetime.datetime.now()) + ": Before OpenAI, OPENAI_API_KEY length: " + str(
274
+ # len(os.environ["OPENAI_API_KEY"])))
275
  llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS)
276
+ # print(str(datetime.datetime.now()) + ": After OpenAI, OPENAI_API_KEY length: " + str(
277
+ # len(os.environ["OPENAI_API_KEY"])))
278
+ chain, express_chain, memory = load_chain(TOOLS_DEFAULT_LIST, llm)
279
+
280
+ # Pertains to question answering functionality
281
+ embeddings = OpenAIEmbeddings()
282
+ qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
283
+
284
+ # print(str(datetime.datetime.now()) + ": After load_chain, OPENAI_API_KEY length: " + str(
285
+ # len(os.environ["OPENAI_API_KEY"])))
286
+ # os.environ["OPENAI_API_KEY"] = ""
287
+ return chain, express_chain, llm, embeddings, qa_chain, memory
288
+ return None, None, None, None, None, None
289
 
290
 
291
  def run_chain(chain, inp, capture_hidden_text):
 
300
  try:
301
  output = chain.run(input=inp)
302
  except AuthenticationError as ae:
303
+ error_msg = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
304
+ print("error_msg", error_msg)
305
  except RateLimitError as rle:
306
  error_msg = "\n\nRateLimitError: " + str(rle)
307
  except ValueError as ve:
 
338
  try:
339
  output = chain.run(input=inp)
340
  except AuthenticationError as ae:
341
+ output = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
342
+ print("output", output)
343
  except RateLimitError as rle:
344
  output = "\n\nRateLimitError: " + str(rle)
345
  except ValueError as ve:
 
352
  return output, hidden_text
353
 
354
 
355
+ def reset_memory(history, memory):
356
+ memory.clear()
357
+ history = []
358
+ return history, history, memory
359
+
360
+
361
  class ChatWrapper:
362
 
363
  def __init__(self):
 
365
 
366
  def __call__(
367
  self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
368
+ trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
369
  num_words, formality, anticipation_level, joy_level, trust_level,
370
  fear_level, surprise_level, sadness_level, disgust_level, anger_level,
371
+ lang_level, translate_to, literary_style, qa_chain, docsearch, use_embeddings
372
  ):
373
  """Execute the chat functionality."""
374
  self.lock.acquire()
 
377
  print("inp: " + inp)
378
  print("trace_chain: ", trace_chain)
379
  print("speak_text: ", speak_text)
380
+ print("talking_head: ", talking_head)
381
  print("monologue: ", monologue)
382
  history = history or []
383
  # If chain is None, that is because no API key was provided.
384
+ output = "Please paste your OpenAI key from openai.com to use this app. " + str(datetime.datetime.now())
 
385
  hidden_text = output
386
 
387
+ if chain:
388
  # Set OpenAI key
389
  import openai
390
  openai.api_key = api_key
391
  if not monologue:
392
+ if use_embeddings:
393
+ if inp and inp.strip() != "":
394
+ if docsearch:
395
+ docs = docsearch.similarity_search(inp)
396
+ output = str(qa_chain.run(input_documents=docs, question=inp))
397
+ else:
398
+ output, hidden_text = "Please supply some text in the the Embeddings tab.", None
399
+ else:
400
+ output, hidden_text = "What's on your mind?", None
401
+ else:
402
+ output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
403
  else:
404
  output, hidden_text = inp, None
405
 
 
413
  text_to_display = hidden_text + "\n\n" + output
414
  history.append((inp, text_to_display))
415
 
416
+ html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
 
417
  if speak_text:
418
+ if talking_head:
419
+ if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
420
+ html_video, temp_file = do_html_video_speak(output, translate_to)
421
+ else:
422
+ temp_file = LOOPING_TALKING_HEAD
423
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
424
+ html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
425
+ else:
426
+ html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
427
+ else:
428
+ if talking_head:
429
+ temp_file = LOOPING_TALKING_HEAD
430
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
431
+ else:
432
+ # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
433
+ # html_video = create_html_video(temp_file, "128")
434
+ pass
435
+
436
  except Exception as e:
437
  raise e
438
  finally:
439
  self.lock.release()
440
+ return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
441
+ # return history, history, html_audio, temp_aud_file, ""
442
 
443
 
444
  chat = ChatWrapper()
 
451
  region_name=os.environ["AWS_DEFAULT_REGION"]
452
  ).client('polly')
453
 
454
+ # voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
455
+ voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
456
  if not voice_id:
457
+ # voice_id = "Joanna"
458
+ voice_id = "Matthew"
459
  language_code = "en-US"
460
  engine = NEURAL_ENGINE
461
  response = polly_client.synthesize_speech(
 
491
  return html_audio, "audios/tempfile.mp3"
492
 
493
 
494
+ def create_html_video(file_name, width):
495
+ temp_file_url = "/file=" + tmp_file.value['name']
496
+ html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
497
+ return html_video
498
+
499
+
500
+ def do_html_video_speak(words_to_speak, azure_language):
501
+ azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male")
502
+ if not azure_voice:
503
+ azure_voice = "en-US-ChristopherNeural"
504
+
505
  headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
506
  body = {
507
  'bot_name': 'Masahiro',
508
  'bot_response': words_to_speak,
509
+ 'azure_voice': azure_voice,
510
+ 'azure_style': 'friendly',
511
+ 'animation_pipeline': 'high_speed',
512
  }
513
  api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
514
  res = requests.post(api_endpoint, json=body, headers=headers)
515
+ print("res.status_code: ", res.status_code)
516
 
517
  html_video = '<pre>no video</pre>'
518
  if isinstance(res.content, bytes):
519
  response_stream = io.BytesIO(res.content)
520
+ print("len(res.content)): ", len(res.content))
521
+
522
  with open('videos/tempfile.mp4', 'wb') as f:
523
  f.write(response_stream.read())
524
  temp_file = gr.File("videos/tempfile.mp4")
525
  temp_file_url = "/file=" + temp_file.value['name']
526
+ html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
527
  else:
528
  print('video url unknown')
529
  return html_video, "videos/tempfile.mp4"
 
532
  def update_selected_tools(widget, state, llm):
533
  if widget:
534
  state = widget
535
+ chain, express_chain, memory = load_chain(state, llm)
536
  return state, llm, chain, express_chain
537
 
538
 
539
+ def update_talking_head(widget, state):
540
+ if widget:
541
+ state = widget
542
+
543
+ video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
544
+ return state, video_html_talking_head
545
+ else:
546
+ # return state, create_html_video(LOOPING_TALKING_HEAD, "32")
547
+ return None, "<pre></pre>"
548
+
549
+
550
  def update_foo(widget, state):
551
  if widget:
552
  state = widget
553
  return state
554
 
555
 
556
+ # Pertains to question answering functionality
557
+ def update_embeddings(embeddings_text, embeddings, qa_chain):
558
+ if embeddings_text:
559
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
560
+ texts = text_splitter.split_text(embeddings_text)
561
+
562
+ docsearch = FAISS.from_texts(texts, embeddings)
563
+ print("Embeddings updated")
564
+ return docsearch
565
+
566
+
567
+ # Pertains to question answering functionality
568
+ def update_use_embeddings(widget, state):
569
+ if widget:
570
+ state = widget
571
+ return state
572
+
573
+
574
  with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
575
  llm_state = gr.State()
576
  history_state = gr.State()
 
579
  tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
580
  trace_chain_state = gr.State(False)
581
  speak_text_state = gr.State(False)
582
+ talking_head_state = gr.State(True)
583
  monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it.
584
+ memory_state = gr.State()
585
 
586
  # Pertains to Express-inator functionality
587
  num_words_state = gr.State(NUM_WORDS_DEFAULT)
 
601
  # Pertains to WHISPER functionality
602
  whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
603
 
604
+ # Pertains to question answering functionality
605
+ embeddings_state = gr.State()
606
+ qa_chain_state = gr.State()
607
+ docsearch_state = gr.State()
608
+ use_embeddings_state = gr.State(False)
609
+
610
  with gr.Tab("Chat"):
611
  with gr.Row():
612
  with gr.Column():
613
  gr.HTML(
614
  """<b><center>GPT + WolframAlpha + Whisper</center></b>
615
+ <p><center>New feature: <b>Embeddings</b></center></p>""")
616
 
617
  openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
618
+ show_label=False, lines=1, type='password',
619
+ value=openai_api_key,
620
+ visible=False,)
621
 
622
  with gr.Row():
623
+ with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
624
+ speak_text_cb = gr.Checkbox(label="Enable speech", value=False)
625
+ speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
626
+ outputs=[speak_text_state])
627
+
628
  my_file = gr.File(label="Upload a file", type="file", visible=False)
629
+ tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
630
+ # tmp_file_url = "/file=" + tmp_file.value['name']
631
+ htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
632
  video_html = gr.HTML(htm_video)
633
 
634
  # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
 
637
  htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
638
  audio_html = gr.HTML(htm_audio)
639
 
640
+ with gr.Column(scale=7):
641
  chatbot = gr.Chatbot()
642
 
643
  with gr.Row():
 
652
  interactive=True, streaming=False)
653
  audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
654
 
655
+ # TEMPORARY FOR TESTING
656
+ # with gr.Row():
657
+ # audio_comp_tb = gr.Textbox(label="Just say it!", lines=1)
658
+ # audio_comp_tb.submit(transcribe_dummy, inputs=[audio_comp_tb, whisper_lang_state], outputs=[message])
659
+
660
  gr.Examples(
661
  examples=["How many people live in Canada?",
662
  "What is 2 to the 30th power?",
 
680
  trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
681
  outputs=[trace_chain_state])
682
 
683
+ # speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
684
+ # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
685
+ # outputs=[speak_text_state])
686
+
687
+ talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
688
+ talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
689
+ outputs=[talking_head_state, video_html])
690
 
691
  monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
692
  value=False)
693
  monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
694
  outputs=[monologue_state])
695
 
696
+ reset_btn = gr.Button(value="Reset chat", variant="secondary").style(full_width=False)
697
+ reset_btn.click(reset_memory, inputs=[history_state, memory_state], outputs=[chatbot, history_state, memory_state])
698
+
699
  with gr.Tab("Whisper STT"):
700
  whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
701
  WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
 
727
  "Korean", "Norwegian", "Polish",
728
  "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
729
  "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
730
+ "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon", "Neanderthal",
731
  "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
732
  value=TRANSLATE_TO_DEFAULT)
733
 
 
745
 
746
  with gr.Tab("Lit style"):
747
  literary_style_radio = gr.Radio(label="Literary style:", choices=[
748
+ LITERARY_STYLE_DEFAULT, "Prose", "Story", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick", "Rap",
749
+ "Joke", "Knock-knock", "FAQ"],
750
  value=LITERARY_STYLE_DEFAULT)
751
 
752
  literary_style_radio.change(update_foo,
 
817
  inputs=[num_words_slider, num_words_state],
818
  outputs=[num_words_state])
819
 
820
+ with gr.Tab("Embeddings"):
821
+ embeddings_text_box = gr.Textbox(label="Enter text for embeddings and hit Create:",
822
+ lines=20)
823
+
824
+ with gr.Row():
825
+ use_embeddings_cb = gr.Checkbox(label="Use embeddings", value=False)
826
+ use_embeddings_cb.change(update_use_embeddings, inputs=[use_embeddings_cb, use_embeddings_state],
827
+ outputs=[use_embeddings_state])
828
+
829
+ embeddings_text_submit = gr.Button(value="Create", variant="secondary").style(full_width=False)
830
+ embeddings_text_submit.click(update_embeddings,
831
+ inputs=[embeddings_text_box, embeddings_state, qa_chain_state],
832
+ outputs=[docsearch_state])
833
+
834
  gr.HTML("""
835
+ <p>This application, developed by <a href='https://www.linkedin.com/in/javafxpert/'>James L. Weaver</a>,
836
+ demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
837
  When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
838
+ Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
839
  For faster inference without waiting in queue, you may duplicate the space.
840
  </p>""")
841
 
842
+ gr.HTML("""
843
+ <form action="https://www.paypal.com/donate" method="post" target="_blank">
844
+ <input type="hidden" name="business" value="AK8BVNALBXSPQ" />
845
+ <input type="hidden" name="no_recurring" value="0" />
846
+ <input type="hidden" name="item_name" value="Please consider helping to defray the cost of APIs such as SerpAPI and WolframAlpha that this app uses." />
847
+ <input type="hidden" name="currency_code" value="USD" />
848
+ <input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" />
849
+ <img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
850
+ </form>
851
+ """)
852
+
853
+ gr.HTML("""<center>
854
+ <a href="https://huggingface.co/spaces/JavaFXpert/Chat-GPT-LangChain?duplicate=true">
855
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
856
+ Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
857
+ </center>""")
858
 
859
  message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
860
+ speak_text_state, talking_head_state, monologue_state,
861
  express_chain_state, num_words_state, formality_state,
862
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
863
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
864
+ lang_level_state, translate_to_state, literary_style_state,
865
+ qa_chain_state, docsearch_state, use_embeddings_state],
866
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
867
+ # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
868
 
869
  submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
870
+ speak_text_state, talking_head_state, monologue_state,
871
  express_chain_state, num_words_state, formality_state,
872
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
873
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
874
+ lang_level_state, translate_to_state, literary_style_state,
875
+ qa_chain_state, docsearch_state, use_embeddings_state],
876
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
877
+ # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
878
 
879
+ block.load(set_openai_api_key,
880
  inputs=[openai_api_key_textbox],
881
+ outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
882
+ qa_chain_state, memory_state])
883
 
884
+ block.launch(debug=True)