marcolorenzi98 commited on
Commit
b1c4eca
·
1 Parent(s): 751a5e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -11
app.py CHANGED
@@ -331,11 +331,31 @@ def text_to_speech(sentence):
331
 
332
  return sr, audio_reshaped
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  """# Uploading PDF File"""
335
 
336
  #from google.colab import files
337
  #uploaded = files.upload()
338
 
 
339
  """#Gradio interface"""
340
 
341
  interface = gr.Blocks()
@@ -357,21 +377,24 @@ with interface:
357
  with gr.Row():
358
  with gr.Column():
359
  uploaded_article = gr.File()
360
- gr.Markdown("## PDF Examples")
361
- gr.Examples(
362
- examples=[[os.path.join(os.path.abspath(""), 'Article 7 Efficient Estimation of Word Representations in Vector Space.pdf')],
363
- [os.path.join(os.path.abspath(""), "Article 9 Transformers in Speech Processing_ Survey.pdf")],
364
- [os.path.join(os.path.abspath(""), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")]],
365
- inputs=uploaded_article
366
- )
367
-
368
  with gr.Column():
369
  summarized_abstract = gr.Textbox("One-sentence Abstract")
370
  talked_abstract = gr.Audio(type="numpy")
371
  with gr.Row():
372
  summary_button = gr.Button(value="Summarize Abstract", size="lg")
373
  tts_button = gr.Button(value="Speak Abstract", size="lg")
374
-
 
 
 
 
 
 
 
 
 
 
375
 
376
  #the functionality goes down here
377
 
@@ -383,5 +406,4 @@ with interface:
383
  tts_button.click(text_to_speech, inputs=summarized_abstract, outputs=talked_abstract)
384
 
385
  if __name__ == "__main__":
386
- interface.launch()
387
-
 
331
 
332
  return sr, audio_reshaped
333
 
334
+ def sum_audio(path):
335
+
336
+ sentence = summarize_abstract (path)
337
+
338
+ synthesiser = pipeline("text-to-speech", "suno/bark-small")
339
+
340
+ speech = synthesiser(sentence, forward_params={"do_sample": True})
341
+
342
+ audio_float32 = speech["audio"]
343
+ sr = speech["sampling_rate"]
344
+
345
+ #gr.Audio only accept a tuple(int, np.array(int16))
346
+ audio_int16 = (audio_float32 * 32767).astype(np.int16)
347
+ audio_reshaped = audio_int16.reshape(audio_int16.shape[1])
348
+
349
+ audio_tuple = (sr, audio_reshaped)
350
+
351
+ return sentence, audio_tuple
352
+
353
  """# Uploading PDF File"""
354
 
355
  #from google.colab import files
356
  #uploaded = files.upload()
357
 
358
+
359
  """#Gradio interface"""
360
 
361
  interface = gr.Blocks()
 
377
  with gr.Row():
378
  with gr.Column():
379
  uploaded_article = gr.File()
380
+
 
 
 
 
 
 
 
381
  with gr.Column():
382
  summarized_abstract = gr.Textbox("One-sentence Abstract")
383
  talked_abstract = gr.Audio(type="numpy")
384
  with gr.Row():
385
  summary_button = gr.Button(value="Summarize Abstract", size="lg")
386
  tts_button = gr.Button(value="Speak Abstract", size="lg")
387
+
388
+ gr.Markdown("## PDF Examples")
389
+ gr.Examples(
390
+ examples=[[os.path.join(os.path.abspath(""), 'Article 7 Efficient Estimation of Word Representations in Vector Space.pdf')],
391
+ [os.path.join(os.path.abspath(""), "Article 9 Transformers in Speech Processing_ Survey.pdf")],
392
+ [os.path.join(os.path.abspath(""), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")]],
393
+ inputs=uploaded_article,
394
+ outputs=[summarized_abstract, talked_abstract],
395
+ fn=sum_audio,
396
+ cache_examples = True,
397
+ )
398
 
399
  #the functionality goes down here
400
 
 
406
  tts_button.click(text_to_speech, inputs=summarized_abstract, outputs=talked_abstract)
407
 
408
  if __name__ == "__main__":
409
+ interface.launch(debug=False)