Spaces:

marcolorenzi98
/

AAI-projects

Runtime error

App Files Files Community

marcolorenzi98 commited on Dec 3, 2023

Commit

b1c4eca

1 Parent(s): 751a5e2

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -11

app.py CHANGED Viewed

@@ -331,11 +331,31 @@ def text_to_speech(sentence):
   return sr, audio_reshaped
 """# Uploading PDF File"""
 #from google.colab import files
 #uploaded = files.upload()
 """#Gradio interface"""
 interface = gr.Blocks()
@@ -357,21 +377,24 @@ with interface:
   with gr.Row():
     with gr.Column():
       uploaded_article = gr.File()
-      gr.Markdown("## PDF Examples")
-      gr.Examples(
-        examples=[[os.path.join(os.path.abspath(""), 'Article 7 Efficient Estimation of Word Representations in Vector Space.pdf')],
-                  [os.path.join(os.path.abspath(""), "Article 9 Transformers in Speech Processing_ Survey.pdf")],
-                  [os.path.join(os.path.abspath(""), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")]],
-        inputs=uploaded_article
-    )
     with gr.Column():
       summarized_abstract = gr.Textbox("One-sentence Abstract")
       talked_abstract = gr.Audio(type="numpy")
       with gr.Row():
         summary_button = gr.Button(value="Summarize Abstract", size="lg")
         tts_button = gr.Button(value="Speak Abstract", size="lg")
   #the functionality goes down here
@@ -383,5 +406,4 @@ with interface:
   tts_button.click(text_to_speech, inputs=summarized_abstract, outputs=talked_abstract)
 if __name__ == "__main__":
-    interface.launch()

   return sr, audio_reshaped
+def sum_audio(path):
+  sentence = summarize_abstract (path)
+  synthesiser = pipeline("text-to-speech", "suno/bark-small")
+  speech = synthesiser(sentence, forward_params={"do_sample": True})
+  audio_float32 = speech["audio"]
+  sr = speech["sampling_rate"]
+  #gr.Audio only accept a tuple(int, np.array(int16))
+  audio_int16 = (audio_float32 * 32767).astype(np.int16)
+  audio_reshaped = audio_int16.reshape(audio_int16.shape[1])
+  audio_tuple = (sr, audio_reshaped)
+  return sentence, audio_tuple
 """# Uploading PDF File"""
 #from google.colab import files
 #uploaded = files.upload()
 """#Gradio interface"""
 interface = gr.Blocks()
   with gr.Row():
     with gr.Column():
       uploaded_article = gr.File()
     with gr.Column():
       summarized_abstract = gr.Textbox("One-sentence Abstract")
       talked_abstract = gr.Audio(type="numpy")
       with gr.Row():
         summary_button = gr.Button(value="Summarize Abstract", size="lg")
         tts_button = gr.Button(value="Speak Abstract", size="lg")
+  gr.Markdown("## PDF Examples")
+  gr.Examples(
+      examples=[[os.path.join(os.path.abspath(""), 'Article 7 Efficient Estimation of Word Representations in Vector Space.pdf')],
+                [os.path.join(os.path.abspath(""), "Article 9 Transformers in Speech Processing_ Survey.pdf")],
+                [os.path.join(os.path.abspath(""), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")]],
+      inputs=uploaded_article,
+      outputs=[summarized_abstract, talked_abstract],
+      fn=sum_audio,
+      cache_examples = True,
+    )
   #the functionality goes down here
   tts_button.click(text_to_speech, inputs=summarized_abstract, outputs=talked_abstract)
 if __name__ == "__main__":
+    interface.launch(debug=False)