Mishmosh commited on
Commit
19e94a1
·
1 Parent(s): 7d0890e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -59
app.py CHANGED
@@ -247,62 +247,3 @@ for key in text_per_page.keys(): # go through keys in dictionary
247
  break
248
  print(abstract_from_pdf)
249
 
250
- from transformers import pipeline
251
- summarizer = pipeline("summarization", model="ainize/bart-base-cnn")
252
- #summarizer = pipeline("summarization", model="linydub/bart-large-samsum") # various models were tried and the best one was selected
253
- #summarizer = pipeline("summarization", model="slauw87/bart_summarisation")
254
- #summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
255
- #summarizer = pipeline("summarization", model="google/pegasus-cnn_dailymail")
256
- #print(summarizer(abstract_from_pdf, max_length=50, min_length=5, do_sample=False))
257
- summarized_text=(summarizer(abstract_from_pdf))
258
- print(summarized_text)
259
- #summary_of_abstract=str(summarizer)
260
- #type(summary_of_abstract)
261
- #print(summary_of_abstract)
262
-
263
- # the aim of this section of code is to get a summary of just one sentence by summarizing the summary all while the summary is longer than one sentence.
264
- # unfortunately, I tried many many models and none of them actually summarize the text to as short as one sentence.
265
- #I had searched for ways to fine tune the summarization model to specify that the summarization should be done in just one sentence but did not find a way to implement it
266
- from transformers import pipeline
267
- summarized_text_list_list=summarized_text_list['summary_text']
268
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
269
- #print(summarizer)
270
- number_of_sentences=summarized_text_list_list.count('.')
271
- print(number_of_sentences)
272
- while(number_of_sentences)>1:
273
- print(number_of_sentences)
274
- summarized_text_list_list=summarizer(summarized_text_list_list)[0]['summary_text']
275
- number_of_sentences-=1
276
- print(summarized_text_list_list)
277
- print(number_of_sentences)
278
-
279
-
280
- #text to speech
281
- #!pip install git+https://github.com/huggingface/transformers.git
282
- #!pip install datasets sentencepiece
283
- import torch
284
- import soundfile as sf
285
- from IPython.display import Audio
286
- from datasets import load_dataset
287
- from transformers import pipeline
288
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
289
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
290
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
291
- #text = "The future belongs to those who believe in the beauty of their dreams."
292
- #text = (summarized_text_list_list)
293
-
294
- inputs = processor(text=summarized_text_list_list, return_tensors="pt")
295
- from datasets import load_dataset
296
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
297
-
298
- import torch
299
- speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
300
- spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
301
- from transformers import SpeechT5HifiGan
302
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
303
- with torch.no_grad():
304
- speech = vocoder(spectrogram)
305
- speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
306
- Audio(speech, rate=16000)
307
-
308
-
 
247
  break
248
  print(abstract_from_pdf)
249