vividsd commited on
Commit
40ed6b9
·
1 Parent(s): 533bc81

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -96
app.py DELETED
@@ -1,96 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- from tempfile import NamedTemporaryFile
4
- from PyPDF2 import PdfReader
5
- from IPython.display import Audio
6
- import numpy as np
7
- from bark import SAMPLE_RATE, generate_audio, preload_models
8
- from scipy.io.wavfile import write as write_wav
9
- import torch
10
-
11
- def summarize_abstract_from_pdf(pdf_file_path):
12
- abstract_string = 'abstract'
13
- found_abstract = False
14
- intro_string ='introduction'
15
- extracted_text_string =""
16
-
17
- # Read the PDF and extract text from the first page
18
- with open(pdf_file_path, 'rb') as pdf_file:
19
- reader = PdfReader(pdf_file)
20
- text = ""
21
- text += reader.pages[0].extract_text()
22
-
23
-
24
- file = text.splitlines()
25
- for lines in file:
26
- lower_lines = lines.lower()
27
- if lower_lines.strip()== abstract_string:
28
- found_abstract = True
29
- elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
30
- found_abstract = False
31
-
32
- if found_abstract == True:
33
- extracted_text_string += lines
34
-
35
-
36
- extracted_text_string = extracted_text_string.replace("Abstract", "")
37
- summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
38
- # Generate a summarized abstract using the specified model
39
- summarized_abstract = summarizer(extracted_text_string,
40
- min_length=16,
41
- max_length=150,
42
- no_repeat_ngram_size=3,
43
- encoder_no_repeat_ngram_size=3,
44
- repetition_penalty=3.5,
45
- num_beams=4,
46
- early_stopping=True,
47
- )
48
- #I run this twice to get summazired text
49
- summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
50
- min_length=16,
51
- max_length=25,
52
- no_repeat_ngram_size=3,
53
- encoder_no_repeat_ngram_size=3,
54
- repetition_penalty=3.5,
55
- num_beams=4,
56
- early_stopping=True,
57
- )
58
-
59
-
60
-
61
- # Return the summarized abstract as a string
62
- return summarized_abstract2[0]['summary_text']
63
-
64
- def generate_audio_func(pdf_file):
65
-
66
- pdf_file_path = pdf_file.name
67
- # Generate audio from text
68
- #call the summarize abstract function
69
- text_prompt = summarize_abstract_from_pdf(pdf_file_path)
70
- audio_array = generate_audio(text_prompt)
71
-
72
- # Create a temporary WAV file to save the audio
73
- with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
74
- wav_file_path = temp_wav_file.name
75
- write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
76
- return wav_file_path
77
-
78
-
79
-
80
- # Define app name, app description, and examples
81
- app_name = "PDF to Audio Converter"
82
- app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts."
83
-
84
- # Create the Gradio app
85
- input_component = gr.File(file_types=["pdf"])
86
- output_component = gr.Audio()
87
-
88
- demo = gr.Interface(
89
- fn=generate_audio_func,
90
- inputs=input_component,
91
- outputs=output_component,
92
- title=app_name,
93
- description=app_description
94
- )
95
-
96
- demo.launch()