vividsd commited on
Commit
5ca28a3
·
1 Parent(s): 66d84cd

Delete app_assessment3.py

Browse files
Files changed (1) hide show
  1. app_assessment3.py +0 -97
app_assessment3.py DELETED
@@ -1,97 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- from tempfile import NamedTemporaryFile
4
- from PyPDF2 import PdfReader
5
- from IPython.display import Audio
6
- import numpy as np
7
- from bark import SAMPLE_RATE, generate_audio, preload_models
8
- from scipy.io.wavfile import write as write_wav
9
- import torch
10
-
11
- def summarize_abstract_from_pdf(pdf_file_path):
12
- abstract_string = 'abstract'
13
- found_abstract = False
14
- intro_string ='introduction'
15
- extracted_text_string =""
16
-
17
-
18
- # Read the PDF and extract text from the first page
19
- with open(pdf_file_path, 'rb') as pdf_file:
20
- reader = PdfReader(pdf_file)
21
- text = ""
22
- text += reader.pages[0].extract_text()
23
-
24
-
25
- file = text.splitlines()
26
- for lines in file:
27
- lower_lines = lines.lower()
28
- if lower_lines.strip()== abstract_string:
29
- found_abstract = True
30
- elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
31
- found_abstract = False
32
-
33
- if found_abstract == True:
34
- extracted_text_string += lines
35
-
36
-
37
- extracted_text_string = extracted_text_string.replace("Abstract", "")
38
- summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
39
- # Generate a summarized abstract using the specified model
40
- summarized_abstract = summarizer(extracted_text_string,
41
- min_length=16,
42
- max_length=150,
43
- no_repeat_ngram_size=3,
44
- encoder_no_repeat_ngram_size=3,
45
- repetition_penalty=3.5,
46
- num_beams=4,
47
- early_stopping=True,
48
- )
49
- #I run this twice to get summazired text
50
- summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
51
- min_length=16,
52
- max_length=25,
53
- no_repeat_ngram_size=3,
54
- encoder_no_repeat_ngram_size=3,
55
- repetition_penalty=3.5,
56
- num_beams=4,
57
- early_stopping=True,
58
- )
59
-
60
-
61
-
62
- # Return the summarized abstract as a string
63
- return summarized_abstract2[0]['summary_text']
64
-
65
- def generate_audio_func(pdf_file):
66
-
67
- pdf_file_path = pdf_file.name
68
- # Generate audio from text
69
- #call the summarize abstract function
70
- text_prompt = summarize_abstract_from_pdf(pdf_file_path)
71
- audio_array = generate_audio(text_prompt)
72
-
73
- # Create a temporary WAV file to save the audio
74
- with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
75
- wav_file_path = temp_wav_file.name
76
- write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
77
- return wav_file_path
78
-
79
-
80
-
81
- # Define app name, app description, and examples
82
- app_name = "PDF to Audio Converter"
83
- app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts."
84
-
85
- # Create the Gradio app
86
- input_component = gr.File(file_types=["pdf"])
87
- output_component = gr.Audio()
88
-
89
- demo = gr.Interface(
90
- fn=generate_audio_func,
91
- inputs=input_component,
92
- outputs=output_component,
93
- title=app_name,
94
- description=app_description
95
- )
96
-
97
- demo.launch()