Mishmosh commited on
Commit
4fd0f5d
·
1 Parent(s): e8e2778

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -43
app.py CHANGED
@@ -1,52 +1,23 @@
1
  import gradio as gr
2
- from PyPDF2 import PdfFileReader
3
- from transformers import pipeline
4
 
5
- # Function to extract text from PDF
6
- def extract_text_from_pdf(pdf_path):
7
- with open(pdf_path, 'rb') as file:
8
- pdf_reader = PdfFileReader(file)
9
- text = ""
10
- for page_num in range(pdf_reader.numPages):
11
- page = pdf_reader.getPage(page_num)
12
- text += page.extractText()
13
- return text
14
 
15
- # Function to extract the abstract from the text
16
- def extract_abstract(text):
17
- abstract = ""
18
- found_abstract = False
19
- paragraphs = text.split('\n')
20
- for index, paragraph in enumerate(paragraphs):
21
- if 'Abstract' in paragraph:
22
- found_abstract = True
23
- abstract = paragraphs[index + 1] # Get the next paragraph as the abstract
24
- return abstract if found_abstract else "Abstract not found"
25
-
26
- # Function to summarize text
27
- def summarize_text(text):
28
- summarizer = pipeline("summarization", model="ainize/bart-base-cnn")
29
- summarized_text = summarizer(text, max_length=50, min_length=5, do_sample=False)[0]['summary_text']
30
- return summarized_text
31
-
32
- # Function to convert text to speech
33
- def text_to_speech(text):
34
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
35
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
36
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
37
- inputs = processor(text, return_tensors="pt")
38
- speech = model.generate_speech(inputs["input_ids"])
39
- return speech.numpy().tobytes(), 16000 # Return audio data and sample rate
40
 
41
  # Gradio interface
42
  iface = gr.Interface(
43
- fn=lambda pdf_file: text_to_speech(summarize_text(extract_abstract(extract_text_from_pdf(pdf_file.name)))),
44
- inputs=gr.File(label="Upload PDF", type="file"),
45
- outputs="audio",
46
- live=True,
47
- title="PDF Abstract Summarizer with Text-to-Speech",
48
- description="Upload a PDF, and I will extract the abstract, summarize it, and convert it to speech."
49
  )
50
 
51
- # Launch the interface
52
  iface.launch()
 
1
  import gradio as gr
 
 
2
 
3
+ # Function to process the input message and PDF file
4
+ def process_input(message, pdf_file):
5
+ # Save the uploaded PDF file
6
+ pdf_file.save("uploaded_pdf.pdf")
 
 
 
 
 
7
 
8
+ # Process the message and return a result
9
+ result = f"Message: {message}\nPDF file uploaded successfully!"
10
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Gradio interface
13
  iface = gr.Interface(
14
+ fn=process_input,
15
+ inputs=[
16
+ gr.inputs.Textbox(label="Enter your message"),
17
+ gr.inputs.File(label="Upload a PDF file", type="file", accept=".pdf")
18
+ ],
19
+ outputs=gr.outputs.Textbox(label="Result")
20
  )
21
 
22
+ # Launch the Gradio interface
23
  iface.launch()