vardhan-rawat commited on
Commit
1a8f572
·
verified ·
1 Parent(s): a026148

Upload summarizer.py

Browse files
Files changed (1) hide show
  1. summarizer.py +58 -0
summarizer.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import BartForConditionalGeneration, BartTokenizer
3
+ from PyPDF2 import PdfFileReader
4
+ import torch
5
+
6
+ # Loading BART
7
+ model_name = "facebook/bart-large-cnn"
8
+ tokenizer = BartTokenizer.from_pretrained(model_name)
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model = BartForConditionalGeneration.from_pretrained(model_name).to(device)
11
+
12
+ # Function to Calculate summary lengths
13
+ def calc_summary_lengths(text_length):
14
+ short_min = int(0.10 * text_length)
15
+ medium_min = short_max_ = int(0.15 * text_length)
16
+ medium_max = long_min = int(0.20 * text_length)
17
+ long_max = int(0.30 * text_length)
18
+ return {
19
+ "Short": (short_min, short_max_),
20
+ "Medium": (medium_min, medium_max),
21
+ "Long": (long_min, long_max)
22
+ }
23
+
24
+ # Function to summarize text
25
+ def summarize_text(pdf_file, summary_length):
26
+ try:
27
+ text = ""
28
+ with open(pdf_file.name, "rb") as f:
29
+ reader = PdfFileReader(f)
30
+ for page in range(reader.numPages):
31
+ text += reader.getPage(page).extractText()
32
+
33
+ text = " ".join(text.split())
34
+ text_length = len(text.split())
35
+
36
+ summary_range = calc_summary_lengths(text_length)
37
+ min_length, max_length = summary_range[summary_length]
38
+
39
+ # Summary Generation
40
+ inputs = tokenizer.encode(text, max_length=1024, return_tensors='pt', truncation=True).to(device)
41
+ summary_ids = model.generate(inputs, num_beams=4, min_length=min_length, max_length=max_length, early_stopping=True)
42
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
43
+
44
+ return summary
45
+ except Exception as e:
46
+ return f"Error: {str(e)} \nPlease check file size and type!"
47
+
48
+
49
+
50
+ input_component = gr.File(label="Upload PDF file")
51
+ output_component = gr.Textbox(label="Summarized Text")
52
+ summary_length_component = gr.Dropdown(label="Summary Length", choices=["Short", "Medium", "Long"])
53
+
54
+ title = "PDF Text Summarizer (BART)"
55
+ description = "<h2>Upload a PDF file and select the desired summary length.</h2>"
56
+
57
+ InterFace = gr.Interface(fn=summarize_text, inputs=[input_component, summary_length_component], outputs=output_component, title=title, description=description)
58
+ InterFace.launch()