Sibinraj commited on
Commit
948b6c3
·
verified ·
1 Parent(s): 4765f63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -3
app.py CHANGED
@@ -1,13 +1,23 @@
1
  import torch
2
  import gradio as gr
3
  from transformers import T5ForConditionalGeneration, T5Tokenizer
4
- import fitz # PyMuPDF
5
 
 
6
  model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
7
  model = T5ForConditionalGeneration.from_pretrained(model_path)
8
  tokenizer = T5Tokenizer.from_pretrained(model_path)
9
 
10
  def extract_text_from_pdf(pdf_path):
 
 
 
 
 
 
 
 
 
11
  text = ""
12
  with fitz.open(pdf_path) as doc:
13
  for page in doc:
@@ -15,6 +25,17 @@ def extract_text_from_pdf(pdf_path):
15
  return text
16
 
17
  def summarize_text(text, max_length, show_length):
 
 
 
 
 
 
 
 
 
 
 
18
  inputs = tokenizer.encode(
19
  "summarize: " + text,
20
  return_tensors='pt',
@@ -25,8 +46,8 @@ def summarize_text(text, max_length, show_length):
25
 
26
  summary_ids = model.generate(
27
  inputs,
28
- max_length=max_length + 20,
29
- min_length=10,
30
  num_beams=5,
31
  no_repeat_ngram_size=2,
32
  early_stopping=True
@@ -56,9 +77,21 @@ def summarize_text(text, max_length, show_length):
56
  return summary
57
 
58
  def handle_pdf(pdf, max_length, show_length):
 
 
 
 
 
 
 
 
 
 
 
59
  text = extract_text_from_pdf(pdf.name)
60
  return summarize_text(text, max_length, show_length)
61
 
 
62
  interface = gr.Interface(
63
  fn=handle_pdf,
64
  inputs=[
@@ -70,4 +103,5 @@ interface = gr.Interface(
70
  title='PDF Text Summarizer using T5-finetuned-dialogue_sumxx'
71
  )
72
 
 
73
  interface.launch()
 
1
  import torch
2
  import gradio as gr
3
  from transformers import T5ForConditionalGeneration, T5Tokenizer
4
+ import fitz
5
 
6
+ # Load the model and tokenizer
7
  model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
8
  model = T5ForConditionalGeneration.from_pretrained(model_path)
9
  tokenizer = T5Tokenizer.from_pretrained(model_path)
10
 
11
  def extract_text_from_pdf(pdf_path):
12
+ """
13
+ Extracts text from a given PDF file.
14
+
15
+ Args:
16
+ pdf_path (str): Path to the PDF file.
17
+
18
+ Returns:
19
+ str: Extracted text from the PDF.
20
+ """
21
  text = ""
22
  with fitz.open(pdf_path) as doc:
23
  for page in doc:
 
25
  return text
26
 
27
  def summarize_text(text, max_length, show_length):
28
+ """
29
+ Summarizes the given text using a T5 model.
30
+
31
+ Args:
32
+ text (str): The text to summarize.
33
+ max_length (int): The maximum length of the summary.
34
+ show_length (bool): Whether to show the length of the summary.
35
+
36
+ Returns:
37
+ str: The summarized text.
38
+ """
39
  inputs = tokenizer.encode(
40
  "summarize: " + text,
41
  return_tensors='pt',
 
46
 
47
  summary_ids = model.generate(
48
  inputs,
49
+ max_length=max_length + 20, # Allow some buffer
50
+ min_length=10,
51
  num_beams=5,
52
  no_repeat_ngram_size=2,
53
  early_stopping=True
 
77
  return summary
78
 
79
  def handle_pdf(pdf, max_length, show_length):
80
+ """
81
+ Handles the PDF upload, extracts text, and summarizes it.
82
+
83
+ Args:
84
+ pdf (UploadedFile): The uploaded PDF file.
85
+ max_length (int): The maximum length of the summary.
86
+ show_length (bool): Whether to show the length of the summary.
87
+
88
+ Returns:
89
+ str: The summarized text.
90
+ """
91
  text = extract_text_from_pdf(pdf.name)
92
  return summarize_text(text, max_length, show_length)
93
 
94
+ # Define the Gradio interface
95
  interface = gr.Interface(
96
  fn=handle_pdf,
97
  inputs=[
 
103
  title='PDF Text Summarizer using T5-finetuned-dialogue_sumxx'
104
  )
105
 
106
+ # Launch the Gradio interface
107
  interface.launch()