Spaces:

Sibinraj
/

dialogue_Text_Summarizer

Sleeping

App Files Files Community

Sibinraj commited on Jun 17, 2024

Commit

948b6c3

verified ·

1 Parent(s): 4765f63

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -3

app.py CHANGED Viewed

@@ -1,13 +1,23 @@
 import torch
 import gradio as gr
 from transformers import T5ForConditionalGeneration, T5Tokenizer
-import fitz  # PyMuPDF
 model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
 model = T5ForConditionalGeneration.from_pretrained(model_path)
 tokenizer = T5Tokenizer.from_pretrained(model_path)
 def extract_text_from_pdf(pdf_path):
     text = ""
     with fitz.open(pdf_path) as doc:
         for page in doc:
@@ -15,6 +25,17 @@ def extract_text_from_pdf(pdf_path):
     return text
 def summarize_text(text, max_length, show_length):
     inputs = tokenizer.encode(
         "summarize: " + text,
         return_tensors='pt',
@@ -25,8 +46,8 @@ def summarize_text(text, max_length, show_length):
     summary_ids = model.generate(
         inputs,
-        max_length=max_length + 20,
-        min_length=10,
         num_beams=5,
         no_repeat_ngram_size=2,
         early_stopping=True
@@ -56,9 +77,21 @@ def summarize_text(text, max_length, show_length):
     return summary
 def handle_pdf(pdf, max_length, show_length):
     text = extract_text_from_pdf(pdf.name)
     return summarize_text(text, max_length, show_length)
 interface = gr.Interface(
     fn=handle_pdf,
     inputs=[
@@ -70,4 +103,5 @@ interface = gr.Interface(
     title='PDF Text Summarizer using T5-finetuned-dialogue_sumxx'
 )
 interface.launch()

 import torch
 import gradio as gr
 from transformers import T5ForConditionalGeneration, T5Tokenizer
+import fitz
+# Load the model and tokenizer
 model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
 model = T5ForConditionalGeneration.from_pretrained(model_path)
 tokenizer = T5Tokenizer.from_pretrained(model_path)
 def extract_text_from_pdf(pdf_path):
+    """
+    Extracts text from a given PDF file.
+    Args:
+        pdf_path (str): Path to the PDF file.
+    Returns:
+        str: Extracted text from the PDF.
+    """
     text = ""
     with fitz.open(pdf_path) as doc:
         for page in doc:
     return text
 def summarize_text(text, max_length, show_length):
+    """
+    Summarizes the given text using a T5 model.
+    Args:
+        text (str): The text to summarize.
+        max_length (int): The maximum length of the summary.
+        show_length (bool): Whether to show the length of the summary.
+    Returns:
+        str: The summarized text.
+    """
     inputs = tokenizer.encode(
         "summarize: " + text,
         return_tensors='pt',
     summary_ids = model.generate(
         inputs,
+        max_length=max_length + 20,  # Allow some buffer
+        min_length=10,
         num_beams=5,
         no_repeat_ngram_size=2,
         early_stopping=True
     return summary
 def handle_pdf(pdf, max_length, show_length):
+    """
+    Handles the PDF upload, extracts text, and summarizes it.
+    Args:
+        pdf (UploadedFile): The uploaded PDF file.
+        max_length (int): The maximum length of the summary.
+        show_length (bool): Whether to show the length of the summary.
+    Returns:
+        str: The summarized text.
+    """
     text = extract_text_from_pdf(pdf.name)
     return summarize_text(text, max_length, show_length)
+# Define the Gradio interface
 interface = gr.Interface(
     fn=handle_pdf,
     inputs=[
     title='PDF Text Summarizer using T5-finetuned-dialogue_sumxx'
 )
+# Launch the Gradio interface
 interface.launch()