ritchi1 commited on
Commit
f9464b7
·
verified ·
1 Parent(s): dac5276

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -10,25 +10,34 @@ def summarize_pdf(pdf_file):
10
  # Extract text from the uploaded PDF
11
  pdf_reader = PyPDF2.PdfReader(pdf_file)
12
  text = ""
13
- for page in pdf_reader.pages:
14
- text += page.extract_text()
 
 
 
 
 
 
15
 
16
  # Check if text was extracted
17
  if not text.strip():
18
- return "❌ Could not extract text from the PDF. Please upload a valid document."
19
 
20
  # Summarize the extracted text
21
- summary = summarizer(text, max_length=300, min_length=50, do_sample=False)
 
 
 
22
  return summary[0]['summary_text']
23
 
24
  except Exception as e:
25
  return f"❌ An error occurred: {str(e)}"
26
 
27
- # Update Gradio interface to use the new syntax
28
  interface = gr.Interface(
29
  fn=summarize_pdf,
30
- inputs=gr.File(label="Upload PDF"), # Updated to gr.File
31
- outputs=gr.Textbox(label="Summary"), # Updated to gr.Textbox
32
  title="PDF Summarizer",
33
  description="Upload a PDF file to extract and summarize its content using state-of-the-art AI."
34
  )
 
10
  # Extract text from the uploaded PDF
11
  pdf_reader = PyPDF2.PdfReader(pdf_file)
12
  text = ""
13
+
14
+ for page_number, page in enumerate(pdf_reader.pages):
15
+ try:
16
+ page_text = page.extract_text()
17
+ if page_text:
18
+ text += page_text + "\n"
19
+ except Exception as e:
20
+ return f"❌ Could not read page {page_number + 1}: {str(e)}"
21
 
22
  # Check if text was extracted
23
  if not text.strip():
24
+ return "❌ Could not extract any text from the PDF. Please upload a readable document."
25
 
26
  # Summarize the extracted text
27
+ # Limit the text input length for summarization to avoid overflow errors
28
+ max_length = 1024 # Hugging Face models have input length limits
29
+ text = text[:max_length]
30
+ summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
31
  return summary[0]['summary_text']
32
 
33
  except Exception as e:
34
  return f"❌ An error occurred: {str(e)}"
35
 
36
+ # Gradio Interface
37
  interface = gr.Interface(
38
  fn=summarize_pdf,
39
+ inputs=gr.File(label="Upload PDF"),
40
+ outputs=gr.Textbox(label="Summary"),
41
  title="PDF Summarizer",
42
  description="Upload a PDF file to extract and summarize its content using state-of-the-art AI."
43
  )