mishrasahil934 commited on
Commit
e6a9ac6
·
verified ·
1 Parent(s): 668f0b8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -2,23 +2,22 @@ import os
2
  import base64
3
  import tempfile
4
  import streamlit as st
5
- from transformers import pipeline
6
- from PyPDF2 import PdfReader
7
 
 
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
 
10
  # Load the summarization model
11
  tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
12
  base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
13
 
14
- # Function to extract text from a PDF using PyPDF2
15
  def extract_text_from_pdf(pdf_path):
16
- reader = PdfReader(pdf_path)
17
  text = ""
18
- for page in reader.pages:
19
- page_text = page.extract_text()
20
- if page_text: # Only add page text if it exists
21
- text += page_text
22
  if text.strip():
23
  return text
24
  return None
@@ -45,7 +44,7 @@ def displayPDF(file_path):
45
 
46
  # Streamlit App
47
  def main():
48
- st.title('PDF Content Summarizer')
49
 
50
  # PDF Upload Section
51
  uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
 
2
  import base64
3
  import tempfile
4
  import streamlit as st
5
+ import fitz # PyMuPDF
 
6
 
7
+ from transformers import pipeline
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
 
10
  # Load the summarization model
11
  tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
12
  base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
13
 
14
+ # Function to extract text from a PDF using PyMuPDF
15
  def extract_text_from_pdf(pdf_path):
 
16
  text = ""
17
+ doc = fitz.open(pdf_path)
18
+ for page_num in range(doc.page_count):
19
+ page = doc.load_page(page_num) # Get a page
20
+ text += page.get_text() # Extract text from the page
21
  if text.strip():
22
  return text
23
  return None
 
44
 
45
  # Streamlit App
46
  def main():
47
+ st.title('Content Summarizer')
48
 
49
  # PDF Upload Section
50
  uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])