mishrasahil934 commited on
Commit
668f0b8
·
verified ·
1 Parent(s): 421cd7c
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -13,14 +13,15 @@ base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
13
 
14
  # Function to extract text from a PDF using PyPDF2
15
  def extract_text_from_pdf(pdf_path):
16
- reader = PdfReader(pdf_path)
17
- text = ""
18
- for page in reader.pages:
19
- text += page.extract_text() # Extract text from each page
20
- if not text.strip():
21
- raise ValueError("The PDF file contains no extractable text.")
22
- return text
23
-
 
24
 
25
  # LLM pipeline for summarization
26
  def llm_pipeline(input_text):
@@ -37,16 +38,14 @@ def llm_pipeline(input_text):
37
  @st.cache_data
38
  # Function to display the PDF
39
  def displayPDF(file_path):
40
-
41
- with open(file_path, "rb") as f:
42
- base64_pdf = base64.b64encode(f.read()).decode('utf-8')
43
- pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
44
- st.markdown(pdf_display, unsafe_allow_html=True)
45
-
46
 
47
  # Streamlit App
48
  def main():
49
- st.title('Content Summarizer')
50
 
51
  # PDF Upload Section
52
  uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
 
13
 
14
  # Function to extract text from a PDF using PyPDF2
15
  def extract_text_from_pdf(pdf_path):
16
+ reader = PdfReader(pdf_path)
17
+ text = ""
18
+ for page in reader.pages:
19
+ page_text = page.extract_text()
20
+ if page_text: # Only add page text if it exists
21
+ text += page_text
22
+ if text.strip():
23
+ return text
24
+ return None
25
 
26
  # LLM pipeline for summarization
27
  def llm_pipeline(input_text):
 
38
  @st.cache_data
39
  # Function to display the PDF
40
  def displayPDF(file_path):
41
+ with open(file_path, "rb") as f:
42
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
43
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
44
+ st.markdown(pdf_display, unsafe_allow_html=True)
 
 
45
 
46
  # Streamlit App
47
  def main():
48
+ st.title('PDF Content Summarizer')
49
 
50
  # PDF Upload Section
51
  uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])