IAMTFRMZA commited on
Commit
272755a
·
verified ·
1 Parent(s): 20a5442
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import time
3
- import pymupdf # Proper import for PyMuPDF
4
  from openai import OpenAI
5
  from docx import Document
6
  from docx.shared import Pt
@@ -44,14 +44,14 @@ for message in st.session_state.messages:
44
  uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"])
45
 
46
  if uploaded_file:
47
- # Read the PDF file using PyMuPDF
48
- pdf_document = PyMuPDF.open(uploaded_file)
49
  full_text = ""
50
 
51
  # Extract text from each page
52
- for page_num in range(pdf_document.page_count):
53
- page = pdf_document.load_page(page_num)
54
- full_text += page.get_text()
55
 
56
  st.write("Text extracted from PDF:")
57
  st.text_area("Extracted Text", full_text, height=300)
 
1
  import streamlit as st
2
  import time
3
+ import PyPDF2 # Import PyPDF2 for PDF text extraction
4
  from openai import OpenAI
5
  from docx import Document
6
  from docx.shared import Pt
 
44
  uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"])
45
 
46
  if uploaded_file:
47
+ # Read the PDF file using PyPDF2
48
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
49
  full_text = ""
50
 
51
  # Extract text from each page
52
+ for page_num in range(len(pdf_reader.pages)):
53
+ page = pdf_reader.pages[page_num]
54
+ full_text += page.extract_text()
55
 
56
  st.write("Text extracted from PDF:")
57
  st.text_area("Extracted Text", full_text, height=300)