hardik90 commited on
Commit
e0b9f9a
·
verified ·
1 Parent(s): 064713e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -1,24 +1,41 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import fitz # PyMuPDF
 
 
4
 
5
  # Load the QA model
6
  qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2")
7
 
8
  # Function to extract text from a PDF file
9
  def extract_text_from_pdf(uploaded_file):
 
10
  try:
11
- doc = fitz.open(stream=uploaded_file, filetype="pdf")
 
 
 
 
 
12
  text = ""
13
  for page_num in range(doc.page_count):
14
  page = doc[page_num]
15
  text += page.get_text()
16
  doc.close()
 
17
  return text
 
18
  except Exception as e:
19
  st.error(f"Error extracting text from PDF: {str(e)}")
20
  return None
21
 
 
 
 
 
 
 
 
22
  # Streamlit app
23
  def main():
24
  st.title("PDF Question Answering App")
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import fitz # PyMuPDF
4
+ import tempfile
5
+ import os
6
 
7
  # Load the QA model
8
  qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2")
9
 
10
  # Function to extract text from a PDF file
11
  def extract_text_from_pdf(uploaded_file):
12
+ temp_file = None
13
  try:
14
+ # Save the uploaded PDF as a temporary file
15
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
16
+ temp_file.write(uploaded_file.read())
17
+
18
+ # Open the temporary PDF file and extract text
19
+ doc = fitz.open(temp_file.name)
20
  text = ""
21
  for page_num in range(doc.page_count):
22
  page = doc[page_num]
23
  text += page.get_text()
24
  doc.close()
25
+
26
  return text
27
+
28
  except Exception as e:
29
  st.error(f"Error extracting text from PDF: {str(e)}")
30
  return None
31
 
32
+ finally:
33
+ # Remove the temporary file
34
+ if temp_file:
35
+ temp_file.close()
36
+ # Uncomment the line below if you want to delete the temporary file after use
37
+ # os.remove(temp_file.name)
38
+
39
  # Streamlit app
40
  def main():
41
  st.title("PDF Question Answering App")