import streamlit as st from transformers import pipeline import fitz # PyMuPDF import tempfile import os # Load the QA model qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2") # Function to extract text from a PDF file def extract_text_from_pdf(uploaded_file): temp_file = None try: # Save the uploaded PDF as a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") temp_file.write(uploaded_file.read()) # Open the temporary PDF file and extract text doc = fitz.open(temp_file.name) text = "" for page_num in range(doc.page_count): page = doc[page_num] text += page.get_text() doc.close() return text except Exception as e: st.error(f"Error extracting text from PDF: {str(e)}") return None finally: # Remove the temporary file if temp_file: temp_file.close() # Uncomment the line below if you want to delete the temporary file after use # os.remove(temp_file.name) # Streamlit app def main(): st.title("PDF Question Answering App") # Upload PDF file through Streamlit uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: # Read the PDF file and extract text pdf_text = extract_text_from_pdf(uploaded_file) if pdf_text is not None: # Display the extracted text st.subheader("Extracted Text from PDF") st.text(pdf_text) # Input for user question question = st.text_input("Ask a question about the PDF:") # Button to trigger question answering if st.button("Get Answer"): if question: # Use the QA model to get the answer answer = qa_model(question=question, context=pdf_text) st.subheader("Answer:") st.write(answer["answer"]) else: st.warning("Please enter a question.") if __name__ == "__main__": main()