Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import fitz # PyMuPDF | |
import tempfile | |
import os | |
# Load the QA model | |
qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2") | |
# Function to extract text from a PDF file | |
def extract_text_from_pdf(uploaded_file): | |
temp_file = None | |
try: | |
# Save the uploaded PDF as a temporary file | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") | |
temp_file.write(uploaded_file.read()) | |
# Open the temporary PDF file and extract text | |
doc = fitz.open(temp_file.name) | |
text = "" | |
for page_num in range(doc.page_count): | |
page = doc[page_num] | |
text += page.get_text() | |
doc.close() | |
return text | |
except Exception as e: | |
st.error(f"Error extracting text from PDF: {str(e)}") | |
return None | |
finally: | |
# Remove the temporary file | |
if temp_file: | |
temp_file.close() | |
# Uncomment the line below if you want to delete the temporary file after use | |
# os.remove(temp_file.name) | |
# Streamlit app | |
def main(): | |
st.title("PDF Question Answering App") | |
# Upload PDF file through Streamlit | |
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
if uploaded_file is not None: | |
# Read the PDF file and extract text | |
pdf_text = extract_text_from_pdf(uploaded_file) | |
if pdf_text is not None: | |
# Display the extracted text | |
st.subheader("Extracted Text from PDF") | |
st.text(pdf_text) | |
# Input for user question | |
question = st.text_input("Ask a question about the PDF:") | |
# Button to trigger question answering | |
if st.button("Get Answer"): | |
if question: | |
# Use the QA model to get the answer | |
answer = qa_model(question=question, context=pdf_text) | |
st.subheader("Answer:") | |
st.write(answer["answer"]) | |
else: | |
st.warning("Please enter a question.") | |
if __name__ == "__main__": | |
main() | |