Spaces:
Sleeping
Sleeping
File size: 1,670 Bytes
3328efb 064713e 3328efb 064713e 3328efb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
from transformers import pipeline
import fitz # PyMuPDF
# Load the QA model
qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2")
# Function to extract text from a PDF file
def extract_text_from_pdf(uploaded_file):
try:
doc = fitz.open(stream=uploaded_file, filetype="pdf")
text = ""
for page_num in range(doc.page_count):
page = doc[page_num]
text += page.get_text()
doc.close()
return text
except Exception as e:
st.error(f"Error extracting text from PDF: {str(e)}")
return None
# Streamlit app
def main():
st.title("PDF Question Answering App")
# Upload PDF file through Streamlit
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
# Read the PDF file and extract text
pdf_text = extract_text_from_pdf(uploaded_file)
if pdf_text is not None:
# Display the extracted text
st.subheader("Extracted Text from PDF")
st.text(pdf_text)
# Input for user question
question = st.text_input("Ask a question about the PDF:")
# Button to trigger question answering
if st.button("Get Answer"):
if question:
# Use the QA model to get the answer
answer = qa_model(question=question, context=pdf_text)
st.subheader("Answer:")
st.write(answer["answer"])
else:
st.warning("Please enter a question.")
if __name__ == "__main__":
main()
|