Spaces:
Sleeping
Sleeping
File size: 2,009 Bytes
cd7c688 42feee4 4751c0e 7b6f550 42feee4 cd7c688 42feee4 9ed2bab 42feee4 cd7c688 42feee4 22b9487 42feee4 7b6f550 22b9487 7b6f550 42feee4 7b6f550 42feee4 4751c0e 376ef7d 42feee4 7b6f550 4751c0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import fitz # PyMuPDF
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch
# Load the RAG model components
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq")
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
doc = fitz.open(pdf_file)
text = ""
for page in doc:
text += page.get_text()
return text
# Function to handle question answering
def answer_question(question, pdf_text):
# Prepare the context for the model
inputs = tokenizer(question, return_tensors="pt")
# Tokenize PDF text
pdf_inputs = tokenizer(pdf_text, return_tensors="pt")
# Generate the answer
with torch.no_grad():
outputs = model.generate(input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
context_input_ids=pdf_inputs['input_ids'],
context_attention_mask=pdf_inputs['attention_mask'])
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
return answer
# Streamlit app
st.title("PDF Question-Answer Chatbot")
st.write("Upload a PDF file and ask questions based on its content.")
# File uploader
pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
if pdf_file is not None:
# Extract text from the PDF
pdf_text = extract_text_from_pdf(pdf_file)
st.success("PDF loaded successfully!")
# Question input
question = st.text_input("Ask a question:")
if question:
with st.spinner("Finding answer..."):
try:
answer = answer_question(question, pdf_text)
st.write("### Answer:")
st.write(answer)
except Exception as e:
st.error(f"Error occurred: {str(e)}")
|