File size: 2,009 Bytes
cd7c688
42feee4
4751c0e
7b6f550
42feee4
 
 
 
 
cd7c688
42feee4
 
 
9ed2bab
42feee4
 
cd7c688
 
42feee4
 
 
22b9487
42feee4
7b6f550
22b9487
7b6f550
42feee4
7b6f550
 
 
 
 
 
42feee4
4751c0e
376ef7d
42feee4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b6f550
 
 
 
 
 
4751c0e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
import fitz  # PyMuPDF
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch

# Load the RAG model components
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq")
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    doc = fitz.open(pdf_file)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Function to handle question answering
def answer_question(question, pdf_text):
    # Prepare the context for the model
    inputs = tokenizer(question, return_tensors="pt")
    
    # Tokenize PDF text
    pdf_inputs = tokenizer(pdf_text, return_tensors="pt")

    # Generate the answer
    with torch.no_grad():
        outputs = model.generate(input_ids=inputs['input_ids'], 
                                 attention_mask=inputs['attention_mask'], 
                                 context_input_ids=pdf_inputs['input_ids'], 
                                 context_attention_mask=pdf_inputs['attention_mask'])
    
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Streamlit app
st.title("PDF Question-Answer Chatbot")
st.write("Upload a PDF file and ask questions based on its content.")

# File uploader
pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
if pdf_file is not None:
    # Extract text from the PDF
    pdf_text = extract_text_from_pdf(pdf_file)
    st.success("PDF loaded successfully!")

    # Question input
    question = st.text_input("Ask a question:")
    
    if question:
        with st.spinner("Finding answer..."):
            try:
                answer = answer_question(question, pdf_text)
                st.write("### Answer:")
                st.write(answer)
            except Exception as e:
                st.error(f"Error occurred: {str(e)}")