tarrasyed19472007 commited on
Commit
376ef7d
·
verified ·
1 Parent(s): 8a78614

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
4
+ import torch
5
+
6
+ # Load RAG model and tokenizer
7
+ tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence")
8
+ retriever = RagRetriever.from_pretrained("facebook/rag-sequence", use_dummy_dataset=True)
9
+ model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence")
10
+
11
+ # Function to process PDF
12
+ def read_pdf(file):
13
+ text = ""
14
+ pdf_reader = PdfReader(file)
15
+ for page in pdf_reader.pages:
16
+ text += page.extract_text()
17
+ return text
18
+
19
+ # Function to answer the question
20
+ def answer_question(question, context):
21
+ input_dict = tokenizer.prepare_seq2seq_batch(
22
+ src_texts=[question],
23
+ return_tensors="pt",
24
+ padding=True,
25
+ truncation=True
26
+ )
27
+ # Retrieve relevant documents
28
+ input_ids = input_dict["input_ids"]
29
+ context_ids = retriever(input_ids, return_tensors="pt")['input_ids']
30
+
31
+ # Generate answer
32
+ outputs = model.generate(input_ids=input_ids, context_input_ids=context_ids)
33
+ answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)
34
+ return answer[0]
35
+
36
+ # Streamlit frontend
37
+ st.title("PDF Question-Answering Chatbot")
38
+
39
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
40
+
41
+ if uploaded_file is not None:
42
+ # Read PDF
43
+ pdf_text = read_pdf(uploaded_file)
44
+ st.success("PDF file processed successfully.")
45
+
46
+ # Text area for user input
47
+ question = st.text_input("Ask a question about the PDF content:")
48
+
49
+ if question:
50
+ # Get the answer
51
+ answer = answer_question(question, pdf_text)
52
+ st.subheader("Answer:")
53
+ st.write(answer)
54
+
55
+ # Run the application
56
+ if __name__ == "__main__":
57
+ st.run()