RAG_Chat_Bot / app.py
Waseem7711's picture
Update app.py
7f39b78 verified
raw
history blame
1.88 kB
pip install transformers torch accelerate PyMuPDF streamlit
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import fitz # PyMuPDF
# Load the tokenizer and model
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
"ricepaper/vi-gemma-2b-RAG",
device_map="auto",
torch_dtype=torch.bfloat16
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
return tokenizer, model
tokenizer, model = load_model()
# Function to read text from a PDF file
def read_pdf(file):
text = ""
with fitz.open("pdf", file.read()) as doc:
for page in doc:
text += page.get_text()
return text
# Streamlit app
st.title("PDF Question Answering with vi-gemma-2b-RAG")
st.write("Upload a PDF file, and ask a question based on its content.")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
question = st.text_input("Enter your question:")
if uploaded_file is not None and question:
# Read PDF content
pdf_text = read_pdf(uploaded_file)
# Prepare the input for the model
prompt_template = """
### Instruction and Input:
Based on the following context/documentation:
{}
Please answer the question: {}
### Response:
{}
"""
input_text = prompt_template.format(pdf_text, question, "")
input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
# Generate a response
with torch.cuda.amp.autocast():
outputs = model.generate(
**input_ids,
max_new_tokens=200,
no_repeat_ngram_size=5
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.subheader("Answer:")
st.write(response)