beyondrag / rag.py
giulio98's picture
Update app.py
b5ac9e4
raw
history blame
2.17 kB
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_chroma import Chroma
import spaces
from langchain_text_splitters import MarkdownHeaderTextSplitter
import os
from transformers import AutoTokenizer
api_token = os.getenv("HF_TOKEN")
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
embedding_model = HuggingFaceBgeEmbeddings(
model_name="BAAI/bge-large-en-v1.5",
model_kwargs={"device": "cuda"},
encode_kwargs={"normalize_embeddings": True},
query_instruction=""
)
def create_rag_index(text_no_prefix):
"""Loads the PDF, splits its text, and builds a vectorstore for naive RAG."""
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
tokenizer,
chunk_size=256,
chunk_overlap=0,
add_start_index=True,
strip_whitespace=True,
separators=["\n\n", "\n", ".", " ", ""],
)
# Concatenate pages and create Document objects.
docs = [Document(page_content=x) for x in text_splitter.split_text(text_no_prefix)]
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding_model)
return vectorstore
def run_naive_rag_query(vectorstore, query, rag_token_size, prefix, task, few_shot_examples):
"""
For naive RAG, retrieves top-k chunks (k based on target token size)
and generates an answer using those chunks.
"""
k = max(1, rag_token_size // 256)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": k})
retrieved_docs = retriever.invoke(query)
for doc in retrieved_docs:
print("=================")
print(doc.page_content)
print("=================")
formatted_context = "\n\n".join([doc.page_content for doc in retrieved_docs])
rag_context = prefix + "Retrieved context: \n" + formatted_context + task + few_shot_examples
return rag_context