File size: 1,958 Bytes
6d6b66e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import os
import gradio as gr
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.llms.ollama import Ollama
# Set up Ollama
os.system('curl -fsSL https://ollama.com/install.sh | sh')
os.system('ollama serve &')
os.system('sleep 5')
os.system('ollama pull llama3.2')
os.system('ollama pull llama3.2')
# Initialize embeddings and LLM
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
llama = Ollama(
model="llama3.2",
request_timeout=1000,
)
def initialize_index():
"""Initialize the vector store index from PDF files in the data directory"""
# Load documents from the data directory
loader = SimpleDirectoryReader(
input_dir="data",
required_exts=[".pdf"]
)
documents = loader.load_data()
# Create index
index = VectorStoreIndex.from_documents(
documents,
embed_model=embeddings,
)
# Return query engine with Llama
return index.as_query_engine(llm=llama)
# Initialize the query engine at startup
query_engine = initialize_index()
def process_query(
message: str,
history: list[tuple[str, str]],
) -> str:
"""Process a query using the RAG system"""
try:
# Get response from the query engine
response = query_engine.query(
message,
streaming=True
)
return str(response)
except Exception as e:
return f"Error processing query: {str(e)}"
# Create the Gradio interface
demo = gr.ChatInterface(
process_query,
title="PDF Question Answering with RAG + Llama",
description="Ask questions about the content of the loaded PDF documents using Llama model",
examples=[
["What is Computer"],
],
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
)
if __name__ == "__main__":
demo.launch() |