import os import gradio as gr import json from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from langchain_community.embeddings import HuggingFaceEmbeddings from llama_index.llms.llama_cpp import LlamaCPP from llama_index.llms.llama_cpp.llama_utils import ( messages_to_prompt, completion_to_prompt, ) model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf' llm = LlamaCPP( model_url=model_url, temperature=0.1, max_new_tokens=256, context_window=2048, model_kwargs={"n_gpu_layers": 1}, messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) # Initialize embeddings and LLM embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") def initialize_index(): """Initialize the vector store index from JSON data.""" # Load JSON data with open("dummy_users_with_tasks.json", "r") as file: json_data = json.load(file) # Convert JSON data to plain text for embedding documents = [] for user in json_data: tasks_summary = "\n".join( [f"Task: {task['title']} | Start: {task['start_date']} | Due: {task['due_date']}" for task in user["tasks"]] ) doc_text = f"User: {user['name']} | Email: {user['email']}\nTasks:\n{tasks_summary}" documents.append(doc_text) # Create index index = VectorStoreIndex.from_documents( documents, embed_model=embeddings, ) # Return query engine with Llama return index.as_query_engine(llm=llm) # Initialize the query engine at startup query_engine = initialize_index() def process_query( message: str, history: list[tuple[str, str]], ) -> str: """Process a query using the RAG system.""" try: # Get response from the query engine response = query_engine.query( message, ) return response except Exception as e: return f"Error processing query: {str(e)}" # Gradio interface (if needed) interface = gr.Interface( fn=process_query, inputs=["text", "state"], outputs="text", ) if __name__ == "__main__": interface.launch()