File size: 2,253 Bytes
6d6b66e
 
98b6610
6d6b66e
 
0ddbfad
 
 
 
 
6d6b66e
0ddbfad
 
 
 
 
 
 
 
 
 
 
6d6b66e
 
 
 
98b6610
 
 
 
 
 
 
 
 
 
 
 
 
 
6d6b66e
 
 
 
 
 
 
 
0ddbfad
6d6b66e
 
 
 
 
 
 
 
98b6610
6d6b66e
 
 
 
 
98b6610
6d6b66e
 
 
98b6610
 
 
 
 
6d6b66e
 
 
98b6610
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import gradio as gr
import json
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'
llm = LlamaCPP(
    model_url=model_url,
    temperature=0.1,
    max_new_tokens=256,
    context_window=2048,
    model_kwargs={"n_gpu_layers": 1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)
# Initialize embeddings and LLM
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

def initialize_index():
    """Initialize the vector store index from JSON data."""
    # Load JSON data
    with open("dummy_users_with_tasks.json", "r") as file:
        json_data = json.load(file)
    
    # Convert JSON data to plain text for embedding
    documents = []
    for user in json_data:
        tasks_summary = "\n".join(
            [f"Task: {task['title']} | Start: {task['start_date']} | Due: {task['due_date']}" 
             for task in user["tasks"]]
        )
        doc_text = f"User: {user['name']} | Email: {user['email']}\nTasks:\n{tasks_summary}"
        documents.append(doc_text)
    
    # Create index
    index = VectorStoreIndex.from_documents(
        documents,
        embed_model=embeddings,
    )
    
    # Return query engine with Llama
    return index.as_query_engine(llm=llm)

# Initialize the query engine at startup
query_engine = initialize_index()

def process_query(
    message: str,
    history: list[tuple[str, str]],
) -> str:
    """Process a query using the RAG system."""
    try:
        # Get response from the query engine
        response = query_engine.query(
            message,
        )
        return response
    except Exception as e:
        return f"Error processing query: {str(e)}"

# Gradio interface (if needed)
interface = gr.Interface(
    fn=process_query,
    inputs=["text", "state"],
    outputs="text",
)

if __name__ == "__main__":
    interface.launch()