File size: 3,004 Bytes
87188d3
78f2fc1
e25de72
b20fcd1
78f2fc1
c003775
a0e1108
 
75cb8fa
a0e1108
 
 
75cb8fa
a0e1108
 
 
 
78f2fc1
a0e1108
 
 
 
 
 
78f2fc1
a0e1108
c003775
78f2fc1
a0e1108
 
78f2fc1
a0e1108
 
 
78f2fc1
a0e1108
 
 
 
 
 
b20fcd1
 
 
a0e1108
 
 
 
 
 
 
 
 
b20fcd1
a0e1108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e25de72
 
 
a0e1108
e25de72
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from datasets import load_dataset
import numpy as np
import gradio as gr
import chromadb

from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLMpipeline
import torch
import chromadb

# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db")  # Stores data persistently
collection = chroma_client.get_or_create_collection(name="wikipedia_docs")

# Load the BAAI embedding model
model_name = "BAAI/bge-base-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def get_embedding(text):
    """Generate embeddings using BAAI/bge-base-en."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].numpy().tolist()  # Take CLS token embedding

# Load LLaMA Model (Meta LLaMA 2)
llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf"))

# Load a small subset (10,000 rows)
#dataset = load_dataset("wiki40b", "en", split="train[:1000]")

# Extract only text
#docs = [d["text"] for d in dataset]
docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."]

#print("Loaded dataset with", len(docs), "documents.")

# ✅ Step 2: Embed and Store in ChromaDB
for i, doc in enumerate(docs):
    embedding = get_embedding(doc)
    collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc])

print("Stored embeddings in ChromaDB!")

# Store embeddings in ChromaDB
#for i, (doc, embedding) in enumerate(zip(docs, embeddings)):
#    collection.add(
#        ids=[str(i)],  # Unique ID for each doc
#        embeddings=[embedding.tolist()],  # Convert numpy array to list
#        documents=[doc]
#    )


# Search function using ChromaDB
#def search_wikipedia(query, top_k=3):
#    query_embedding = embed_model.encode([query]).tolist()
#    results = collection.query(
#        query_embeddings=query_embedding, 
#        n_results=top_k
    
    #return "\n\n".join(results["documents"][0])  # Return top results
#    return results["documents"][0]  # Return top results

# Function to search ChromaDB & generate response
def query_llama(user_input):
    query_embedding = get_embedding(user_input)
    results = collection.query(query_embeddings=[query_embedding], n_results=3)

    if not results["documents"]:
        return "No relevant documents found."

    context = " ".join(results["documents"][0])
    prompt = f"Using this context, answer the question: {user_input}\nContext: {context}"

    response = llama_pipe(prompt, max_length=200)
    return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}"


# Gradio Interface
iface = gr.Interface(
    fn=query_llama, 
    inputs="text", 
    outputs="text", 
    title="Wikipedia Search RAG",
    description="Enter a query and retrieve relevant Wikipedia passages."
)

iface.launch()