Spaces:
Runtime error
Runtime error
File size: 3,004 Bytes
87188d3 78f2fc1 e25de72 b20fcd1 78f2fc1 c003775 a0e1108 75cb8fa a0e1108 75cb8fa a0e1108 78f2fc1 a0e1108 78f2fc1 a0e1108 c003775 78f2fc1 a0e1108 78f2fc1 a0e1108 78f2fc1 a0e1108 b20fcd1 a0e1108 b20fcd1 a0e1108 e25de72 a0e1108 e25de72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from datasets import load_dataset
import numpy as np
import gradio as gr
import chromadb
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLMpipeline
import torch
import chromadb
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db") # Stores data persistently
collection = chroma_client.get_or_create_collection(name="wikipedia_docs")
# Load the BAAI embedding model
model_name = "BAAI/bge-base-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
def get_embedding(text):
"""Generate embeddings using BAAI/bge-base-en."""
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
outputs = model(**inputs)
return outputs.last_hidden_state[:, 0, :].numpy().tolist() # Take CLS token embedding
# Load LLaMA Model (Meta LLaMA 2)
llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf"))
# Load a small subset (10,000 rows)
#dataset = load_dataset("wiki40b", "en", split="train[:1000]")
# Extract only text
#docs = [d["text"] for d in dataset]
docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."]
#print("Loaded dataset with", len(docs), "documents.")
# ✅ Step 2: Embed and Store in ChromaDB
for i, doc in enumerate(docs):
embedding = get_embedding(doc)
collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc])
print("Stored embeddings in ChromaDB!")
# Store embeddings in ChromaDB
#for i, (doc, embedding) in enumerate(zip(docs, embeddings)):
# collection.add(
# ids=[str(i)], # Unique ID for each doc
# embeddings=[embedding.tolist()], # Convert numpy array to list
# documents=[doc]
# )
# Search function using ChromaDB
#def search_wikipedia(query, top_k=3):
# query_embedding = embed_model.encode([query]).tolist()
# results = collection.query(
# query_embeddings=query_embedding,
# n_results=top_k
#return "\n\n".join(results["documents"][0]) # Return top results
# return results["documents"][0] # Return top results
# Function to search ChromaDB & generate response
def query_llama(user_input):
query_embedding = get_embedding(user_input)
results = collection.query(query_embeddings=[query_embedding], n_results=3)
if not results["documents"]:
return "No relevant documents found."
context = " ".join(results["documents"][0])
prompt = f"Using this context, answer the question: {user_input}\nContext: {context}"
response = llama_pipe(prompt, max_length=200)
return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}"
# Gradio Interface
iface = gr.Interface(
fn=query_llama,
inputs="text",
outputs="text",
title="Wikipedia Search RAG",
description="Enter a query and retrieve relevant Wikipedia passages."
)
iface.launch() |