Spaces:

Kalyani8
/

Wikipedia_RAG

Runtime error

File size: 3,421 Bytes

87188d3
78f2fc1
e25de72
b20fcd1
78f2fc1
43e5896
a0e1108
 
43e5896
eaad362
43e5896
6535a83
bd7aeef
43e5896
eaad362
 
 
 
 
 
 
 
43e5896
 
 
a0e1108
 
 
75cb8fa
a0e1108
 
 
 
78f2fc1
a0e1108
 
 
 
 
 
78f2fc1
a0e1108
6535a83
78f2fc1
a0e1108
 
78f2fc1
a0e1108
 
 
78f2fc1
a0e1108
 
 
 
 
 
b20fcd1
 
 
a0e1108
 
 
 
 
 
 
 
 
b20fcd1
a0e1108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e25de72
 
 
a0e1108
e25de72
 
 
 
 
 
43e5896

from datasets import load_dataset
import numpy as np
import gradio as gr
import chromadb

from transformers import AutoModel, AutoTokenizer, pipeline
import torch
import chromadb
import os
import requests

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-hf"
headers = {"Authorization": f"Bearer {os.getenv('HF_Token')}"}

def query_llama(prompt):
    payload = {"inputs": prompt}
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

prompt = "Explain machine learning in simple terms."
response = query_llama(prompt)
print(response)


'''
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db")  # Stores data persistently
collection = chroma_client.get_or_create_collection(name="wikipedia_docs")

# Load the BAAI embedding model
model_name = "BAAI/bge-base-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def get_embedding(text):
    """Generate embeddings using BAAI/bge-base-en."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].numpy().tolist()  # Take CLS token embedding

# Load LLaMA Model (Meta LLaMA 2)
#llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf"))

# Load a small subset (10,000 rows)
#dataset = load_dataset("wiki40b", "en", split="train[:1000]")

# Extract only text
#docs = [d["text"] for d in dataset]
docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."]

#print("Loaded dataset with", len(docs), "documents.")

# ✅ Step 2: Embed and Store in ChromaDB
for i, doc in enumerate(docs):
    embedding = get_embedding(doc)
    collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc])

print("Stored embeddings in ChromaDB!")

# Store embeddings in ChromaDB
#for i, (doc, embedding) in enumerate(zip(docs, embeddings)):
#    collection.add(
#        ids=[str(i)],  # Unique ID for each doc
#        embeddings=[embedding.tolist()],  # Convert numpy array to list
#        documents=[doc]
#    )


# Search function using ChromaDB
#def search_wikipedia(query, top_k=3):
#    query_embedding = embed_model.encode([query]).tolist()
#    results = collection.query(
#        query_embeddings=query_embedding, 
#        n_results=top_k
    
    #return "\n\n".join(results["documents"][0])  # Return top results
#    return results["documents"][0]  # Return top results

# Function to search ChromaDB & generate response
def query_llama(user_input):
    query_embedding = get_embedding(user_input)
    results = collection.query(query_embeddings=[query_embedding], n_results=3)

    if not results["documents"]:
        return "No relevant documents found."

    context = " ".join(results["documents"][0])
    prompt = f"Using this context, answer the question: {user_input}\nContext: {context}"

    response = llama_pipe(prompt, max_length=200)
    return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}"


# Gradio Interface
iface = gr.Interface(
    fn=query_llama, 
    inputs="text", 
    outputs="text", 
    title="Wikipedia Search RAG",
    description="Enter a query and retrieve relevant Wikipedia passages."
)

iface.launch()
'''