Spaces:
Runtime error
Runtime error
from datasets import load_dataset | |
import numpy as np | |
import gradio as gr | |
import chromadb | |
from transformers import AutoModel, AutoTokenizer, pipeline | |
import torch | |
import chromadb | |
import os | |
import requests | |
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-hf" | |
headers = {"Authorization": f"Bearer {os.getenv('HF_Token')}"} | |
def query_llama(prompt): | |
payload = {"inputs": prompt} | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
prompt = "Explain machine learning in simple terms." | |
response = query_llama(prompt) | |
print(response) | |
''' | |
# Initialize ChromaDB client | |
chroma_client = chromadb.PersistentClient(path="./chroma_db") # Stores data persistently | |
collection = chroma_client.get_or_create_collection(name="wikipedia_docs") | |
# Load the BAAI embedding model | |
model_name = "BAAI/bge-base-en" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
def get_embedding(text): | |
"""Generate embeddings using BAAI/bge-base-en.""" | |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
return outputs.last_hidden_state[:, 0, :].numpy().tolist() # Take CLS token embedding | |
# Load LLaMA Model (Meta LLaMA 2) | |
#llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")) | |
# Load a small subset (10,000 rows) | |
#dataset = load_dataset("wiki40b", "en", split="train[:1000]") | |
# Extract only text | |
#docs = [d["text"] for d in dataset] | |
docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."] | |
#print("Loaded dataset with", len(docs), "documents.") | |
# ✅ Step 2: Embed and Store in ChromaDB | |
for i, doc in enumerate(docs): | |
embedding = get_embedding(doc) | |
collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc]) | |
print("Stored embeddings in ChromaDB!") | |
# Store embeddings in ChromaDB | |
#for i, (doc, embedding) in enumerate(zip(docs, embeddings)): | |
# collection.add( | |
# ids=[str(i)], # Unique ID for each doc | |
# embeddings=[embedding.tolist()], # Convert numpy array to list | |
# documents=[doc] | |
# ) | |
# Search function using ChromaDB | |
#def search_wikipedia(query, top_k=3): | |
# query_embedding = embed_model.encode([query]).tolist() | |
# results = collection.query( | |
# query_embeddings=query_embedding, | |
# n_results=top_k | |
#return "\n\n".join(results["documents"][0]) # Return top results | |
# return results["documents"][0] # Return top results | |
# Function to search ChromaDB & generate response | |
def query_llama(user_input): | |
query_embedding = get_embedding(user_input) | |
results = collection.query(query_embeddings=[query_embedding], n_results=3) | |
if not results["documents"]: | |
return "No relevant documents found." | |
context = " ".join(results["documents"][0]) | |
prompt = f"Using this context, answer the question: {user_input}\nContext: {context}" | |
response = llama_pipe(prompt, max_length=200) | |
return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}" | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=query_llama, | |
inputs="text", | |
outputs="text", | |
title="Wikipedia Search RAG", | |
description="Enter a query and retrieve relevant Wikipedia passages." | |
) | |
iface.launch() | |
''' |