Spaces:
Runtime error
Runtime error
File size: 3,421 Bytes
87188d3 78f2fc1 e25de72 b20fcd1 78f2fc1 43e5896 a0e1108 43e5896 eaad362 43e5896 6535a83 bd7aeef 43e5896 eaad362 43e5896 a0e1108 75cb8fa a0e1108 78f2fc1 a0e1108 78f2fc1 a0e1108 6535a83 78f2fc1 a0e1108 78f2fc1 a0e1108 78f2fc1 a0e1108 b20fcd1 a0e1108 b20fcd1 a0e1108 e25de72 a0e1108 e25de72 43e5896 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
from datasets import load_dataset
import numpy as np
import gradio as gr
import chromadb
from transformers import AutoModel, AutoTokenizer, pipeline
import torch
import chromadb
import os
import requests
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-hf"
headers = {"Authorization": f"Bearer {os.getenv('HF_Token')}"}
def query_llama(prompt):
payload = {"inputs": prompt}
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
prompt = "Explain machine learning in simple terms."
response = query_llama(prompt)
print(response)
'''
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db") # Stores data persistently
collection = chroma_client.get_or_create_collection(name="wikipedia_docs")
# Load the BAAI embedding model
model_name = "BAAI/bge-base-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
def get_embedding(text):
"""Generate embeddings using BAAI/bge-base-en."""
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
outputs = model(**inputs)
return outputs.last_hidden_state[:, 0, :].numpy().tolist() # Take CLS token embedding
# Load LLaMA Model (Meta LLaMA 2)
#llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf"))
# Load a small subset (10,000 rows)
#dataset = load_dataset("wiki40b", "en", split="train[:1000]")
# Extract only text
#docs = [d["text"] for d in dataset]
docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."]
#print("Loaded dataset with", len(docs), "documents.")
# ✅ Step 2: Embed and Store in ChromaDB
for i, doc in enumerate(docs):
embedding = get_embedding(doc)
collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc])
print("Stored embeddings in ChromaDB!")
# Store embeddings in ChromaDB
#for i, (doc, embedding) in enumerate(zip(docs, embeddings)):
# collection.add(
# ids=[str(i)], # Unique ID for each doc
# embeddings=[embedding.tolist()], # Convert numpy array to list
# documents=[doc]
# )
# Search function using ChromaDB
#def search_wikipedia(query, top_k=3):
# query_embedding = embed_model.encode([query]).tolist()
# results = collection.query(
# query_embeddings=query_embedding,
# n_results=top_k
#return "\n\n".join(results["documents"][0]) # Return top results
# return results["documents"][0] # Return top results
# Function to search ChromaDB & generate response
def query_llama(user_input):
query_embedding = get_embedding(user_input)
results = collection.query(query_embeddings=[query_embedding], n_results=3)
if not results["documents"]:
return "No relevant documents found."
context = " ".join(results["documents"][0])
prompt = f"Using this context, answer the question: {user_input}\nContext: {context}"
response = llama_pipe(prompt, max_length=200)
return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}"
# Gradio Interface
iface = gr.Interface(
fn=query_llama,
inputs="text",
outputs="text",
title="Wikipedia Search RAG",
description="Enter a query and retrieve relevant Wikipedia passages."
)
iface.launch()
''' |