Spaces:
Runtime error
Runtime error
File size: 3,453 Bytes
87188d3 78f2fc1 e25de72 b20fcd1 78f2fc1 43e5896 a0e1108 75cb8fa 43e5896 919e0ec 43e5896 a0e1108 75cb8fa a0e1108 78f2fc1 a0e1108 78f2fc1 a0e1108 c003775 78f2fc1 a0e1108 78f2fc1 a0e1108 78f2fc1 a0e1108 b20fcd1 a0e1108 b20fcd1 a0e1108 e25de72 a0e1108 e25de72 43e5896 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from datasets import load_dataset
import numpy as np
import gradio as gr
import chromadb
from transformers import AutoModel, AutoTokenizer, pipeline
import torch
import chromadb
import os
# Ensure the token is set
hf_token = os.getenv("HF_Token")
if not hf_token:
raise ValueError("HF_Token is not set. Please check your Hugging Face Secrets.")
# Load LLaMA-2 model with authentication
llama_pipe = pipeline(
"text-generation",
model="TheBloke/Llama-2-7B-Chat-GGUF",
token=hf_token # Pass the token explicitly
)
# Test LLaMA-2 inference
output = llama_pipe("What is machine learning?", max_length=100)
print(output)
'''
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db") # Stores data persistently
collection = chroma_client.get_or_create_collection(name="wikipedia_docs")
# Load the BAAI embedding model
model_name = "BAAI/bge-base-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
def get_embedding(text):
"""Generate embeddings using BAAI/bge-base-en."""
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
outputs = model(**inputs)
return outputs.last_hidden_state[:, 0, :].numpy().tolist() # Take CLS token embedding
# Load LLaMA Model (Meta LLaMA 2)
llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf"))
# Load a small subset (10,000 rows)
#dataset = load_dataset("wiki40b", "en", split="train[:1000]")
# Extract only text
#docs = [d["text"] for d in dataset]
docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."]
#print("Loaded dataset with", len(docs), "documents.")
# ✅ Step 2: Embed and Store in ChromaDB
for i, doc in enumerate(docs):
embedding = get_embedding(doc)
collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc])
print("Stored embeddings in ChromaDB!")
# Store embeddings in ChromaDB
#for i, (doc, embedding) in enumerate(zip(docs, embeddings)):
# collection.add(
# ids=[str(i)], # Unique ID for each doc
# embeddings=[embedding.tolist()], # Convert numpy array to list
# documents=[doc]
# )
# Search function using ChromaDB
#def search_wikipedia(query, top_k=3):
# query_embedding = embed_model.encode([query]).tolist()
# results = collection.query(
# query_embeddings=query_embedding,
# n_results=top_k
#return "\n\n".join(results["documents"][0]) # Return top results
# return results["documents"][0] # Return top results
# Function to search ChromaDB & generate response
def query_llama(user_input):
query_embedding = get_embedding(user_input)
results = collection.query(query_embeddings=[query_embedding], n_results=3)
if not results["documents"]:
return "No relevant documents found."
context = " ".join(results["documents"][0])
prompt = f"Using this context, answer the question: {user_input}\nContext: {context}"
response = llama_pipe(prompt, max_length=200)
return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}"
# Gradio Interface
iface = gr.Interface(
fn=query_llama,
inputs="text",
outputs="text",
title="Wikipedia Search RAG",
description="Enter a query and retrieve relevant Wikipedia passages."
)
iface.launch()
''' |