Spaces:

willco-afk
/

RAG_AI_BOT

Sleeping

File size: 1,926 Bytes

import gradio as gr
import chromadb
from transformers import AutoTokenizer, AutoModel
import numpy as np
import torch

# Load the pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Initialize Chroma client
client = chromadb.Client()

# Create a Chroma collection
collection = client.create_collection(name="tree_images")

# Custom dataset of tree descriptions (both decorated and undecorated)
content = [
    # Your tree descriptions here...
]

# Function to generate embeddings using the pre-trained model
def generate_embeddings(texts):
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            output = model(**inputs)
            embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())
    return embeddings

# Generate embeddings for the content
embeddings = generate_embeddings(content)

# Add the embeddings to Chroma using upsert
for idx, text in enumerate(content):
    collection.upsert(
        documents=[text],  # the document (text) itself
        metadatas=[{"id": idx}],  # metadata associated with the document
        embeddings=[embeddings[idx]]  # the corresponding embeddings for the document
    )

# Define the search function for Gradio interface
def search(query):
    # Generate embedding for the query
    query_embedding = generate_embeddings([query])[0].reshape(1, -1)
    
    # Chroma-based search
    chroma_results = collection.query(query_embeddings=query_embedding, n_results=3)["documents"]
    
    # Return results
    return "Chroma Results: " + ", ".join(chroma_results)

# Create the Gradio interface
interface = gr.Interface(fn=search, inputs="text", outputs="text")

# Launch the Gradio interface
interface.launch()