RAG_AI_BOT / app.py
willco-afk's picture
Update app.py
092e189 verified
import gradio as gr
import chromadb
from transformers import AutoTokenizer, AutoModel
import numpy as np
import torch
# Load the pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Initialize Chroma client
client = chromadb.Client()
# Create a Chroma collection
collection = client.create_collection(name="tree_images")
# Custom dataset of tree descriptions (both decorated and undecorated)
content = [
# Your tree descriptions here...
]
# Function to generate embeddings using the pre-trained model
def generate_embeddings(texts):
embeddings = []
for text in texts:
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
output = model(**inputs)
embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())
return embeddings
# Generate embeddings for the content
embeddings = generate_embeddings(content)
# Add the embeddings to Chroma using upsert
for idx, text in enumerate(content):
collection.upsert(
documents=[text], # the document (text) itself
metadatas=[{"id": idx}], # metadata associated with the document
embeddings=[embeddings[idx]] # the corresponding embeddings for the document
)
# Define the search function for Gradio interface
def search(query):
# Generate embedding for the query
query_embedding = generate_embeddings([query])[0].reshape(1, -1)
# Chroma-based search
chroma_results = collection.query(query_embeddings=query_embedding, n_results=3)["documents"]
# Return results
return "Chroma Results: " + ", ".join(chroma_results)
# Create the Gradio interface
interface = gr.Interface(fn=search, inputs="text", outputs="text")
# Launch the Gradio interface
interface.launch()