import gradio as gr import chromadb from transformers import AutoTokenizer, AutoModel import numpy as np import torch # Load the pre-trained model and tokenizer model_name = "sentence-transformers/all-MiniLM-L6-v2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Initialize Chroma client client = chromadb.Client() # Create a Chroma collection collection = client.create_collection(name="tree_images") # Custom dataset of tree descriptions (both decorated and undecorated) content = [ # Your tree descriptions here... ] # Function to generate embeddings using the pre-trained model def generate_embeddings(texts): embeddings = [] for text in texts: inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) with torch.no_grad(): output = model(**inputs) embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy()) return embeddings # Generate embeddings for the content embeddings = generate_embeddings(content) # Add the embeddings to Chroma using upsert for idx, text in enumerate(content): collection.upsert( documents=[text], # the document (text) itself metadatas=[{"id": idx}], # metadata associated with the document embeddings=[embeddings[idx]] # the corresponding embeddings for the document ) # Define the search function for Gradio interface def search(query): # Generate embedding for the query query_embedding = generate_embeddings([query])[0].reshape(1, -1) # Chroma-based search chroma_results = collection.query(query_embeddings=query_embedding, n_results=3)["documents"] # Return results return "Chroma Results: " + ", ".join(chroma_results) # Create the Gradio interface interface = gr.Interface(fn=search, inputs="text", outputs="text") # Launch the Gradio interface interface.launch()