File size: 1,926 Bytes
1764725
 
 
 
 
4e0c319
1764725
 
 
 
4e0c319
1764725
 
4e0c319
1764725
 
4e0c319
9eaaba5
 
092e189
9eaaba5
2ffecbc
1764725
 
 
 
 
 
 
 
 
2ffecbc
1764725
 
 
092e189
1764725
092e189
c6cea3b
 
 
 
1764725
 
 
 
 
19da2df
1764725
 
19da2df
1764725
092e189
1764725
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import chromadb
from transformers import AutoTokenizer, AutoModel
import numpy as np
import torch

# Load the pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Initialize Chroma client
client = chromadb.Client()

# Create a Chroma collection
collection = client.create_collection(name="tree_images")

# Custom dataset of tree descriptions (both decorated and undecorated)
content = [
    # Your tree descriptions here...
]

# Function to generate embeddings using the pre-trained model
def generate_embeddings(texts):
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            output = model(**inputs)
            embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())
    return embeddings

# Generate embeddings for the content
embeddings = generate_embeddings(content)

# Add the embeddings to Chroma using upsert
for idx, text in enumerate(content):
    collection.upsert(
        documents=[text],  # the document (text) itself
        metadatas=[{"id": idx}],  # metadata associated with the document
        embeddings=[embeddings[idx]]  # the corresponding embeddings for the document
    )

# Define the search function for Gradio interface
def search(query):
    # Generate embedding for the query
    query_embedding = generate_embeddings([query])[0].reshape(1, -1)
    
    # Chroma-based search
    chroma_results = collection.query(query_embeddings=query_embedding, n_results=3)["documents"]
    
    # Return results
    return "Chroma Results: " + ", ".join(chroma_results)

# Create the Gradio interface
interface = gr.Interface(fn=search, inputs="text", outputs="text")

# Launch the Gradio interface
interface.launch()