Spaces:

willco-afk
/

RAG_AI_BOT

Sleeping

App Files Files Community

RAG_AI_BOT / app.py

willco-afk

Update app.py

092e189 verified 7 months ago

raw

history blame contribute delete

1.93 kB

	import gradio as gr
	import chromadb
	from transformers import AutoTokenizer, AutoModel
	import numpy as np
	import torch

	# Load the pre-trained model and tokenizer
	model_name = "sentence-transformers/all-MiniLM-L6-v2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModel.from_pretrained(model_name)

	# Initialize Chroma client
	client = chromadb.Client()

	# Create a Chroma collection
	collection = client.create_collection(name="tree_images")

	# Custom dataset of tree descriptions (both decorated and undecorated)
	content = [
	# Your tree descriptions here...
	]

	# Function to generate embeddings using the pre-trained model
	def generate_embeddings(texts):
	embeddings = []
	for text in texts:
	inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
	with torch.no_grad():
	output = model(**inputs)
	embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())
	return embeddings

	# Generate embeddings for the content
	embeddings = generate_embeddings(content)

	# Add the embeddings to Chroma using upsert
	for idx, text in enumerate(content):
	collection.upsert(
	documents=[text], # the document (text) itself
	metadatas=[{"id": idx}], # metadata associated with the document
	embeddings=[embeddings[idx]] # the corresponding embeddings for the document
	)

	# Define the search function for Gradio interface
	def search(query):
	# Generate embedding for the query
	query_embedding = generate_embeddings([query])[0].reshape(1, -1)

	# Chroma-based search
	chroma_results = collection.query(query_embeddings=query_embedding, n_results=3)["documents"]

	# Return results
	return "Chroma Results: " + ", ".join(chroma_results)

	# Create the Gradio interface
	interface = gr.Interface(fn=search, inputs="text", outputs="text")

	# Launch the Gradio interface
	interface.launch()