File size: 1,866 Bytes
16d282e 3ad3642 16d282e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# services.py
from typing import List
import numpy as np
from chromadb import Client
import openai
from config.constants import DEEPINFRA_MODEL_TAG, DEEPINFRA_ENDPOINT_URL
import os
DEEPINFRA_API_KEY = os.getenv('DEEPINFRA_API_KEY')
class SearchService:
def __init__(self):
self.client = Client()
self.collection_name = "listing_collection"
self.collection = self.client.create_collection(
name=self.collection_name,
metadata={
'description': 'real_estate_listing',
"hnsw:construction_ef": 64,
"hnsw:M": 32,
"hnsw:search_ef": 32,
},
embedding_function=None,
)
def ingest_data(self, embd_id):
# Add embeddings to the collection with original IDs as metadata
embeddings = embd_id[:, 1:].astype(float)
original_ids = [f"PTFS{num}" for num in embd_id[:, 0].astype('int64')]
ids = [str(i) for i in range(len(original_ids))]
self.collection.add(
ids=ids,
embeddings=embeddings,
metadatas=[{"original_id": id} for id in original_ids],
)
def search(self, query: str) -> List[str]:
# Create an OpenAI client with DeepInfra
openai.api_key = DEEPINFRA_API_KEY
openai.api_base = DEEPINFRA_ENDPOINT_URL
# Convert the search query to embeddings
embeddings = openai.Embedding.create(input=query, model=DEEPINFRA_MODEL_TAG, encoding_format="float")
query_embedding = embeddings.data[0].embedding
# Search for similar embeddings
results = self.collection.query(np.array([query_embedding]), n_results=10)
# Extract the original IDs from the results
original_ids = [metadata["original_id"] for metadata in results["metadatas"][0]]
return original_ids
|