Spaces:
Runtime error
Runtime error
File size: 3,950 Bytes
0ccfd2a d0e302c 0ccfd2a d0e302c 0ccfd2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import uuid
from typing import List, Dict
from qdrant_client import QdrantClient, models as qmodels
from llama_index.llms.openai import OpenAI
from fastembed import TextEmbedding
from .models import FoodItem
from .utils import synthesize_food_item
likes = ["dosa", "fanta", "croissant", "waffles"]
dislikes = ["virgin mojito"]
menu = ["croissant", "mango", "jalebi"]
class RecommendationEngine:
def __init__(
self, category: str, qdrant: QdrantClient, fastembed_model: TextEmbedding
) -> None:
self.collection = f"{category}_preferences"
self.qdrant = qdrant
self.embedding_model = fastembed_model
if self.qdrant.collection_exists(self.collection):
self.counter = self.qdrant.count(self.collection, exact=True).count
else:
self.reset()
self.counter = 0
def reset(self):
self.qdrant.recreate_collection(
self.collection,
vectors_config=qmodels.VectorParams(
size=384, distance=qmodels.Distance.COSINE
),
)
def _generate_vector(self, model_json: dict):
embedding_txt = ""
for k, v in model_json.items():
embedding_txt += f"{k}: {v}"
return list(self.embedding_model.passage_embed([embedding_txt]))[0]
def _insert_preference(self, item: FoodItem, *args, **kwargs):
model_json: dict = item.model_dump()
embedding = self._generate_vector(model_json)
model_json.update(kwargs)
self.qdrant.upsert(
self.collection,
points=[
qmodels.PointStruct(
id=self.counter, payload=model_json, vector=embedding
)
],
)
self.counter += 1
def like(self, item: FoodItem):
self._insert_preference(item, liked=True)
def dislike(self, item: FoodItem):
self._insert_preference(item, liked=False)
def recommend_from_given(
self, items: List[FoodItem], limit: int = 3
) -> Dict[str, float]:
liked_points, _offset = self.qdrant.scroll(
self.collection,
scroll_filter={"must": [{"key": "liked", "match": {"value": True}}]},
)
disliked_points, _offset = self.qdrant.scroll(
self.collection,
scroll_filter={"must": [{"key": "liked", "match": {"value": False}}]},
)
# Insert points in DB so they can be recommended:
# A bit ugly but this is the best possible thing at the moment.
query_id = str(uuid.uuid1())
for item in items:
self._insert_preference(item, query_id=query_id)
scored_points = self.qdrant.recommend(
self.collection,
positive=[p.id for p in liked_points],
negative=[p.id for p in disliked_points],
query_filter={"must": [{"key": "query_id", "match": {"value": query_id}}]},
with_payload=True,
strategy="best_score",
)
self.qdrant.delete(self.collection, [p.id for p in scored_points])
return {point.payload["name"]: point.score for point in scored_points}
if __name__ == "__main__":
llm = OpenAI(model="gpt-3.5-turbo")
qdrant = QdrantClient()
fastembed_model = TextEmbedding()
rec_engine = RecommendationEngine("food", qdrant, fastembed_model)
if rec_engine.counter != len(likes) + len(dislikes):
rec_engine.reset()
print("Filling with starter data")
for food_name in likes:
food_item = synthesize_food_item(food_name, llm)
rec_engine.like(food_item)
for food_name in dislikes:
food_item = synthesize_food_item(food_name, llm)
rec_engine.dislike(food_item)
new_items = [synthesize_food_item(food_name, llm) for food_name in menu]
recommendations = rec_engine.recommend_from_given(items=new_items)
print(recommendations)
|