customer-support / vectordb_utils.py
Prajith04's picture
Update vectordb_utils.py
3e71806 verified
# vectordb_utils.py
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import uuid
import os
# Setup cache dir
cache_dir = os.environ.get("MODEL_CACHE_DIR", "/app/cache") # Fallback
os.makedirs(cache_dir, exist_ok=True)
api_key=os.environ.get("QDRANT_API_KEY")
# Encoder and Qdrant config
encoder = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
qdrant = QdrantClient(
url="https://b4e91bde-3e30-43ef-968e-c10a43f2e161.eu-west-2-0.aws.cloud.qdrant.io:6333",
api_key=api_key,
)
collection_name = "customer_support_docsv1"
# Initialize collection
def init_qdrant_collection():
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
# Add a query/response to DB
def add_to_vectordb(query, response):
vector = encoder.encode(query).tolist()
qdrant.upload_points(
collection_name=collection_name,
points=[PointStruct(
id=str(uuid.uuid4()),
vector=vector,
payload={"query": query, "response": response}
)]
)
# Search DB
def search_vectordb(query, limit=3):
vector = encoder.encode(query).tolist()
return qdrant.search(collection_name=collection_name, query_vector=vector, limit=limit)
# πŸ†• Load and populate from Hugging Face dataset
def populate_vectordb_from_hf():
print("Loading dataset from Hugging Face...")
dataset = load_dataset("Talhat/Customer_IT_Support", split="train")
print("Populating vector DB...")
for item in dataset:
query = item.get("body", "").strip()
response = item.get("answer", "").strip()
if query and response:
add_to_vectordb(query, response)
print("Vector DB population complete.")