Bot_Development / script /build_vector.py
dsmultimedika's picture
Build Application
9002555
raw
history blame
3.3 kB
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from pinecone import Pinecone, ServerlessSpec
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from fastapi import HTTPException, status
from config import PINECONE_CONFIG
import os
import json
class IndexManager:
def __init__(self):
self.vector_index = None
self.index_name = "summarizer-semantic-index"
def _get_pinecone_client(self):
"""Initialize and return the Pinecone client."""
# api_key = os.getenv("PINECONE_API_KEY")
api_key = PINECONE_CONFIG.PINECONE_API_KEY
if not api_key:
raise ValueError(
"Pinecone API key is missing. Please set it in environment variables."
)
return Pinecone(api_key=api_key)
def _create_pinecone_index(self, client):
"""Create Pinecone index if it doesn't already exist."""
if self.index_name not in client.list_indexes().names():
client.create_index(
name=self.index_name,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
return client.Index(self.index_name)
def _initialize_vector_store(self, pinecone_index):
"""Initialize and return the vector store with the Pinecone index."""
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
return StorageContext.from_defaults(vector_store=vector_store)
def build_indexes(self, nodes):
"""Build vector and tree indexes from nodes."""
try:
client = self._get_pinecone_client()
pinecone_index = self._create_pinecone_index(client)
storage_context = self._initialize_vector_store(pinecone_index)
self.vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
self.vector_index.set_index_id("vector")
print(f"Vector Index ID: {self.vector_index.index_id}")
print("Vector Index created successfully.")
response = {
"status": "success",
"message": "Existing Vector Index loaded successfully.",
}
return json.dumps(response)
except HTTPException as http_exc:
raise http_exc # Re-raise HTTPExceptions to ensure FastAPI handles them
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error loading existing indexes: {str(e)}"
)
def load_existing_indexes(self):
"""Load existing indexes from Pinecone."""
try:
client = self._get_pinecone_client()
pinecone_index = client.Index(self.index_name)
print(pinecone_index.describe_index_stats())
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
retriever = VectorStoreIndex.from_vector_store(vector_store)
print("Existing Vector Index loaded successfully.")
return retriever
except Exception as e:
print(f"Error loading existing indexes: {e}")
raise