"""Interface for vector stores.""" from __future__ import annotations from abc import ABC, abstractmethod from typing import Any, Iterable, List, Optional from pydantic import BaseModel, Field from langchain.docstore.document import Document from langchain.embeddings.base import Embeddings from langchain.schema import BaseRetriever class VectorStore(ABC): """Interface for vector stores.""" @abstractmethod def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. kwargs: vectorstore specific parameters Returns: List of ids from adding the texts into the vectorstore. """ def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: """Run more documents through the embeddings and add to the vectorstore. Args: documents (List[Document]: Documents to add to the vectorstore. Returns: List[str]: List of IDs of the added texts. """ # TODO: Handle the case where the user doesn't provide ids on the Collection texts = [doc.page_content for doc in documents] metadatas = [doc.metadata for doc in documents] return self.add_texts(texts, metadatas, **kwargs) @abstractmethod def similarity_search( self, query: str, k: int = 4, **kwargs: Any ) -> List[Document]: """Return docs most similar to query.""" def similarity_search_by_vector( self, embedding: List[float], k: int = 4, **kwargs: Any ) -> List[Document]: """Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. Returns: List of Documents most similar to the query vector. """ raise NotImplementedError def max_marginal_relevance_search( self, query: str, k: int = 4, fetch_k: int = 20 ) -> List[Document]: """Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. Returns: List of Documents selected by maximal marginal relevance. """ raise NotImplementedError def max_marginal_relevance_search_by_vector( self, embedding: List[float], k: int = 4, fetch_k: int = 20 ) -> List[Document]: """Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. Returns: List of Documents selected by maximal marginal relevance. """ raise NotImplementedError @classmethod def from_documents( cls, documents: List[Document], embedding: Embeddings, **kwargs: Any, ) -> VectorStore: """Return VectorStore initialized from documents and embeddings.""" texts = [d.page_content for d in documents] metadatas = [d.metadata for d in documents] return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs) @classmethod @abstractmethod def from_texts( cls, texts: List[str], embedding: Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any, ) -> VectorStore: """Return VectorStore initialized from texts and embeddings.""" def as_retriever(self) -> VectorStoreRetriever: return VectorStoreRetriever(vectorstore=self) class VectorStoreRetriever(BaseRetriever, BaseModel): vectorstore: VectorStore search_kwargs: dict = Field(default_factory=dict) class Config: """Configuration for this pydantic object.""" arbitrary_types_allowed = True def get_relevant_texts(self, query: str) -> List[Document]: return self.vectorstore.similarity_search(query, **self.search_kwargs)