Spaces:
Runtime error
Runtime error
"""Interface for vector stores.""" | |
from __future__ import annotations | |
from abc import ABC, abstractmethod | |
from typing import Any, Iterable, List, Optional | |
from pydantic import BaseModel, Field | |
from langchain.docstore.document import Document | |
from langchain.embeddings.base import Embeddings | |
from langchain.schema import BaseRetriever | |
class VectorStore(ABC): | |
"""Interface for vector stores.""" | |
def add_texts( | |
self, | |
texts: Iterable[str], | |
metadatas: Optional[List[dict]] = None, | |
**kwargs: Any, | |
) -> List[str]: | |
"""Run more texts through the embeddings and add to the vectorstore. | |
Args: | |
texts: Iterable of strings to add to the vectorstore. | |
metadatas: Optional list of metadatas associated with the texts. | |
kwargs: vectorstore specific parameters | |
Returns: | |
List of ids from adding the texts into the vectorstore. | |
""" | |
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: | |
"""Run more documents through the embeddings and add to the vectorstore. | |
Args: | |
documents (List[Document]: Documents to add to the vectorstore. | |
Returns: | |
List[str]: List of IDs of the added texts. | |
""" | |
# TODO: Handle the case where the user doesn't provide ids on the Collection | |
texts = [doc.page_content for doc in documents] | |
metadatas = [doc.metadata for doc in documents] | |
return self.add_texts(texts, metadatas, **kwargs) | |
def similarity_search( | |
self, query: str, k: int = 4, **kwargs: Any | |
) -> List[Document]: | |
"""Return docs most similar to query.""" | |
def similarity_search_by_vector( | |
self, embedding: List[float], k: int = 4, **kwargs: Any | |
) -> List[Document]: | |
"""Return docs most similar to embedding vector. | |
Args: | |
embedding: Embedding to look up documents similar to. | |
k: Number of Documents to return. Defaults to 4. | |
Returns: | |
List of Documents most similar to the query vector. | |
""" | |
raise NotImplementedError | |
def max_marginal_relevance_search( | |
self, query: str, k: int = 4, fetch_k: int = 20 | |
) -> List[Document]: | |
"""Return docs selected using the maximal marginal relevance. | |
Maximal marginal relevance optimizes for similarity to query AND diversity | |
among selected documents. | |
Args: | |
query: Text to look up documents similar to. | |
k: Number of Documents to return. Defaults to 4. | |
fetch_k: Number of Documents to fetch to pass to MMR algorithm. | |
Returns: | |
List of Documents selected by maximal marginal relevance. | |
""" | |
raise NotImplementedError | |
def max_marginal_relevance_search_by_vector( | |
self, embedding: List[float], k: int = 4, fetch_k: int = 20 | |
) -> List[Document]: | |
"""Return docs selected using the maximal marginal relevance. | |
Maximal marginal relevance optimizes for similarity to query AND diversity | |
among selected documents. | |
Args: | |
embedding: Embedding to look up documents similar to. | |
k: Number of Documents to return. Defaults to 4. | |
fetch_k: Number of Documents to fetch to pass to MMR algorithm. | |
Returns: | |
List of Documents selected by maximal marginal relevance. | |
""" | |
raise NotImplementedError | |
def from_documents( | |
cls, | |
documents: List[Document], | |
embedding: Embeddings, | |
**kwargs: Any, | |
) -> VectorStore: | |
"""Return VectorStore initialized from documents and embeddings.""" | |
texts = [d.page_content for d in documents] | |
metadatas = [d.metadata for d in documents] | |
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs) | |
def from_texts( | |
cls, | |
texts: List[str], | |
embedding: Embeddings, | |
metadatas: Optional[List[dict]] = None, | |
**kwargs: Any, | |
) -> VectorStore: | |
"""Return VectorStore initialized from texts and embeddings.""" | |
def as_retriever(self) -> VectorStoreRetriever: | |
return VectorStoreRetriever(vectorstore=self) | |
class VectorStoreRetriever(BaseRetriever, BaseModel): | |
vectorstore: VectorStore | |
search_kwargs: dict = Field(default_factory=dict) | |
class Config: | |
"""Configuration for this pydantic object.""" | |
arbitrary_types_allowed = True | |
def get_relevant_texts(self, query: str) -> List[Document]: | |
return self.vectorstore.similarity_search(query, **self.search_kwargs) | |