Spaces:
Running
Running
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import inspect | |
import faiss | |
import numpy as np | |
from faiss.loader import ( | |
DirectMap, | |
IDSelector, | |
IDSelectorArray, | |
IDSelectorBatch, | |
OperatingPoints, | |
RangeSearchResult, | |
rev_swig_ptr, | |
swig_ptr, | |
try_extract_index_ivf, | |
) | |
################################################################## | |
# The functions below add or replace some methods for classes | |
# this is to be able to pass in numpy arrays directly | |
# The C++ version of the classnames will be suffixed with _c | |
# | |
# The docstrings in the wrappers are intended to be similar to numpy | |
# comments, they will appear with help(Class.method) or ?Class.method | |
# For methods that are not replaced, the C++ documentation will be used if | |
# swig 4.x is run with -doxygen. | |
################################################################## | |
# For most arrays we force the convesion to the target type with | |
# np.ascontiguousarray, but for uint8 codes, we raise a type error | |
# because it is unclear how the conversion should occur: with a view | |
# (= cast) or conversion? | |
def _check_dtype_uint8(codes): | |
if codes.dtype != 'uint8': | |
raise TypeError("Input argument %s must be ndarray of dtype " | |
" uint8, but found %s" % ("codes", codes.dtype)) | |
return np.ascontiguousarray(codes) | |
def replace_method(the_class, name, replacement, ignore_missing=False): | |
""" Replaces a method in a class with another version. The old method | |
is renamed to method_name_c (because presumably it was implemented in C) """ | |
try: | |
orig_method = getattr(the_class, name) | |
except AttributeError: | |
if ignore_missing: | |
return | |
raise | |
if orig_method.__name__ == 'replacement_' + name: | |
# replacement was done in parent class | |
return | |
setattr(the_class, name + '_c', orig_method) | |
setattr(the_class, name, replacement) | |
def handle_Clustering(the_class): | |
def replacement_train(self, x, index, weights=None): | |
"""Perform clustering on a set of vectors. The index is used for assignment. | |
Parameters | |
---------- | |
x : array_like | |
Training vectors, shape (n, self.d). `dtype` must be float32. | |
index : faiss.Index | |
Index used for assignment. The dimension of the index should be `self.d`. | |
weights : array_like, optional | |
Per training sample weight (size n) used when computing the weighted | |
average to obtain the centroid (default is 1 for all training vectors). | |
""" | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d | |
if weights is not None: | |
weights = np.ascontiguousarray(weights, dtype='float32') | |
assert weights.shape == (n, ) | |
self.train_c(n, swig_ptr(x), index, swig_ptr(weights)) | |
else: | |
self.train_c(n, swig_ptr(x), index) | |
def replacement_train_encoded(self, x, codec, index, weights=None): | |
""" Perform clustering on a set of compressed vectors. The index is used for assignment. | |
The decompression is performed on-the-fly. | |
Parameters | |
---------- | |
x : array_like | |
Training vectors, shape (n, codec.code_size()). `dtype` must be `uint8`. | |
codec : faiss.Index | |
Index used to decode the vectors. Should have dimension `self.d`. | |
index : faiss.Index | |
Index used for assignment. The dimension of the index should be `self.d`. | |
weigths : array_like, optional | |
Per training sample weight (size n) used when computing the weighted | |
average to obtain the centroid (default is 1 for all training vectors). | |
""" | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
assert d == codec.sa_code_size() | |
assert codec.d == index.d | |
if weights is not None: | |
weights = np.ascontiguousarray(weights, dtype='float32') | |
assert weights.shape == (n, ) | |
self.train_encoded_c(n, swig_ptr(x), codec, | |
index, swig_ptr(weights)) | |
else: | |
self.train_encoded_c(n, swig_ptr(x), codec, index) | |
replace_method(the_class, 'train', replacement_train) | |
replace_method(the_class, 'train_encoded', replacement_train_encoded) | |
def handle_Clustering1D(the_class): | |
def replacement_train_exact(self, x): | |
"""Perform clustering on a set of 1D vectors. | |
Parameters | |
---------- | |
x : array_like | |
Training vectors, shape (n, 1). `dtype` must be float32. | |
""" | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d | |
self.train_exact_c(n, swig_ptr(x)) | |
replace_method(the_class, 'train_exact', replacement_train_exact) | |
def handle_Quantizer(the_class): | |
def replacement_train(self, x): | |
""" Train the quantizer on a set of training vectors. | |
Parameters | |
---------- | |
x : array_like | |
Training vectors, shape (n, self.d). `dtype` must be float32. | |
""" | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d | |
self.train_c(n, swig_ptr(x)) | |
def replacement_compute_codes(self, x): | |
""" Compute the codes corresponding to a set of vectors. | |
Parameters | |
---------- | |
x : array_like | |
Vectors to encode, shape (n, self.d). `dtype` must be float32. | |
Returns | |
------- | |
codes : array_like | |
Corresponding code for each vector, shape (n, self.code_size) | |
and `dtype` uint8. | |
""" | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d | |
codes = np.empty((n, self.code_size), dtype='uint8') | |
self.compute_codes_c(swig_ptr(x), swig_ptr(codes), n) | |
return codes | |
def replacement_decode(self, codes): | |
"""Reconstruct an approximation of vectors given their codes. | |
Parameters | |
---------- | |
codes : array_like | |
Codes to decode, shape (n, self.code_size). `dtype` must be uint8. | |
Returns | |
------- | |
Reconstructed vectors for each code, shape `(n, d)` and `dtype` float32. | |
""" | |
n, cs = codes.shape | |
codes = _check_dtype_uint8(codes) | |
assert cs == self.code_size | |
x = np.empty((n, self.d), dtype='float32') | |
self.decode_c(swig_ptr(codes), swig_ptr(x), n) | |
return x | |
replace_method(the_class, 'train', replacement_train) | |
replace_method(the_class, 'compute_codes', replacement_compute_codes) | |
replace_method(the_class, 'decode', replacement_decode) | |
def handle_NSG(the_class): | |
def replacement_build(self, x, graph): | |
n, d = x.shape | |
assert d == self.d | |
assert graph.ndim == 2 | |
assert graph.shape[0] == n | |
K = graph.shape[1] | |
x = np.ascontiguousarray(x, dtype='float32') | |
graph = np.ascontiguousarray(graph, dtype='int64') | |
self.build_c(n, swig_ptr(x), swig_ptr(graph), K) | |
replace_method(the_class, 'build', replacement_build) | |
def handle_Index(the_class): | |
def replacement_add(self, x): | |
"""Adds vectors to the index. | |
The index must be trained before vectors can be added to it. | |
The vectors are implicitly numbered in sequence. When `n` vectors are | |
added to the index, they are given ids `ntotal`, `ntotal + 1`, ..., `ntotal + n - 1`. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
self.add_c(n, swig_ptr(x)) | |
def replacement_add_with_ids(self, x, ids): | |
"""Adds vectors with arbitrary ids to the index (not all indexes support this). | |
The index must be trained before vectors can be added to it. | |
Vector `i` is stored in `x[i]` and has id `ids[i]`. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
ids : array_like | |
Array if ids of size n. The ids must be of type `int64`. Note that `-1` is reserved | |
in result lists to mean "not found" so it's better to not use it as an id. | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
ids = np.ascontiguousarray(ids, dtype='int64') | |
assert ids.shape == (n, ), 'not same nb of vectors as ids' | |
self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids)) | |
def replacement_assign(self, x, k, labels=None): | |
"""Find the k nearest neighbors of the set of vectors x in the index. | |
This is the same as the `search` method, but discards the distances. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
k : int | |
Number of nearest neighbors. | |
labels : array_like, optional | |
Labels array to store the results. | |
Returns | |
------- | |
labels: array_like | |
Labels of the nearest neighbors, shape (n, k). | |
When not enough results are found, the label is set to -1 | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
if labels is None: | |
labels = np.empty((n, k), dtype=np.int64) | |
else: | |
assert labels.shape == (n, k) | |
self.assign_c(n, swig_ptr(x), swig_ptr(labels), k) | |
return labels | |
def replacement_train(self, x): | |
"""Trains the index on a representative set of vectors. | |
The index must be trained before vectors can be added to it. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
self.train_c(n, swig_ptr(x)) | |
def replacement_search(self, x, k, *, params=None, D=None, I=None): | |
"""Find the k nearest neighbors of the set of vectors x in the index. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
k : int | |
Number of nearest neighbors. | |
params : SearchParameters | |
Search parameters of the current search (overrides the class-level params) | |
D : array_like, optional | |
Distance array to store the result. | |
I : array_like, optional | |
Labels array to store the results. | |
Returns | |
------- | |
D : array_like | |
Distances of the nearest neighbors, shape (n, k). When not enough results are found | |
the label is set to +Inf or -Inf. | |
I : array_like | |
Labels of the nearest neighbors, shape (n, k). | |
When not enough results are found, the label is set to -1 | |
""" | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d | |
assert k > 0 | |
if D is None: | |
D = np.empty((n, k), dtype=np.float32) | |
else: | |
assert D.shape == (n, k) | |
if I is None: | |
I = np.empty((n, k), dtype=np.int64) | |
else: | |
assert I.shape == (n, k) | |
self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I), params) | |
return D, I | |
def replacement_search_and_reconstruct(self, x, k, *, params=None, D=None, I=None, R=None): | |
"""Find the k nearest neighbors of the set of vectors x in the index, | |
and return an approximation of these vectors. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
k : int | |
Number of nearest neighbors. | |
params : SearchParameters | |
Search parameters of the current search (overrides the class-level params) | |
D : array_like, optional | |
Distance array to store the result. | |
I : array_like, optional | |
Labels array to store the result. | |
R : array_like, optional | |
reconstruction array to store | |
Returns | |
------- | |
D : array_like | |
Distances of the nearest neighbors, shape (n, k). When not enough results are found | |
the label is set to +Inf or -Inf. | |
I : array_like | |
Labels of the nearest neighbors, shape (n, k). When not enough results are found, | |
the label is set to -1 | |
R : array_like | |
Approximate (reconstructed) nearest neighbor vectors, shape (n, k, d). | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert k > 0 | |
if D is None: | |
D = np.empty((n, k), dtype=np.float32) | |
else: | |
assert D.shape == (n, k) | |
if I is None: | |
I = np.empty((n, k), dtype=np.int64) | |
else: | |
assert I.shape == (n, k) | |
if R is None: | |
R = np.empty((n, k, d), dtype=np.float32) | |
else: | |
assert R.shape == (n, k, d) | |
self.search_and_reconstruct_c( | |
n, swig_ptr(x), | |
k, swig_ptr(D), | |
swig_ptr(I), swig_ptr(R), params | |
) | |
return D, I, R | |
def replacement_search_and_return_codes( | |
self, x, k, *, | |
include_listnos=False, params=None, D=None, I=None, codes=None): | |
"""Find the k nearest neighbors of the set of vectors x in the index, | |
and return the codes stored for these vectors | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
k : int | |
Number of nearest neighbors. | |
params : SearchParameters | |
Search parameters of the current search (overrides the class-level params) | |
include_listnos : bool, optional | |
whether to include the list ids in the first bytes of each code | |
D : array_like, optional | |
Distance array to store the result. | |
I : array_like, optional | |
Labels array to store the result. | |
codes : array_like, optional | |
codes array to store | |
Returns | |
------- | |
D : array_like | |
Distances of the nearest neighbors, shape (n, k). When not enough results are found | |
the label is set to +Inf or -Inf. | |
I : array_like | |
Labels of the nearest neighbors, shape (n, k). When not enough results are found, | |
the label is set to -1 | |
R : array_like | |
Approximate (reconstructed) nearest neighbor vectors, shape (n, k, d). | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert k > 0 | |
if D is None: | |
D = np.empty((n, k), dtype=np.float32) | |
else: | |
assert D.shape == (n, k) | |
if I is None: | |
I = np.empty((n, k), dtype=np.int64) | |
else: | |
assert I.shape == (n, k) | |
code_size_1 = self.code_size | |
if include_listnos: | |
code_size_1 += self.coarse_code_size() | |
if codes is None: | |
codes = np.empty((n, k, code_size_1), dtype=np.uint8) | |
else: | |
assert codes.shape == (n, k, code_size_1) | |
self.search_and_return_codes_c( | |
n, swig_ptr(x), | |
k, swig_ptr(D), | |
swig_ptr(I), swig_ptr(codes), include_listnos, | |
params | |
) | |
return D, I, codes | |
def replacement_remove_ids(self, x): | |
"""Remove some ids from the index. | |
This is a O(ntotal) operation by default, so could be expensive. | |
Parameters | |
---------- | |
x : array_like or faiss.IDSelector | |
Either an IDSelector that returns True for vectors to remove, or a | |
list of ids to reomove (1D array of int64). When `x` is a list, | |
it is wrapped into an IDSelector. | |
Returns | |
------- | |
n_remove: int | |
number of vectors that were removed | |
""" | |
if isinstance(x, IDSelector): | |
sel = x | |
else: | |
assert x.ndim == 1 | |
index_ivf = try_extract_index_ivf(self) | |
x = np.ascontiguousarray(x, dtype='int64') | |
if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable: | |
sel = IDSelectorArray(x.size, swig_ptr(x)) | |
else: | |
sel = IDSelectorBatch(x.size, swig_ptr(x)) | |
return self.remove_ids_c(sel) | |
def replacement_reconstruct(self, key, x=None): | |
"""Approximate reconstruction of one vector from the index. | |
Parameters | |
---------- | |
key : int | |
Id of the vector to reconstruct | |
x : array_like, optional | |
pre-allocated array to store the results | |
Returns | |
------- | |
x : array_like reconstructed vector, size `self.d`, `dtype`=float32 | |
""" | |
if x is None: | |
x = np.empty(self.d, dtype=np.float32) | |
else: | |
assert x.shape == (self.d, ) | |
self.reconstruct_c(key, swig_ptr(x)) | |
return x | |
def replacement_reconstruct_batch(self, key, x=None): | |
"""Approximate reconstruction of several vectors from the index. | |
Parameters | |
---------- | |
key : array of ints | |
Ids of the vectors to reconstruct | |
x : array_like, optional | |
pre-allocated array to store the results | |
Returns | |
------- | |
x : array_like | |
reconstrcuted vectors, size `len(key), self.d` | |
""" | |
key = np.ascontiguousarray(key, dtype='int64') | |
n, = key.shape | |
if x is None: | |
x = np.empty((n, self.d), dtype=np.float32) | |
else: | |
assert x.shape == (n, self.d) | |
self.reconstruct_batch_c(n, swig_ptr(key), swig_ptr(x)) | |
return x | |
def replacement_reconstruct_n(self, n0=0, ni=-1, x=None): | |
"""Approximate reconstruction of vectors `n0` ... `n0 + ni - 1` from the index. | |
Missing vectors trigger an exception. | |
Parameters | |
---------- | |
n0 : int | |
Id of the first vector to reconstruct (default 0) | |
ni : int | |
Number of vectors to reconstruct (-1 = default = ntotal) | |
x : array_like, optional | |
pre-allocated array to store the results | |
Returns | |
------- | |
x : array_like | |
Reconstructed vectors, size (`ni`, `self.d`), `dtype`=float32 | |
""" | |
if ni == -1: | |
ni = self.ntotal - n0 | |
if x is None: | |
x = np.empty((ni, self.d), dtype=np.float32) | |
else: | |
assert x.shape == (ni, self.d) | |
self.reconstruct_n_c(n0, ni, swig_ptr(x)) | |
return x | |
def replacement_update_vectors(self, keys, x): | |
n = keys.size | |
assert keys.shape == (n, ) | |
assert x.shape == (n, self.d) | |
x = np.ascontiguousarray(x, dtype='float32') | |
keys = np.ascontiguousarray(keys, dtype='int64') | |
self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x)) | |
# No support passed-in for output buffers | |
def replacement_range_search(self, x, thresh, *, params=None): | |
"""Search vectors that are within a distance of the query vectors. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
thresh : float | |
Threshold to select neighbors. All elements within this radius are returned, | |
except for maximum inner product indexes, where the elements above the | |
threshold are returned | |
params : SearchParameters | |
Search parameters of the current search (overrides the class-level params) | |
Returns | |
------- | |
lims: array_like | |
Starting index of the results for each query vector, size n+1. | |
D : array_like | |
Distances of the nearest neighbors, shape `lims[n]`. The distances for | |
query i are in `D[lims[i]:lims[i+1]]`. | |
I : array_like | |
Labels of nearest neighbors, shape `lims[n]`. The labels for query i | |
are in `I[lims[i]:lims[i+1]]`. | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
thresh = float(thresh) | |
res = RangeSearchResult(n) | |
self.range_search_c(n, swig_ptr(x), thresh, res, params) | |
# get pointers and copy them | |
lims = rev_swig_ptr(res.lims, n + 1).copy() | |
nd = int(lims[-1]) | |
D = rev_swig_ptr(res.distances, nd).copy() | |
I = rev_swig_ptr(res.labels, nd).copy() | |
return lims, D, I | |
def replacement_search_preassigned(self, x, k, Iq, Dq, *, params=None, D=None, I=None): | |
"""Find the k nearest neighbors of the set of vectors x in an IVF index, | |
with precalculated coarse quantization assignment. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
k : int | |
Number of nearest neighbors. | |
Dq : array_like, optional | |
Distance array to the centroids, size (n, nprobe) | |
Iq : array_like, optional | |
Nearest centroids, size (n, nprobe) | |
params : SearchParameters | |
Search parameters of the current search (overrides the class-level params) | |
D : array_like, optional | |
Distance array to store the result. | |
I : array_like, optional | |
Labels array to store the results. | |
Returns | |
------- | |
D : array_like | |
Distances of the nearest neighbors, shape (n, k). When not enough results are found | |
the label is set to +Inf or -Inf. | |
I : array_like | |
Labels of the nearest neighbors, shape (n, k). | |
When not enough results are found, the label is set to -1 | |
""" | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d | |
assert k > 0 | |
if D is None: | |
D = np.empty((n, k), dtype=np.float32) | |
else: | |
assert D.shape == (n, k) | |
if I is None: | |
I = np.empty((n, k), dtype=np.int64) | |
else: | |
assert I.shape == (n, k) | |
Iq = np.ascontiguousarray(Iq, dtype='int64') | |
assert params is None, "params not supported" | |
assert Iq.shape == (n, self.nprobe) | |
if Dq is not None: | |
Dq = np.ascontiguousarray(Dq, dtype='float32') | |
assert Dq.shape == Iq.shape | |
self.search_preassigned_c( | |
n, swig_ptr(x), | |
k, | |
swig_ptr(Iq), swig_ptr(Dq), | |
swig_ptr(D), swig_ptr(I), | |
False | |
) | |
return D, I | |
def replacement_range_search_preassigned(self, x, thresh, Iq, Dq, *, params=None): | |
"""Search vectors that are within a distance of the query vectors. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
thresh : float | |
Threshold to select neighbors. All elements within this radius are returned, | |
except for maximum inner product indexes, where the elements above the | |
threshold are returned | |
Iq : array_like, optional | |
Nearest centroids, size (n, nprobe) | |
Dq : array_like, optional | |
Distance array to the centroids, size (n, nprobe) | |
params : SearchParameters | |
Search parameters of the current search (overrides the class-level params) | |
Returns | |
------- | |
lims: array_like | |
Starting index of the results for each query vector, size n+1. | |
D : array_like | |
Distances of the nearest neighbors, shape `lims[n]`. The distances for | |
query i are in `D[lims[i]:lims[i+1]]`. | |
I : array_like | |
Labels of nearest neighbors, shape `lims[n]`. The labels for query i | |
are in `I[lims[i]:lims[i+1]]`. | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
Iq = np.ascontiguousarray(Iq, dtype='int64') | |
assert params is None, "params not supported" | |
assert Iq.shape == (n, self.nprobe) | |
if Dq is not None: | |
Dq = np.ascontiguousarray(Dq, dtype='float32') | |
assert Dq.shape == Iq.shape | |
thresh = float(thresh) | |
res = RangeSearchResult(n) | |
self.range_search_preassigned_c( | |
n, swig_ptr(x), thresh, | |
swig_ptr(Iq), swig_ptr(Dq), | |
res | |
) | |
# get pointers and copy them | |
lims = rev_swig_ptr(res.lims, n + 1).copy() | |
nd = int(lims[-1]) | |
D = rev_swig_ptr(res.distances, nd).copy() | |
I = rev_swig_ptr(res.labels, nd).copy() | |
return lims, D, I | |
def replacement_sa_encode(self, x, codes=None): | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
if codes is None: | |
codes = np.empty((n, self.sa_code_size()), dtype=np.uint8) | |
else: | |
assert codes.shape == (n, self.sa_code_size()) | |
self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes)) | |
return codes | |
def replacement_sa_decode(self, codes, x=None): | |
n, cs = codes.shape | |
assert cs == self.sa_code_size() | |
codes = _check_dtype_uint8(codes) | |
if x is None: | |
x = np.empty((n, self.d), dtype=np.float32) | |
else: | |
assert x.shape == (n, self.d) | |
self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x)) | |
return x | |
def replacement_add_sa_codes(self, codes, ids=None): | |
n, cs = codes.shape | |
assert cs == self.sa_code_size() | |
codes = _check_dtype_uint8(codes) | |
if ids is not None: | |
assert ids.shape == (n,) | |
ids = swig_ptr(ids) | |
self.add_sa_codes_c(n, swig_ptr(codes), ids) | |
def replacement_permute_entries(self, perm): | |
n, = perm.shape | |
assert n == self.ntotal | |
perm = np.ascontiguousarray(perm, dtype='int64') | |
self.permute_entries_c(faiss.swig_ptr(perm)) | |
replace_method(the_class, 'add', replacement_add) | |
replace_method(the_class, 'add_with_ids', replacement_add_with_ids) | |
replace_method(the_class, 'assign', replacement_assign) | |
replace_method(the_class, 'train', replacement_train) | |
replace_method(the_class, 'search', replacement_search) | |
replace_method(the_class, 'remove_ids', replacement_remove_ids) | |
replace_method(the_class, 'reconstruct', replacement_reconstruct) | |
replace_method(the_class, 'reconstruct_batch', | |
replacement_reconstruct_batch) | |
replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n) | |
replace_method(the_class, 'range_search', replacement_range_search) | |
replace_method(the_class, 'update_vectors', replacement_update_vectors, | |
ignore_missing=True) | |
replace_method(the_class, 'search_and_reconstruct', | |
replacement_search_and_reconstruct, ignore_missing=True) | |
replace_method(the_class, 'search_and_return_codes', | |
replacement_search_and_return_codes, ignore_missing=True) | |
# these ones are IVF-specific | |
replace_method(the_class, 'search_preassigned', | |
replacement_search_preassigned, ignore_missing=True) | |
replace_method(the_class, 'range_search_preassigned', | |
replacement_range_search_preassigned, ignore_missing=True) | |
replace_method(the_class, 'sa_encode', replacement_sa_encode) | |
replace_method(the_class, 'sa_decode', replacement_sa_decode) | |
replace_method(the_class, 'add_sa_codes', replacement_add_sa_codes, | |
ignore_missing=True) | |
replace_method(the_class, 'permute_entries', replacement_permute_entries, | |
ignore_missing=True) | |
# get/set state for pickle | |
# the data is serialized to std::vector -> numpy array -> python bytes | |
# so not very efficient for now. | |
def index_getstate(self): | |
return {"this": faiss.serialize_index(self).tobytes()} | |
def index_setstate(self, st): | |
index2 = faiss.deserialize_index(np.frombuffer(st["this"], dtype="uint8")) | |
self.this = index2.this | |
the_class.__getstate__ = index_getstate | |
the_class.__setstate__ = index_setstate | |
def handle_IndexBinary(the_class): | |
def replacement_add(self, x): | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
assert d == self.code_size | |
self.add_c(n, swig_ptr(x)) | |
def replacement_add_with_ids(self, x, ids): | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
ids = np.ascontiguousarray(ids, dtype='int64') | |
assert d == self.code_size | |
assert ids.shape == (n, ), 'not same nb of vectors as ids' | |
self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids)) | |
def replacement_train(self, x): | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
assert d == self.code_size | |
self.train_c(n, swig_ptr(x)) | |
def replacement_reconstruct(self, key): | |
x = np.empty(self.code_size, dtype=np.uint8) | |
self.reconstruct_c(key, swig_ptr(x)) | |
return x | |
def replacement_reconstruct_n(self, n0=0, ni=-1, x=None): | |
if ni == -1: | |
ni = self.ntotal - n0 | |
if x is None: | |
x = np.empty((ni, self.code_size), dtype=np.uint8) | |
else: | |
assert x.shape == (ni, self.code_size) | |
self.reconstruct_n_c(n0, ni, swig_ptr(x)) | |
return x | |
def replacement_search(self, x, k): | |
x = _check_dtype_uint8(x) | |
n, d = x.shape | |
assert d == self.code_size | |
assert k > 0 | |
distances = np.empty((n, k), dtype=np.int32) | |
labels = np.empty((n, k), dtype=np.int64) | |
self.search_c(n, swig_ptr(x), | |
k, swig_ptr(distances), | |
swig_ptr(labels)) | |
return distances, labels | |
def replacement_search_preassigned(self, x, k, Iq, Dq): | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
assert d == self.code_size | |
assert k > 0 | |
D = np.empty((n, k), dtype=np.int32) | |
I = np.empty((n, k), dtype=np.int64) | |
Iq = np.ascontiguousarray(Iq, dtype='int64') | |
assert Iq.shape == (n, self.nprobe) | |
if Dq is not None: | |
Dq = np.ascontiguousarray(Dq, dtype='int32') | |
assert Dq.shape == Iq.shape | |
self.search_preassigned_c( | |
n, swig_ptr(x), | |
k, | |
swig_ptr(Iq), swig_ptr(Dq), | |
swig_ptr(D), swig_ptr(I), | |
False | |
) | |
return D, I | |
def replacement_range_search(self, x, thresh): | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
assert d == self.code_size | |
res = RangeSearchResult(n) | |
self.range_search_c(n, swig_ptr(x), thresh, res) | |
# get pointers and copy them | |
lims = rev_swig_ptr(res.lims, n + 1).copy() | |
nd = int(lims[-1]) | |
D = rev_swig_ptr(res.distances, nd).copy() | |
I = rev_swig_ptr(res.labels, nd).copy() | |
return lims, D, I | |
def replacement_range_search_preassigned(self, x, thresh, Iq, Dq, *, params=None): | |
n, d = x.shape | |
x = _check_dtype_uint8(x) | |
assert d == self.code_size | |
Iq = np.ascontiguousarray(Iq, dtype='int64') | |
assert params is None, "params not supported" | |
assert Iq.shape == (n, self.nprobe) | |
if Dq is not None: | |
Dq = np.ascontiguousarray(Dq, dtype='int32') | |
assert Dq.shape == Iq.shape | |
thresh = int(thresh) | |
res = RangeSearchResult(n) | |
self.range_search_preassigned_c( | |
n, swig_ptr(x), thresh, | |
swig_ptr(Iq), swig_ptr(Dq), | |
res | |
) | |
# get pointers and copy them | |
lims = rev_swig_ptr(res.lims, n + 1).copy() | |
nd = int(lims[-1]) | |
D = rev_swig_ptr(res.distances, nd).copy() | |
I = rev_swig_ptr(res.labels, nd).copy() | |
return lims, D, I | |
def replacement_remove_ids(self, x): | |
if isinstance(x, IDSelector): | |
sel = x | |
else: | |
assert x.ndim == 1 | |
x = np.ascontiguousarray(x, dtype='int64') | |
sel = IDSelectorBatch(x.size, swig_ptr(x)) | |
return self.remove_ids_c(sel) | |
replace_method(the_class, 'add', replacement_add) | |
replace_method(the_class, 'add_with_ids', replacement_add_with_ids) | |
replace_method(the_class, 'train', replacement_train) | |
replace_method(the_class, 'search', replacement_search) | |
replace_method(the_class, 'range_search', replacement_range_search) | |
replace_method(the_class, 'reconstruct', replacement_reconstruct) | |
replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n) | |
replace_method(the_class, 'remove_ids', replacement_remove_ids) | |
replace_method(the_class, 'search_preassigned', | |
replacement_search_preassigned, ignore_missing=True) | |
replace_method(the_class, 'range_search_preassigned', | |
replacement_range_search_preassigned, ignore_missing=True) | |
def handle_VectorTransform(the_class): | |
def apply_method(self, x): | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d_in | |
y = np.empty((n, self.d_out), dtype=np.float32) | |
self.apply_noalloc(n, swig_ptr(x), swig_ptr(y)) | |
return y | |
def replacement_reverse_transform(self, x): | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d_out | |
y = np.empty((n, self.d_in), dtype=np.float32) | |
self.reverse_transform_c(n, swig_ptr(x), swig_ptr(y)) | |
return y | |
def replacement_vt_train(self, x): | |
n, d = x.shape | |
x = np.ascontiguousarray(x, dtype='float32') | |
assert d == self.d_in | |
self.train_c(n, swig_ptr(x)) | |
replace_method(the_class, 'train', replacement_vt_train) | |
# apply is reserved in Pyton... | |
the_class.apply_py = apply_method | |
the_class.apply = apply_method | |
replace_method(the_class, 'reverse_transform', | |
replacement_reverse_transform) | |
def handle_AutoTuneCriterion(the_class): | |
def replacement_set_groundtruth(self, D, I): | |
if D: | |
assert I.shape == D.shape | |
self.nq, self.gt_nnn = I.shape | |
self.set_groundtruth_c( | |
self.gt_nnn, swig_ptr(D) if D else None, swig_ptr(I)) | |
def replacement_evaluate(self, D, I): | |
assert I.shape == D.shape | |
assert I.shape == (self.nq, self.nnn) | |
return self.evaluate_c(swig_ptr(D), swig_ptr(I)) | |
replace_method(the_class, 'set_groundtruth', replacement_set_groundtruth) | |
replace_method(the_class, 'evaluate', replacement_evaluate) | |
def handle_ParameterSpace(the_class): | |
def replacement_explore(self, index, xq, crit): | |
assert xq.shape == (crit.nq, index.d) | |
xq = np.ascontiguousarray(xq, dtype='float32') | |
ops = OperatingPoints() | |
self.explore_c(index, crit.nq, swig_ptr(xq), | |
crit, ops) | |
return ops | |
replace_method(the_class, 'explore', replacement_explore) | |
def handle_MatrixStats(the_class): | |
original_init = the_class.__init__ | |
def replacement_init(self, m): | |
assert len(m.shape) == 2 | |
m = np.ascontiguousarray(m, dtype='float32') | |
original_init(self, m.shape[0], m.shape[1], swig_ptr(m)) | |
the_class.__init__ = replacement_init | |
def handle_IOWriter(the_class): | |
""" add a write_bytes method """ | |
def write_bytes(self, b): | |
return self(swig_ptr(b), 1, len(b)) | |
the_class.write_bytes = write_bytes | |
def handle_IOReader(the_class): | |
""" add a read_bytes method """ | |
def read_bytes(self, totsz): | |
buf = bytearray(totsz) | |
was_read = self(swig_ptr(buf), 1, len(buf)) | |
return bytes(buf[:was_read]) | |
the_class.read_bytes = read_bytes | |
def handle_IndexRowwiseMinMax(the_class): | |
def replacement_train_inplace(self, x): | |
"""Trains the index on a representative set of vectors inplace. | |
The index must be trained before vectors can be added to it. | |
This call WILL change the values in the input array, because | |
of two scaling proceduces being performed inplace. | |
Parameters | |
---------- | |
x : array_like | |
Query vectors, shape (n, d) where d is appropriate for the index. | |
`dtype` must be float32. | |
""" | |
n, d = x.shape | |
assert d == self.d | |
x = np.ascontiguousarray(x, dtype='float32') | |
self.train_inplace_c(n, swig_ptr(x)) | |
replace_method(the_class, 'train_inplace', replacement_train_inplace) | |
def handle_CodePacker(the_class): | |
def replacement_pack_1(self, x, offset, block): | |
assert x.shape == (self.code_size,) | |
nblock, block_size = block.shape | |
assert block_size == self.block_size | |
assert 0 <= offset < block_size * self.nvec | |
self.pack_1_c(swig_ptr(x), offset, faiss.swig_ptr(block)) | |
def replacement_unpack_1(self, block, offset): | |
nblock, block_size = block.shape | |
assert block_size == self.block_size | |
assert 0 <= offset < block_size * self.nvec | |
x = np.zeros(self.code_size, dtype='uint8') | |
self.unpack_1_c(faiss.swig_ptr(block), offset, swig_ptr(x)) | |
return x | |
replace_method(the_class, 'pack_1', replacement_pack_1) | |
replace_method(the_class, 'unpack_1', replacement_unpack_1) | |
###################################################### | |
# MapLong2Long interface | |
###################################################### | |
def handle_MapLong2Long(the_class): | |
def replacement_map_add(self, keys, vals): | |
n, = keys.shape | |
assert (n,) == vals.shape | |
self.add_c(n, swig_ptr(keys), swig_ptr(vals)) | |
def replacement_map_search_multiple(self, keys): | |
n, = keys.shape | |
vals = np.empty(n, dtype='int64') | |
self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals)) | |
return vals | |
replace_method(the_class, 'add', replacement_map_add) | |
replace_method(the_class, 'search_multiple', | |
replacement_map_search_multiple) | |
###################################################### | |
# SearchParameters and related interface | |
###################################################### | |
def add_to_referenced_objects(self, ref): | |
if not hasattr(self, 'referenced_objects'): | |
self.referenced_objects = [ref] | |
else: | |
self.referenced_objects.append(ref) | |
class RememberSwigOwnership: | |
""" | |
SWIG's seattr transfers ownership of SWIG wrapped objects to the class | |
(btw this seems to contradict https://www.swig.org/Doc1.3/Python.html#Python_nn22 | |
31.4.2) | |
This interferes with how we manage ownership: with the referenced_objects | |
table. Therefore, we reset the thisown field in this context manager. | |
""" | |
def __init__(self, obj): | |
self.obj = obj | |
def __enter__(self): | |
if hasattr(self.obj, "thisown"): | |
self.old_thisown = self.obj.thisown | |
else: | |
self.old_thisown = None | |
def __exit__(self, *ignored): | |
if self.old_thisown is not None: | |
self.obj.thisown = self.old_thisown | |
def handle_SearchParameters(the_class): | |
""" this wrapper is to enable initializations of the form | |
SearchParametersXX(a=3, b=SearchParamsYY) | |
This also requires the enclosing class to keep a reference on the | |
sub-object, since the C++ code assumes the object ownwership is | |
handled externally. | |
""" | |
the_class.original_init = the_class.__init__ | |
def replacement_init(self, **args): | |
self.original_init() | |
for k, v in args.items(): | |
assert hasattr(self, k) | |
with RememberSwigOwnership(v): | |
setattr(self, k, v) | |
if type(v) not in (int, float, bool, str): | |
add_to_referenced_objects(self, v) | |
the_class.__init__ = replacement_init | |
def handle_IDSelectorSubset(the_class, class_owns, force_int64=True): | |
the_class.original_init = the_class.__init__ | |
def replacement_init(self, *args): | |
if len(args) == 1: | |
# assume it's an array | |
subset, = args | |
if force_int64: | |
subset = np.ascontiguousarray(subset, dtype='int64') | |
args = (len(subset), faiss.swig_ptr(subset)) | |
if not class_owns: | |
add_to_referenced_objects(self, subset) | |
self.original_init(*args) | |
the_class.__init__ = replacement_init | |
def handle_CodeSet(the_class): | |
def replacement_insert(self, codes, inserted=None): | |
n, d = codes.shape | |
assert d == self.d | |
codes = np.ascontiguousarray(codes, dtype=np.uint8) | |
if inserted is None: | |
inserted = np.empty(n, dtype=bool) | |
else: | |
assert inserted.shape == (n, ) | |
self.insert_c(n, swig_ptr(codes), swig_ptr(inserted)) | |
return inserted | |
replace_method(the_class, 'insert', replacement_insert) | |