Spaces:
Running
Running
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
# start delvewheel patch | |
def _delvewheel_patch_1_5_2(): | |
import os | |
libs_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'faiss_cpu.libs')) | |
if os.path.isdir(libs_dir): | |
os.add_dll_directory(libs_dir) | |
_delvewheel_patch_1_5_2() | |
del _delvewheel_patch_1_5_2 | |
# end delvewheel patch | |
# @nolint | |
# not linting this file because it imports * from swigfaiss, which | |
# causes a ton of useless warnings. | |
import numpy as np | |
import sys | |
import inspect | |
# We import * so that the symbol foo can be accessed as faiss.foo. | |
from .loader import * | |
# additional wrappers | |
from faiss import class_wrappers | |
from faiss.gpu_wrappers import * | |
from faiss.array_conversions import * | |
from faiss.extra_wrappers import kmin, kmax, pairwise_distances, rand, randint, \ | |
lrand, randn, rand_smooth_vectors, eval_intersection, normalize_L2, \ | |
ResultHeap, knn, Kmeans, checksum, matrix_bucket_sort_inplace, bucket_sort, \ | |
merge_knn_results, MapInt64ToInt64, knn_hamming, \ | |
pack_bitstrings, unpack_bitstrings | |
__version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR, | |
FAISS_VERSION_MINOR, | |
FAISS_VERSION_PATCH) | |
class_wrappers.handle_Clustering(Clustering) | |
class_wrappers.handle_Clustering1D(Clustering1D) | |
class_wrappers.handle_MatrixStats(MatrixStats) | |
class_wrappers.handle_IOWriter(IOWriter) | |
class_wrappers.handle_IOReader(IOReader) | |
class_wrappers.handle_AutoTuneCriterion(AutoTuneCriterion) | |
class_wrappers.handle_ParameterSpace(ParameterSpace) | |
class_wrappers.handle_NSG(IndexNSG) | |
class_wrappers.handle_MapLong2Long(MapLong2Long) | |
class_wrappers.handle_IDSelectorSubset(IDSelectorBatch, class_owns=True) | |
class_wrappers.handle_IDSelectorSubset(IDSelectorArray, class_owns=False) | |
class_wrappers.handle_IDSelectorSubset(IDSelectorBitmap, class_owns=False, force_int64=False) | |
class_wrappers.handle_CodeSet(CodeSet) | |
this_module = sys.modules[__name__] | |
# handle sub-classes | |
for symbol in dir(this_module): | |
obj = getattr(this_module, symbol) | |
# print symbol, isinstance(obj, (type, types.ClassType)) | |
if inspect.isclass(obj): | |
the_class = obj | |
if issubclass(the_class, Index): | |
class_wrappers.handle_Index(the_class) | |
if issubclass(the_class, IndexBinary): | |
class_wrappers.handle_IndexBinary(the_class) | |
if issubclass(the_class, VectorTransform): | |
class_wrappers.handle_VectorTransform(the_class) | |
if issubclass(the_class, Quantizer): | |
class_wrappers.handle_Quantizer(the_class) | |
if issubclass(the_class, IndexRowwiseMinMax) or \ | |
issubclass(the_class, IndexRowwiseMinMaxFP16): | |
class_wrappers.handle_IndexRowwiseMinMax(the_class) | |
if issubclass(the_class, SearchParameters): | |
class_wrappers.handle_SearchParameters(the_class) | |
if issubclass(the_class, CodePacker): | |
class_wrappers.handle_CodePacker(the_class) | |
############################################################################## | |
# For some classes (IndexIVF, IDSelector), the object holds a reference to | |
# a C++ object (eg. the quantizer object of IndexIVF). We don't transfer the | |
# ownership to the C++ object (ie. set own_quantizer=true), but instead we add | |
# a reference in the Python class wrapper instead. This is done via an | |
# additional referenced_objects field. | |
# | |
# Since the semantics of ownership in the C++ classes are sometimes irregular, | |
# these references are added manually using the functions below. | |
############################################################################## | |
def add_ref_in_constructor(the_class, parameter_no): | |
# adds a reference to parameter parameter_no in self | |
# so that that parameter does not get deallocated before self | |
original_init = the_class.__init__ | |
def replacement_init(self, *args): | |
original_init(self, *args) | |
self.referenced_objects = [args[parameter_no]] | |
def replacement_init_multiple(self, *args): | |
original_init(self, *args) | |
pset = parameter_no[len(args)] | |
self.referenced_objects = [args[no] for no in pset] | |
if type(parameter_no) == dict: | |
# a list of parameters to keep, depending on the number of arguments | |
the_class.__init__ = replacement_init_multiple | |
else: | |
the_class.__init__ = replacement_init | |
def add_to_referenced_objects(self, ref): | |
if not hasattr(self, 'referenced_objects'): | |
self.referenced_objects = [ref] | |
else: | |
self.referenced_objects.append(ref) | |
def add_ref_in_method(the_class, method_name, parameter_no): | |
original_method = getattr(the_class, method_name) | |
def replacement_method(self, *args): | |
ref = args[parameter_no] | |
add_to_referenced_objects(self, ref) | |
return original_method(self, *args) | |
setattr(the_class, method_name, replacement_method) | |
def add_ref_in_method_explicit_own(the_class, method_name): | |
# for methods of format set_XXX(object, own) | |
original_method = getattr(the_class, method_name) | |
def replacement_method(self, ref, own=False): | |
if not own: | |
if not hasattr(self, 'referenced_objects'): | |
self.referenced_objects = [ref] | |
else: | |
self.referenced_objects.append(ref) | |
else: | |
# transfer ownership to C++ class | |
ref.this.disown() | |
return original_method(self, ref, own) | |
setattr(the_class, method_name, replacement_method) | |
def add_ref_in_function(function_name, parameter_no): | |
# assumes the function returns an object | |
original_function = getattr(this_module, function_name) | |
def replacement_function(*args): | |
result = original_function(*args) | |
ref = args[parameter_no] | |
result.referenced_objects = [ref] | |
return result | |
setattr(this_module, function_name, replacement_function) | |
add_ref_in_constructor(IndexIVFFlat, 0) | |
add_ref_in_constructor(IndexIVFFlatDedup, 0) | |
add_ref_in_constructor(IndexPreTransform, {2: [0, 1], 1: [0]}) | |
add_ref_in_method(IndexPreTransform, 'prepend_transform', 0) | |
add_ref_in_constructor(IndexIVFPQ, 0) | |
add_ref_in_constructor(IndexIVFPQR, 0) | |
add_ref_in_constructor(IndexIVFPQFastScan, 0) | |
add_ref_in_constructor(IndexIVFResidualQuantizer, 0) | |
add_ref_in_constructor(IndexIVFLocalSearchQuantizer, 0) | |
add_ref_in_constructor(IndexIVFResidualQuantizerFastScan, 0) | |
add_ref_in_constructor(IndexIVFLocalSearchQuantizerFastScan, 0) | |
add_ref_in_constructor(IndexIVFSpectralHash, 0) | |
add_ref_in_method_explicit_own(IndexIVFSpectralHash, "replace_vt") | |
add_ref_in_constructor(Index2Layer, 0) | |
add_ref_in_constructor(Level1Quantizer, 0) | |
add_ref_in_constructor(IndexIVFScalarQuantizer, 0) | |
add_ref_in_constructor(IndexRowwiseMinMax, 0) | |
add_ref_in_constructor(IndexRowwiseMinMaxFP16, 0) | |
add_ref_in_constructor(IndexIDMap, 0) | |
add_ref_in_constructor(IndexIDMap2, 0) | |
add_ref_in_constructor(IndexHNSW, 0) | |
add_ref_in_method(IndexShards, 'add_shard', 0) | |
add_ref_in_method(IndexBinaryShards, 'add_shard', 0) | |
add_ref_in_constructor(IndexRefineFlat, {2: [0], 1: [0]}) | |
add_ref_in_constructor(IndexRefine, {2: [0, 1]}) | |
add_ref_in_constructor(IndexBinaryIVF, 0) | |
add_ref_in_constructor(IndexBinaryFromFloat, 0) | |
add_ref_in_constructor(IndexBinaryIDMap, 0) | |
add_ref_in_constructor(IndexBinaryIDMap2, 0) | |
add_ref_in_method(IndexReplicas, 'addIndex', 0) | |
add_ref_in_method(IndexBinaryReplicas, 'addIndex', 0) | |
add_ref_in_constructor(BufferedIOWriter, 0) | |
add_ref_in_constructor(BufferedIOReader, 0) | |
add_ref_in_constructor(IDSelectorNot, 0) | |
add_ref_in_constructor(IDSelectorAnd, slice(2)) | |
add_ref_in_constructor(IDSelectorOr, slice(2)) | |
add_ref_in_constructor(IDSelectorXOr, slice(2)) | |
add_ref_in_constructor(IDSelectorTranslated, slice(2)) | |
add_ref_in_constructor(IDSelectorXOr, slice(2)) | |
add_ref_in_constructor(IndexIVFIndependentQuantizer, slice(3)) | |
# seems really marginal... | |
# remove_ref_from_method(IndexReplicas, 'removeIndex', 0) | |
###################################################### | |
# search_with_parameters interface | |
###################################################### | |
search_with_parameters_c = search_with_parameters | |
def search_with_parameters(index, x, k, params=None, output_stats=False): | |
x = np.ascontiguousarray(x, dtype='float32') | |
n, d = x.shape | |
assert d == index.d | |
if not params: | |
# if not provided use the ones set in the IVF object | |
params = IVFSearchParameters() | |
index_ivf = extract_index_ivf(index) | |
params.nprobe = index_ivf.nprobe | |
params.max_codes = index_ivf.max_codes | |
nb_dis = np.empty(1, 'uint64') | |
ms_per_stage = np.empty(3, 'float64') | |
distances = np.empty((n, k), dtype=np.float32) | |
labels = np.empty((n, k), dtype=np.int64) | |
search_with_parameters_c( | |
index, n, swig_ptr(x), | |
k, swig_ptr(distances), | |
swig_ptr(labels), | |
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage) | |
) | |
if not output_stats: | |
return distances, labels | |
else: | |
stats = { | |
'ndis': nb_dis[0], | |
'pre_transform_ms': ms_per_stage[0], | |
'coarse_quantizer_ms': ms_per_stage[1], | |
'invlist_scan_ms': ms_per_stage[2], | |
} | |
return distances, labels, stats | |
range_search_with_parameters_c = range_search_with_parameters | |
def range_search_with_parameters(index, x, radius, params=None, output_stats=False): | |
x = np.ascontiguousarray(x, dtype='float32') | |
n, d = x.shape | |
assert d == index.d | |
if not params: | |
# if not provided use the ones set in the IVF object | |
params = IVFSearchParameters() | |
index_ivf = extract_index_ivf(index) | |
params.nprobe = index_ivf.nprobe | |
params.max_codes = index_ivf.max_codes | |
nb_dis = np.empty(1, 'uint64') | |
ms_per_stage = np.empty(3, 'float64') | |
res = RangeSearchResult(n) | |
range_search_with_parameters_c( | |
index, n, swig_ptr(x), | |
radius, res, | |
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage) | |
) | |
lims = rev_swig_ptr(res.lims, n + 1).copy() | |
nd = int(lims[-1]) | |
Dout = rev_swig_ptr(res.distances, nd).copy() | |
Iout = rev_swig_ptr(res.labels, nd).copy() | |
if not output_stats: | |
return lims, Dout, Iout | |
else: | |
stats = { | |
'ndis': nb_dis[0], | |
'pre_transform_ms': ms_per_stage[0], | |
'coarse_quantizer_ms': ms_per_stage[1], | |
'invlist_scan_ms': ms_per_stage[2], | |
} | |
return lims, Dout, Iout, stats | |
# IndexProxy was renamed to IndexReplicas, remap the old name for any old code | |
# people may have | |
IndexProxy = IndexReplicas | |
ConcatenatedInvertedLists = HStackInvertedLists | |
IndexResidual = IndexResidualQuantizer | |
IVFSearchParameters = SearchParametersIVF | |
########################################### | |
# serialization of indexes to byte arrays | |
########################################### | |
def serialize_index(index): | |
""" convert an index to a numpy uint8 array """ | |
writer = VectorIOWriter() | |
write_index(index, writer) | |
return vector_to_array(writer.data) | |
def deserialize_index(data, io_flags=0): | |
reader = VectorIOReader() | |
copy_array_to_vector(data, reader.data) | |
return read_index(reader, io_flags) | |
def serialize_index_binary(index): | |
""" convert an index to a numpy uint8 array """ | |
writer = VectorIOWriter() | |
write_index_binary(index, writer) | |
return vector_to_array(writer.data) | |
def deserialize_index_binary(data): | |
reader = VectorIOReader() | |
copy_array_to_vector(data, reader.data) | |
return read_index_binary(reader) | |