Spaces:
Running
Running
File size: 5,020 Bytes
2a0bc63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import faiss
from faiss.contrib.inspect_tools import get_invlist_sizes
def add_preassigned(index_ivf, x, a, ids=None):
"""
Add elements to an IVF index, where the assignment is already computed
"""
n, d = x.shape
assert a.shape == (n, )
if isinstance(index_ivf, faiss.IndexBinaryIVF):
d *= 8
assert d == index_ivf.d
if ids is not None:
assert ids.shape == (n, )
ids = faiss.swig_ptr(ids)
index_ivf.add_core(
n, faiss.swig_ptr(x), ids, faiss.swig_ptr(a)
)
def search_preassigned(index_ivf, xq, k, list_nos, coarse_dis=None):
"""
Perform a search in the IVF index, with predefined lists to search into.
Supports indexes with pretransforms (as opposed to the
IndexIVF.search_preassigned, that cannot be applied with pretransform).
"""
if isinstance(index_ivf, faiss.IndexPreTransform):
assert index_ivf.chain.size() == 1, "chain must have only one component"
transform = faiss.downcast_VectorTransform(index_ivf.chain.at(0))
xq = transform.apply(xq)
index_ivf = faiss.downcast_index(index_ivf.index)
n, d = xq.shape
if isinstance(index_ivf, faiss.IndexBinaryIVF):
d *= 8
dis_type = "int32"
else:
dis_type = "float32"
assert d == index_ivf.d
assert list_nos.shape == (n, index_ivf.nprobe)
# the coarse distances are used in IVFPQ with L2 distance and
# by_residual=True otherwise we provide dummy coarse_dis
if coarse_dis is None:
coarse_dis = np.zeros((n, index_ivf.nprobe), dtype=dis_type)
else:
assert coarse_dis.shape == (n, index_ivf.nprobe)
return index_ivf.search_preassigned(xq, k, list_nos, coarse_dis)
def range_search_preassigned(index_ivf, x, radius, list_nos, coarse_dis=None):
"""
Perform a range search in the IVF index, with predefined lists to
search into
"""
n, d = x.shape
if isinstance(index_ivf, faiss.IndexBinaryIVF):
d *= 8
dis_type = "int32"
else:
dis_type = "float32"
# the coarse distances are used in IVFPQ with L2 distance and
# by_residual=True otherwise we provide dummy coarse_dis
if coarse_dis is None:
coarse_dis = np.empty((n, index_ivf.nprobe), dtype=dis_type)
else:
assert coarse_dis.shape == (n, index_ivf.nprobe)
assert d == index_ivf.d
assert list_nos.shape == (n, index_ivf.nprobe)
res = faiss.RangeSearchResult(n)
sp = faiss.swig_ptr
index_ivf.range_search_preassigned_c(
n, sp(x), radius,
sp(list_nos), sp(coarse_dis),
res
)
# get pointers and copy them
lims = faiss.rev_swig_ptr(res.lims, n + 1).copy()
num_results = int(lims[-1])
dist = faiss.rev_swig_ptr(res.distances, num_results).copy()
indices = faiss.rev_swig_ptr(res.labels, num_results).copy()
return lims, dist, indices
def replace_ivf_quantizer(index_ivf, new_quantizer):
""" replace the IVF quantizer with a flat quantizer and return the
old quantizer"""
if new_quantizer.ntotal == 0:
centroids = index_ivf.quantizer.reconstruct_n()
new_quantizer.train(centroids)
new_quantizer.add(centroids)
else:
assert new_quantizer.ntotal == index_ivf.nlist
# cleanly dealloc old quantizer
old_own = index_ivf.own_fields
index_ivf.own_fields = False
old_quantizer = faiss.downcast_index(index_ivf.quantizer)
old_quantizer.this.own(old_own)
index_ivf.quantizer = new_quantizer
if hasattr(index_ivf, "referenced_objects"):
index_ivf.referenced_objects.append(new_quantizer)
else:
index_ivf.referenced_objects = [new_quantizer]
return old_quantizer
def permute_invlists(index_ivf, perm):
""" Apply some permutation to the inverted lists, and modify the quantizer
entries accordingly.
Perm is an array of size nlist, where old_index = perm[new_index]
"""
nlist, = perm.shape
assert index_ivf.nlist == nlist
quantizer = faiss.downcast_index(index_ivf.quantizer)
assert quantizer.ntotal == index_ivf.nlist
perm = np.ascontiguousarray(perm, dtype='int64')
# just make sure it's a permutation...
bc = np.bincount(perm, minlength=nlist)
assert np.all(bc == np.ones(nlist, dtype=int))
# handle quantizer
quantizer.permute_entries(perm)
# handle inverted lists
invlists = faiss.downcast_InvertedLists(index_ivf.invlists)
invlists.permute_invlists(faiss.swig_ptr(perm))
def sort_invlists_by_size(index_ivf):
invlist_sizes = get_invlist_sizes(index_ivf.invlists)
perm = np.argsort(invlist_sizes)
permute_invlists(index_ivf, perm)
|