# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# @nolint

# not linting this file because it imports * from swigfaiss, which
# causes a ton of useless warnings.

import numpy as np

from faiss.loader import *

import faiss

import collections.abc


###########################################
# Wrapper for a few functions
###########################################


def kmin(array, k):
    """return k smallest values (and their indices) of the lines of a
    float32 array"""
    array = np.ascontiguousarray(array, dtype='float32')
    m, n = array.shape
    I = np.zeros((m, k), dtype='int64')
    D = np.zeros((m, k), dtype='float32')
    ha = faiss.float_maxheap_array_t()
    ha.ids = swig_ptr(I)
    ha.val = swig_ptr(D)
    ha.nh = m
    ha.k = k
    ha.heapify()
    ha.addn(n, swig_ptr(array))
    ha.reorder()
    return D, I


def kmax(array, k):
    """return k largest values (and their indices) of the lines of a
    float32 array"""
    array = np.ascontiguousarray(array, dtype='float32')
    m, n = array.shape
    I = np.zeros((m, k), dtype='int64')
    D = np.zeros((m, k), dtype='float32')
    ha = faiss.float_minheap_array_t()
    ha.ids = swig_ptr(I)
    ha.val = swig_ptr(D)
    ha.nh = m
    ha.k = k
    ha.heapify()
    ha.addn(n, swig_ptr(array))
    ha.reorder()
    return D, I


def pairwise_distances(xq, xb, metric=METRIC_L2, metric_arg=0):
    """compute the whole pairwise distance matrix between two sets of
    vectors"""
    xq = np.ascontiguousarray(xq, dtype='float32')
    xb = np.ascontiguousarray(xb, dtype='float32')
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2
    dis = np.empty((nq, nb), dtype='float32')
    if metric == METRIC_L2:
        pairwise_L2sqr(
            d, nq, swig_ptr(xq),
            nb, swig_ptr(xb),
            swig_ptr(dis))
    elif metric == METRIC_INNER_PRODUCT:
        dis[:] = xq @ xb.T
    else:
        pairwise_extra_distances(
            d, nq, swig_ptr(xq),
            nb, swig_ptr(xb),
            metric, metric_arg,
            swig_ptr(dis))
    return dis


def rand(n, seed=12345):
    res = np.empty(n, dtype='float32')
    float_rand(swig_ptr(res), res.size, seed)
    return res


def randint(n, seed=12345, vmax=None):
    res = np.empty(n, dtype='int64')
    if vmax is None:
        int64_rand(swig_ptr(res), res.size, seed)
    else:
        int64_rand_max(swig_ptr(res), res.size, vmax, seed)
    return res


lrand = randint


def randn(n, seed=12345):
    res = np.empty(n, dtype='float32')
    float_randn(swig_ptr(res), res.size, seed)
    return res


def checksum(a):
    """ compute a checksum for quick-and-dirty comparisons of arrays """
    a = a.view('uint8')
    if a.ndim == 1:
        return bvec_checksum(a.size, swig_ptr(a))
    n, d = a.shape
    cs = np.zeros(n, dtype='uint64')
    bvecs_checksum(n, d, swig_ptr(a), swig_ptr(cs))
    return cs

rand_smooth_vectors_c = rand_smooth_vectors

def rand_smooth_vectors(n, d, seed=1234):
    res = np.empty((n, d), dtype='float32')
    rand_smooth_vectors_c(n, d, swig_ptr(res), seed)
    return res


def eval_intersection(I1, I2):
    """ size of intersection between each line of two result tables"""
    I1 = np.ascontiguousarray(I1, dtype='int64')
    I2 = np.ascontiguousarray(I2, dtype='int64')
    n = I1.shape[0]
    assert I2.shape[0] == n
    k1, k2 = I1.shape[1], I2.shape[1]
    ninter = 0
    for i in range(n):
        ninter += ranklist_intersection_size(
            k1, swig_ptr(I1[i]), k2, swig_ptr(I2[i]))
    return ninter


def normalize_L2(x):
    fvec_renorm_L2(x.shape[1], x.shape[0], swig_ptr(x))

bucket_sort_c = bucket_sort

def bucket_sort(tab, nbucket=None, nt=0):
    """Perform a bucket sort on a table of integers.

    Parameters
    ----------
    tab : array_like
        elements to sort, max value nbucket - 1
    nbucket : integer
        number of buckets, None if unknown
    nt : integer
        number of threads to use (0 = use unthreaded codepath)

    Returns
    -------
    lims : array_like
        cumulative sum of bucket sizes (size vmax + 1)
    perm : array_like
        perm[lims[i] : lims[i + 1]] contains the indices of bucket #i (size tab.size)
    """
    tab = np.ascontiguousarray(tab, dtype="int64")
    if nbucket is None:
        nbucket = int(tab.max() + 1)
    lims = np.empty(nbucket + 1, dtype='int64')
    perm = np.empty(tab.size, dtype='int64')
    bucket_sort_c(
        tab.size, faiss.swig_ptr(tab.view('uint64')),
        nbucket, faiss.swig_ptr(lims), faiss.swig_ptr(perm),
        nt
    )
    return lims, perm

matrix_bucket_sort_inplace_c = matrix_bucket_sort_inplace

def matrix_bucket_sort_inplace(tab, nbucket=None, nt=0):
    """Perform a bucket sort on a matrix, recording the original
    row of each element.

    Parameters
    ----------
    tab : array_like
        array of size (N, ncol) that contains the bucket ids, maximum
        value nbucket - 1.
        On output, it the elements are shuffled such that the flat array
        tab.ravel()[lims[i] : lims[i + 1]] contains the row numbers
        of each bucket entry.
    nbucket : integer
        number of buckets (the maximum value in tab should be nbucket - 1)
    nt : integer
        number of threads to use (0 = use unthreaded codepath)

    Returns
    -------
    lims : array_like
        cumulative sum of bucket sizes (size vmax + 1)
    """
    assert tab.dtype == 'int32' or tab.dtype == 'int64'
    nrow, ncol = tab.shape
    if nbucket is None:
        nbucket = int(tab.max() + 1)
    lims = np.empty(nbucket + 1, dtype='int64')
    matrix_bucket_sort_inplace_c(
        nrow, ncol, faiss.swig_ptr(tab),
        nbucket, faiss.swig_ptr(lims),
        nt
    )
    return lims


###########################################
# ResultHeap
###########################################

class ResultHeap:
    """Accumulate query results from a sliced dataset. The final result will
    be in self.D, self.I."""

    def __init__(self, nq, k, keep_max=False):
        """
        nq: number of query vectors,
        k: number of results per query
        keep_max: keep the top-k maximum values instead of the minima
        """
        self.I = np.zeros((nq, k), dtype='int64')
        self.D = np.zeros((nq, k), dtype='float32')
        self.nq, self.k = nq, k
        if keep_max:
            heaps = float_minheap_array_t()
        else:
            heaps = float_maxheap_array_t()
        heaps.k = k
        heaps.nh = nq
        heaps.val = swig_ptr(self.D)
        heaps.ids = swig_ptr(self.I)
        heaps.heapify()
        self.heaps = heaps

    def add_result(self, D, I):
        """
        Add results for all heaps
        D, I should be of size (nh, nres)
        D, I do not need to be in a particular order (heap or sorted)
        """
        nq, kd = D.shape
        D = np.ascontiguousarray(D, dtype='float32')
        I = np.ascontiguousarray(I, dtype='int64')
        assert I.shape == (nq, kd)
        assert nq == self.nq
        self.heaps.addn_with_ids(
            kd, swig_ptr(D),
            swig_ptr(I), kd)

    def add_result_subset(self, subset, D, I):
        """
        Add results for a subset of heaps.
        D, I should hold resutls for all the subset
        as a special case, if I is 1D, then all ids are assumed to be the same
        """
        nsubset, kd = D.shape
        assert nsubset == len(subset)
        assert (
            I.ndim == 2 and D.shape == I.shape or
            I.ndim == 1 and I.shape == (kd, )
        )
        D = np.ascontiguousarray(D, dtype='float32')
        I = np.ascontiguousarray(I, dtype='int64')
        subset = np.ascontiguousarray(subset, dtype='int64')
        id_stride = 0 if I.ndim == 1 else kd
        self.heaps.addn_query_subset_with_ids(
            nsubset, swig_ptr(subset),
            kd, swig_ptr(D), swig_ptr(I), id_stride
        )

    def finalize(self):
        self.heaps.reorder()


def merge_knn_results(Dall, Iall, keep_max=False):
    """
    Merge a set of sorted knn-results obtained from different shards in a dataset
    Dall and Iall are of size (nshard, nq, k) each D[i, j] should be sorted
    returns D, I of size (nq, k) as the merged result set
    """
    assert Iall.shape == Dall.shape
    nshard, n, k = Dall.shape
    Dnew = np.empty((n, k), dtype=Dall.dtype)
    Inew = np.empty((n, k), dtype=Iall.dtype)
    func = merge_knn_results_CMax if keep_max else merge_knn_results_CMin
    func(
        n, k, nshard,
        swig_ptr(Dall), swig_ptr(Iall),
        swig_ptr(Dnew), swig_ptr(Inew)
    )
    return Dnew, Inew

######################################################
# Efficient ID to ID map
######################################################

class MapInt64ToInt64:

    def __init__(self, capacity):
        self.log2_capacity = int(np.log2(capacity))
        assert capacity == 2 ** self.log2_capacity, "need power of 2 capacity"
        self.capacity = capacity
        self.tab = np.empty((capacity, 2), dtype='int64')
        faiss.hashtable_int64_to_int64_init(self.log2_capacity, swig_ptr(self.tab))

    def add(self, keys, vals):
        n, = keys.shape
        assert vals.shape == (n,)
        faiss.hashtable_int64_to_int64_add(
            self.log2_capacity, swig_ptr(self.tab),
            n, swig_ptr(keys), swig_ptr(vals))

    def lookup(self, keys):
        n, = keys.shape
        vals = np.empty((n,), dtype='int64')
        faiss.hashtable_int64_to_int64_lookup(
            self.log2_capacity, swig_ptr(self.tab),
            n, swig_ptr(keys), swig_ptr(vals))
        return vals

######################################################
# KNN function
######################################################

def knn(xq, xb, k, metric=METRIC_L2):
    """
    Compute the k nearest neighbors of a vector without constructing an index


    Parameters
    ----------
    xq : array_like
        Query vectors, shape (nq, d) where the dimension d is that same as xb
        `dtype` must be float32.
    xb : array_like
        Database vectors, shape (nb, d) where dimension d is the same as xq
        `dtype` must be float32.
    k : int
        Number of nearest neighbors.
    distance_type : MetricType, optional
        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)

    Returns
    -------
    D : array_like
        Distances of the nearest neighbors, shape (nq, k)
    I : array_like
        Labels of the nearest neighbors, shape (nq, k)
    """
    xq = np.ascontiguousarray(xq, dtype='float32')
    xb = np.ascontiguousarray(xb, dtype='float32')
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2

    I = np.empty((nq, k), dtype='int64')
    D = np.empty((nq, k), dtype='float32')

    if metric == METRIC_L2:
        knn_L2sqr(
            swig_ptr(xq), swig_ptr(xb),
            d, nq, nb, k, swig_ptr(D), swig_ptr(I)
        )
    elif metric == METRIC_INNER_PRODUCT:
        knn_inner_product(
            swig_ptr(xq), swig_ptr(xb),
            d, nq, nb, k, swig_ptr(D), swig_ptr(I)
        )
    else:
        raise NotImplementedError("only L2 and INNER_PRODUCT are supported")
    return D, I

def knn_hamming(xq, xb, k, variant="hc"):
    """
    Compute the k nearest neighbors of a set of vectors without constructing an index.

    Parameters
    ----------
    xq : array_like
        Query vectors, shape (nq, d) where d is the number of bits / 8
        `dtype` must be uint8.
    xb : array_like
        Database vectors, shape (nb, d) where d is the number of bits / 8
        `dtype` must be uint8.
    k : int
        Number of nearest neighbors.
    variant : string
        Function variant to use, either "mc" (counter) or "hc" (heap)

    Returns
    -------
    D : array_like
        Distances of the nearest neighbors, shape (nq, k)
    I : array_like
        Labels of the nearest neighbors, shape (nq, k)
    """
    # other variant is "mc"
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2
    D = np.empty((nq, k), dtype='int32')
    I = np.empty((nq, k), dtype='int64')

    if variant == "hc":
        heap = faiss.int_maxheap_array_t()
        heap.k = k
        heap.nh = nq
        heap.ids = faiss.swig_ptr(I)
        heap.val = faiss.swig_ptr(D)
        faiss.hammings_knn_hc(
            heap, faiss.swig_ptr(xq), faiss.swig_ptr(xb), nb,
            d, 1
        )
    elif variant == "mc":
        faiss.hammings_knn_mc(
            faiss.swig_ptr(xq), faiss.swig_ptr(xb), nq, nb, k, d,
            faiss.swig_ptr(D), faiss.swig_ptr(I)
        )
    else:
        raise NotImplementedError
    return D, I


###########################################
# Kmeans object
###########################################


class Kmeans:
    """Object that performs k-means clustering and manages the centroids.
    The `Kmeans` class is essentially a wrapper around the C++ `Clustering` object.

    Parameters
    ----------
    d : int
       dimension of the vectors to cluster
    k : int
       number of clusters
    gpu: bool or int, optional
       False: don't use GPU
       True: use all GPUs
       number: use this many GPUs
    progressive_dim_steps:
        use a progressive dimension clustering (with that number of steps)

    Subsequent parameters are fields of the Clustring object. The most important are:

    niter: int, optional
       clustering iterations
    nredo: int, optional
       redo clustering this many times and keep best
    verbose: bool, optional
    spherical: bool, optional
       do we want normalized centroids?
    int_centroids: bool, optional
       round centroids coordinates to integer
    seed: int, optional
       seed for the random number generator

    """

    def __init__(self, d, k, **kwargs):
        """d: input dimension, k: nb of centroids. Additional
         parameters are passed on the ClusteringParameters object,
         including niter=25, verbose=False, spherical = False
        """
        self.d = d
        self.reset(k)
        self.gpu = False
        if "progressive_dim_steps" in kwargs:
            self.cp = ProgressiveDimClusteringParameters()
        else:
            self.cp = ClusteringParameters()
        for k, v in kwargs.items():
            if k == 'gpu':
                if v == True or v == -1:
                    v = get_num_gpus()
                self.gpu = v
            else:
                # if this raises an exception, it means that it is a non-existent field
                getattr(self.cp, k)
                setattr(self.cp, k, v)
        self.set_index()

    def set_index(self):
        d = self.d
        if self.cp.__class__ == ClusteringParameters:
            if self.cp.spherical:
                self.index = IndexFlatIP(d)
            else:
                self.index = IndexFlatL2(d)
            if self.gpu:
                self.index = faiss.index_cpu_to_all_gpus(self.index, ngpu=self.gpu)
        else:
            if self.gpu:
                fac = GpuProgressiveDimIndexFactory(ngpu=self.gpu)
            else:
                fac = ProgressiveDimIndexFactory()
            self.fac = fac

    def reset(self, k=None):
        """ prepare k-means object to perform a new clustering, possibly
        with another number of centroids """
        if k is not None:
            self.k = int(k)
        self.centroids = None
        self.obj = None
        self.iteration_stats = None

    def train(self, x, weights=None, init_centroids=None):
        """ Perform k-means clustering.
        On output of the function call:

        - the centroids are in the centroids field of size (`k`, `d`).

        - the objective value at each iteration is in the array obj (size `niter`)

        - detailed optimization statistics are in the array iteration_stats.

        Parameters
        ----------
        x : array_like
            Training vectors, shape (n, d), `dtype` must be float32 and n should
            be larger than the number of clusters `k`.
        weights : array_like
            weight associated to each vector, shape `n`
        init_centroids : array_like
            initial set of centroids, shape (n, d)

        Returns
        -------
        final_obj: float
            final optimization objective

        """
        x = np.ascontiguousarray(x, dtype='float32')
        n, d = x.shape
        assert d == self.d

        if self.cp.__class__ == ClusteringParameters:
            # regular clustering
            clus = Clustering(d, self.k, self.cp)
            if init_centroids is not None:
                nc, d2 = init_centroids.shape
                assert d2 == d
                faiss.copy_array_to_vector(init_centroids.ravel(), clus.centroids)
            clus.train(x, self.index, weights)
        else:
            # not supported for progressive dim
            assert weights is None
            assert init_centroids is None
            assert not self.cp.spherical
            clus = ProgressiveDimClustering(d, self.k, self.cp)
            clus.train(n, swig_ptr(x), self.fac)

        centroids = faiss.vector_float_to_array(clus.centroids)

        self.centroids = centroids.reshape(self.k, d)
        stats = clus.iteration_stats
        stats = [stats.at(i) for i in range(stats.size())]
        self.obj = np.array([st.obj for st in stats])
        # copy all the iteration_stats objects to a python array
        stat_fields = 'obj time time_search imbalance_factor nsplit'.split()
        self.iteration_stats = [
            {field: getattr(st, field) for field in stat_fields}
            for st in stats
        ]
        return self.obj[-1] if self.obj.size > 0 else 0.0

    def assign(self, x):
        x = np.ascontiguousarray(x, dtype='float32')
        assert self.centroids is not None, "should train before assigning"
        self.index.reset()
        self.index.add(self.centroids)
        D, I = self.index.search(x, 1)
        return D.ravel(), I.ravel()


###########################################
# Packing and unpacking bistrings
###########################################

def is_sequence(x):
    return isinstance(x, collections.abc.Sequence)

pack_bitstrings_c = pack_bitstrings

def pack_bitstrings(a, nbit):
    """
    Pack a set integers (i, j) where i=0:n and j=0:M into
    n bitstrings.
    Output is an uint8 array of size (n, code_size), where code_size is
    such that at most 7 bits per code are wasted.

    If nbit is an integer: all entries takes nbit bits.
    If nbit is an array: entry (i, j) takes nbit[j] bits.
    """
    n, M = a.shape
    a = np.ascontiguousarray(a, dtype='int32')
    if is_sequence(nbit):
        nbit = np.ascontiguousarray(nbit, dtype='int32')
        assert nbit.shape == (M,)
        code_size = int((nbit.sum() + 7) // 8)
        b = np.empty((n, code_size), dtype='uint8')
        pack_bitstrings_c(
            n, M, swig_ptr(nbit), swig_ptr(a), swig_ptr(b), code_size)
    else:
        code_size = (M * nbit + 7) // 8
        b = np.empty((n, code_size), dtype='uint8')
        pack_bitstrings_c(n, M, nbit, swig_ptr(a), swig_ptr(b), code_size)
    return b

unpack_bitstrings_c = unpack_bitstrings

def unpack_bitstrings(b, M_or_nbits, nbit=None):
    """
    Unpack a set integers (i, j) where i=0:n and j=0:M from
    n bitstrings (encoded as uint8s).
    Input is an uint8 array of size (n, code_size), where code_size is
    such that at most 7 bits per code are wasted.

    Two forms:
    - when called with (array, M, nbit): there are M entries of size
      nbit per row
    - when called with (array, nbits): element (i, j) is encoded in
      nbits[j] bits
    """
    n, code_size = b.shape
    if nbit is None:
        nbit = np.ascontiguousarray(M_or_nbits, dtype='int32')
        M = len(nbit)
        min_code_size = int((nbit.sum() + 7) // 8)
        assert code_size >= min_code_size
        a = np.empty((n, M), dtype='int32')
        unpack_bitstrings_c(
            n, M, swig_ptr(nbit),
            swig_ptr(b), code_size, swig_ptr(a))
    else:
        M = M_or_nbits
        min_code_size = (M * nbit + 7) // 8
        assert code_size >= min_code_size
        a = np.empty((n, M), dtype='int32')
        unpack_bitstrings_c(
            n, M, nbit, swig_ptr(b), code_size, swig_ptr(a))
    return a