File size: 4,652 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""
Uses C++ map containers for fast dict-like behavior with keys being
integers, and values float.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
# C++
from cython.operator cimport dereference as deref, preincrement as inc
from libcpp.utility cimport pair
from libcpp.map cimport map as cpp_map
import numpy as np
from ._typedefs cimport float64_t, intp_t
###############################################################################
# An object to be used in Python
# Lookup is faster than dict (up to 10 times), and so is full traversal
# (up to 50 times), and assignment (up to 6 times), but creation is
# slower (up to 3 times). Also, a large benefit is that memory
# consumption is reduced a lot compared to a Python dict
cdef class IntFloatDict:
def __init__(
self,
intp_t[:] keys,
float64_t[:] values,
):
cdef int i
cdef int size = values.size
# Should check that sizes for keys and values are equal, and
# after should boundcheck(False)
for i in range(size):
self.my_map[keys[i]] = values[i]
def __len__(self):
return self.my_map.size()
def __getitem__(self, int key):
cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.find(key)
if it == self.my_map.end():
# The key is not in the dict
raise KeyError('%i' % key)
return deref(it).second
def __setitem__(self, int key, float value):
self.my_map[key] = value
# Cython 0.20 generates buggy code below. Commenting this out for now
# and relying on the to_arrays method
# def __iter__(self):
# cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
# cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
# while it != end:
# yield deref(it).first, deref(it).second
# inc(it)
def __iter__(self):
cdef int size = self.my_map.size()
cdef intp_t [:] keys = np.empty(size, dtype=np.intp)
cdef float64_t [:] values = np.empty(size, dtype=np.float64)
self._to_arrays(keys, values)
cdef int idx
cdef intp_t key
cdef float64_t value
for idx in range(size):
key = keys[idx]
value = values[idx]
yield key, value
def to_arrays(self):
"""Return the key, value representation of the IntFloatDict
object.
Returns
=======
keys : ndarray, shape (n_items, ), dtype=int
The indices of the data points
values : ndarray, shape (n_items, ), dtype=float
The values of the data points
"""
cdef int size = self.my_map.size()
keys = np.empty(size, dtype=np.intp)
values = np.empty(size, dtype=np.float64)
self._to_arrays(keys, values)
return keys, values
cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values):
# Internal version of to_arrays that takes already-initialized arrays
cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
cdef int index = 0
while it != end:
keys[index] = deref(it).first
values[index] = deref(it).second
inc(it)
index += 1
def update(self, IntFloatDict other):
cdef cpp_map[intp_t, float64_t].iterator it = other.my_map.begin()
cdef cpp_map[intp_t, float64_t].iterator end = other.my_map.end()
while it != end:
self.my_map[deref(it).first] = deref(it).second
inc(it)
def copy(self):
cdef IntFloatDict out_obj = IntFloatDict.__new__(IntFloatDict)
# The '=' operator is a copy operator for C++ maps
out_obj.my_map = self.my_map
return out_obj
def append(self, intp_t key, float64_t value):
# Construct our arguments
cdef pair[intp_t, float64_t] args
args.first = key
args.second = value
self.my_map.insert(args)
###############################################################################
# operation on dict
def argmin(IntFloatDict d):
cdef cpp_map[intp_t, float64_t].iterator it = d.my_map.begin()
cdef cpp_map[intp_t, float64_t].iterator end = d.my_map.end()
cdef intp_t min_key = -1
cdef float64_t min_value = np.inf
while it != end:
if deref(it).second < min_value:
min_value = deref(it).second
min_key = deref(it).first
inc(it)
return min_key, min_value
|