|
""" |
|
An OrderedSet is a custom MutableSet that remembers its order, so that every |
|
entry has an index that can be looked up. |
|
|
|
Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger, |
|
and released under the MIT license. |
|
""" |
|
import itertools as it |
|
from collections import deque |
|
|
|
try: |
|
|
|
from collections.abc import MutableSet, Sequence |
|
except ImportError: |
|
|
|
from collections import MutableSet, Sequence |
|
|
|
SLICE_ALL = slice(None) |
|
__version__ = "3.1" |
|
|
|
|
|
def is_iterable(obj): |
|
""" |
|
Are we being asked to look up a list of things, instead of a single thing? |
|
We check for the `__iter__` attribute so that this can cover types that |
|
don't have to be known by this module, such as NumPy arrays. |
|
|
|
Strings, however, should be considered as atomic values to look up, not |
|
iterables. The same goes for tuples, since they are immutable and therefore |
|
valid entries. |
|
|
|
We don't need to check for the Python 2 `unicode` type, because it doesn't |
|
have an `__iter__` attribute anyway. |
|
""" |
|
return ( |
|
hasattr(obj, "__iter__") |
|
and not isinstance(obj, str) |
|
and not isinstance(obj, tuple) |
|
) |
|
|
|
|
|
class OrderedSet(MutableSet, Sequence): |
|
""" |
|
An OrderedSet is a custom MutableSet that remembers its order, so that |
|
every entry has an index that can be looked up. |
|
|
|
Example: |
|
>>> OrderedSet([1, 1, 2, 3, 2]) |
|
OrderedSet([1, 2, 3]) |
|
""" |
|
|
|
def __init__(self, iterable=None): |
|
self.items = [] |
|
self.map = {} |
|
if iterable is not None: |
|
self |= iterable |
|
|
|
def __len__(self): |
|
""" |
|
Returns the number of unique elements in the ordered set |
|
|
|
Example: |
|
>>> len(OrderedSet([])) |
|
0 |
|
>>> len(OrderedSet([1, 2])) |
|
2 |
|
""" |
|
return len(self.items) |
|
|
|
def __getitem__(self, index): |
|
""" |
|
Get the item at a given index. |
|
|
|
If `index` is a slice, you will get back that slice of items, as a |
|
new OrderedSet. |
|
|
|
If `index` is a list or a similar iterable, you'll get a list of |
|
items corresponding to those indices. This is similar to NumPy's |
|
"fancy indexing". The result is not an OrderedSet because you may ask |
|
for duplicate indices, and the number of elements returned should be |
|
the number of elements asked for. |
|
|
|
Example: |
|
>>> oset = OrderedSet([1, 2, 3]) |
|
>>> oset[1] |
|
2 |
|
""" |
|
if isinstance(index, slice) and index == SLICE_ALL: |
|
return self.copy() |
|
elif is_iterable(index): |
|
return [self.items[i] for i in index] |
|
elif hasattr(index, "__index__") or isinstance(index, slice): |
|
result = self.items[index] |
|
if isinstance(result, list): |
|
return self.__class__(result) |
|
else: |
|
return result |
|
else: |
|
raise TypeError("Don't know how to index an OrderedSet by %r" % index) |
|
|
|
def copy(self): |
|
""" |
|
Return a shallow copy of this object. |
|
|
|
Example: |
|
>>> this = OrderedSet([1, 2, 3]) |
|
>>> other = this.copy() |
|
>>> this == other |
|
True |
|
>>> this is other |
|
False |
|
""" |
|
return self.__class__(self) |
|
|
|
def __getstate__(self): |
|
if len(self) == 0: |
|
|
|
|
|
|
|
|
|
|
|
|
|
return (None,) |
|
else: |
|
return list(self) |
|
|
|
def __setstate__(self, state): |
|
if state == (None,): |
|
self.__init__([]) |
|
else: |
|
self.__init__(state) |
|
|
|
def __contains__(self, key): |
|
""" |
|
Test if the item is in this ordered set |
|
|
|
Example: |
|
>>> 1 in OrderedSet([1, 3, 2]) |
|
True |
|
>>> 5 in OrderedSet([1, 3, 2]) |
|
False |
|
""" |
|
return key in self.map |
|
|
|
def add(self, key): |
|
""" |
|
Add `key` as an item to this OrderedSet, then return its index. |
|
|
|
If `key` is already in the OrderedSet, return the index it already |
|
had. |
|
|
|
Example: |
|
>>> oset = OrderedSet() |
|
>>> oset.append(3) |
|
0 |
|
>>> print(oset) |
|
OrderedSet([3]) |
|
""" |
|
if key not in self.map: |
|
self.map[key] = len(self.items) |
|
self.items.append(key) |
|
return self.map[key] |
|
|
|
append = add |
|
|
|
def update(self, sequence): |
|
""" |
|
Update the set with the given iterable sequence, then return the index |
|
of the last element inserted. |
|
|
|
Example: |
|
>>> oset = OrderedSet([1, 2, 3]) |
|
>>> oset.update([3, 1, 5, 1, 4]) |
|
4 |
|
>>> print(oset) |
|
OrderedSet([1, 2, 3, 5, 4]) |
|
""" |
|
item_index = None |
|
try: |
|
for item in sequence: |
|
item_index = self.add(item) |
|
except TypeError: |
|
raise ValueError( |
|
"Argument needs to be an iterable, got %s" % type(sequence) |
|
) |
|
return item_index |
|
|
|
def index(self, key): |
|
""" |
|
Get the index of a given entry, raising an IndexError if it's not |
|
present. |
|
|
|
`key` can be an iterable of entries that is not a string, in which case |
|
this returns a list of indices. |
|
|
|
Example: |
|
>>> oset = OrderedSet([1, 2, 3]) |
|
>>> oset.index(2) |
|
1 |
|
""" |
|
if is_iterable(key): |
|
return [self.index(subkey) for subkey in key] |
|
return self.map[key] |
|
|
|
|
|
get_loc = index |
|
get_indexer = index |
|
|
|
def pop(self): |
|
""" |
|
Remove and return the last element from the set. |
|
|
|
Raises KeyError if the set is empty. |
|
|
|
Example: |
|
>>> oset = OrderedSet([1, 2, 3]) |
|
>>> oset.pop() |
|
3 |
|
""" |
|
if not self.items: |
|
raise KeyError("Set is empty") |
|
|
|
elem = self.items[-1] |
|
del self.items[-1] |
|
del self.map[elem] |
|
return elem |
|
|
|
def discard(self, key): |
|
""" |
|
Remove an element. Do not raise an exception if absent. |
|
|
|
The MutableSet mixin uses this to implement the .remove() method, which |
|
*does* raise an error when asked to remove a non-existent item. |
|
|
|
Example: |
|
>>> oset = OrderedSet([1, 2, 3]) |
|
>>> oset.discard(2) |
|
>>> print(oset) |
|
OrderedSet([1, 3]) |
|
>>> oset.discard(2) |
|
>>> print(oset) |
|
OrderedSet([1, 3]) |
|
""" |
|
if key in self: |
|
i = self.map[key] |
|
del self.items[i] |
|
del self.map[key] |
|
for k, v in self.map.items(): |
|
if v >= i: |
|
self.map[k] = v - 1 |
|
|
|
def clear(self): |
|
""" |
|
Remove all items from this OrderedSet. |
|
""" |
|
del self.items[:] |
|
self.map.clear() |
|
|
|
def __iter__(self): |
|
""" |
|
Example: |
|
>>> list(iter(OrderedSet([1, 2, 3]))) |
|
[1, 2, 3] |
|
""" |
|
return iter(self.items) |
|
|
|
def __reversed__(self): |
|
""" |
|
Example: |
|
>>> list(reversed(OrderedSet([1, 2, 3]))) |
|
[3, 2, 1] |
|
""" |
|
return reversed(self.items) |
|
|
|
def __repr__(self): |
|
if not self: |
|
return "%s()" % (self.__class__.__name__,) |
|
return "%s(%r)" % (self.__class__.__name__, list(self)) |
|
|
|
def __eq__(self, other): |
|
""" |
|
Returns true if the containers have the same items. If `other` is a |
|
Sequence, then order is checked, otherwise it is ignored. |
|
|
|
Example: |
|
>>> oset = OrderedSet([1, 3, 2]) |
|
>>> oset == [1, 3, 2] |
|
True |
|
>>> oset == [1, 2, 3] |
|
False |
|
>>> oset == [2, 3] |
|
False |
|
>>> oset == OrderedSet([3, 2, 1]) |
|
False |
|
""" |
|
|
|
|
|
if isinstance(other, (Sequence, deque)): |
|
|
|
|
|
return list(self) == list(other) |
|
try: |
|
other_as_set = set(other) |
|
except TypeError: |
|
|
|
return False |
|
else: |
|
return set(self) == other_as_set |
|
|
|
def union(self, *sets): |
|
""" |
|
Combines all unique items. |
|
Each items order is defined by its first appearance. |
|
|
|
Example: |
|
>>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0]) |
|
>>> print(oset) |
|
OrderedSet([3, 1, 4, 5, 2, 0]) |
|
>>> oset.union([8, 9]) |
|
OrderedSet([3, 1, 4, 5, 2, 0, 8, 9]) |
|
>>> oset | {10} |
|
OrderedSet([3, 1, 4, 5, 2, 0, 10]) |
|
""" |
|
cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet |
|
containers = map(list, it.chain([self], sets)) |
|
items = it.chain.from_iterable(containers) |
|
return cls(items) |
|
|
|
def __and__(self, other): |
|
|
|
return self.intersection(other) |
|
|
|
def intersection(self, *sets): |
|
""" |
|
Returns elements in common between all sets. Order is defined only |
|
by the first set. |
|
|
|
Example: |
|
>>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3]) |
|
>>> print(oset) |
|
OrderedSet([1, 2, 3]) |
|
>>> oset.intersection([2, 4, 5], [1, 2, 3, 4]) |
|
OrderedSet([2]) |
|
>>> oset.intersection() |
|
OrderedSet([1, 2, 3]) |
|
""" |
|
cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet |
|
if sets: |
|
common = set.intersection(*map(set, sets)) |
|
items = (item for item in self if item in common) |
|
else: |
|
items = self |
|
return cls(items) |
|
|
|
def difference(self, *sets): |
|
""" |
|
Returns all elements that are in this set but not the others. |
|
|
|
Example: |
|
>>> OrderedSet([1, 2, 3]).difference(OrderedSet([2])) |
|
OrderedSet([1, 3]) |
|
>>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3])) |
|
OrderedSet([1]) |
|
>>> OrderedSet([1, 2, 3]) - OrderedSet([2]) |
|
OrderedSet([1, 3]) |
|
>>> OrderedSet([1, 2, 3]).difference() |
|
OrderedSet([1, 2, 3]) |
|
""" |
|
cls = self.__class__ |
|
if sets: |
|
other = set.union(*map(set, sets)) |
|
items = (item for item in self if item not in other) |
|
else: |
|
items = self |
|
return cls(items) |
|
|
|
def issubset(self, other): |
|
""" |
|
Report whether another set contains this set. |
|
|
|
Example: |
|
>>> OrderedSet([1, 2, 3]).issubset({1, 2}) |
|
False |
|
>>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4}) |
|
True |
|
>>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5}) |
|
False |
|
""" |
|
if len(self) > len(other): |
|
return False |
|
return all(item in other for item in self) |
|
|
|
def issuperset(self, other): |
|
""" |
|
Report whether this set contains another set. |
|
|
|
Example: |
|
>>> OrderedSet([1, 2]).issuperset([1, 2, 3]) |
|
False |
|
>>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3}) |
|
True |
|
>>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3}) |
|
False |
|
""" |
|
if len(self) < len(other): |
|
return False |
|
return all(item in self for item in other) |
|
|
|
def symmetric_difference(self, other): |
|
""" |
|
Return the symmetric difference of two OrderedSets as a new set. |
|
That is, the new set will contain all elements that are in exactly |
|
one of the sets. |
|
|
|
Their order will be preserved, with elements from `self` preceding |
|
elements from `other`. |
|
|
|
Example: |
|
>>> this = OrderedSet([1, 4, 3, 5, 7]) |
|
>>> other = OrderedSet([9, 7, 1, 3, 2]) |
|
>>> this.symmetric_difference(other) |
|
OrderedSet([4, 5, 9, 2]) |
|
""" |
|
cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet |
|
diff1 = cls(self).difference(other) |
|
diff2 = cls(other).difference(self) |
|
return diff1.union(diff2) |
|
|
|
def _update_items(self, items): |
|
""" |
|
Replace the 'items' list of this OrderedSet with a new one, updating |
|
self.map accordingly. |
|
""" |
|
self.items = items |
|
self.map = {item: idx for (idx, item) in enumerate(items)} |
|
|
|
def difference_update(self, *sets): |
|
""" |
|
Update this OrderedSet to remove items from one or more other sets. |
|
|
|
Example: |
|
>>> this = OrderedSet([1, 2, 3]) |
|
>>> this.difference_update(OrderedSet([2, 4])) |
|
>>> print(this) |
|
OrderedSet([1, 3]) |
|
|
|
>>> this = OrderedSet([1, 2, 3, 4, 5]) |
|
>>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6])) |
|
>>> print(this) |
|
OrderedSet([3, 5]) |
|
""" |
|
items_to_remove = set() |
|
for other in sets: |
|
items_to_remove |= set(other) |
|
self._update_items([item for item in self.items if item not in items_to_remove]) |
|
|
|
def intersection_update(self, other): |
|
""" |
|
Update this OrderedSet to keep only items in another set, preserving |
|
their order in this set. |
|
|
|
Example: |
|
>>> this = OrderedSet([1, 4, 3, 5, 7]) |
|
>>> other = OrderedSet([9, 7, 1, 3, 2]) |
|
>>> this.intersection_update(other) |
|
>>> print(this) |
|
OrderedSet([1, 3, 7]) |
|
""" |
|
other = set(other) |
|
self._update_items([item for item in self.items if item in other]) |
|
|
|
def symmetric_difference_update(self, other): |
|
""" |
|
Update this OrderedSet to remove items from another set, then |
|
add items from the other set that were not present in this set. |
|
|
|
Example: |
|
>>> this = OrderedSet([1, 4, 3, 5, 7]) |
|
>>> other = OrderedSet([9, 7, 1, 3, 2]) |
|
>>> this.symmetric_difference_update(other) |
|
>>> print(this) |
|
OrderedSet([4, 5, 9, 2]) |
|
""" |
|
items_to_add = [item for item in other if item not in self] |
|
items_to_remove = set(other) |
|
self._update_items( |
|
[item for item in self.items if item not in items_to_remove] + items_to_add |
|
) |
|
|