|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef LOSSYCOUNTER_H |
|
#define LOSSYCOUNTER_H |
|
|
|
#include <cstddef> |
|
#include <cmath> |
|
#ifdef USE_UNORDERED_MAP |
|
#include <tr1/unordered_map> |
|
#else |
|
#include <map> |
|
#endif |
|
#include <iterator> |
|
|
|
|
|
|
|
template<class T> |
|
class LossyCounterIterator; |
|
|
|
template<class T> |
|
class LossyCounterErasingIterator; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class T> |
|
class LossyCounter { |
|
|
|
public: |
|
|
|
|
|
typedef double error_t; |
|
|
|
|
|
typedef double support_t; |
|
|
|
|
|
typedef size_t counter_t; |
|
|
|
|
|
typedef counter_t frequency_t; |
|
|
|
|
|
typedef counter_t maximum_error_t; |
|
|
|
|
|
typedef std::pair<frequency_t, maximum_error_t> item_counts_t; |
|
#ifdef USE_UNORDERED_MAP |
|
typedef std::tr1::unordered_map<T, item_counts_t, typename T::Hash> storage_t; |
|
#else |
|
|
|
typedef std::map<T, item_counts_t> storage_t; |
|
#endif |
|
|
|
typedef LossyCounterIterator<T> const_iterator; |
|
|
|
|
|
typedef LossyCounterErasingIterator<T> erasing_iterator; |
|
|
|
|
|
const error_t error; |
|
|
|
|
|
const support_t support; |
|
|
|
|
|
const counter_t bucketWidth; |
|
|
|
private: |
|
|
|
|
|
counter_t _bucketId; |
|
|
|
|
|
counter_t _count; |
|
|
|
|
|
storage_t _storage; |
|
|
|
public: |
|
|
|
|
|
|
|
|
|
|
|
|
|
LossyCounter<T>(error_t _error, support_t _support): |
|
error(_error), support(_support), bucketWidth(_error > 0.0 ? ceil(1/_error) : 0), _bucketId(1), _count(0), _storage() {} |
|
|
|
|
|
|
|
|
|
void add(const T& item); |
|
|
|
|
|
|
|
|
|
const_iterator begin(void) const { return LossyCounterIterator<T>(threshold(), _storage.begin(), _storage.end()); } |
|
|
|
|
|
|
|
|
|
const_iterator end(void) const { return LossyCounterIterator<T>(_storage.end()); } |
|
|
|
|
|
|
|
|
|
erasing_iterator beginErase(void) { return LossyCounterErasingIterator<T>(threshold(), _storage); } |
|
|
|
|
|
|
|
|
|
erasing_iterator endErase(void) { return LossyCounterErasingIterator<T>(_storage); } |
|
|
|
|
|
|
|
|
|
counter_t bucketId(void) const { return _bucketId; } |
|
|
|
|
|
|
|
|
|
counter_t count(void) const { return _count; } |
|
|
|
|
|
|
|
|
|
size_t size(void) const { return _storage.size(); } |
|
|
|
|
|
|
|
|
|
|
|
double threshold(bool positive = false) const { return positive ? support * _count : (support - error) * _count; } |
|
|
|
|
|
|
|
|
|
double maxError(void) const { return error * _count; } |
|
|
|
|
|
|
|
|
|
bool aboutToPrune(void) const { return (bucketWidth != 0) && ((_count % bucketWidth) == 0); } |
|
|
|
private: |
|
|
|
|
|
|
|
|
|
void prune(void); |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class T> |
|
class LossyCounterIterator: public std::iterator<std::forward_iterator_tag, typename LossyCounter<T>::storage_t::value_type> { |
|
public: |
|
|
|
typedef LossyCounterIterator<T> self_type; |
|
|
|
typedef typename LossyCounter<T>::storage_t::const_iterator const_iterator; |
|
|
|
protected: |
|
|
|
|
|
const double _threshold; |
|
|
|
|
|
const_iterator _current; |
|
|
|
|
|
const const_iterator _end; |
|
|
|
public: |
|
|
|
|
|
|
|
LossyCounterIterator<T>(const_iterator end): |
|
_threshold(0), _current(_end), _end(end) {} |
|
|
|
LossyCounterIterator<T>(double threshold, const_iterator begin, const_iterator end): |
|
_threshold(threshold), _current(begin), _end(end) { |
|
|
|
this->forward(true); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
LossyCounterIterator<T> operator++(void); |
|
|
|
|
|
|
|
|
|
LossyCounterIterator<T> operator++(int junk); |
|
|
|
bool operator==(const self_type& rhs) const { return _current == rhs._current; } |
|
|
|
bool operator!=(const self_type& rhs) const { return _current != rhs._current; } |
|
|
|
|
|
|
|
|
|
|
|
|
|
const T& item(void) const { return _current->first; } |
|
|
|
|
|
|
|
|
|
typename LossyCounter<T>::frequency_t frequency(void) const { return _current->second.first; } |
|
|
|
|
|
|
|
|
|
typename LossyCounter<T>::error_t max_error(void) const { return _current->second.second; } |
|
|
|
protected: |
|
|
|
|
|
|
|
|
|
void forward(bool init); |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<class T> |
|
class LossyCounterErasingIterator: public LossyCounterIterator<T> { |
|
public: |
|
|
|
typedef typename LossyCounter<T>::storage_t storage_t; |
|
|
|
private: |
|
|
|
storage_t& _storage; |
|
|
|
public: |
|
|
|
|
|
|
|
LossyCounterErasingIterator<T>(storage_t& storage): LossyCounterIterator<T>(storage.end()), _storage(storage) {} |
|
|
|
LossyCounterErasingIterator<T>(double threshold, storage_t& storage): LossyCounterIterator<T>(threshold, storage.begin(), storage.end()), _storage(storage) {} |
|
|
|
protected: |
|
|
|
|
|
|
|
|
|
void forward(bool init); |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class T> |
|
void LossyCounter<T>::add(const T& item) { |
|
|
|
typename storage_t::iterator iter = _storage.find(item); |
|
|
|
if ( iter == _storage.end() ) { |
|
|
|
_storage.insert(std::make_pair(item, item_counts_t(1, _bucketId - 1))); |
|
} |
|
else { |
|
|
|
iter->second.first += 1; |
|
} |
|
|
|
|
|
++_count; |
|
if ( this->aboutToPrune() ) { |
|
this->prune(); |
|
++_bucketId; |
|
} |
|
} |
|
|
|
|
|
template<class T> |
|
void LossyCounter<T>::prune(void) { |
|
|
|
for ( typename storage_t::iterator iter = _storage.begin(); iter != _storage.end(); ) { |
|
|
|
if ( (iter->second.first + iter->second.second) <= _bucketId ) { |
|
_storage.erase(iter++); |
|
} |
|
else { |
|
++iter; |
|
} |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class T> |
|
LossyCounterIterator<T> LossyCounterIterator<T>::operator++(void) { |
|
this->forward(); |
|
return *this; |
|
} |
|
|
|
|
|
template<class T> |
|
LossyCounterIterator<T> LossyCounterIterator<T>::operator++(int junk) { |
|
self_type iter = *this; |
|
this->forward(); |
|
return iter; |
|
} |
|
|
|
template<class T> |
|
void LossyCounterIterator<T>::forward(bool init = false) { |
|
if ( _current == _end ) { |
|
return; |
|
} |
|
if ( init && (this->frequency() >= _threshold) ) { |
|
|
|
return; |
|
} |
|
|
|
while ( (++_current != _end) && (this->frequency() < _threshold) ); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class T> |
|
void LossyCounterErasingIterator<T>::forward(bool init = false) { |
|
if (this->_current == this->_end ) { |
|
return; |
|
} |
|
if ( init && (this->frequency() >= this->_threshold) ) { |
|
|
|
return; |
|
} |
|
|
|
typename LossyCounterIterator<T>::const_iterator previous = this->_current; |
|
|
|
while ( ++this->_current != this->_end ) { |
|
if ( this->frequency() < this->_threshold ) { |
|
|
|
_storage.erase(previous); |
|
previous = this->_current; |
|
} else { |
|
break; |
|
} |
|
} |
|
_storage.erase(previous); |
|
} |
|
|
|
#endif |
|
|