// Copyright (C) 2016 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ #ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ namespace dlib { class running_gradient { /*! WHAT THIS OBJECT REPRESENTS This object is a tool for estimating if a noisy sequence of numbers is trending up or down and by how much. It does this by finding the least squares fit of a line to the data and then allows you to perform a statistical test on the slope of that line. !*/ public: running_gradient ( ); /*! ensures - #current_n() == 0 !*/ void clear( ); /*! ensures - #current_n() == 0 - this object has its initial value - clears all memory of any previous data points !*/ double current_n ( ) const; /*! ensures - returns the number of values given to this object by add(). !*/ void add( double y ); /*! ensures - Updates the gradient() and standard_error() estimates in this object based on the new y value. - #current_n() == current_n() + 1 !*/ double gradient ( ) const; /*! requires - current_n() > 1 ensures - If we consider the values given to add() as time series data, we can estimate the rate-of-change of those values. That is, how much, typically, do those values change from sample to sample? The gradient() function returns the current estimate. It does this by finding the least squares fit of a line to the data given to add() and returning the slope of this line. !*/ double intercept ( ) const; /*! requires - current_n() > 0 ensures - This class fits a line to the time series data given to add(). This function returns the intercept of that line while gradient() returns the slope of that line. This means that, for example, the next point that add() will see, as predicted by this best fit line, is the value intercept() + current_n()*gradient(). !*/ double standard_error ( ) const; /*! requires - current_n() > 2 ensures - returns the standard deviation of the estimate of gradient(). !*/ double probability_gradient_less_than ( double thresh ) const; /*! requires - current_n() > 2 ensures - If we can assume the values given to add() are linearly related to each other and corrupted by Gaussian additive noise then our estimate of gradient() is a random variable with a mean value of gradient() and a standard deviation of standard_error(). This lets us compute the probability that the true gradient of the data is less than thresh, which is what this function returns. !*/ double probability_gradient_greater_than ( double thresh ) const; /*! requires - current_n() > 2 ensures - returns 1-probability_gradient_less_than(thresh) !*/ }; void serialize ( const running_gradient& item, std::ostream& out ); /*! provides serialization support !*/ void deserialize ( running_gradient& item, std::istream& in ); /*! provides serialization support !*/ // ---------------------------------------------------------------------------------------- template < typename T > double probability_gradient_less_than ( const T& container, double thresh ); /*! requires - container must be a container of double values that can be enumerated with a range based for loop. - The container must contain more than 2 elements. ensures - Puts all the elements of container into a running_gradient object, R, and then returns R.probability_gradient_less_than(thresh). !*/ template < typename T > double probability_gradient_greater_than ( const T& container, double thresh ); /*! requires - container must be a container of double values that can be enumerated with a range based for loop. - The container must contain more than 2 elements. ensures - Puts all the elements of container into a running_gradient object, R, and then returns R.probability_gradient_greater_than(thresh). !*/ // ---------------------------------------------------------------------------------------- template < typename T > double probability_values_are_increasing ( const T& container ); /*! requires - container must be a container of double values that can be enumerated with a range based for loop. ensures - Returns the probability that the values in container are increasing. This is probability_gradient_greater_than(container,0) if container.size() > 2 and 0.5 otherwise. !*/ template < typename T > double probability_values_are_increasing_robust ( const T& container, double quantile_discard = 0.10 ); /*! requires - container must be a container of double values that can be enumerated with a range based for loop. ensures - This function behaves just like probability_values_are_increasing(container) except that it ignores values in container that are in the upper quantile_discard quantile. So for example, if the quantile discard is 0.1 then the 10% largest values in container are ignored. This makes the estimate robust to large spurious values that otherwise might confuse the results. For instance, the sequence of values {1,2,1e10,3,4,5,6,7,8,9} looks decreasing to probability_values_are_increasing() but looks increasing to probability_values_are_increasing_robust(). !*/ // ---------------------------------------------------------------------------------------- template < typename T > size_t count_steps_without_decrease ( const T& container, double probability_of_decrease = 0.51 ); /*! requires - container must be a container of double values that can be enumerated with .rbegin() and .rend(). - 0.5 < probability_of_decrease < 1 ensures - If you think of the contents of container as a potentially noisy time series, then this function returns a count of how long the time series has gone without noticeably decreasing in value. It does this by adding the elements into a running_gradient object and counting how many elements, starting with container.back(), that you need to examine before you are confident that the series has been decreasing in value. Here, "confident of decrease" means that the probability of decrease is >= probability_of_decrease. - Setting probability_of_decrease to 0.51 means we count until we see even a small hint of decrease, whereas a larger value of 0.99 would return a larger count since it keeps going until it is nearly certain the time series is decreasing. - The max possible output from this function is container.size(). !*/ template < typename T > size_t count_steps_without_decrease_robust ( const T& container, double probability_of_decrease = 0.51, double quantile_discard = 0.10 ); /*! requires - container must be a container of double values that can be enumerated with .begin() and .end() as well as .rbegin() and .rend(). - 0.5 < probability_of_decrease < 1 - 0 <= quantile_discard <= 1 ensures - This function behaves just like count_steps_without_decrease(container,probability_of_decrease) except that it ignores values in container that are in the upper quantile_discard quantile. So for example, if the quantile discard is 0.1 then the 10% largest values in container are ignored. !*/ // ---------------------------------------------------------------------------------------- template < typename T > size_t count_steps_without_increase ( const T& container, double probability_of_increase = 0.51 ); /*! requires - container must be a container of double values that can be enumerated with .rbegin() and .rend(). - 0.5 < probability_of_increase < 1 ensures - If you think of the contents of container as a potentially noisy time series, then this function returns a count of how long the time series has gone without noticeably increasing in value. It does this by adding the elements into a running_gradient object and counting how many elements, starting with container.back(), that you need to examine before you are confident that the series has been increasing in value. Here, "confident of increase" means that the probability of increase is >= probability_of_increase. - Setting probability_of_increase to 0.51 means we count until we see even a small hint of increase, whereas a larger value of 0.99 would return a larger count since it keeps going until it is nearly certain the time series is increasing. !*/ // ---------------------------------------------------------------------------------------- template < typename T > double find_upper_quantile ( const T& container, double quantile ); /*! requires - container must be a container of double values that can be enumerated with .begin() and .end(). - 0 <= quantile <= 1 - container.size() > 0 ensures - Finds and returns the value such that quantile percent of the values in container are greater than it. For example, 0.5 would find the median value in container while 0.1 would find the value that lower bounded the 10% largest values in container. !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_