dlib C++ Library - running_gradient

// Copyright (C) 2016  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
#ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_


namespace dlib
{
    class running_gradient 
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
                This object is a tool for estimating if a noisy sequence of numbers is
                trending up or down and by how much.  It does this by finding the least
                squares fit of a line to the data and then allows you to perform a
                statistical test on the slope of that line.
        !*/

    public:

        running_gradient (
        );
        /*!
            ensures
                - #current_n() == 0
        !*/

        void clear(
        );
        /*!
            ensures
                - #current_n() == 0
                - this object has its initial value
                - clears all memory of any previous data points
        !*/

        double current_n (
        ) const;
        /*!
            ensures
                - returns the number of values given to this object by add(). 
        !*/

        void add(
            double y
        );
        /*!
            ensures
                - Updates the gradient() and standard_error() estimates in this object
                  based on the new y value.
                - #current_n() == current_n() + 1
        !*/

        double gradient (
        ) const;
        /*!
            requires
                - current_n() > 1
            ensures
                - If we consider the values given to add() as time series data, we can
                  estimate the rate-of-change of those values.  That is, how much,
                  typically, do those values change from sample to sample?  The gradient()
                  function returns the current estimate.  It does this by finding the least
                  squares fit of a line to the data given to add() and returning the slope
                  of this line.
        !*/

        double intercept (
        ) const;
        /*!
            requires
                - current_n() > 0
            ensures
                - This class fits a line to the time series data given to add().  This
                  function returns the intercept of that line while gradient() returns the
                  slope of that line.  This means that, for example, the next point that
                  add() will see, as predicted by this best fit line, is the value
                  intercept() + current_n()*gradient().
        !*/

        double standard_error ( 
        ) const;
        /*!
            requires
                - current_n() > 2
            ensures
                - returns the standard deviation of the estimate of gradient(). 
        !*/

        double probability_gradient_less_than (
            double thresh
        ) const;
        /*!
            requires
                - current_n() > 2
            ensures
                - If we can assume the values given to add() are linearly related to each
                  other and corrupted by Gaussian additive noise then our estimate of
                  gradient() is a random variable with a mean value of gradient() and a
                  standard deviation of standard_error().  This lets us compute the
                  probability that the true gradient of the data is less than thresh, which
                  is what this function returns.
        !*/

        double probability_gradient_greater_than (
            double thresh
        ) const;
        /*!
            requires
                - current_n() > 2
            ensures
                - returns 1-probability_gradient_less_than(thresh)
        !*/

    };

    void serialize (
        const running_gradient& item, 
        std::ostream& out 
    );
    /*!
        provides serialization support 
    !*/

    void deserialize (
        running_gradient& item, 
        std::istream& in
    );
    /*!
        provides serialization support 
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T
        >
    double probability_gradient_less_than (
        const T& container,
        double thresh
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with a
              range based for loop.
            - The container must contain more than 2 elements.
        ensures
            - Puts all the elements of container into a running_gradient object, R, and
              then returns R.probability_gradient_less_than(thresh). 
    !*/

    template <
        typename T
        >
    double probability_gradient_greater_than (
        const T& container,
        double thresh
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with a
              range based for loop.
            - The container must contain more than 2 elements.
        ensures
            - Puts all the elements of container into a running_gradient object, R, and
              then returns R.probability_gradient_greater_than(thresh).
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T
        > 
    double probability_values_are_increasing (
        const T& container
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with a
              range based for loop.
        ensures
            - Returns the probability that the values in container are increasing.  This is
              probability_gradient_greater_than(container,0) if container.size() > 2 and 0.5
              otherwise.
    !*/

    template <
        typename T
        > 
    double probability_values_are_increasing_robust (
        const T& container,
        double quantile_discard = 0.10
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with a
              range based for loop.
        ensures
            - This function behaves just like probability_values_are_increasing(container) except
              that it ignores values in container that are in the upper quantile_discard quantile.
              So for example, if the quantile discard is 0.1 then the 10% largest values in
              container are ignored.  This makes the estimate robust to large spurious values that
              otherwise might confuse the results.  For instance, the sequence of values 
              {1,2,1e10,3,4,5,6,7,8,9} looks decreasing to probability_values_are_increasing() 
              but looks increasing to probability_values_are_increasing_robust().
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T
        > 
    size_t count_steps_without_decrease (
        const T& container,
        double probability_of_decrease = 0.51
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with
              .rbegin() and .rend().
            - 0.5 < probability_of_decrease < 1
        ensures
            - If you think of the contents of container as a potentially noisy time series,
              then this function returns a count of how long the time series has gone
              without noticeably decreasing in value.  It does this by adding the
              elements into a running_gradient object and counting how many elements,
              starting with container.back(), that you need to examine before you are
              confident that the series has been decreasing in value.  Here, "confident of
              decrease" means that the probability of decrease is >= probability_of_decrease.  
            - Setting probability_of_decrease to 0.51 means we count until we see even a
              small hint of decrease, whereas a larger value of 0.99 would return a larger
              count since it keeps going until it is nearly certain the time series is
              decreasing.
            - The max possible output from this function is container.size().
    !*/

    template <
        typename T
        > 
    size_t count_steps_without_decrease_robust (
        const T& container,
        double probability_of_decrease = 0.51,
        double quantile_discard = 0.10
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with
              .begin() and .end() as well as .rbegin() and .rend().
            - 0.5 < probability_of_decrease < 1
            - 0 <= quantile_discard <= 1
        ensures
            - This function behaves just like
              count_steps_without_decrease(container,probability_of_decrease) except that
              it ignores values in container that are in the upper quantile_discard
              quantile.  So for example, if the quantile discard is 0.1 then the 10%
              largest values in container are ignored.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T
        > 
    size_t count_steps_without_increase (
        const T& container,
        double probability_of_increase = 0.51
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with
              .rbegin() and .rend().
            - 0.5 < probability_of_increase < 1
        ensures
            - If you think of the contents of container as a potentially noisy time series,
              then this function returns a count of how long the time series has gone
              without noticeably increasing in value.  It does this by adding the
              elements into a running_gradient object and counting how many elements,
              starting with container.back(), that you need to examine before you are
              confident that the series has been increasing in value.  Here, "confident of
              increase" means that the probability of increase is >= probability_of_increase.  
            - Setting probability_of_increase to 0.51 means we count until we see even a
              small hint of increase, whereas a larger value of 0.99 would return a larger
              count since it keeps going until it is nearly certain the time series is
              increasing.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T
        > 
    double find_upper_quantile (
        const T& container,
        double quantile
    );
    /*!
        requires
            - container must be a container of double values that can be enumerated with
              .begin() and .end().
            - 0 <= quantile <= 1
            - container.size() > 0
        ensures
            - Finds and returns the value such that quantile percent of the values in
              container are greater than it.  For example, 0.5 would find the median value
              in container while 0.1 would find the value that lower bounded the 10%
              largest values in container.
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_