dlib C++ Library - sammon

// Copyright (C) 2012  Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_SAMMoN_ABSTRACT_Hh_
#ifdef DLIB_SAMMoN_ABSTRACT_Hh_

#include "../matrix/matrix_abstract.h"
#include <vector>

namespace dlib
{

    class sammon_projection
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
                This is a function object that computes the Sammon projection of a set
                of N points in a L-dimensional vector space onto a d-dimensional space
                (d < L), according to the paper:
                    A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon

                The current implementation is a vectorized version of the original algorithm.
        !*/

    public:

        sammon_projection(
        );
        /*!
            ensures
                - this object is properly initialized 
        !*/

        template <typename matrix_type>
        std::vector<matrix<double,0,1> > operator() (
            const std::vector<matrix_type>& data,       
            const long num_dims                      
        );
        /*!
            requires
                - num_dims > 0
                - matrix_type should be a kind of dlib::matrix of doubles capable
                  of representing column vectors.
                - for all valid i:
                    - is_col_vector(data[i]) == true
                    - data[0].size() == data[i].size()
                      (i.e. all the vectors in data must have the same dimensionality)
                - if (data.size() != 0) then
                    - 0 < num_dims <= data[0].size()
                      (i.e. you can't project into a higher dimension than the input data,
                      only to a lower dimension.)
            ensures
                - This routine computes Sammon's dimensionality reduction method based on the
                  given input data.  It will attempt to project the contents of data into a
                  num_dims dimensional space that preserves relative distances between the
                  input data points.
                - This function returns a std::vector, OUT, such that:
                    - OUT == a set of column vectors that represent the Sammon projection of 
                      the input data vectors. 
                    - OUT.size() == data.size()
                    - for all valid i:
                        - OUT[i].size() == num_dims
                        - OUT[i] == the Sammon projection of the input vector data[i]
        !*/

        template <typename matrix_type>
        void operator() (
            const std::vector<matrix_type>& data,       
            const long num_dims,                     
            std::vector<matrix<double,0,1> >& result,   
            double &err,                                
            const unsigned long num_iters = 1000,             
            const double err_delta = 1.0e-9            
        );
        /*!
            requires
                - num_iters > 0
                - err_delta > 0
                - num_dims > 0
                - matrix_type should be a kind of dlib::matrix of doubles capable
                  of representing column vectors.
                - for all valid i:
                    - is_col_vector(data[i]) == true
                    - data[0].size() == data[i].size()
                      (i.e. all the vectors in data must have the same dimensionality)
                - if (data.size() != 0) then
                    - 0 < num_dims <= data[0].size()
                      (i.e. you can't project into a higher dimension than the input data,
                      only to a lower dimension.)
            ensures
                - This routine computes Sammon's dimensionality reduction method based on the
                  given input data.  It will attempt to project the contents of data into a
                  num_dims dimensional space that preserves relative distances between the
                  input data points.
                - #err == the final error value at the end of the algorithm.  The goal of Sammon's
                  algorithm is to find a lower dimensional projection of the input data that
                  preserves the relative distances between points.  The value in #err is a measure
                  of the total error at the end of the algorithm.  So smaller values indicate
                  a better projection was found than if a large value is returned via #err.
                - Sammon's algorithm will run until either num_iters iterations has executed
                  or the change in error from one iteration to the next is less than err_delta.
                - Upon completion, the output of Sammon's projection is stored into #result, in
                  particular, we will have:
                    - #result == a set of column vectors that represent the Sammon projection of 
                      the input data vectors. 
                    - #result.size() == data.size()
                    - for all valid i:
                        - #result[i].size() == num_dims
                        - #result[i] == the Sammon projection of the input vector data[i]
        !*/

    };

} 

#endif // DLIB_SAMMoN_ABSTRACT_Hh_