// Copyright (C) 2003 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_POINT_TrANSFORMS_H_ #define DLIB_POINT_TrANSFORMS_H_ #include "point_transforms_abstract.h" #include "../algs.h" #include "vector.h" #include "../matrix.h" #include "../matrix/matrix_la.h" #include "../optimization/optimization.h" #include "rectangle.h" #include "drectangle.h" #include #include namespace dlib { // ---------------------------------------------------------------------------------------- class point_rotator { public: point_rotator ( ) { sin_angle = 0; cos_angle = 1; } point_rotator ( const double& angle ) { sin_angle = std::sin(angle); cos_angle = std::cos(angle); } template const dlib::vector operator() ( const dlib::vector& p ) const { double x = cos_angle*p.x() - sin_angle*p.y(); double y = sin_angle*p.x() + cos_angle*p.y(); return dlib::vector(x,y); } const matrix get_m( ) const { matrix temp; temp = cos_angle, -sin_angle, sin_angle, cos_angle; return temp; } inline friend void serialize (const point_rotator& item, std::ostream& out) { serialize(item.sin_angle, out); serialize(item.cos_angle, out); } inline friend void deserialize (point_rotator& item, std::istream& in) { deserialize(item.sin_angle, in); deserialize(item.cos_angle, in); } private: double sin_angle; double cos_angle; }; // ---------------------------------------------------------------------------------------- class point_transform { public: point_transform ( ) { sin_angle = 0; cos_angle = 1; translate.x() = 0; translate.y() = 0; } point_transform ( const double& angle, const dlib::vector& translate_ ) { sin_angle = std::sin(angle); cos_angle = std::cos(angle); translate = translate_; } template const dlib::vector operator() ( const dlib::vector& p ) const { double x = cos_angle*p.x() - sin_angle*p.y(); double y = sin_angle*p.x() + cos_angle*p.y(); return dlib::vector(x,y) + translate; } const matrix get_m( ) const { matrix temp; temp = cos_angle, -sin_angle, sin_angle, cos_angle; return temp; } const dlib::vector get_b( ) const { return translate; } inline friend void serialize (const point_transform& item, std::ostream& out) { serialize(item.sin_angle, out); serialize(item.cos_angle, out); serialize(item.translate, out); } inline friend void deserialize (point_transform& item, std::istream& in) { deserialize(item.sin_angle, in); deserialize(item.cos_angle, in); deserialize(item.translate, in); } private: double sin_angle; double cos_angle; dlib::vector translate; }; // ---------------------------------------------------------------------------------------- class point_transform_affine { public: point_transform_affine ( ) { m = identity_matrix(2); b.x() = 0; b.y() = 0; } point_transform_affine ( const matrix& m_, const dlib::vector& b_ ) :m(m_), b(b_) { } const dlib::vector operator() ( const dlib::vector& p ) const { return m*p + b; } const matrix& get_m( ) const { return m; } const dlib::vector& get_b( ) const { return b; } inline friend void serialize (const point_transform_affine& item, std::ostream& out) { serialize(item.m, out); serialize(item.b, out); } inline friend void deserialize (point_transform_affine& item, std::istream& in) { deserialize(item.m, in); deserialize(item.b, in); } private: matrix m; dlib::vector b; }; // ---------------------------------------------------------------------------------------- class rectangle_transform { public: rectangle_transform ( ) { } rectangle_transform ( const point_transform_affine& tform_ ) :tform(tform_) { } drectangle operator() ( const drectangle& r ) const { dpoint tl = r.tl_corner(); dpoint tr = r.tr_corner(); dpoint bl = r.bl_corner(); dpoint br = r.br_corner(); // The new rectangle would ideally have this area if we could actually rotate // the box. Note the 1+ is because that's how the rectangles calculate their // width and height. double new_area = (1+length(tform(tl)-tform(tr)))*(1+length(tform(tl)-tform(bl))); // But if we rotate the corners of the rectangle and then find the rectangle // that contains them we get this, which might have a much larger area than we // want. drectangle temp; temp += tform(tl); temp += tform(tr); temp += tform(bl); temp += tform(br); // so we adjust the area to match the target area and have the same center as // the above box. double scale = std::sqrt(new_area/temp.area()); return centered_rect(center(temp), std::round(temp.width()*scale), std::round(temp.height()*scale)); } rectangle operator() ( const rectangle& r ) const { return (*this)(drectangle(r)); } const point_transform_affine& get_tform( ) const { return tform; } inline friend void serialize (const rectangle_transform& item, std::ostream& out) { serialize(item.tform, out); } inline friend void deserialize (rectangle_transform& item, std::istream& in) { deserialize(item.tform, in); } private: point_transform_affine tform; }; // ---------------------------------------------------------------------------------------- inline point_transform_affine operator* ( const point_transform_affine& lhs, const point_transform_affine& rhs ) { return point_transform_affine(lhs.get_m()*rhs.get_m(), lhs.get_m()*rhs.get_b()+lhs.get_b()); } // ---------------------------------------------------------------------------------------- inline point_transform_affine inv ( const point_transform_affine& trans ) { matrix im = inv(trans.get_m()); return point_transform_affine(im, -im*trans.get_b()); } // ---------------------------------------------------------------------------------------- template point_transform_affine find_affine_transform ( const std::vector >& from_points, const std::vector >& to_points ) { // make sure requires clause is not broken DLIB_ASSERT(from_points.size() == to_points.size() && from_points.size() >= 3, "\t point_transform_affine find_affine_transform(from_points, to_points)" << "\n\t Invalid inputs were given to this function." << "\n\t from_points.size(): " << from_points.size() << "\n\t to_points.size(): " << to_points.size() ); matrix P(3, from_points.size()); matrix Q(2, from_points.size()); for (unsigned long i = 0; i < from_points.size(); ++i) { P(0,i) = from_points[i].x(); P(1,i) = from_points[i].y(); P(2,i) = 1; Q(0,i) = to_points[i].x(); Q(1,i) = to_points[i].y(); } const matrix m = Q*pinv(P); return point_transform_affine(subm(m,0,0,2,2), colm(m,2)); } // ---------------------------------------------------------------------------------------- template point_transform_affine find_similarity_transform ( const std::vector >& from_points, const std::vector >& to_points ) { // make sure requires clause is not broken DLIB_ASSERT(from_points.size() == to_points.size() && from_points.size() >= 2, "\t point_transform_affine find_similarity_transform(from_points, to_points)" << "\n\t Invalid inputs were given to this function." << "\n\t from_points.size(): " << from_points.size() << "\n\t to_points.size(): " << to_points.size() ); // We use the formulas from the paper: Least-squares estimation of transformation // parameters between two point patterns by Umeyama. They are equations 34 through // 43. dlib::vector mean_from, mean_to; double sigma_from = 0, sigma_to = 0; matrix cov; cov = 0; for (unsigned long i = 0; i < from_points.size(); ++i) { mean_from += from_points[i]; mean_to += to_points[i]; } mean_from /= from_points.size(); mean_to /= from_points.size(); for (unsigned long i = 0; i < from_points.size(); ++i) { sigma_from += length_squared(from_points[i] - mean_from); sigma_to += length_squared(to_points[i] - mean_to); cov += (to_points[i] - mean_to)*trans(from_points[i] - mean_from); } sigma_from /= from_points.size(); sigma_to /= from_points.size(); cov /= from_points.size(); matrix u, v, s, d; svd(cov, u,d,v); s = identity_matrix(cov); if (det(cov) < 0 || (det(cov) == 0 && det(u)*det(v)<0)) { if (d(1,1) < d(0,0)) s(1,1) = -1; else s(0,0) = -1; } matrix r = u*s*trans(v); double c = 1; if (sigma_from != 0) c = 1.0/sigma_from * trace(d*s); vector t = mean_to - c*r*mean_from; return point_transform_affine(c*r, t); } // ---------------------------------------------------------------------------------------- class point_transform_projective { public: point_transform_projective ( ) { m = identity_matrix(3); } point_transform_projective ( const matrix& m_ ) :m(m_) { } point_transform_projective ( const point_transform_affine& tran ) { set_subm(m, 0,0, 2,2) = tran.get_m(); set_subm(m, 0,2, 2,1) = tran.get_b(); m(2,0) = 0; m(2,1) = 0; m(2,2) = 1; } const dlib::vector operator() ( const dlib::vector& p ) const { dlib::vector temp(p); temp.z() = 1; temp = m*temp; if (temp.z() != 0) temp = temp/temp.z(); return temp; } const matrix& get_m( ) const { return m; } inline friend void serialize (const point_transform_projective& item, std::ostream& out) { serialize(item.m, out); } inline friend void deserialize (point_transform_projective& item, std::istream& in) { deserialize(item.m, in); } private: matrix m; }; // ---------------------------------------------------------------------------------------- inline point_transform_projective operator* ( const point_transform_projective& lhs, const point_transform_projective& rhs ) { return point_transform_projective(lhs.get_m()*rhs.get_m()); } // ---------------------------------------------------------------------------------------- inline point_transform_projective inv ( const point_transform_projective& trans ) { return point_transform_projective(inv(trans.get_m())); } // ---------------------------------------------------------------------------------------- namespace impl_proj { inline point_transform_projective find_projective_transform_basic ( const std::vector >& from_points, const std::vector >& to_points ) /*! ensures - Uses the system of equations approach to finding a projective transform. This is "Method 3" from Estimating Projective Transformation Matrix by Zhengyou Zhang. - It should be emphasized that the find_projective_transform_basic() routine, which uses the most popular method for finding projective transformations, doesn't really work well when the minimum error solution doesn't have zero error. In this case, it can deviate by a large amount from the proper minimum mean squared error transformation. Therefore, our overall strategy will be to use the solution from find_projective_transform_basic() as a starting point for a BFGS based non-linear optimizer which will optimize the correct mean squared error criterion. A great essay on this subject is Homography Estimation by Elan Dubrofsky. !*/ { // make sure requires clause is not broken DLIB_ASSERT(from_points.size() == to_points.size() && from_points.size() >= 4, "\t point_transform_projective find_projective_transform_basic(from_points, to_points)" << "\n\t Invalid inputs were given to this function." << "\n\t from_points.size(): " << from_points.size() << "\n\t to_points.size(): " << to_points.size() ); matrix accum, u, v; matrix w; matrix B; accum = 0; B = 0; for (unsigned long i = 0; i < from_points.size(); ++i) { dlib::vector f = from_points[i]; f.z() = 1; dlib::vector t = to_points[i]; t.z() = 1; set_subm(B,0,0,1,3) = t.y()*trans(f); set_subm(B,1,0,1,3) = trans(f); set_subm(B,0,3,1,3) = -t.x()*trans(f); set_subm(B,1,6,1,3) = -t.x()*trans(f); accum += trans(B)*B; } svd2(true, false, accum, u, w, v); long j = index_of_min(w); return point_transform_projective(reshape(colm(u,j),3,3)); } // ---------------------------------------------------------------------------------------- struct obj { /*! WHAT THIS OBJECT REPRESENTS This is the objective function we really want to minimize when looking for a transformation matrix. That is, we would like the transformed points to be as close as possible to their "to" points. Here, closeness is measured using Euclidean distance. !*/ obj( const std::vector >& from_points_, const std::vector >& to_points_ ) : from_points(from_points_) , to_points(to_points_) {} const std::vector >& from_points; const std::vector >& to_points; double operator() ( const matrix& p ) const { point_transform_projective tran(reshape(p,3,3)); double sum = 0; for (unsigned long i = 0; i < from_points.size(); ++i) { sum += length_squared(tran(from_points[i]) - to_points[i]); } return sum; } }; struct obj_der { /*! WHAT THIS OBJECT REPRESENTS This is the derivative of obj. !*/ obj_der( const std::vector >& from_points_, const std::vector >& to_points_ ) : from_points(from_points_) , to_points(to_points_) {} const std::vector >& from_points; const std::vector >& to_points; matrix operator() ( const matrix& p ) const { const matrix H = reshape(p,3,3); matrix grad; grad = 0; for (unsigned long i = 0; i < from_points.size(); ++i) { dlib::vector from, to; from = from_points[i]; from.z() = 1; to = to_points[i]; to.z() = 1; matrix w = H*from; const double scale = (w(2) != 0) ? (1.0/w(2)) : (1); w *= scale; matrix residual = (w-to)*2*scale; grad(0,0) += from.x()*residual(0); grad(0,1) += from.y()*residual(0); grad(0,2) += residual(0); grad(1,0) += from.x()*residual(1); grad(1,1) += from.y()*residual(1); grad(1,2) += residual(1); grad(2,0) += -(from.x()*w(0)*residual(0) + from.x()*w(1)*residual(1)); grad(2,1) += -(from.y()*w(0)*residual(0) + from.y()*w(1)*residual(1)); grad(2,2) += -( w(0)*residual(0) + w(1)*residual(1)); } return reshape_to_column_vector(grad); } }; } // ---------------------------------------------------------------------------------------- inline point_transform_projective find_projective_transform ( const std::vector >& from_points, const std::vector >& to_points ) { using namespace impl_proj; // make sure requires clause is not broken DLIB_ASSERT(from_points.size() == to_points.size() && from_points.size() >= 4, "\t point_transform_projective find_projective_transform(from_points, to_points)" << "\n\t Invalid inputs were given to this function." << "\n\t from_points.size(): " << from_points.size() << "\n\t to_points.size(): " << to_points.size() ); // Find a candidate projective transformation. Also, find the best affine // transform and then compare it with the projective transform estimated using the // direct SVD method. Use whichever one works better as the starting point for a // BFGS based optimizer. If the best solution has large mean squared error and is // also close to affine then find_projective_transform_basic() might give a very // bad initial guess. So also checking for a good affine transformation can // produce a much better final result in many cases. point_transform_projective tran1 = find_projective_transform_basic(from_points, to_points); point_transform_affine tran2 = find_affine_transform(from_points, to_points); // check which is best double error1 = 0; double error2 = 0; for (unsigned long i = 0; i < from_points.size(); ++i) { error1 += length_squared(tran1(from_points[i])-to_points[i]); error2 += length_squared(tran2(from_points[i])-to_points[i]); } matrix params; // Pick the minimum error solution among the two so far. if (error1 < error2) params = reshape_to_column_vector(tran1.get_m()); else params = reshape_to_column_vector(point_transform_projective(tran2).get_m()); // Now refine the transformation matrix so that we can be sure we have // at least a local minimizer. obj o(from_points, to_points); obj_der der(from_points, to_points); find_min(bfgs_search_strategy(), objective_delta_stop_strategy(1e-6,100), o, der, params, 0); return point_transform_projective(reshape(params,3,3)); } // ---------------------------------------------------------------------------------------- template const dlib::vector rotate_point ( const dlib::vector& center, const dlib::vector& p, double angle ) { point_rotator rot(angle); return rot(p-center)+center; } // ---------------------------------------------------------------------------------------- inline matrix rotation_matrix ( double angle ) { const double ca = std::cos(angle); const double sa = std::sin(angle); matrix m; m = ca, -sa, sa, ca; return m; } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- class point_transform_affine3d { public: point_transform_affine3d ( ) { m = identity_matrix(3); b.x() = 0; b.y() = 0; } point_transform_affine3d ( const matrix& m_, const dlib::vector& b_ ) :m(m_), b(b_) { } const dlib::vector operator() ( const dlib::vector& p ) const { return m*p + b; } const matrix& get_m( ) const { return m; } const dlib::vector& get_b( ) const { return b; } inline friend void serialize (const point_transform_affine3d& item, std::ostream& out) { serialize(item.m, out); serialize(item.b, out); } inline friend void deserialize (point_transform_affine3d& item, std::istream& in) { deserialize(item.m, in); deserialize(item.b, in); } private: matrix m; dlib::vector b; }; // ---------------------------------------------------------------------------------------- inline point_transform_affine3d operator* ( const point_transform_affine3d& lhs, const point_transform_affine& rhs ) { matrix m; m = 0; set_subm(m, get_rect(rhs.get_m())) = rhs.get_m(); vector b = rhs.get_b(); return point_transform_affine3d(lhs.get_m()*m, lhs.get_m()*b+lhs.get_b()); } // ---------------------------------------------------------------------------------------- inline point_transform_affine3d operator* ( const point_transform_affine3d& lhs, const point_transform_affine3d& rhs ) { return point_transform_affine3d(lhs.get_m()*rhs.get_m(), lhs.get_m()*rhs.get_b()+lhs.get_b()); } // ---------------------------------------------------------------------------------------- inline point_transform_affine3d inv ( const point_transform_affine3d& trans ) { matrix im = inv(trans.get_m()); return point_transform_affine3d(im, -im*trans.get_b()); } // ---------------------------------------------------------------------------------------- inline point_transform_affine3d rotate_around_x ( double angle ) { const double ca = std::cos(angle); const double sa = std::sin(angle); matrix m; m = 1, 0, 0, 0, ca, -sa, 0, sa, ca; vector b; return point_transform_affine3d(m,b); } // ---------------------------------------------------------------------------------------- inline point_transform_affine3d rotate_around_y ( double angle ) { const double ca = std::cos(angle); const double sa = std::sin(angle); matrix m; m = ca, 0, sa, 0, 1, 0, -sa, 0, ca; vector b; return point_transform_affine3d(m,b); } // ---------------------------------------------------------------------------------------- inline point_transform_affine3d rotate_around_z ( double angle ) { const double ca = std::cos(angle); const double sa = std::sin(angle); matrix m; m = ca, -sa, 0, sa, ca, 0, 0, 0, 1; vector b; return point_transform_affine3d(m,b); } // ---------------------------------------------------------------------------------------- inline point_transform_affine3d translate_point ( const vector& delta ) { return point_transform_affine3d(identity_matrix(3),delta); } inline point_transform_affine3d translate_point ( double x, double y, double z ) { return translate_point(vector(x,y,z)); } // ---------------------------------------------------------------------------------------- class camera_transform { public: camera_transform ( ) { *this = camera_transform(vector(1,1,1), vector(0,0,0), vector(0,0,1), 90, 1); } camera_transform ( const vector& camera_pos_, const vector& camera_looking_at_, const vector& camera_up_direction_, const double camera_field_of_view_, const unsigned long num_pixels_ ) { // make sure requires clause is not broken DLIB_CASSERT(0 < camera_field_of_view_ && camera_field_of_view_ < 180, "\t camera_transform::camera_transform()" << "\n\t Invalid inputs were given to this function." << "\n\t camera_field_of_view_: " << camera_field_of_view_ ); camera_pos = camera_pos_; camera_looking_at = camera_looking_at_; camera_up_direction = camera_up_direction_; camera_field_of_view = camera_field_of_view_; num_pixels = num_pixels_; dlib::vector X,Y,Z; Z = (camera_looking_at - camera_pos).normalize(); Y = camera_up_direction - dot(camera_up_direction,Z)*Z; Y = Y.normalize(); X = Z.cross(Y); set_rowm(proj,0) = trans(X); // Minus because images have y axis going down but we want the 3d projection to appear using a normal coordinate system with y going up. set_rowm(proj,1) = -trans(Y); set_rowm(proj,2) = trans(Z); width = num_pixels/2.0; dist_scale = width/std::tan(pi/180*camera_field_of_view/2); } vector get_camera_pos() const { return camera_pos; } vector get_camera_looking_at() const { return camera_looking_at; } vector get_camera_up_direction()const { return camera_up_direction; } double get_camera_field_of_view() const { return camera_field_of_view; } unsigned long get_num_pixels() const { return num_pixels; } inline dpoint operator() ( const vector& p, double& scale, double& distance ) const { vector temp = p-camera_pos; temp = proj*temp; distance = temp.z(); scale = dist_scale/(temp.z()>0 ? temp.z() : 1e-9); temp.x() = temp.x()*scale + width; temp.y() = temp.y()*scale + width; return temp; } dpoint operator() ( const vector& p ) const { double scale, distance; return (*this)(p,scale,distance); } inline friend void serialize (const camera_transform& item, std::ostream& out) { serialize(item.camera_pos, out); serialize(item.camera_looking_at, out); serialize(item.camera_up_direction, out); serialize(item.camera_field_of_view, out); serialize(item.num_pixels, out); serialize(item.proj, out); serialize(item.dist_scale, out); serialize(item.width, out); } inline friend void deserialize (camera_transform& item, std::istream& in) { deserialize(item.camera_pos, in); deserialize(item.camera_looking_at, in); deserialize(item.camera_up_direction, in); deserialize(item.camera_field_of_view, in); deserialize(item.num_pixels, in); deserialize(item.proj, in); deserialize(item.dist_scale, in); deserialize(item.width, in); } private: vector camera_pos; vector camera_looking_at; vector camera_up_direction; double camera_field_of_view; unsigned long num_pixels; matrix proj; double dist_scale; double width; }; // ---------------------------------------------------------------------------------------- } #endif // DLIB_POINT_TrANSFORMS_H_