siftpp.cpp

00001
00002 /***************************************************************************
00003  *  siftpp.cpp - siftpp based classifier 
00004  *
00005  *  Created: Sat Apr 12 10:15:23 2008
00006  *  Copyright 2008 Stefan Schiffer [stefanschiffer.de]
00007  *
00008  ****************************************************************************/
00009
00010 /*  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version. A runtime exception applies to
00014  *  this software (see LICENSE.GPL_WRE file mentioned below for details).
00015  *
00016  *  This program is distributed in the hope that it will be useful,
00017  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  *  GNU Library General Public License for more details.
00020  *
00021  *  Read the full text in the LICENSE.GPL_WRE file in the doc directory.
00022  */
00023
00024 #include <iostream>
00025 #include <vector>
00026
00027 #include <classifiers/siftpp.h>
00028
00029 //#ifdef SIFTPP_TIMETRACKER
00030 #include <utils/time/clock.h>
00031 #include <utils/time/tracker.h>
00032 //#endif
00033
00034 #include <core/exception.h>
00035 #include <core/exceptions/software.h>
00036 #include <fvutils/color/colorspaces.h>
00037 #include <fvutils/color/conversions.h>
00038 #include <fvutils/readers/png.h>
00039 //#include <fvutils/writers/pnm.h>
00040 //#include <fvutils/writers/png.h>
00041
00042 //using namespace fawkes; 
00043 using namespace fawkes;
00044 
00045 /** @class SiftppClassifier <classifiers/siftpp.h>
00046  * SIFTPP classifier.
00047  *
00048  * This class provides a classifier that uses SIFTPP to detect objects in a given
00049  * image by matching features. The objects are reported back as regions of interest. 
00050  * Each ROI contains an object. ROIs with 11x11 are matched features.
00051  *
00052  * This code uses siftpp from http://vision.ucla.edu/~vedaldi/code/siftpp/siftpp.html
00053  * and is partly based on code from their package.
00054  *
00055  * @author Stefan Schiffer
00056  */
00057 
00058 /** Constructor.
00059  * @param object_file file that contains an image of the object to detect
00060  * @param samplingStep Initial sampling step
00061  * @param octaves Number of analysed octaves
00062  * @param levels Number of levels per octave
00063  * @param magnif Keypoint magnification (default = 3)
00064  * @param noorient rotation invariance (0) or upright (1)
00065  * @param unnormalized Normalization of features (default 0)
00066  */
00067 SiftppClassifier::SiftppClassifier( const char * object_file,
00068                                     int samplingStep, int octaves, int levels,
00069                                     float magnif, int noorient, int unnormalized)
00070   : Classifier("SiftppClassifier")
00071 {
00072   // params for FastHessian
00073   __samplingStep = samplingStep;
00074   __octaves = octaves;
00075   __levels = levels;
00076   // params for Descriptors
00077   __first          = -1 ;
00078   __threshold      = 0.04f / __levels / 2.0f ;
00079   __edgeThreshold  = 10.0f;
00080   __magnif         = magnif;
00081   __noorient       = noorient;
00082   __unnormalized   = unnormalized;
00083
00084   // descriptor vector length
00085   __vlen = 128;
00086
00087
00088   //#ifdef SIFTPP_TIMETRACKER
00089   __tt = new TimeTracker();
00090   __loop_count = 0;
00091   __ttc_objconv = __tt->add_class("ObjectConvert");
00092   __ttc_objfeat = __tt->add_class("ObjectFeatures");
00093   __ttc_imgconv = __tt->add_class("ImageConvert");
00094   __ttc_imgfeat = __tt->add_class("ImageFeatures");
00095   __ttc_matchin = __tt->add_class("Matching");
00096   __ttc_roimerg = __tt->add_class("MergeROIs");
00097   //#endif
00098
00099   //#ifdef SIFTPP_TIMETRACKER
00100   __tt->ping_start(__ttc_objconv);
00101   //#endif
00102
00103   PNGReader pngr( object_file );
00104   unsigned char* buf = malloc_buffer( pngr.colorspace(), pngr.pixel_width(), pngr.pixel_height() );
00105   pngr.set_buffer( buf );
00106   pngr.read();
00107
00108   unsigned int lwidth = pngr.pixel_width();
00109   unsigned int lheight = pngr.pixel_height();
00110   VL::pixel_t * im_pt = new VL::pixel_t [lwidth * lheight ];
00111   VL::pixel_t * start = im_pt;
00112   //VL::pixel_t* end   = start + lwidth*lheight ; 
00113   for (unsigned int h = 0; h < lheight; ++h) {
00114     for (unsigned int w = 0; w < lwidth ; ++w) {
00115       int i = (buf[h * lwidth + w] );
00116       VL::pixel_t norm = VL::pixel_t( 255 );
00117       *start++ = VL::pixel_t( i ) / norm;
00118     }
00119   }
00120   // make image
00121   __obj_img = new VL::PgmBuffer();
00122   __obj_img->width  = lwidth;
00123   __obj_img->height = lheight;
00124   __obj_img->data   = im_pt;
00125
00126   if ( ! __obj_img ) {
00127     throw Exception("Could not load object file");
00128   }
00129
00130   //#ifdef SIFTPP_TIMETRACKER
00131   __tt->ping_end(__ttc_objconv);
00132   //#endif
00133
00134   // save object image for debugging
00135   //
00136
00137   //#ifdef SIFTPP_TIMETRACKER
00138   __tt->ping_start(__ttc_objfeat);
00139   //#endif
00140
00141   // COMPUTE OBJECT FEATURES
00142   __obj_features.clear();
00143   //__obj_features.reserve(1000);
00144   __obj_num_features = 0;
00145
00146   __sigman = .5 ;
00147   __sigma0 = 1.6 * powf(2.0f, 1.0f / __levels) ;
00148
00149   std::cout << "SiftppClassifier(ctor): init scalespace" << std::endl;
00150   // initialize scalespace
00151   VL::Sift sift(__obj_img->data, __obj_img->width, __obj_img->height,
00152                 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00153
00154   std::cout << "SiftppClassifier(ctor): detect object keypoints" << std::endl;
00155   // Run SIFTPP detector
00156   sift.detectKeypoints(__threshold, __edgeThreshold) ;
00157   // Number of keypoints
00158   __obj_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00159   std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' object-keypoints" << std::endl;
00160
00161   // set descriptor options
00162   sift.setNormalizeDescriptor( ! __unnormalized ) ;
00163   sift.setMagnification( __magnif ) ;
00164
00165   std::cout << "SiftppClassifier(ctor): run detector, compute ori and des ..." << std::endl;
00166   // Run detector, compute orientations and descriptors
00167   for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00168        iter != sift.keypointsEnd() ; ++iter ) {
00169
00170     //Feature * feat = new Feature();
00171     Feature feat;
00172
00173     //std::cout << "SiftppClassifier(ctor): saving keypoint" << std::endl;
00174     feat.key = (*iter);
00175
00176     // detect orientations
00177     VL::float_t angles [4] ;
00178     int nangles ;
00179     if( ! __noorient ) {
00180       nangles = sift.computeKeypointOrientations(angles, *iter) ;
00181     } else {
00182       nangles = 1;
00183       angles[0] = VL::float_t(0) ;
00184     }
00185     feat.number_of_desc = nangles;
00186     feat.descs = new VL::float_t*[nangles];
00187
00188     //std::cout << "SiftppClassifier(ctor): computing '" << nangles << "' descriptors" << std::endl;
00189     // compute descriptors
00190     for(int a = 0 ; a < nangles ; ++a) {
00191       //       out << setprecision(2) << iter->x << ' ' << setprecision(2) << iter->y << ' '
00192       //          << setprecision(2) << iter->sigma << ' ' << setprecision(3) << angles[a] ;
00193       // compute descriptor
00194       feat.descs[a] = new VL::float_t[__vlen];
00195       sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00196     } // next angle
00197     //std::cout << "SiftppClassifier(ctor): computed '" << feat.number_of_desc << "' descriptors." << std::endl;
00198
00199     // save feature
00200     __obj_features.push_back( feat );
00201
00202   } // next keypoint
00203
00204   __obj_num_features = __obj_features.size();
00205   if ( ! __obj_num_features > 0 ) {
00206     throw Exception("Could not compute object features");
00207   }
00208   std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' features from object" << std::endl;
00209
00210   //#ifdef SIFTPP_TIMETRACKER
00211   __tt->ping_end(__ttc_objfeat);
00212   //#endif
00213
00214 }
00215
00216 
00217 /** Destructor. */
00218 SiftppClassifier::~SiftppClassifier()
00219 {
00220   //
00221   delete __obj_img;
00222   __obj_features.clear();
00223   //
00224   //delete __image;
00225   __img_features.clear();
00226 }
00227
00228
00229 std::list< ROI > *
00230 SiftppClassifier::classify()
00231 {
00232   //#ifdef SIFTPP_TIMETRACKER
00233   __tt->ping_start(0);
00234   //#endif
00235
00236   // list of ROIs to return
00237   std::list< ROI > *rv = new std::list< ROI >();
00238
00239   // for ROI calculation
00240   int x_min = _width;
00241   int y_min = _height;
00242   int x_max = 0;
00243   int y_max = 0;
00244
00245   //#ifdef SIFTPP_TIMETRACKER
00246   __tt->ping_start(__ttc_imgconv);
00247   //#endif
00248   std::cout << "SiftppClassifier(classify): copy imgdat to SIFTPP Image" << std::endl;
00249
00250   VL::pixel_t * im_pt = new VL::pixel_t [_width * _height ];
00251   VL::pixel_t * start = im_pt;
00252   for (unsigned int h = 0; h < _height; ++h) {
00253     for (unsigned int w = 0; w < _width ; ++w) {
00254       int i = (_src[h * _width + w] );
00255       VL::pixel_t norm = VL::pixel_t( 255 );
00256       *start++ = VL::pixel_t( i ) / norm;
00257     }
00258   }
00259   // make image
00260   __image = new VL::PgmBuffer();
00261   __image->width  = _width;
00262   __image->height = _height;
00263   __image->data   = im_pt;
00264
00265   //#ifdef SIFTPP_TIMETRACKER
00266   __tt->ping_end(__ttc_imgconv);
00267   //#endif
00268 
00269   /// Write image to verify correct operation
00270     // nothing yet
00271
00272   //#ifdef SIFTPP_TIMETRACKER
00273   __tt->ping_start(__ttc_imgfeat);
00274   //#endif
00275
00276   // COMPUTE IMAGE FEATURES
00277   __img_features.clear();
00278   __img_num_features = 0;
00279   //__img_features.reserve(1000);
00280
00281   std::cout << "SiftppClassifier(classify): init scalespace" << std::endl;
00282   // initialize scalespace
00283   VL::Sift sift(__image->data, __image->width, __image->height,
00284                 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00285
00286   std::cout << "SiftppClassifier(classify): detect image keypoints" << std::endl;
00287   // Run SIFTPP detector
00288   sift.detectKeypoints(__threshold, __edgeThreshold) ;
00289
00290   // Number of keypoints
00291   __img_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00292   std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image keypoints" << std::endl;
00293
00294   // set descriptor options
00295   sift.setNormalizeDescriptor( ! __unnormalized ) ;
00296   sift.setMagnification( __magnif ) ;
00297
00298   std::cout << "SiftppClassifier(classify): run detector, compute ori and des ..." << std::endl;
00299   // Run detector, compute orientations and descriptors
00300   for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00301        iter != sift.keypointsEnd() ; ++iter ) {
00302
00303     Feature feat; // = new Feature();
00304
00305     //std::cout << "SiftppClassifier(classify): saving keypoint" << std::endl;
00306     feat.key = (*iter);
00307
00308     //std::cout << "SiftppClassifier(classify): detect orientations" << std::endl;
00309     // detect orientations
00310     VL::float_t angles [4] ;
00311     int nangles ;
00312     if( ! __noorient ) {
00313       nangles = sift.computeKeypointOrientations(angles, *iter) ;
00314     } else {
00315       nangles = 1;
00316       angles[0] = VL::float_t(0) ;
00317     }
00318     feat.number_of_desc = nangles;
00319     feat.descs = new VL::float_t*[nangles];
00320
00321     //std::cout << "SiftppClassifier(classify): computing '" << nangles << "' descriptors" << std::endl;
00322     // compute descriptors
00323     for(int a = 0 ; a < nangles ; ++a) {
00324       // compute descriptor
00325       feat.descs[a] = new VL::float_t[__vlen] ;
00326       sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00327     } // next angle
00328     //std::cout << "SiftppClassifier(classify): computed '" << feat.number_of_desc << "' descriptors." << std::endl;
00329
00330     // save feature
00331     __img_features.push_back( feat );
00332
00333   } // next keypoint
00334
00335   // Number of feature
00336   __img_num_features = __img_features.size();
00337
00338   //#ifdef SIFTPP_TIMETRACKER
00339   __tt->ping_end(__ttc_imgfeat);
00340   //#endif
00341
00342   std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image features" << std::endl;
00343
00344   //#ifdef SIFTPP_TIMETRACKER
00345   __tt->ping_start(__ttc_matchin);
00346   //#endif
00347   std::cout << "SiftppClassifier(classify): matching ..." << std::endl;
00348
00349   std::vector< int > matches(__obj_features.size());
00350   int m = 0;
00351   for (unsigned i = 0; i < __obj_features.size(); i++) {
00352     int match = findMatch(__obj_features[i], __img_features);
00353     matches[i] = match;
00354     if (match != -1) {
00355       std::cout << "SiftppClassifier(classify): Matched feature " << i << " in object image with feature " << match << " in image." << std::endl;
00356       /// adding feature-ROI
00357       ROI r( (int)(__img_features[matches[i]].key.x)-5, (int)(__img_features[matches[i]].key.y )-5, 11, 11, _width, _height);
00358       rv->push_back(r);
00359       // increment feature-match-count
00360       ++m;
00361     }
00362   }
00363
00364   //#ifdef SIFTPP_TIMETRACKER
00365   __tt->ping_end(__ttc_matchin);
00366   //#endif
00367   std::cout << "SiftppClassifier(classify) matched '" << m << "' of '" << __obj_features.size() << "' features in scene." << std::endl;
00368
00369   std::cout << "SiftppClassifier(classify): computing ROI" << std::endl;
00370   //#ifdef SIFTPP_TIMETRACKER
00371   __tt->ping_start(__ttc_roimerg);
00372   //#endif
00373
00374   for (unsigned i = 0; i < matches.size(); i++) {
00375     if (matches[i] != -1) {
00376       if( (int)__img_features[matches[i]].key.x < x_min )
00377         x_min = (int)__img_features[matches[i]].key.x;
00378       if( (int)__img_features[matches[i]].key.y < y_min )
00379         y_min = (int)__img_features[matches[i]].key.y;
00380       if( (int)__img_features[matches[i]].key.x > x_max )
00381         x_max = (int)__img_features[matches[i]].key.x;
00382       if( (int)__img_features[matches[i]].key.y > y_max )
00383         y_max = (int)__img_features[matches[i]].key.y;
00384     }
00385   }
00386   if( m != 0 ) {
00387     ROI r(x_min, y_min, x_max-x_min, y_max-y_min, _width, _height);
00388     rv->push_back(r);
00389   }
00390
00391   //#ifdef SIFTPP_TIMETRACKER
00392   __tt->ping_end(__ttc_roimerg);
00393   //#endif
00394
00395   //#ifdef SIFTPP_TIMETRACKER
00396   __tt->ping_end(0);
00397   //#endif
00398
00399   //#ifdef SIFTPP_TIMETRACKER
00400   // print timetracker statistics
00401   __tt->print_to_stdout();
00402   //#endif
00403
00404   delete __image;
00405
00406   std::cout << "SiftppClassifier(classify): done ... returning '" << rv->size() << "' ROIs." << std::endl;
00407   return rv;
00408 }
00409
00410 int
00411 SiftppClassifier::findMatch(const Feature & ip1, const std::vector< Feature > & ipts) {
00412   double mind = 1e100, second = 1e100;
00413   int match = -1;
00414
00415   for (unsigned i = 0; i < ipts.size(); i++) {
00416
00417     if (ipts[i].number_of_desc != ip1.number_of_desc)
00418       continue;
00419     //std::cout << "SiftppClassifier(findMatch): number_of_desc matched!" << std::endl;
00420     for ( int j = 0; j < ip1.number_of_desc; ++j ) {
00421       double d = distSquare(ipts[i].descs[j], ip1.descs[j], __vlen);
00422
00423       if (d < mind) {
00424         second = mind;
00425         mind = d;
00426         match = i;
00427       } else if (d < second) {
00428         second = d;
00429       }
00430     }
00431   }
00432
00433   if (mind < 0.5 * second)
00434     return match;
00435
00436   return -1;
00437 }
00438
00439
00440 double
00441 SiftppClassifier::distSquare(VL::float_t *v1, VL::float_t *v2, int n) {
00442   double dsq = 0.;
00443   while (n--) {
00444     dsq += (v1[n-1] - v2[n-1]) * (v1[n-1] - v2[n-1]);
00445   }
00446   //std::cout << "  dsq: '" << dsq << "'" << std::endl;
00447   return dsq;
00448 }