00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <iostream>
00025 #include <vector>
00026
00027 #include <classifiers/siftpp.h>
00028
00029
00030 #include <utils/time/clock.h>
00031 #include <utils/time/tracker.h>
00032
00033
00034 #include <core/exception.h>
00035 #include <core/exceptions/software.h>
00036 #include <fvutils/color/colorspaces.h>
00037 #include <fvutils/color/conversions.h>
00038 #include <fvutils/readers/png.h>
00039
00040
00041
00042
00043 using namespace fawkes;
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067 SiftppClassifier::SiftppClassifier( const char * object_file,
00068 int samplingStep, int octaves, int levels,
00069 float magnif, int noorient, int unnormalized)
00070 : Classifier("SiftppClassifier")
00071 {
00072
00073 __samplingStep = samplingStep;
00074 __octaves = octaves;
00075 __levels = levels;
00076
00077 __first = -1 ;
00078 __threshold = 0.04f / __levels / 2.0f ;
00079 __edgeThreshold = 10.0f;
00080 __magnif = magnif;
00081 __noorient = noorient;
00082 __unnormalized = unnormalized;
00083
00084
00085 __vlen = 128;
00086
00087
00088
00089 __tt = new TimeTracker();
00090 __loop_count = 0;
00091 __ttc_objconv = __tt->add_class("ObjectConvert");
00092 __ttc_objfeat = __tt->add_class("ObjectFeatures");
00093 __ttc_imgconv = __tt->add_class("ImageConvert");
00094 __ttc_imgfeat = __tt->add_class("ImageFeatures");
00095 __ttc_matchin = __tt->add_class("Matching");
00096 __ttc_roimerg = __tt->add_class("MergeROIs");
00097
00098
00099
00100 __tt->ping_start(__ttc_objconv);
00101
00102
00103 PNGReader pngr( object_file );
00104 unsigned char* buf = malloc_buffer( pngr.colorspace(), pngr.pixel_width(), pngr.pixel_height() );
00105 pngr.set_buffer( buf );
00106 pngr.read();
00107
00108 unsigned int lwidth = pngr.pixel_width();
00109 unsigned int lheight = pngr.pixel_height();
00110 VL::pixel_t * im_pt = new VL::pixel_t [lwidth * lheight ];
00111 VL::pixel_t * start = im_pt;
00112
00113 for (unsigned int h = 0; h < lheight; ++h) {
00114 for (unsigned int w = 0; w < lwidth ; ++w) {
00115 int i = (buf[h * lwidth + w] );
00116 VL::pixel_t norm = VL::pixel_t( 255 );
00117 *start++ = VL::pixel_t( i ) / norm;
00118 }
00119 }
00120
00121 __obj_img = new VL::PgmBuffer();
00122 __obj_img->width = lwidth;
00123 __obj_img->height = lheight;
00124 __obj_img->data = im_pt;
00125
00126 if ( ! __obj_img ) {
00127 throw Exception("Could not load object file");
00128 }
00129
00130
00131 __tt->ping_end(__ttc_objconv);
00132
00133
00134
00135
00136
00137
00138 __tt->ping_start(__ttc_objfeat);
00139
00140
00141
00142 __obj_features.clear();
00143
00144 __obj_num_features = 0;
00145
00146 __sigman = .5 ;
00147 __sigma0 = 1.6 * powf(2.0f, 1.0f / __levels) ;
00148
00149 std::cout << "SiftppClassifier(ctor): init scalespace" << std::endl;
00150
00151 VL::Sift sift(__obj_img->data, __obj_img->width, __obj_img->height,
00152 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00153
00154 std::cout << "SiftppClassifier(ctor): detect object keypoints" << std::endl;
00155
00156 sift.detectKeypoints(__threshold, __edgeThreshold) ;
00157
00158 __obj_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00159 std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' object-keypoints" << std::endl;
00160
00161
00162 sift.setNormalizeDescriptor( ! __unnormalized ) ;
00163 sift.setMagnification( __magnif ) ;
00164
00165 std::cout << "SiftppClassifier(ctor): run detector, compute ori and des ..." << std::endl;
00166
00167 for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00168 iter != sift.keypointsEnd() ; ++iter ) {
00169
00170
00171 Feature feat;
00172
00173
00174 feat.key = (*iter);
00175
00176
00177 VL::float_t angles [4] ;
00178 int nangles ;
00179 if( ! __noorient ) {
00180 nangles = sift.computeKeypointOrientations(angles, *iter) ;
00181 } else {
00182 nangles = 1;
00183 angles[0] = VL::float_t(0) ;
00184 }
00185 feat.number_of_desc = nangles;
00186 feat.descs = new VL::float_t*[nangles];
00187
00188
00189
00190 for(int a = 0 ; a < nangles ; ++a) {
00191
00192
00193
00194 feat.descs[a] = new VL::float_t[__vlen];
00195 sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00196 }
00197
00198
00199
00200 __obj_features.push_back( feat );
00201
00202 }
00203
00204 __obj_num_features = __obj_features.size();
00205 if ( ! __obj_num_features > 0 ) {
00206 throw Exception("Could not compute object features");
00207 }
00208 std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' features from object" << std::endl;
00209
00210
00211 __tt->ping_end(__ttc_objfeat);
00212
00213
00214 }
00215
00216
00217
00218 SiftppClassifier::~SiftppClassifier()
00219 {
00220
00221 delete __obj_img;
00222 __obj_features.clear();
00223
00224
00225 __img_features.clear();
00226 }
00227
00228
00229 std::list< ROI > *
00230 SiftppClassifier::classify()
00231 {
00232
00233 __tt->ping_start(0);
00234
00235
00236
00237 std::list< ROI > *rv = new std::list< ROI >();
00238
00239
00240 int x_min = _width;
00241 int y_min = _height;
00242 int x_max = 0;
00243 int y_max = 0;
00244
00245
00246 __tt->ping_start(__ttc_imgconv);
00247
00248 std::cout << "SiftppClassifier(classify): copy imgdat to SIFTPP Image" << std::endl;
00249
00250 VL::pixel_t * im_pt = new VL::pixel_t [_width * _height ];
00251 VL::pixel_t * start = im_pt;
00252 for (unsigned int h = 0; h < _height; ++h) {
00253 for (unsigned int w = 0; w < _width ; ++w) {
00254 int i = (_src[h * _width + w] );
00255 VL::pixel_t norm = VL::pixel_t( 255 );
00256 *start++ = VL::pixel_t( i ) / norm;
00257 }
00258 }
00259
00260 __image = new VL::PgmBuffer();
00261 __image->width = _width;
00262 __image->height = _height;
00263 __image->data = im_pt;
00264
00265
00266 __tt->ping_end(__ttc_imgconv);
00267
00268
00269
00270
00271
00272
00273 __tt->ping_start(__ttc_imgfeat);
00274
00275
00276
00277 __img_features.clear();
00278 __img_num_features = 0;
00279
00280
00281 std::cout << "SiftppClassifier(classify): init scalespace" << std::endl;
00282
00283 VL::Sift sift(__image->data, __image->width, __image->height,
00284 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00285
00286 std::cout << "SiftppClassifier(classify): detect image keypoints" << std::endl;
00287
00288 sift.detectKeypoints(__threshold, __edgeThreshold) ;
00289
00290
00291 __img_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00292 std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image keypoints" << std::endl;
00293
00294
00295 sift.setNormalizeDescriptor( ! __unnormalized ) ;
00296 sift.setMagnification( __magnif ) ;
00297
00298 std::cout << "SiftppClassifier(classify): run detector, compute ori and des ..." << std::endl;
00299
00300 for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00301 iter != sift.keypointsEnd() ; ++iter ) {
00302
00303 Feature feat;
00304
00305
00306 feat.key = (*iter);
00307
00308
00309
00310 VL::float_t angles [4] ;
00311 int nangles ;
00312 if( ! __noorient ) {
00313 nangles = sift.computeKeypointOrientations(angles, *iter) ;
00314 } else {
00315 nangles = 1;
00316 angles[0] = VL::float_t(0) ;
00317 }
00318 feat.number_of_desc = nangles;
00319 feat.descs = new VL::float_t*[nangles];
00320
00321
00322
00323 for(int a = 0 ; a < nangles ; ++a) {
00324
00325 feat.descs[a] = new VL::float_t[__vlen] ;
00326 sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00327 }
00328
00329
00330
00331 __img_features.push_back( feat );
00332
00333 }
00334
00335
00336 __img_num_features = __img_features.size();
00337
00338
00339 __tt->ping_end(__ttc_imgfeat);
00340
00341
00342 std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image features" << std::endl;
00343
00344
00345 __tt->ping_start(__ttc_matchin);
00346
00347 std::cout << "SiftppClassifier(classify): matching ..." << std::endl;
00348
00349 std::vector< int > matches(__obj_features.size());
00350 int m = 0;
00351 for (unsigned i = 0; i < __obj_features.size(); i++) {
00352 int match = findMatch(__obj_features[i], __img_features);
00353 matches[i] = match;
00354 if (match != -1) {
00355 std::cout << "SiftppClassifier(classify): Matched feature " << i << " in object image with feature " << match << " in image." << std::endl;
00356
00357 ROI r( (int)(__img_features[matches[i]].key.x)-5, (int)(__img_features[matches[i]].key.y )-5, 11, 11, _width, _height);
00358 rv->push_back(r);
00359
00360 ++m;
00361 }
00362 }
00363
00364
00365 __tt->ping_end(__ttc_matchin);
00366
00367 std::cout << "SiftppClassifier(classify) matched '" << m << "' of '" << __obj_features.size() << "' features in scene." << std::endl;
00368
00369 std::cout << "SiftppClassifier(classify): computing ROI" << std::endl;
00370
00371 __tt->ping_start(__ttc_roimerg);
00372
00373
00374 for (unsigned i = 0; i < matches.size(); i++) {
00375 if (matches[i] != -1) {
00376 if( (int)__img_features[matches[i]].key.x < x_min )
00377 x_min = (int)__img_features[matches[i]].key.x;
00378 if( (int)__img_features[matches[i]].key.y < y_min )
00379 y_min = (int)__img_features[matches[i]].key.y;
00380 if( (int)__img_features[matches[i]].key.x > x_max )
00381 x_max = (int)__img_features[matches[i]].key.x;
00382 if( (int)__img_features[matches[i]].key.y > y_max )
00383 y_max = (int)__img_features[matches[i]].key.y;
00384 }
00385 }
00386 if( m != 0 ) {
00387 ROI r(x_min, y_min, x_max-x_min, y_max-y_min, _width, _height);
00388 rv->push_back(r);
00389 }
00390
00391
00392 __tt->ping_end(__ttc_roimerg);
00393
00394
00395
00396 __tt->ping_end(0);
00397
00398
00399
00400
00401 __tt->print_to_stdout();
00402
00403
00404 delete __image;
00405
00406 std::cout << "SiftppClassifier(classify): done ... returning '" << rv->size() << "' ROIs." << std::endl;
00407 return rv;
00408 }
00409
00410 int
00411 SiftppClassifier::findMatch(const Feature & ip1, const std::vector< Feature > & ipts) {
00412 double mind = 1e100, second = 1e100;
00413 int match = -1;
00414
00415 for (unsigned i = 0; i < ipts.size(); i++) {
00416
00417 if (ipts[i].number_of_desc != ip1.number_of_desc)
00418 continue;
00419
00420 for ( int j = 0; j < ip1.number_of_desc; ++j ) {
00421 double d = distSquare(ipts[i].descs[j], ip1.descs[j], __vlen);
00422
00423 if (d < mind) {
00424 second = mind;
00425 mind = d;
00426 match = i;
00427 } else if (d < second) {
00428 second = d;
00429 }
00430 }
00431 }
00432
00433 if (mind < 0.5 * second)
00434 return match;
00435
00436 return -1;
00437 }
00438
00439
00440 double
00441 SiftppClassifier::distSquare(VL::float_t *v1, VL::float_t *v2, int n) {
00442 double dsq = 0.;
00443 while (n--) {
00444 dsq += (v1[n-1] - v2[n-1]) * (v1[n-1] - v2[n-1]);
00445 }
00446
00447 return dsq;
00448 }