yuvrgb.cpp

00001
00002 /****************************************************************************
00003  *  yuvrgb.h - YUV to RGB conversion - specific methods, macros and constants
00004  *
00005  *  Created: Sat Aug 12 15:02:41 2006
00006  *  based on colorspaces.h from Tue Feb 23 13:49:38 2005
00007  *  Copyright  2005-2006  Tim Niemueller [www.niemueller.de]
00008  *
00009  ****************************************************************************/
00010
00011 /*  This program is free software; you can redistribute it and/or modify
00012  *  it under the terms of the GNU General Public License as published by
00013  *  the Free Software Foundation; either version 2 of the License, or
00014  *  (at your option) any later version. A runtime exception applies to
00015  *  this software (see LICENSE.GPL_WRE file mentioned below for details).
00016  *
00017  *  This program is distributed in the hope that it will be useful,
00018  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  *  GNU Library General Public License for more details.
00021  *
00022  *  Read the full text in the LICENSE.GPL_WRE file in the doc directory.
00023  */
00024
00025 #include <fvutils/color/yuvrgb.h>
00026 #include <core/macros.h>
00027
00028 #include <fvutils/cpu/mmx.h>
00029 
00030 /** YUV to RGB Conversion
00031  * B = 1.164(Y - 16)                  + 2.018(U - 128)
00032  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
00033  * R = 1.164(Y - 16) + 1.596(V - 128)
00034  *
00035  * Values have to be clamped to keep them in the [0-255] range.
00036  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
00037  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
00038  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
00039  *            line
00040  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
00041  *            (thus this is a 24bit RGB with one byte per color) line by line.
00042  * @param width Width of the image contained in the YUV buffer
00043  * @param height Height of the image contained in the YUV buffer
00044  */
00045 void
00046 yuv411packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00047                            unsigned int width, unsigned int height)
00048 {
00049   register int y0, y1, y2, y3, u, v;
00050   register unsigned int i = 0;
00051   while (i < (width * height)*3/2) {
00052     u  = YUV[i++] - 128;
00053     y0 = YUV[i++] -  16;
00054     y1 = YUV[i++] -  16;
00055     v  = YUV[i++] - 128;
00056     y2 = YUV[i++] -  16;
00057     y3 = YUV[i++] -  16;
00058
00059     // Set red, green and blue bytes for pixel 0
00060     *RGB++ = clip( (76284 * y0 + 104595 * v             ) >> 16 );
00061     *RGB++ = clip( (76284 * y0 -  25625 * u - 53281 * v ) >> 16 );
00062     *RGB++ = clip( (76284 * y0 + 132252 * u             ) >> 16 );
00063
00064     // Set red, green and blue bytes for pixel 1
00065     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00066     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00067     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00068
00069     // Set red, green and blue bytes for pixel 2
00070     *RGB++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00071     *RGB++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00072     *RGB++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00073
00074     // Set red, green and blue bytes for pixel 3
00075     *RGB++ = clip( (76284 * y3 + 104595 * v             ) >> 16 );
00076     *RGB++ = clip( (76284 * y3 -  25625 * u - 53281 * v ) >> 16 );
00077     *RGB++ = clip( (76284 * y3 + 132252 * u             ) >> 16 );
00078
00079   }
00080 }
00081
00082 
00083 /** YUV to RGB Conversion
00084  * B = 1.164(Y - 16)                  + 2.018(U - 128)
00085  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
00086  * R = 1.164(Y - 16) + 1.596(V - 128)
00087  *
00088  * Values have to be clamped to keep them in the [0-255] range.
00089  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
00090  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
00091  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
00092  *            line
00093  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
00094  *            (thus this is a 24bit RGB with one byte per color) line by line.
00095  * @param width Width of the image contained in the YUV buffer
00096  * @param height Height of the image contained in the YUV buffer
00097  */
00098 void
00099 yuv422planar_to_rgb_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00100 {
00101
00102   register short y1, y2, u, v;
00103   register const unsigned char *yp, *up, *vp;
00104   register unsigned int i;
00105
00106   yp = planar;
00107   up = planar + (width * height);
00108   vp = up + (width * height / 2);
00109
00110   for (i = 0; i < (width * height / 2); ++i) {
00111
00112     y1 = *yp++;
00113     y2 = *yp++;
00114     u  = *up++;
00115     v  = *vp++;
00116
00117     y1 -=  16;
00118     y2 -=  16;
00119     u  -= 128;
00120     v  -= 128;
00121
00122     // Set red, green and blue bytes for pixel 0
00123     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00124     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00125     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00126
00127     // Set red, green and blue bytes for pixel 1
00128     *RGB++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00129     *RGB++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00130     *RGB++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00131
00132   }
00133 }
00134
00135
00136 
00137 /** YUV to RGB Conversion
00138  * B = 1.164(Y - 16)                  + 2.018(U - 128)
00139  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
00140  * R = 1.164(Y - 16) + 1.596(V - 128)
00141  *
00142  * Values have to be clamped to keep them in the [0-255] range.
00143  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
00144  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
00145  * @param YUV unsigned char array that contains the pixels, 4 pixels in 8 byte macro pixel, line after
00146  *            line
00147  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
00148  *            (thus this is a 24bit RGB with one byte per color) line by line.
00149  * @param width Width of the image contained in the YUV buffer
00150  * @param height Height of the image contained in the YUV buffer
00151  */
00152 void
00153 yuv422packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00154                            unsigned int width, unsigned int height)
00155 {
00156   register int y0, y1, u, v;
00157   register unsigned int i = 0;
00158   while (i < (width * height)/2) {
00159     u  = YUV[i++] - 128;
00160     y0 = YUV[i++] -  16;
00161     v  = YUV[i++] - 128;
00162     y1 = YUV[i++] -  16;
00163
00164     // Set red, green and blue bytes for pixel 0
00165     *RGB++ = clip( (76284 * y0 + 104595 * v             ) >> 16 );
00166     *RGB++ = clip( (76284 * y0 -  25625 * u - 53281 * v ) >> 16 );
00167     *RGB++ = clip( (76284 * y0 + 132252 * u             ) >> 16 );
00168
00169     // Set red, green and blue bytes for pixel 1
00170     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00171     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00172     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00173   }
00174 }
00175 
00176 /** Convert YUV422 planar to BGR.
00177  * Use formula in aforementioned function.
00178  * @param YUV YUV422 planar buffer
00179  * @param BGR BGR buffer
00180  * @param width Width of the image contained in the YUV buffer
00181  * @param height Height of the image contained in the YUV buffer
00182  */
00183 void
00184 yuv422planar_to_bgr_plainc(const unsigned char *planar, unsigned char *BGR,
00185                            unsigned int width, unsigned int height)
00186 {
00187
00188   register short y1, y2, u, v;
00189   register const unsigned char *yp, *up, *vp;
00190   register unsigned int i;
00191
00192   yp = planar;
00193   up = planar + (width * height);
00194   vp = up + (width * height / 2);
00195
00196   for (i = 0; i < (width * height / 2); ++i) {
00197
00198     y1 = *yp++;
00199     y2 = *yp++;
00200     u  = *up++;
00201     v  = *vp++;
00202
00203     y1 -=  16;
00204     y2 -=  16;
00205     u  -= 128;
00206     v  -= 128;
00207
00208     // Set red, green and blue bytes for pixel 0
00209     *BGR++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00210     *BGR++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00211     *BGR++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00212
00213     // Set red, green and blue bytes for pixel 1
00214     *BGR++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00215     *BGR++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00216     *BGR++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00217   }
00218 }
00219
00220
00221 void
00222 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00223 {
00224
00225   register short y1, y2, u, v;
00226   register const unsigned char *yp, *up, *vp;
00227   register unsigned int i;
00228
00229   yp = planar;
00230   up = planar + (width * height);
00231   vp = up + (width * height / 2);
00232
00233   for (i = 0; i < (width * height / 2); ++i) {
00234
00235     y1 = *yp++;
00236     y2 = *yp++;
00237     u  = *up++;
00238     v  = *vp++;
00239
00240     y1 -=  16;
00241     y2 -=  16;
00242     u  -= 128;
00243     v  -= 128;
00244
00245     // Set red, green and blue bytes for pixel 0
00246     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00247     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00248     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00249     *RGB++ = 255;
00250
00251     // Set red, green and blue bytes for pixel 1
00252     *RGB++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00253     *RGB++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00254     *RGB++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00255     *RGB++ = 255;
00256
00257   }
00258
00259 }
00260
00261
00262 void
00263 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar, unsigned char *BGR, unsigned int width, unsigned int height)
00264 {
00265
00266   register short y1, y2, u, v;
00267   register const unsigned char *yp, *up, *vp;
00268   register unsigned int i;
00269
00270   yp = planar;
00271   up = planar + (width * height);
00272   vp = up + (width * height / 2);
00273
00274   for (i = 0; i < (width * height / 2); ++i) {
00275
00276     y1 = *yp++;
00277     y2 = *yp++;
00278     u  = *up++;
00279     v  = *vp++;
00280
00281     y1 -=  16;
00282     y2 -=  16;
00283     u  -= 128;
00284     v  -= 128;
00285
00286     // Set red, green and blue bytes for pixel 0
00287     *BGR++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00288     *BGR++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00289     *BGR++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00290     *BGR++ = 255;
00291
00292     // Set red, green and blue bytes for pixel 1
00293     *BGR++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00294     *BGR++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00295     *BGR++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00296     *BGR++ = 255;
00297
00298   }
00299
00300 }
00301
00302
00303 void
00304 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV, unsigned char *BGR,
00305                                       unsigned int width, unsigned int height)
00306 {
00307
00308   register int y0, y1, u, v;
00309   register unsigned int i = 0;
00310   while (i < (width * height * 2)) {
00311     u  = YUV[i++] - 128;
00312     y0 = YUV[i++] -  16;
00313     v  = YUV[i++] - 128;
00314     y1 = YUV[i++] -  16;
00315
00316     // Set red, green and blue bytes for pixel 0
00317     *BGR++ = clip( (76284 * y0 + 132252 * u             ) >> 16 );
00318     *BGR++ = clip( (76284 * y0 -  25625 * u - 53281 * v ) >> 16 );
00319     *BGR++ = clip( (76284 * y0 + 104595 * v             ) >> 16 );
00320     *BGR++ = 255;
00321
00322     // Set red, green and blue bytes for pixel 1
00323     *BGR++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00324     *BGR++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00325     *BGR++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00326     *BGR++ = 255;
00327
00328   }
00329 }
00330
00331
00332 #if ( \
00333          defined __i386__ || \
00334          defined __386__ || \
00335          defined __X86__ || \
00336          defined _M_IX86 || \
00337          defined i386)
00338 
00339 #define CRV    104595
00340 #define CBU    132251
00341 #define CGU    25624
00342 #define CGV    53280
00343 #define YMUL   76283
00344 #define OFF    32768
00345 #define BITRES 16
00346 
00347 /* calculation float resolution in bits */
00348 /* ie RES = 6 is 10.6 fixed point */
00349 /*    RES = 8 is 8.8 fixed point */
00350 /*    RES = 4 is 12.4 fixed point */
00351 /* NB: going above 6 will lead to overflow... :( */
00352 #define RES    6
00353 
00354 #define RZ(i)  (i >> (BITRES - RES))
00355 #define FOUR(i) {i, i, i, i}
00356 
00357 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
00358 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
00359 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
00360 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
00361 __aligned(8) const volatile unsigned short _const_ymul  [4] = FOUR(RZ(YMUL));
00362 __aligned(8) const volatile unsigned short _const_128   [4] = FOUR(128);
00363 __aligned(8) const volatile unsigned short _const_32    [4] = FOUR(RZ(OFF));
00364 __aligned(8) const volatile unsigned short _const_16    [4] = FOUR(16);
00365
00366 #define CONST_CRVCRV *_const_crvcrv
00367 #define CONST_CBUCBU *_const_cbucbu
00368 #define CONST_CGUCGU *_const_cgucgu
00369 #define CONST_CGVCGV *_const_cgvcgv
00370 #define CONST_YMUL   *_const_ymul
00371 #define CONST_128    *_const_128
00372 #define CONST_32     *_const_32
00373 #define CONST_16     *_const_16
00374 
00375 void
00376 yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb,
00377                          unsigned int w, unsigned int h)
00378 {
00379   unsigned int xx, yy;
00380   register const unsigned char *yp1, *up, *vp;
00381   unsigned char *dp1;
00382
00383   /* plane pointers */
00384   yp1 = yuv;
00385   up = yuv + (w * h);
00386   vp = up + (w * (h / 4));
00387   /* destination pointers */
00388   dp1 = rgb;
00389
00390
00391
00392   yp1 = yuv;
00393   up = yuv + (w * h);
00394   vp = up + ((w / 2) * (h / 2));
00395   dp1 = rgb;
00396   for (yy = 0; yy < h; yy++)
00397     {
00398       for (xx = 0; xx < w; xx += 8)
00399         {
00400           movq_m2r(*yp1, mm0);
00401           movq_r2r(mm0, mm1);
00402           psrlw_i2r(8, mm0);
00403           psllw_i2r(8, mm1);
00404           psrlw_i2r(8, mm1);
00405
00406           pxor_r2r(mm7, mm7);
00407           movd_m2r(*up, mm3);
00408           movd_m2r(*vp, mm2);
00409
00410           punpcklbw_r2r(mm7, mm2);
00411           punpcklbw_r2r(mm7, mm3);
00412
00413           movq_m2r(CONST_16, mm4);
00414           psubsw_r2r(mm4, mm0);
00415           psubsw_r2r(mm4, mm1);
00416
00417           movq_m2r(CONST_128, mm5);
00418           psubsw_r2r(mm5, mm2);
00419           psubsw_r2r(mm5, mm3);
00420
00421           movq_m2r(CONST_YMUL, mm4);
00422           pmullw_r2r(mm4, mm0);
00423           pmullw_r2r(mm4, mm1);
00424
00425           movq_m2r(CONST_CRVCRV, mm7);
00426           pmullw_r2r(mm3, mm7);
00427
00428           movq_m2r(CONST_CBUCBU, mm6);
00429           pmullw_r2r(mm2, mm6);
00430
00431           movq_m2r(CONST_CGUCGU, mm5);
00432           pmullw_r2r(mm2, mm5);
00433
00434           movq_m2r(CONST_CGVCGV, mm4);
00435           pmullw_r2r(mm3, mm4);
00436
00437           movq_r2r(mm0, mm2);
00438           paddsw_r2r(mm7, mm2);
00439           paddsw_r2r(mm1, mm7);
00440
00441           psraw_i2r(RES, mm2);
00442           psraw_i2r(RES, mm7);
00443           packuswb_r2r(mm7, mm2);
00444
00445           pxor_r2r(mm7, mm7);
00446           movq_r2r(mm2, mm3);
00447           punpckhbw_r2r(mm7, mm2);
00448           punpcklbw_r2r(mm3, mm7);
00449           por_r2r(mm7, mm2);
00450
00451           movq_r2r(mm0, mm3);
00452           psubsw_r2r(mm5, mm3);
00453           psubsw_r2r(mm4, mm3);
00454           paddsw_m2r(CONST_32, mm3);
00455
00456           movq_r2r(mm1, mm7);
00457           psubsw_r2r(mm5, mm7);
00458           psubsw_r2r(mm4, mm7);
00459           paddsw_m2r(CONST_32, mm7);
00460
00461           psraw_i2r(RES, mm3);
00462           psraw_i2r(RES, mm7);
00463           packuswb_r2r(mm7, mm3);
00464
00465           pxor_r2r(mm7, mm7);
00466           movq_r2r(mm3, mm4);
00467           punpckhbw_r2r(mm7, mm3);
00468           punpcklbw_r2r(mm4, mm7);
00469           por_r2r(mm7, mm3);
00470
00471           movq_m2r(CONST_32, mm4);
00472           paddsw_r2r(mm6, mm0);
00473           paddsw_r2r(mm6, mm1);
00474           paddsw_r2r(mm4, mm0);
00475           paddsw_r2r(mm4, mm1);
00476           psraw_i2r(RES, mm0);
00477           psraw_i2r(RES, mm1);
00478           packuswb_r2r(mm1, mm0);
00479
00480           pxor_r2r(mm7, mm7);
00481           movq_r2r(mm0, mm5);
00482           punpckhbw_r2r(mm7, mm0);
00483           punpcklbw_r2r(mm5, mm7);
00484           por_r2r(mm7, mm0);
00485
00486           pxor_r2r(mm1, mm1);
00487           movq_r2r(mm0, mm5);
00488           movq_r2r(mm3, mm6);
00489           movq_r2r(mm2, mm7);
00490           punpckhbw_r2r(mm3, mm2);
00491           punpcklbw_r2r(mm6, mm7);
00492           punpckhbw_r2r(mm1, mm0);
00493           punpcklbw_r2r(mm1, mm5);
00494
00495           movq_r2r(mm7, mm1);
00496           punpckhwd_r2r(mm5, mm7);
00497           punpcklwd_r2r(mm5, mm1);
00498
00499           movq_r2r(mm2, mm4);
00500           punpckhwd_r2r(mm0, mm2);
00501           punpcklwd_r2r(mm0, mm4);
00502
00503           movntq_r2m(mm1, *(dp1));
00504           movntq_r2m(mm7, *(dp1 + 8));
00505           movntq_r2m(mm4, *(dp1 + 16));
00506           movntq_r2m(mm2, *(dp1 + 24));
00507
00508           yp1 += 8;
00509           up += 4;
00510           vp += 4;
00511           dp1 += 8 * 4;
00512         }
00513       if (yy & 0x1)
00514         {
00515           up -= w / 2;
00516           vp -= w / 2;
00517         }
00518     }
00519   emms();
00520 }
00521 #endif