00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <fvutils/color/yuvrgb.h>
00026 #include <core/macros.h>
00027
00028 #include <fvutils/cpu/mmx.h>
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 void
00046 yuv411packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00047 unsigned int width, unsigned int height)
00048 {
00049 register int y0, y1, y2, y3, u, v;
00050 register unsigned int i = 0;
00051 while (i < (width * height)*3/2) {
00052 u = YUV[i++] - 128;
00053 y0 = YUV[i++] - 16;
00054 y1 = YUV[i++] - 16;
00055 v = YUV[i++] - 128;
00056 y2 = YUV[i++] - 16;
00057 y3 = YUV[i++] - 16;
00058
00059
00060 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
00061 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
00062 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
00063
00064
00065 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00066 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00067 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00068
00069
00070 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00071 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00072 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00073
00074
00075 *RGB++ = clip( (76284 * y3 + 104595 * v ) >> 16 );
00076 *RGB++ = clip( (76284 * y3 - 25625 * u - 53281 * v ) >> 16 );
00077 *RGB++ = clip( (76284 * y3 + 132252 * u ) >> 16 );
00078
00079 }
00080 }
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098 void
00099 yuv422planar_to_rgb_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00100 {
00101
00102 register short y1, y2, u, v;
00103 register const unsigned char *yp, *up, *vp;
00104 register unsigned int i;
00105
00106 yp = planar;
00107 up = planar + (width * height);
00108 vp = up + (width * height / 2);
00109
00110 for (i = 0; i < (width * height / 2); ++i) {
00111
00112 y1 = *yp++;
00113 y2 = *yp++;
00114 u = *up++;
00115 v = *vp++;
00116
00117 y1 -= 16;
00118 y2 -= 16;
00119 u -= 128;
00120 v -= 128;
00121
00122
00123 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00124 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00125 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00126
00127
00128 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00129 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00130 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00131
00132 }
00133 }
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152 void
00153 yuv422packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00154 unsigned int width, unsigned int height)
00155 {
00156 register int y0, y1, u, v;
00157 register unsigned int i = 0;
00158 while (i < (width * height)/2) {
00159 u = YUV[i++] - 128;
00160 y0 = YUV[i++] - 16;
00161 v = YUV[i++] - 128;
00162 y1 = YUV[i++] - 16;
00163
00164
00165 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
00166 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
00167 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
00168
00169
00170 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00171 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00172 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00173 }
00174 }
00175
00176
00177
00178
00179
00180
00181
00182
00183 void
00184 yuv422planar_to_bgr_plainc(const unsigned char *planar, unsigned char *BGR,
00185 unsigned int width, unsigned int height)
00186 {
00187
00188 register short y1, y2, u, v;
00189 register const unsigned char *yp, *up, *vp;
00190 register unsigned int i;
00191
00192 yp = planar;
00193 up = planar + (width * height);
00194 vp = up + (width * height / 2);
00195
00196 for (i = 0; i < (width * height / 2); ++i) {
00197
00198 y1 = *yp++;
00199 y2 = *yp++;
00200 u = *up++;
00201 v = *vp++;
00202
00203 y1 -= 16;
00204 y2 -= 16;
00205 u -= 128;
00206 v -= 128;
00207
00208
00209 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00210 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00211 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00212
00213
00214 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00215 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00216 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00217 }
00218 }
00219
00220
00221 void
00222 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00223 {
00224
00225 register short y1, y2, u, v;
00226 register const unsigned char *yp, *up, *vp;
00227 register unsigned int i;
00228
00229 yp = planar;
00230 up = planar + (width * height);
00231 vp = up + (width * height / 2);
00232
00233 for (i = 0; i < (width * height / 2); ++i) {
00234
00235 y1 = *yp++;
00236 y2 = *yp++;
00237 u = *up++;
00238 v = *vp++;
00239
00240 y1 -= 16;
00241 y2 -= 16;
00242 u -= 128;
00243 v -= 128;
00244
00245
00246 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00247 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00248 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00249 *RGB++ = 255;
00250
00251
00252 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00253 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00254 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00255 *RGB++ = 255;
00256
00257 }
00258
00259 }
00260
00261
00262 void
00263 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar, unsigned char *BGR, unsigned int width, unsigned int height)
00264 {
00265
00266 register short y1, y2, u, v;
00267 register const unsigned char *yp, *up, *vp;
00268 register unsigned int i;
00269
00270 yp = planar;
00271 up = planar + (width * height);
00272 vp = up + (width * height / 2);
00273
00274 for (i = 0; i < (width * height / 2); ++i) {
00275
00276 y1 = *yp++;
00277 y2 = *yp++;
00278 u = *up++;
00279 v = *vp++;
00280
00281 y1 -= 16;
00282 y2 -= 16;
00283 u -= 128;
00284 v -= 128;
00285
00286
00287 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00288 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00289 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00290 *BGR++ = 255;
00291
00292
00293 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00294 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00295 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00296 *BGR++ = 255;
00297
00298 }
00299
00300 }
00301
00302
00303 void
00304 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV, unsigned char *BGR,
00305 unsigned int width, unsigned int height)
00306 {
00307
00308 register int y0, y1, u, v;
00309 register unsigned int i = 0;
00310 while (i < (width * height * 2)) {
00311 u = YUV[i++] - 128;
00312 y0 = YUV[i++] - 16;
00313 v = YUV[i++] - 128;
00314 y1 = YUV[i++] - 16;
00315
00316
00317 *BGR++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
00318 *BGR++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
00319 *BGR++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
00320 *BGR++ = 255;
00321
00322
00323 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00324 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00325 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00326 *BGR++ = 255;
00327
00328 }
00329 }
00330
00331
00332 #if ( \
00333 defined __i386__ || \
00334 defined __386__ || \
00335 defined __X86__ || \
00336 defined _M_IX86 || \
00337 defined i386)
00338
00339 #define CRV 104595
00340 #define CBU 132251
00341 #define CGU 25624
00342 #define CGV 53280
00343 #define YMUL 76283
00344 #define OFF 32768
00345 #define BITRES 16
00346
00347
00348
00349
00350
00351
00352 #define RES 6
00353
00354 #define RZ(i) (i >> (BITRES - RES))
00355 #define FOUR(i) {i, i, i, i}
00356
00357 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
00358 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
00359 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
00360 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
00361 __aligned(8) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
00362 __aligned(8) const volatile unsigned short _const_128 [4] = FOUR(128);
00363 __aligned(8) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF));
00364 __aligned(8) const volatile unsigned short _const_16 [4] = FOUR(16);
00365
00366 #define CONST_CRVCRV *_const_crvcrv
00367 #define CONST_CBUCBU *_const_cbucbu
00368 #define CONST_CGUCGU *_const_cgucgu
00369 #define CONST_CGVCGV *_const_cgvcgv
00370 #define CONST_YMUL *_const_ymul
00371 #define CONST_128 *_const_128
00372 #define CONST_32 *_const_32
00373 #define CONST_16 *_const_16
00374
00375 void
00376 yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb,
00377 unsigned int w, unsigned int h)
00378 {
00379 unsigned int xx, yy;
00380 register const unsigned char *yp1, *up, *vp;
00381 unsigned char *dp1;
00382
00383
00384 yp1 = yuv;
00385 up = yuv + (w * h);
00386 vp = up + (w * (h / 4));
00387
00388 dp1 = rgb;
00389
00390
00391
00392 yp1 = yuv;
00393 up = yuv + (w * h);
00394 vp = up + ((w / 2) * (h / 2));
00395 dp1 = rgb;
00396 for (yy = 0; yy < h; yy++)
00397 {
00398 for (xx = 0; xx < w; xx += 8)
00399 {
00400 movq_m2r(*yp1, mm0);
00401 movq_r2r(mm0, mm1);
00402 psrlw_i2r(8, mm0);
00403 psllw_i2r(8, mm1);
00404 psrlw_i2r(8, mm1);
00405
00406 pxor_r2r(mm7, mm7);
00407 movd_m2r(*up, mm3);
00408 movd_m2r(*vp, mm2);
00409
00410 punpcklbw_r2r(mm7, mm2);
00411 punpcklbw_r2r(mm7, mm3);
00412
00413 movq_m2r(CONST_16, mm4);
00414 psubsw_r2r(mm4, mm0);
00415 psubsw_r2r(mm4, mm1);
00416
00417 movq_m2r(CONST_128, mm5);
00418 psubsw_r2r(mm5, mm2);
00419 psubsw_r2r(mm5, mm3);
00420
00421 movq_m2r(CONST_YMUL, mm4);
00422 pmullw_r2r(mm4, mm0);
00423 pmullw_r2r(mm4, mm1);
00424
00425 movq_m2r(CONST_CRVCRV, mm7);
00426 pmullw_r2r(mm3, mm7);
00427
00428 movq_m2r(CONST_CBUCBU, mm6);
00429 pmullw_r2r(mm2, mm6);
00430
00431 movq_m2r(CONST_CGUCGU, mm5);
00432 pmullw_r2r(mm2, mm5);
00433
00434 movq_m2r(CONST_CGVCGV, mm4);
00435 pmullw_r2r(mm3, mm4);
00436
00437 movq_r2r(mm0, mm2);
00438 paddsw_r2r(mm7, mm2);
00439 paddsw_r2r(mm1, mm7);
00440
00441 psraw_i2r(RES, mm2);
00442 psraw_i2r(RES, mm7);
00443 packuswb_r2r(mm7, mm2);
00444
00445 pxor_r2r(mm7, mm7);
00446 movq_r2r(mm2, mm3);
00447 punpckhbw_r2r(mm7, mm2);
00448 punpcklbw_r2r(mm3, mm7);
00449 por_r2r(mm7, mm2);
00450
00451 movq_r2r(mm0, mm3);
00452 psubsw_r2r(mm5, mm3);
00453 psubsw_r2r(mm4, mm3);
00454 paddsw_m2r(CONST_32, mm3);
00455
00456 movq_r2r(mm1, mm7);
00457 psubsw_r2r(mm5, mm7);
00458 psubsw_r2r(mm4, mm7);
00459 paddsw_m2r(CONST_32, mm7);
00460
00461 psraw_i2r(RES, mm3);
00462 psraw_i2r(RES, mm7);
00463 packuswb_r2r(mm7, mm3);
00464
00465 pxor_r2r(mm7, mm7);
00466 movq_r2r(mm3, mm4);
00467 punpckhbw_r2r(mm7, mm3);
00468 punpcklbw_r2r(mm4, mm7);
00469 por_r2r(mm7, mm3);
00470
00471 movq_m2r(CONST_32, mm4);
00472 paddsw_r2r(mm6, mm0);
00473 paddsw_r2r(mm6, mm1);
00474 paddsw_r2r(mm4, mm0);
00475 paddsw_r2r(mm4, mm1);
00476 psraw_i2r(RES, mm0);
00477 psraw_i2r(RES, mm1);
00478 packuswb_r2r(mm1, mm0);
00479
00480 pxor_r2r(mm7, mm7);
00481 movq_r2r(mm0, mm5);
00482 punpckhbw_r2r(mm7, mm0);
00483 punpcklbw_r2r(mm5, mm7);
00484 por_r2r(mm7, mm0);
00485
00486 pxor_r2r(mm1, mm1);
00487 movq_r2r(mm0, mm5);
00488 movq_r2r(mm3, mm6);
00489 movq_r2r(mm2, mm7);
00490 punpckhbw_r2r(mm3, mm2);
00491 punpcklbw_r2r(mm6, mm7);
00492 punpckhbw_r2r(mm1, mm0);
00493 punpcklbw_r2r(mm1, mm5);
00494
00495 movq_r2r(mm7, mm1);
00496 punpckhwd_r2r(mm5, mm7);
00497 punpcklwd_r2r(mm5, mm1);
00498
00499 movq_r2r(mm2, mm4);
00500 punpckhwd_r2r(mm0, mm2);
00501 punpcklwd_r2r(mm0, mm4);
00502
00503 movntq_r2m(mm1, *(dp1));
00504 movntq_r2m(mm7, *(dp1 + 8));
00505 movntq_r2m(mm4, *(dp1 + 16));
00506 movntq_r2m(mm2, *(dp1 + 24));
00507
00508 yp1 += 8;
00509 up += 4;
00510 vp += 4;
00511 dp1 += 8 * 4;
00512 }
00513 if (yy & 0x1)
00514 {
00515 up -= w / 2;
00516 vp -= w / 2;
00517 }
00518 }
00519 emms();
00520 }
00521 #endif