00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "config.h"
00025
00026 #include <string.h>
00027 #include <stdlib.h>
00028 #if HAVE_STDINT_H
00029 #include <stdint.h>
00030 #endif
00031 #include <math.h>
00032
00033 #include "libavutil/mem.h"
00034 #include "libavcodec/dsputil.h"
00035
00036 #include "color.h"
00037
00038 #if HAVE_MMX
00039 #include "ffmpeg-mmx.h"
00040 #endif
00041
00042 void (*yv12_to_yuy2)
00043 (const unsigned char *y_src, int y_src_pitch,
00044 const unsigned char *u_src, int u_src_pitch,
00045 const unsigned char *v_src, int v_src_pitch,
00046 unsigned char *yuy2_map, int yuy2_pitch,
00047 int width, int height, int progressive);
00048
00049 void (*yuy2_to_yv12)
00050 (const unsigned char *yuy2_map, int yuy2_pitch,
00051 unsigned char *y_dst, int y_dst_pitch,
00052 unsigned char *u_dst, int u_dst_pitch,
00053 unsigned char *v_dst, int v_dst_pitch,
00054 int width, int height);
00055
00056 void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b );
00057
00058
00059 #define C_YUV420_YUYV( ) \
00060 *p_line1++ = *p_y1++; *p_line2++ = *p_y2++; \
00061 *p_line1++ = *p_u; *p_line2++ = (*p_u++ + *p_u2++)>>1; \
00062 *p_line1++ = *p_y1++; *p_line2++ = *p_y2++; \
00063 *p_line1++ = *p_v; *p_line2++ = (*p_v++ + *p_v2++)>>1;
00064
00065
00066
00067
00068
00069
00070 static void yv12_to_yuy2_c
00071 (const unsigned char *y_src, int y_src_pitch,
00072 const unsigned char *u_src, int u_src_pitch,
00073 const unsigned char *v_src, int v_src_pitch,
00074 unsigned char *yuy2_map, int yuy2_pitch,
00075 int width, int height, int progressive)
00076 {
00077
00078 uint8_t *p_line1, *p_line2 = yuy2_map;
00079 const uint8_t *p_y1, *p_y2 = y_src;
00080 const uint8_t *p_u = u_src;
00081 const uint8_t *p_v = v_src;
00082 const uint8_t *p_u2 = u_src + u_src_pitch;
00083 const uint8_t *p_v2 = v_src + v_src_pitch;
00084
00085 int i_x, i_y;
00086
00087 const int i_source_margin = y_src_pitch - width;
00088 const int i_source_u_margin = u_src_pitch - width/2;
00089 const int i_source_v_margin = v_src_pitch - width/2;
00090 const int i_dest_margin = yuy2_pitch - width*2;
00091
00092
00093 if ( progressive )
00094 {
00095 for ( i_y = height / 2 ; i_y-- ; )
00096 {
00097 p_line1 = p_line2;
00098 p_line2 += yuy2_pitch;
00099
00100 p_y1 = p_y2;
00101 p_y2 += y_src_pitch;
00102
00103 for ( i_x = width / 2 ; i_x-- ; )
00104 {
00105 C_YUV420_YUYV( );
00106 }
00107
00108 p_y2 += i_source_margin;
00109 p_u += i_source_u_margin;
00110 p_v += i_source_v_margin;
00111 if ( i_y > 1 )
00112 {
00113 p_u2 += i_source_u_margin;
00114 p_v2 += i_source_v_margin;
00115 }
00116 else
00117 {
00118 p_u2 = p_u;
00119 p_v2 = p_v;
00120 }
00121 p_line2 += i_dest_margin;
00122 }
00123 }
00124 else
00125 {
00126 p_u2 = u_src + 2*u_src_pitch;
00127 p_v2 = v_src + 2*v_src_pitch;
00128 for ( i_y = height / 4 ; i_y-- ; )
00129 {
00130 p_line1 = p_line2;
00131 p_line2 += 2 * yuy2_pitch;
00132
00133 p_y1 = p_y2;
00134 p_y2 += 2 * y_src_pitch;
00135
00136 for ( i_x = width / 2 ; i_x-- ; )
00137 {
00138 C_YUV420_YUYV( );
00139 }
00140
00141 p_y2 += i_source_margin + y_src_pitch;
00142 p_u += i_source_u_margin + u_src_pitch;
00143 p_v += i_source_v_margin + v_src_pitch;
00144 if ( i_y > 1 )
00145 {
00146 p_u2 += i_source_u_margin + u_src_pitch;
00147 p_v2 += i_source_v_margin + v_src_pitch;
00148 }
00149 else
00150 {
00151 p_u2 = p_u;
00152 p_v2 = p_v;
00153 }
00154 p_line2 += i_dest_margin + yuy2_pitch;
00155 }
00156
00157 p_line2 = yuy2_map + yuy2_pitch;
00158 p_y2 = y_src + y_src_pitch;
00159 p_u = u_src + u_src_pitch;
00160 p_v = v_src + v_src_pitch;
00161 p_u2 = u_src + 3*u_src_pitch;
00162 p_v2 = v_src + 3*v_src_pitch;
00163
00164 for ( i_y = height / 4 ; i_y-- ; )
00165 {
00166 p_line1 = p_line2;
00167 p_line2 += 2 * yuy2_pitch;
00168
00169 p_y1 = p_y2;
00170 p_y2 += 2 * y_src_pitch;
00171
00172 for ( i_x = width / 2 ; i_x-- ; )
00173 {
00174 C_YUV420_YUYV( );
00175 }
00176
00177 p_y2 += i_source_margin + y_src_pitch;
00178 p_u += i_source_u_margin + u_src_pitch;
00179 p_v += i_source_v_margin + v_src_pitch;
00180 if ( i_y > 1 )
00181 {
00182 p_u2 += i_source_u_margin + u_src_pitch;
00183 p_v2 += i_source_v_margin + v_src_pitch;
00184 }
00185 else
00186 {
00187 p_u2 = p_u;
00188 p_v2 = p_v;
00189 }
00190 p_line2 += i_dest_margin + yuy2_pitch;
00191 }
00192 }
00193 }
00194
00195
00196 #if HAVE_MMX
00197
00198 #define MMXEXT_YUV420_YUYV( ) \
00199 do { \
00200 __asm__ __volatile__(".align 8 \n\t" \
00201 "movq (%0), %%mm0 \n\t" \
00202 "movd (%1), %%mm1 \n\t" \
00203 "movd (%2), %%mm2 \n\t" \
00204 "punpcklbw %%mm2, %%mm1 \n\t" \
00205 "movq %%mm0, %%mm2 \n\t" \
00206 "punpcklbw %%mm1, %%mm2 \n\t" \
00207 : \
00208 : "r" (p_y1), "r" (p_u), "r" (p_v) ); \
00209 __asm__ __volatile__( \
00210 "movd (%0), %%mm3 \n\t" \
00211 "movd (%1), %%mm4 \n\t" \
00212 "punpcklbw %%mm4, %%mm3 \n\t" \
00213 "pavgb %%mm1, %%mm3 \n\t" \
00214 : \
00215 : "r" (p_u2), "r" (p_v2) ); \
00216 __asm__ __volatile__( \
00217 "movntq %%mm2, (%0) \n\t" \
00218 "punpckhbw %%mm1, %%mm0 \n\t" \
00219 "movntq %%mm0, 8(%0) \n\t" \
00220 "movq (%2), %%mm0 \n\t" \
00221 "movq %%mm0, %%mm2 \n\t" \
00222 "punpcklbw %%mm3, %%mm2 \n\t" \
00223 "movntq %%mm2, (%1) \n\t" \
00224 "punpckhbw %%mm3, %%mm0 \n\t" \
00225 "movntq %%mm0, 8(%1) \n\t" \
00226 : \
00227 : "r" (p_line1), "r" (p_line2), "r" (p_y2) ); \
00228 p_line1 += 16; p_line2 += 16; p_y1 += 8; p_y2 += 8; p_u += 4; p_v += 4; \
00229 p_u2 += 4; p_v2 += 4; \
00230 } while(0)
00231
00232 #endif
00233
00234 #if HAVE_MMX
00235 static void yv12_to_yuy2_mmxext
00236 (const unsigned char *y_src, int y_src_pitch,
00237 const unsigned char *u_src, int u_src_pitch,
00238 const unsigned char *v_src, int v_src_pitch,
00239 unsigned char *yuy2_map, int yuy2_pitch,
00240 int width, int height, int progressive )
00241 {
00242 uint8_t *p_line1, *p_line2 = yuy2_map;
00243 const uint8_t *p_y1, *p_y2 = y_src;
00244 const uint8_t *p_u = u_src;
00245 const uint8_t *p_v = v_src;
00246 const uint8_t *p_u2 = u_src + u_src_pitch;
00247 const uint8_t *p_v2 = v_src + v_src_pitch;
00248
00249 int i_x, i_y;
00250
00251 const int i_source_margin = y_src_pitch - width;
00252 const int i_source_u_margin = u_src_pitch - width/2;
00253 const int i_source_v_margin = v_src_pitch - width/2;
00254 const int i_dest_margin = yuy2_pitch - width*2;
00255
00256 if ( progressive )
00257 {
00258 for ( i_y = height / 2; i_y-- ; )
00259 {
00260 p_line1 = p_line2;
00261 p_line2 += yuy2_pitch;
00262
00263 p_y1 = p_y2;
00264 p_y2 += y_src_pitch;
00265
00266 for ( i_x = width / 8 ; i_x-- ; )
00267 {
00268 MMXEXT_YUV420_YUYV( );
00269 }
00270 for ( i_x = (width % 8) / 2 ; i_x-- ; )
00271 {
00272 C_YUV420_YUYV( );
00273 }
00274
00275 p_y2 += i_source_margin;
00276 p_u += i_source_u_margin;
00277 p_v += i_source_v_margin;
00278 if ( i_y > 1 )
00279 {
00280 p_u2 += i_source_u_margin;
00281 p_v2 += i_source_v_margin;
00282 }
00283 else
00284 {
00285 p_u2 = p_u;
00286 p_v2 = p_v;
00287 }
00288 p_line2 += i_dest_margin;
00289 }
00290 }
00291 else
00292 {
00293 p_u2 = u_src + 2*u_src_pitch;
00294 p_v2 = v_src + 2*v_src_pitch;
00295 for ( i_y = height / 4 ; i_y-- ; )
00296 {
00297 p_line1 = p_line2;
00298 p_line2 += 2 * yuy2_pitch;
00299
00300 p_y1 = p_y2;
00301 p_y2 += 2 * y_src_pitch;
00302
00303 for ( i_x = width / 8 ; i_x-- ; )
00304 {
00305 MMXEXT_YUV420_YUYV( );
00306 }
00307 for ( i_x = (width % 8) / 2 ; i_x-- ; )
00308 {
00309 C_YUV420_YUYV( );
00310 }
00311
00312 p_y2 += i_source_margin + y_src_pitch;
00313 p_u += i_source_u_margin + u_src_pitch;
00314 p_v += i_source_v_margin + v_src_pitch;
00315 if ( i_y > 1 )
00316 {
00317 p_u2 += i_source_u_margin + u_src_pitch;
00318 p_v2 += i_source_v_margin + v_src_pitch;
00319 }
00320 else
00321 {
00322 p_u2 = p_u;
00323 p_v2 = p_v;
00324 }
00325 p_line2 += i_dest_margin + yuy2_pitch;
00326 }
00327
00328 p_line2 = yuy2_map + yuy2_pitch;
00329 p_y2 = y_src + y_src_pitch;
00330 p_u = u_src + u_src_pitch;
00331 p_v = v_src + v_src_pitch;
00332 p_u2 = u_src + 3*u_src_pitch;
00333 p_v2 = v_src + 3*v_src_pitch;
00334
00335 for ( i_y = height / 4 ; i_y-- ; )
00336 {
00337 p_line1 = p_line2;
00338 p_line2 += 2 * yuy2_pitch;
00339
00340 p_y1 = p_y2;
00341 p_y2 += 2 * y_src_pitch;
00342
00343 for ( i_x = width / 8 ; i_x-- ; )
00344 {
00345 MMXEXT_YUV420_YUYV( );
00346 }
00347 for ( i_x = (width % 8) / 2 ; i_x-- ; )
00348 {
00349 C_YUV420_YUYV( );
00350 }
00351
00352 p_y2 += i_source_margin + y_src_pitch;
00353 p_u += i_source_u_margin + u_src_pitch;
00354 p_v += i_source_v_margin + v_src_pitch;
00355 if ( i_y > 1 )
00356 {
00357 p_u2 += i_source_u_margin + u_src_pitch;
00358 p_v2 += i_source_v_margin + v_src_pitch;
00359 }
00360 else
00361 {
00362 p_u2 = p_u;
00363 p_v2 = p_v;
00364 }
00365 p_line2 += i_dest_margin + yuy2_pitch;
00366 }
00367 }
00368
00369 sfence();
00370 emms();
00371 }
00372 #endif
00373
00374 #define C_YUYV_YUV420( ) \
00375 *p_y1++ = *p_line1++; *p_y2++ = *p_line2++; \
00376 *p_u++ = (*p_line1++ + *p_line2++)>>1; \
00377 *p_y1++ = *p_line1++; *p_y2++ = *p_line2++; \
00378 *p_v++ = (*p_line1++ + *p_line2++)>>1;
00379
00380 static void yuy2_to_yv12_c
00381 (const unsigned char *yuy2_map, int yuy2_pitch,
00382 unsigned char *y_dst, int y_dst_pitch,
00383 unsigned char *u_dst, int u_dst_pitch,
00384 unsigned char *v_dst, int v_dst_pitch,
00385 int width, int height)
00386 {
00387
00388 const uint8_t *p_line1, *p_line2 = yuy2_map;
00389 uint8_t *p_y1, *p_y2 = y_dst;
00390 uint8_t *p_u = u_dst;
00391 uint8_t *p_v = v_dst;
00392
00393 int i_x, i_y;
00394
00395 const int i_dest_margin = y_dst_pitch - width;
00396 const int i_dest_u_margin = u_dst_pitch - width/2;
00397 const int i_dest_v_margin = v_dst_pitch - width/2;
00398 const int i_source_margin = yuy2_pitch - width*2;
00399
00400
00401 for ( i_y = height / 2 ; i_y-- ; )
00402 {
00403 p_line1 = p_line2;
00404 p_line2 += yuy2_pitch;
00405
00406 p_y1 = p_y2;
00407 p_y2 += y_dst_pitch;
00408
00409 for ( i_x = width / 8 ; i_x-- ; )
00410 {
00411 C_YUYV_YUV420( );
00412 C_YUYV_YUV420( );
00413 C_YUYV_YUV420( );
00414 C_YUYV_YUV420( );
00415 }
00416
00417 p_y2 += i_dest_margin;
00418 p_u += i_dest_u_margin;
00419 p_v += i_dest_v_margin;
00420 p_line2 += i_source_margin;
00421 }
00422 }
00423
00424
00425 #if HAVE_MMX
00426
00427
00428 #define MMXEXT_YUYV_YUV420( ) \
00429 do { \
00430 __asm__ __volatile__(".align 8 \n\t" \
00431 "movq (%0), %%mm0 \n\t" \
00432 "movq 8(%0), %%mm1 \n\t" \
00433 "movq %%mm0, %%mm2 \n\t" \
00434 "movq %%mm1, %%mm3 \n\t" \
00435 "psrlw $8, %%mm0 \n\t" \
00436 "psrlw $8, %%mm1 \n\t" \
00437 "pand %%mm7, %%mm2 \n\t" \
00438 "pand %%mm7, %%mm3 \n\t" \
00439 "packuswb %%mm1, %%mm0 \n\t" \
00440 "packuswb %%mm3, %%mm2 \n\t" \
00441 "movntq %%mm2, (%1) \n\t" \
00442 : \
00443 : "r" (p_line1), "r" (p_y1) ); \
00444 __asm__ __volatile__(".align 8 \n\t" \
00445 "movq (%0), %%mm1 \n\t" \
00446 "movq 8(%0), %%mm2 \n\t" \
00447 "movq %%mm1, %%mm3 \n\t" \
00448 "movq %%mm2, %%mm4 \n\t" \
00449 "psrlw $8, %%mm1 \n\t" \
00450 "psrlw $8, %%mm2 \n\t" \
00451 "pand %%mm7, %%mm3 \n\t" \
00452 "pand %%mm7, %%mm4 \n\t" \
00453 "packuswb %%mm2, %%mm1 \n\t" \
00454 "packuswb %%mm4, %%mm3 \n\t" \
00455 "movntq %%mm3, (%1) \n\t" \
00456 : \
00457 : "r" (p_line2), "r" (p_y2) ); \
00458 __asm__ __volatile__( \
00459 "pavgb %%mm1, %%mm0 \n\t" \
00460 "movq %%mm0, %%mm1 \n\t" \
00461 "psrlw $8, %%mm0 \n\t" \
00462 "packuswb %%mm0, %%mm0 \n\t" \
00463 "movd %%mm0, (%0) \n\t" \
00464 "pand %%mm7, %%mm1 \n\t" \
00465 "packuswb %%mm1, %%mm1 \n\t" \
00466 "movd %%mm1, (%1) \n\t" \
00467 : \
00468 : "r" (p_v), "r" (p_u) ); \
00469 p_line1 += 16; p_line2 += 16; p_y1 += 8; p_y2 += 8; p_u += 4; p_v += 4; \
00470 } while(0)
00471
00472 #endif
00473
00474 #if HAVE_MMX
00475 static void yuy2_to_yv12_mmxext
00476 (const unsigned char *yuy2_map, int yuy2_pitch,
00477 unsigned char *y_dst, int y_dst_pitch,
00478 unsigned char *u_dst, int u_dst_pitch,
00479 unsigned char *v_dst, int v_dst_pitch,
00480 int width, int height)
00481 {
00482 const uint8_t *p_line1, *p_line2 = yuy2_map;
00483 uint8_t *p_y1, *p_y2 = y_dst;
00484 uint8_t *p_u = u_dst;
00485 uint8_t *p_v = v_dst;
00486
00487 int i_x, i_y;
00488
00489 const int i_dest_margin = y_dst_pitch - width;
00490 const int i_dest_u_margin = u_dst_pitch - width/2;
00491 const int i_dest_v_margin = v_dst_pitch - width/2;
00492 const int i_source_margin = yuy2_pitch - width*2;
00493
00494 __asm__ __volatile__(
00495 "pcmpeqw %mm7, %mm7 \n\t"
00496 "psrlw $8, %mm7 \n\t"
00497 );
00498
00499 for ( i_y = height / 2 ; i_y-- ; )
00500 {
00501 p_line1 = p_line2;
00502 p_line2 += yuy2_pitch;
00503
00504 p_y1 = p_y2;
00505 p_y2 += y_dst_pitch;
00506
00507 for ( i_x = width / 8 ; i_x-- ; )
00508 {
00509 MMXEXT_YUYV_YUV420( );
00510 }
00511
00512 p_y2 += i_dest_margin;
00513 p_u += i_dest_u_margin;
00514 p_v += i_dest_v_margin;
00515 p_line2 += i_source_margin;
00516 }
00517
00518 sfence();
00519 emms();
00520 }
00521 #endif
00522
00523 #if HAVE_MMX
00524 static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width,
00525 uint8_t *m, uint8_t *t, uint8_t *b )
00526 {
00527 int i;
00528 const mmx_t ymask = { 0x00ff00ff00ff00ffULL };
00529 const mmx_t cmask = { 0xff00ff00ff00ff00ULL };
00530
00531
00532 width *= 2;
00533 i = width / 8;
00534 width -= i * 8;
00535
00536 movq_m2r( ymask, mm7 );
00537 movq_m2r( cmask, mm6 );
00538
00539 while ( i-- )
00540 {
00541 movq_m2r( *t, mm0 );
00542 movq_m2r( *b, mm1 );
00543 movq_m2r( *m, mm2 );
00544
00545 movq_r2r ( mm2, mm3 );
00546 pand_r2r ( mm7, mm3 );
00547
00548 pand_r2r ( mm6, mm0 );
00549 pand_r2r ( mm6, mm1 );
00550 pand_r2r ( mm6, mm2 );
00551
00552 psrlq_i2r( 8, mm0 );
00553 psrlq_i2r( 7, mm1 );
00554 psrlq_i2r( 8, mm2 );
00555
00556 movq_r2r ( mm0, mm4 );
00557 psllw_i2r( 1, mm4 );
00558 paddw_r2r( mm4, mm0 );
00559
00560 movq_r2r ( mm2, mm4 );
00561 psllw_i2r( 1, mm4 );
00562 paddw_r2r( mm4, mm2 );
00563
00564 paddw_r2r( mm0, mm2 );
00565 paddw_r2r( mm1, mm2 );
00566
00567 psllw_i2r( 5, mm2 );
00568 pand_r2r( mm6, mm2 );
00569
00570 por_r2r ( mm3, mm2 );
00571
00572 movq_r2m( mm2, *output );
00573 output += 8;
00574 t += 8;
00575 b += 8;
00576 m += 8;
00577 }
00578 output++; t++; b++; m++;
00579 while ( width-- )
00580 {
00581 *output = (3 * *t + 3 * *m + 2 * *b) >> 3;
00582 output +=2; t+=2; b+=2; m+=2;
00583 }
00584
00585 emms();
00586 }
00587 #endif
00588
00589 static void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width,
00590 uint8_t *m, uint8_t *t, uint8_t *b )
00591 {
00592 output++; t++; b++; m++;
00593 while ( width-- )
00594 {
00595 *output = (3 * *t + 3 * *m + 2 * *b) >> 3;
00596 output +=2; t+=2; b+=2; m+=2;
00597 }
00598 }
00599
00600
00601
00602
00603
00604
00605
00606 void init_yuv_conversion(void)
00607 {
00608
00609
00610
00611
00612 #ifdef MMX
00613 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
00614 {
00615 yv12_to_yuy2 = yv12_to_yuy2_mmxext;
00616 yuy2_to_yv12 = yuy2_to_yv12_mmxext;
00617 vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_mmx;
00618 }
00619 else
00620 #endif
00621 {
00622 yv12_to_yuy2 = yv12_to_yuy2_c;
00623 yuy2_to_yv12 = yuy2_to_yv12_c;
00624 vfilter_chroma_332_packed422_scanline = vfilter_chroma_332_packed422_scanline_c;
00625 }
00626 }
00627
00628 void apply_chroma_filter( uint8_t *data, int stride, int width, int height )
00629 {
00630 int i;
00631
00632
00633
00634
00635
00636 for ( i = 0; i < height; i++, data += stride )
00637 {
00638 vfilter_chroma_332_packed422_scanline( data, width,
00639 data,
00640 (i) ? (data - stride) : data,
00641 (i < height-1) ? (data + stride) : data );
00642 }
00643 }
00644
00645