00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <cstdio>
00026 #include <cstdlib>
00027 #include <cstring>
00028 #include "RTjpegN.h"
00029
00030 #ifdef MMX
00031 static mmx_t RTjpeg_ones;
00032 static mmx_t RTjpeg_half;
00033 static mmx_t RTjpeg_C4;
00034 static mmx_t RTjpeg_C6;
00035 static mmx_t RTjpeg_C2mC6;
00036 static mmx_t RTjpeg_C2pC6;
00037 static mmx_t RTjpeg_zero;
00038 #endif
00039
00040
00041 #define BETTERCOMPRESSION 1
00042
00043 static const unsigned char RTjpeg_ZZ[64]={
00044 0,
00045 8, 1,
00046 2, 9, 16,
00047 24, 17, 10, 3,
00048 4, 11, 18, 25, 32,
00049 40, 33, 26, 19, 12, 5,
00050 6, 13, 20, 27, 34, 41, 48,
00051 56, 49, 42, 35, 28, 21, 14, 7,
00052 15, 22, 29, 36, 43, 50, 57,
00053 58, 51, 44, 37, 30, 23,
00054 31, 38, 45, 52, 59,
00055 60, 53, 46, 39,
00056 47, 54, 61,
00057 62, 55,
00058 63 };
00059
00060 static const uint64_t RTjpeg_aan_tab[64]={
00061 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
00062 5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL,
00063 5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL,
00064 5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL,
00065 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
00066 3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL,
00067 2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL,
00068 1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL,
00069 };
00070
00071 static const unsigned char RTjpeg_lum_quant_tbl[64] = {
00072 16, 11, 10, 16, 24, 40, 51, 61,
00073 12, 12, 14, 19, 26, 58, 60, 55,
00074 14, 13, 16, 24, 40, 57, 69, 56,
00075 14, 17, 22, 29, 51, 87, 80, 62,
00076 18, 22, 37, 56, 68, 109, 103, 77,
00077 24, 35, 55, 64, 81, 104, 113, 92,
00078 49, 64, 78, 87, 103, 121, 120, 101,
00079 72, 92, 95, 98, 112, 100, 103, 99
00080 };
00081
00082 static const unsigned char RTjpeg_chrom_quant_tbl[64] = {
00083 17, 18, 24, 47, 99, 99, 99, 99,
00084 18, 21, 26, 66, 99, 99, 99, 99,
00085 24, 26, 56, 99, 99, 99, 99, 99,
00086 47, 66, 99, 99, 99, 99, 99, 99,
00087 99, 99, 99, 99, 99, 99, 99, 99,
00088 99, 99, 99, 99, 99, 99, 99, 99,
00089 99, 99, 99, 99, 99, 99, 99, 99,
00090 99, 99, 99, 99, 99, 99, 99, 99
00091 };
00092
00093 #ifdef BETTERCOMPRESSION
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110 int RTjpeg::b2s(int16_t *data, int8_t *strm, uint8_t bt8)
00111 {
00112 register int ci, co=1;
00113 register int16_t ZZvalue;
00114 register unsigned char bitten;
00115 register unsigned char bitoff;
00116
00117 uint8_t *ustrm = (uint8_t *)strm;
00118 bt8 = bt8;
00119 #ifdef SHOWBLOCK
00120
00121 int ii;
00122 for (ii=0; ii < 64; ii++) {
00123 fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
00124 }
00125 fprintf(stdout, "\n\n");
00126
00127 #endif
00128
00129
00130
00131
00132
00133
00134
00135 ustrm[0]=
00136 (uint8_t)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
00137
00138
00139 ci=63;
00140 while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--;
00141
00142 bitten = ((unsigned char)ci) << 2;
00143
00144 if (ci==0) {
00145 ustrm[1]= bitten;
00146 co = 2;
00147 return (int)co;
00148 }
00149
00150
00151 bitoff = 0;
00152 co = 1;
00153
00154 for(; ci>0; ci--) {
00155
00156 ZZvalue = data[RTjpeg_ZZ[ci]];
00157
00158 switch(ZZvalue) {
00159 case 0:
00160 break;
00161 case 1:
00162 bitten |= (0x01<<bitoff);
00163 break;
00164 case -1:
00165 bitten |= (0x03<<bitoff);
00166 break;
00167 default:
00168 bitten |= (0x02<<bitoff);
00169 goto HERZWEH;
00170 break;
00171 }
00172
00173 if ( bitoff == 0 ) {
00174 ustrm[co]= bitten;
00175 bitten = 0;
00176 bitoff = 8;
00177 co++;
00178 }
00179 bitoff-=2;
00180
00181 }
00182
00183
00184 if (bitoff != 6) {
00185
00186 ustrm[co]= bitten;
00187 co++;
00188
00189 }
00190 goto BAUCHWEH;
00191
00192 HERZWEH:
00193
00194
00195
00196 switch(bitoff){
00197 case 4:
00198 case 6:
00199 bitoff = 0;
00200 break;
00201 case 2:
00202 case 0:
00203 ustrm[co]= bitten;
00204 bitoff = 4;
00205 co++;
00206 bitten = 0;
00207 break;
00208 default:
00209 break;
00210 }
00211
00212 for(; ci>0; ci--) {
00213
00214 ZZvalue = data[RTjpeg_ZZ[ci]];
00215
00216 if ( (ZZvalue > 7) || (ZZvalue < -7) ) {
00217 bitten |= (0x08<<bitoff);
00218 goto HIRNWEH;
00219 }
00220
00221 bitten |= (ZZvalue&0xf)<<bitoff;
00222
00223 if ( bitoff == 0 ) {
00224 ustrm[co]= bitten;
00225 bitten = 0;
00226 bitoff = 8;
00227 co++;
00228 }
00229 bitoff-=4;
00230 }
00231
00232
00233 if ( bitoff == 0 ) {
00234 ustrm[co]= bitten;
00235 co++;
00236 }
00237 goto BAUCHWEH;
00238
00239 HIRNWEH:
00240
00241 ustrm[co]= bitten;
00242 co++;
00243
00244
00245
00246 for(; ci>0; ci--) {
00247
00248 ZZvalue = data[RTjpeg_ZZ[ci]];
00249
00250 if (ZZvalue>0)
00251 {
00252 strm[co++]=(int8_t)(ZZvalue>127)?127:ZZvalue;
00253 }
00254 else
00255 {
00256 strm[co++]=(int8_t)(ZZvalue<-128)?-128:ZZvalue;
00257 }
00258
00259 }
00260
00261
00262 BAUCHWEH:
00263
00264 #ifdef SHOWBLOCK
00265 {
00266 int i;
00267 fprintf(stdout, "\nco = '%d'\n", co);
00268 for (i=0; i < co+2; i++) {
00269 fprintf(stdout, "%d ", strm[i]);
00270 }
00271 fprintf(stdout, "\n\n");
00272 }
00273 #endif
00274
00275 return (int)co;
00276 }
00277
00278
00279
00280
00281
00282 int RTjpeg::s2b(int16_t *data, int8_t *strm, uint8_t bt8, int32_t *qtbla)
00283 {
00284 uint32_t *qtbl = (uint32_t *)qtbla;
00285 int ci;
00286 register int co;
00287 register int i;
00288 register unsigned char bitten;
00289 register unsigned char bitoff;
00290
00291 bt8 = bt8;
00292
00293 i=RTjpeg_ZZ[0];
00294 data[i]=((uint8_t)strm[0])*qtbl[i];
00295
00296
00297
00298 bitten = ((unsigned char)strm[1]) >> 2;
00299 co = 63;
00300 for(; co > bitten; co--) {
00301
00302 data[RTjpeg_ZZ[co]] = 0;
00303
00304 }
00305
00306 if (co==0) {
00307 ci = 2;
00308 goto AUTOBAHN;
00309 }
00310
00311
00312 ci=1;
00313 bitoff = 0;
00314
00315 for(; co>0; co--) {
00316
00317 bitten = ((unsigned char)strm[ci]) >> bitoff;
00318 bitten &= 0x03;
00319
00320 i=RTjpeg_ZZ[co];
00321
00322 switch( bitten ) {
00323 case 0x03:
00324 data[i]= -qtbl[i];
00325 break;
00326 case 0x02:
00327 goto FUSSWEG;
00328 break;
00329 case 0x01:
00330 data[i]= qtbl[i];
00331 break;
00332 case 0x00:
00333 data[i]= 0;
00334 break;
00335 default:
00336 break;
00337 }
00338
00339 if ( bitoff == 0 ) {
00340 bitoff = 8;
00341 ci++;
00342 }
00343 bitoff -= 2;
00344 }
00345
00346
00347
00348
00349 if (bitoff!=6) ci++;
00350
00351 goto AUTOBAHN;
00352
00353
00354 FUSSWEG:
00355
00356 switch(bitoff){
00357 case 4:
00358 case 6:
00359 bitoff = 0;
00360 break;
00361 case 2:
00362 case 0:
00363
00364 ci++;
00365 bitoff = 4;
00366 break;
00367 default:
00368 break;
00369 }
00370
00371 for(; co>0; co--) {
00372
00373 bitten = ((unsigned char)strm[ci]) >> bitoff;
00374 bitten &= 0x0f;
00375
00376 i=RTjpeg_ZZ[co];
00377
00378 if ( bitten == 0x08 ) {
00379 goto STRASSE;
00380 }
00381
00382
00383 if ( bitten & 0x08 ) {
00384 bitten |= 0xf0;
00385 }
00386
00387
00388 data[i]=((signed char)bitten)*qtbl[i];
00389
00390 if ( bitoff == 0 ) {
00391 bitoff = 8;
00392 ci++;
00393 }
00394 bitoff -= 4;
00395 }
00396
00397
00398
00399 if (bitoff!=4) ci++;
00400
00401 goto AUTOBAHN;
00402
00403 STRASSE:
00404 ci++;
00405
00406 for(; co>0; co--) {
00407 i=RTjpeg_ZZ[co];
00408 data[i]=strm[ci++]*qtbl[i];
00409 }
00410
00411
00412
00413 AUTOBAHN:
00414
00415 #ifdef SHOWBLOCK
00416 fprintf(stdout, "\nci = '%d'\n", ci);
00417 for (i=0; i < 64; i++) {
00418 fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]);
00419 }
00420 fprintf(stdout, "\n\n");
00421 #endif
00422
00423 return ci;
00424 }
00425
00426 #else
00427
00428 int RTjpeg::b2s(int16_t *data, int8_t *strm, uint8_t bt8)
00429 {
00430 register int ci, co=1, tmp;
00431 register int16_t ZZvalue;
00432
00433 #ifdef SHOWBLOCK
00434
00435 int ii;
00436 for (ii=0; ii < 64; ii++) {
00437 fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
00438 }
00439 fprintf(stdout, "\n\n");
00440
00441 #endif
00442
00443 (uint8_t)strm[0]=(uint8_t)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
00444
00445 for(ci=1; ci<=bt8; ci++)
00446 {
00447 ZZvalue = data[RTjpeg_ZZ[ci]];
00448
00449 if (ZZvalue>0)
00450 {
00451 strm[co++]=(int8_t)(ZZvalue>127)?127:ZZvalue;
00452 }
00453 else
00454 {
00455 strm[co++]=(int8_t)(ZZvalue<-128)?-128:ZZvalue;
00456 }
00457 }
00458
00459 for(; ci<64; ci++)
00460 {
00461 ZZvalue = data[RTjpeg_ZZ[ci]];
00462
00463 if (ZZvalue>0)
00464 {
00465 strm[co++]=(int8_t)(ZZvalue>63)?63:ZZvalue;
00466 }
00467 else if (ZZvalue<0)
00468 {
00469 strm[co++]=(int8_t)(ZZvalue<-64)?-64:ZZvalue;
00470 }
00471 else
00472 {
00473 tmp=ci;
00474 do
00475 {
00476 ci++;
00477 } while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0));
00478
00479 strm[co++]=(int8_t)(63+(ci-tmp));
00480 ci--;
00481 }
00482 }
00483 return (int)co;
00484 }
00485
00486 int RTjpeg::s2b(int16_t *data, int8_t *strm, uint8_t bt8, uint32_t *qtbla)
00487 {
00488 uint32_t *qtbl = (uint32_t *)qtbla;
00489 int ci=1, co=1, tmp;
00490 register int i;
00491
00492 i=RTjpeg_ZZ[0];
00493 data[i]=((uint8_t)strm[0])*qtbl[i];
00494
00495 for(co=1; co<=bt8; co++)
00496 {
00497 i=RTjpeg_ZZ[co];
00498 data[i]=strm[ci++]*qtbl[i];
00499 }
00500
00501 for(; co<64; co++)
00502 {
00503 if (strm[ci]>63)
00504 {
00505 tmp=co+strm[ci]-63;
00506 for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0;
00507 co--;
00508 } else
00509 {
00510 i=RTjpeg_ZZ[co];
00511 data[i]=strm[ci]*qtbl[i];
00512 }
00513 ci++;
00514 }
00515 return (int)ci;
00516 }
00517 #endif
00518
00519 #ifdef MMX
00520 void RTjpeg::QuantInit(void)
00521 {
00522 int i;
00523 int16_t *qtbl;
00524
00525 qtbl = (int16_t *)lqt;
00526 for (i = 0; i < 64; i++)
00527 qtbl[i] = (int16_t)lqt[i];
00528
00529 qtbl = (int16_t *)cqt;
00530 for (i = 0; i < 64; i++)
00531 qtbl[i] = (int16_t)cqt[i];
00532 }
00533
00534 void RTjpeg::Quant(int16_t *block, int32_t *qtbl)
00535 {
00536 int i;
00537 mmx_t *bl, *ql;
00538
00539
00540 ql=(mmx_t *)qtbl;
00541 bl=(mmx_t *)block;
00542
00543 movq_m2r(RTjpeg_ones, mm6);
00544 movq_m2r(RTjpeg_half, mm7);
00545
00546 for(i=16; i; i--)
00547 {
00548 movq_m2r(*(ql++), mm0);
00549 movq_m2r(*bl, mm2);
00550 movq_r2r(mm0, mm1);
00551 movq_r2r(mm2, mm3);
00552
00553 punpcklwd_r2r(mm6, mm0);
00554 punpckhwd_r2r(mm6, mm1);
00555
00556 punpcklwd_r2r(mm7, mm2);
00557 punpckhwd_r2r(mm7, mm3);
00558
00559 pmaddwd_r2r(mm2, mm0);
00560 pmaddwd_r2r(mm3, mm1);
00561
00562 psrad_i2r(16, mm0);
00563 psrad_i2r(16, mm1);
00564
00565 packssdw_r2r(mm1, mm0);
00566
00567 movq_r2m(mm0, *(bl++));
00568 }
00569 }
00570 #else
00571 void RTjpeg::QuantInit()
00572 {
00573 }
00574
00575 void RTjpeg::Quant(int16_t *block, int32_t *qtbl)
00576 {
00577 int i;
00578
00579 for(i=0; i<64; i++)
00580 block[i]=(int16_t)((block[i]*qtbl[i]+32767)>>16);
00581 }
00582 #endif
00583
00584
00585
00586
00587 #ifndef MMX
00588 #define FIX_0_382683433 ((int32_t) 98)
00589 #define FIX_0_541196100 ((int32_t) 139)
00590 #define FIX_0_707106781 ((int32_t) 181)
00591 #define FIX_1_306562965 ((int32_t) 334)
00592
00593 #define DESCALE10(x) (int16_t)( ((x)+128) >> 8)
00594 #define DESCALE20(x) (int16_t)(((x)+32768) >> 16)
00595 #define D_MULTIPLY(var,const) ((int32_t) ((var) * (const)))
00596 #endif
00597
00598 void RTjpeg::DctInit()
00599 {
00600 int i;
00601
00602 for(i = 0; i < 64; i++)
00603 {
00604 lqt[i] = (((uint64_t)lqt[i] << 32) / RTjpeg_aan_tab[i]);
00605 cqt[i] = (((uint64_t)cqt[i] << 32) / RTjpeg_aan_tab[i]);
00606 }
00607 }
00608
00609 void RTjpeg::DctY(uint8_t *idata, int rskip)
00610 {
00611 #ifndef MMX
00612 int32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00613 int32_t tmp10, tmp11, tmp12, tmp13;
00614 int32_t z1, z2, z3, z4, z5, z11, z13;
00615 uint8_t *idataptr;
00616 int16_t *odataptr;
00617 int32_t *wsptr;
00618 int ctr;
00619
00620
00621 idataptr = idata;
00622 wsptr = ws;
00623 for (ctr = 7; ctr >= 0; ctr--) {
00624 tmp0 = idataptr[0] + idataptr[7];
00625 tmp7 = idataptr[0] - idataptr[7];
00626 tmp1 = idataptr[1] + idataptr[6];
00627 tmp6 = idataptr[1] - idataptr[6];
00628 tmp2 = idataptr[2] + idataptr[5];
00629 tmp5 = idataptr[2] - idataptr[5];
00630 tmp3 = idataptr[3] + idataptr[4];
00631 tmp4 = idataptr[3] - idataptr[4];
00632
00633 tmp10 = (tmp0 + tmp3);
00634 tmp13 = tmp0 - tmp3;
00635 tmp11 = (tmp1 + tmp2);
00636 tmp12 = tmp1 - tmp2;
00637
00638 wsptr[0] = (tmp10 + tmp11)<<8;
00639 wsptr[4] = (tmp10 - tmp11)<<8;
00640
00641 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00642 wsptr[2] = (tmp13<<8) + z1;
00643 wsptr[6] = (tmp13<<8) - z1;
00644
00645 tmp10 = tmp4 + tmp5;
00646 tmp11 = tmp5 + tmp6;
00647 tmp12 = tmp6 + tmp7;
00648
00649 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00650 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5;
00651 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5;
00652 z3 = D_MULTIPLY(tmp11, FIX_0_707106781);
00653
00654 z11 = (tmp7<<8) + z3;
00655 z13 = (tmp7<<8) - z3;
00656
00657 wsptr[5] = z13 + z2;
00658 wsptr[3] = z13 - z2;
00659 wsptr[1] = z11 + z4;
00660 wsptr[7] = z11 - z4;
00661
00662 idataptr += rskip<<3;
00663 wsptr += 8;
00664 }
00665
00666 wsptr = ws;
00667 odataptr = block;
00668 for (ctr = 7; ctr >= 0; ctr--) {
00669 tmp0 = wsptr[0] + wsptr[56];
00670 tmp7 = wsptr[0] - wsptr[56];
00671 tmp1 = wsptr[8] + wsptr[48];
00672 tmp6 = wsptr[8] - wsptr[48];
00673 tmp2 = wsptr[16] + wsptr[40];
00674 tmp5 = wsptr[16] - wsptr[40];
00675 tmp3 = wsptr[24] + wsptr[32];
00676 tmp4 = wsptr[24] - wsptr[32];
00677
00678 tmp10 = tmp0 + tmp3;
00679 tmp13 = tmp0 - tmp3;
00680 tmp11 = tmp1 + tmp2;
00681 tmp12 = tmp1 - tmp2;
00682
00683 odataptr[0] = DESCALE10(tmp10 + tmp11);
00684 odataptr[32] = DESCALE10(tmp10 - tmp11);
00685
00686 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00687 odataptr[16] = DESCALE20((tmp13<<8) + z1);
00688 odataptr[48] = DESCALE20((tmp13<<8) - z1);
00689
00690 tmp10 = tmp4 + tmp5;
00691 tmp11 = tmp5 + tmp6;
00692 tmp12 = tmp6 + tmp7;
00693
00694 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00695 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5;
00696 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5;
00697 z3 = D_MULTIPLY(tmp11, FIX_0_707106781);
00698
00699 z11 = (tmp7<<8) + z3;
00700 z13 = (tmp7<<8) - z3;
00701
00702 odataptr[40] = DESCALE20(z13 + z2);
00703 odataptr[24] = DESCALE20(z13 - z2);
00704 odataptr[8] = DESCALE20(z11 + z4);
00705 odataptr[56] = DESCALE20(z11 - z4);
00706
00707 odataptr++;
00708 wsptr++;
00709
00710 }
00711 #else
00712 volatile mmx_t tmp6, tmp7;
00713 register mmx_t *dataptr = (mmx_t *)block;
00714 mmx_t *idata2 = (mmx_t *)idata;
00715
00716
00717
00718
00719 movq_m2r(RTjpeg_zero, mm2);
00720
00721 movq_m2r(*idata2, mm0);
00722 movq_r2r(mm0, mm1);
00723
00724 punpcklbw_r2r(mm2, mm0);
00725 movq_r2m(mm0, *(dataptr));
00726
00727 punpckhbw_r2r(mm2, mm1);
00728 movq_r2m(mm1, *(dataptr+1));
00729
00730 idata2 += rskip;
00731
00732 movq_m2r(*idata2, mm0);
00733 movq_r2r(mm0, mm1);
00734
00735 punpcklbw_r2r(mm2, mm0);
00736 movq_r2m(mm0, *(dataptr+2));
00737
00738 punpckhbw_r2r(mm2, mm1);
00739 movq_r2m(mm1, *(dataptr+3));
00740
00741 idata2 += rskip;
00742
00743 movq_m2r(*idata2, mm0);
00744 movq_r2r(mm0, mm1);
00745
00746 punpcklbw_r2r(mm2, mm0);
00747 movq_r2m(mm0, *(dataptr+4));
00748
00749 punpckhbw_r2r(mm2, mm1);
00750 movq_r2m(mm1, *(dataptr+5));
00751
00752 idata2 += rskip;
00753
00754 movq_m2r(*idata2, mm0);
00755 movq_r2r(mm0, mm1);
00756
00757 punpcklbw_r2r(mm2, mm0);
00758 movq_r2m(mm0, *(dataptr+6));
00759
00760 punpckhbw_r2r(mm2, mm1);
00761 movq_r2m(mm1, *(dataptr+7));
00762
00763 idata2 += rskip;
00764
00765 movq_m2r(*idata2, mm0);
00766 movq_r2r(mm0, mm1);
00767
00768 punpcklbw_r2r(mm2, mm0);
00769 movq_r2m(mm0, *(dataptr+8));
00770
00771 punpckhbw_r2r(mm2, mm1);
00772 movq_r2m(mm1, *(dataptr+9));
00773
00774 idata2 += rskip;
00775
00776 movq_m2r(*idata2, mm0);
00777 movq_r2r(mm0, mm1);
00778
00779 punpcklbw_r2r(mm2, mm0);
00780 movq_r2m(mm0, *(dataptr+10));
00781
00782 punpckhbw_r2r(mm2, mm1);
00783 movq_r2m(mm1, *(dataptr+11));
00784
00785 idata2 += rskip;
00786
00787 movq_m2r(*idata2, mm0);
00788 movq_r2r(mm0, mm1);
00789
00790 punpcklbw_r2r(mm2, mm0);
00791 movq_r2m(mm0, *(dataptr+12));
00792
00793 punpckhbw_r2r(mm2, mm1);
00794 movq_r2m(mm1, *(dataptr+13));
00795
00796 idata2 += rskip;
00797
00798 movq_m2r(*idata2, mm0);
00799 movq_r2r(mm0, mm1);
00800
00801 punpcklbw_r2r(mm2, mm0);
00802 movq_r2m(mm0, *(dataptr+14));
00803
00804 punpckhbw_r2r(mm2, mm1);
00805 movq_r2m(mm1, *(dataptr+15));
00806
00807
00808
00809 movq_m2r(*(dataptr+9), mm7);
00810
00811 movq_m2r(*(dataptr+13), mm6);
00812 movq_r2r(mm7, mm5);
00813
00814 punpcklwd_m2r(*(dataptr+11), mm7);
00815 movq_r2r(mm6, mm2);
00816
00817 punpcklwd_m2r(*(dataptr+15), mm6);
00818 movq_r2r(mm7, mm1);
00819
00820 movq_m2r(*(dataptr+11), mm3);
00821 punpckldq_r2r(mm6, mm7);
00822
00823 movq_m2r(*(dataptr+15), mm0);
00824 punpckhdq_r2r(mm6, mm1);
00825
00826 movq_r2m(mm7,*(dataptr+9));
00827 punpckhwd_r2r(mm3, mm5);
00828
00829 movq_r2m(mm1,*(dataptr+11));
00830 punpckhwd_r2r(mm0, mm2);
00831
00832 movq_r2r(mm5, mm1);
00833 punpckldq_r2r(mm2, mm5);
00834
00835 movq_m2r(*(dataptr+1), mm0);
00836 punpckhdq_r2r(mm2, mm1);
00837
00838 movq_r2m(mm5,*(dataptr+13));
00839
00840
00841
00842 movq_r2m(mm1, *(dataptr+15));
00843
00844 movq_m2r(*(dataptr+5), mm2);
00845 movq_r2r(mm0, mm6);
00846
00847 punpcklwd_m2r(*(dataptr+3), mm0);
00848 movq_r2r(mm2, mm7);
00849
00850 punpcklwd_m2r(*(dataptr+7), mm2);
00851 movq_r2r(mm0, mm4);
00852
00853
00854 movq_m2r(*(dataptr+8), mm1);
00855 punpckldq_r2r(mm2, mm0);
00856
00857 movq_m2r(*(dataptr+12), mm3);
00858 punpckhdq_r2r(mm2, mm4);
00859
00860 punpckhwd_m2r(*(dataptr+3), mm6);
00861 movq_r2r(mm1, mm2);
00862
00863 punpckhwd_m2r(*(dataptr+7), mm7);
00864 movq_r2r(mm6, mm5);
00865
00866 movq_r2m(mm0, *(dataptr+8));
00867 punpckhdq_r2r(mm7, mm5);
00868
00869 punpcklwd_m2r(*(dataptr+10), mm1);
00870 movq_r2r(mm3, mm0);
00871
00872 punpckhwd_m2r(*(dataptr+10), mm2);
00873
00874 movq_r2m(mm4, *(dataptr+10));
00875 punpckldq_r2r(mm7, mm6);
00876
00877 punpcklwd_m2r(*(dataptr+14), mm3);
00878 movq_r2r(mm1, mm4);
00879
00880 movq_r2m(mm6, *(dataptr+12));
00881 punpckldq_r2r(mm3, mm1);
00882
00883 punpckhwd_m2r(*(dataptr+14), mm0);
00884 movq_r2r(mm2, mm6);
00885
00886 movq_r2m(mm5, *(dataptr+14));
00887 punpckhdq_r2r(mm3, mm4);
00888
00889 movq_r2m(mm1, *(dataptr+1));
00890 punpckldq_r2r(mm0, mm2);
00891
00892 movq_r2m(mm4, *(dataptr+3));
00893 punpckhdq_r2r(mm0, mm6);
00894
00895 movq_r2m(mm2, *(dataptr+5));
00896
00897 movq_m2r(*dataptr, mm0);
00898
00899 movq_r2m(mm6, *(dataptr+7));
00900
00901
00902
00903
00904 movq_m2r(*(dataptr+4), mm7);
00905 movq_r2r(mm0, mm2);
00906
00907 punpcklwd_m2r(*(dataptr+2), mm0);
00908 movq_r2r(mm7, mm4);
00909
00910 punpcklwd_m2r(*(dataptr+6), mm7);
00911 movq_r2r(mm0, mm1);
00912
00913 movq_m2r(*(dataptr+2), mm6);
00914 punpckldq_r2r(mm7, mm0);
00915
00916 movq_m2r(*(dataptr+6), mm5);
00917 punpckhdq_r2r(mm7, mm1);
00918
00919 movq_r2r(mm0, mm7);
00920 punpckhwd_r2r(mm6, mm2);
00921
00922 psubw_m2r(*(dataptr+14), mm7);
00923 movq_r2r(mm1, mm6);
00924
00925 paddw_m2r(*(dataptr+14), mm0);
00926 punpckhwd_r2r(mm5, mm4);
00927
00928 paddw_m2r(*(dataptr+12), mm1);
00929 movq_r2r(mm2, mm3);
00930
00931 psubw_m2r(*(dataptr+12), mm6);
00932 punpckldq_r2r(mm4, mm2);
00933
00934 movq_r2m(mm7, tmp7);
00935 movq_r2r(mm2, mm5);
00936
00937 movq_r2m(mm6, tmp6);
00938 punpckhdq_r2r(mm4, mm3);
00939
00940 paddw_m2r(*(dataptr+10), mm2);
00941 movq_r2r(mm3, mm4);
00942
00943
00944
00945
00946
00947
00948 paddw_m2r(*(dataptr+8), mm3);
00949 movq_r2r(mm0, mm7);
00950
00951 psubw_m2r(*(dataptr+8), mm4);
00952 movq_r2r(mm1, mm6);
00953
00954 paddw_r2r(mm3, mm0);
00955 psubw_r2r(mm3, mm7);
00956
00957 psubw_r2r(mm2, mm6);
00958 paddw_r2r(mm2, mm1);
00959
00960 psubw_m2r(*(dataptr+10), mm5);
00961 paddw_r2r(mm7, mm6);
00962
00963
00964
00965 movq_m2r(tmp6, mm2);
00966 movq_r2r(mm0, mm3);
00967
00968 psllw_i2r(2, mm6);
00969 paddw_r2r(mm1, mm0);
00970
00971 pmulhw_m2r(RTjpeg_C4, mm6);
00972 psubw_r2r(mm1, mm3);
00973
00974 movq_r2m(mm0, *dataptr);
00975 movq_r2r(mm7, mm0);
00976
00977
00978 movq_r2m(mm3, *(dataptr+8));
00979 paddw_r2r(mm5, mm4);
00980
00981 movq_m2r(tmp7, mm3);
00982 paddw_r2r(mm6, mm0);
00983
00984 paddw_r2r(mm2, mm5);
00985 psubw_r2r(mm6, mm7);
00986
00987 movq_r2m(mm0, *(dataptr+4));
00988 paddw_r2r(mm3, mm2);
00989
00990
00991
00992 movq_r2m(mm7, *(dataptr+12));
00993 movq_r2r(mm4, mm1);
00994
00995 psubw_r2r(mm2, mm1);
00996 psllw_i2r(2, mm4);
00997
00998 movq_m2r(RTjpeg_C2mC6, mm0);
00999 psllw_i2r(2, mm1);
01000
01001 pmulhw_m2r(RTjpeg_C6, mm1);
01002 psllw_i2r(2, mm2);
01003
01004 pmulhw_r2r(mm0, mm4);
01005
01006
01007
01008 pmulhw_m2r(RTjpeg_C2pC6, mm2);
01009 psllw_i2r(2, mm5);
01010
01011 pmulhw_m2r(RTjpeg_C4, mm5);
01012 movq_r2r(mm3, mm0);
01013
01014 movq_m2r(*(dataptr+1), mm7);
01015 paddw_r2r(mm1, mm4);
01016
01017 paddw_r2r(mm1, mm2);
01018
01019 paddw_r2r(mm5, mm0);
01020 psubw_r2r(mm5, mm3);
01021
01022
01023
01024 movq_r2r(mm3, mm5);
01025 psubw_r2r(mm4, mm3);
01026
01027 paddw_r2r(mm4, mm5);
01028 movq_r2r(mm0, mm6);
01029
01030 movq_r2m(mm3, *(dataptr+6));
01031 psubw_r2r(mm2, mm0);
01032
01033 movq_r2m(mm5, *(dataptr+10));
01034 paddw_r2r(mm2, mm6);
01035
01036 movq_r2m(mm0, *(dataptr+14));
01037
01038
01039
01040
01041
01042 movq_m2r(*(dataptr+3), mm1);
01043 movq_r2r(mm7, mm0);
01044
01045 movq_r2m(mm6, *(dataptr+2));
01046
01047 movq_m2r(*(dataptr+5), mm2);
01048 movq_r2r(mm1, mm6);
01049
01050 paddw_m2r(*(dataptr+15), mm0);
01051
01052 movq_m2r(*(dataptr+7), mm3);
01053 movq_r2r(mm2, mm5);
01054
01055 psubw_m2r(*(dataptr+15), mm7);
01056 movq_r2r(mm3, mm4);
01057
01058 paddw_m2r(*(dataptr+13), mm1);
01059
01060 movq_r2m(mm7, tmp7);
01061 movq_r2r(mm0, mm7);
01062
01063 psubw_m2r(*(dataptr+13), mm6);
01064
01065
01066
01067 paddw_m2r(*(dataptr+9), mm3);
01068
01069 movq_r2m(mm6, tmp6);
01070 movq_r2r(mm1, mm6);
01071
01072 paddw_m2r(*(dataptr+11), mm2);
01073 paddw_r2r(mm3, mm0);
01074
01075 psubw_r2r(mm3, mm7);
01076
01077 psubw_m2r(*(dataptr+9), mm4);
01078 psubw_r2r(mm2, mm6);
01079
01080 paddw_r2r(mm2, mm1);
01081
01082 psubw_m2r(*(dataptr+11), mm5);
01083 paddw_r2r(mm7, mm6);
01084
01085
01086
01087 movq_m2r(tmp6, mm2);
01088 movq_r2r(mm0, mm3);
01089
01090 psllw_i2r(2, mm6);
01091 paddw_r2r(mm1, mm0);
01092
01093 pmulhw_m2r(RTjpeg_C4, mm6);
01094 psubw_r2r(mm1, mm3);
01095
01096 movq_r2m(mm0, *(dataptr+1));
01097 movq_r2r(mm7, mm0);
01098
01099
01100
01101 movq_r2m(mm3, *(dataptr+9));
01102 paddw_r2r(mm5, mm4);
01103
01104 movq_m2r(tmp7, mm3);
01105 paddw_r2r(mm6, mm0);
01106
01107 paddw_r2r(mm2, mm5);
01108 psubw_r2r(mm6, mm7);
01109
01110 movq_r2m(mm0, *(dataptr+5));
01111 paddw_r2r(mm3, mm2);
01112
01113
01114
01115 movq_r2m(mm7, *(dataptr+13));
01116 movq_r2r(mm4, mm1);
01117
01118 psubw_r2r(mm2, mm1);
01119 psllw_i2r(2, mm4);
01120
01121 movq_m2r(RTjpeg_C2mC6, mm0);
01122 psllw_i2r(2, mm1);
01123
01124 pmulhw_m2r(RTjpeg_C6, mm1);
01125 psllw_i2r(2, mm5);
01126
01127 pmulhw_r2r(mm0, mm4);
01128
01129
01130
01131 pmulhw_m2r(RTjpeg_C4, mm5);
01132 psllw_i2r(2, mm2);
01133
01134 pmulhw_m2r(RTjpeg_C2pC6, mm2);
01135 movq_r2r(mm3, mm0);
01136
01137 movq_m2r(*(dataptr+9), mm7);
01138 paddw_r2r(mm1, mm4);
01139
01140 paddw_r2r(mm5, mm0);
01141 psubw_r2r(mm5, mm3);
01142
01143
01144
01145 movq_r2r(mm3, mm5);
01146 paddw_r2r(mm1, mm2);
01147
01148 movq_r2r(mm0, mm6);
01149 psubw_r2r(mm4, mm5);
01150
01151 paddw_r2r(mm2, mm6);
01152 paddw_r2r(mm4, mm3);
01153
01154 movq_r2m(mm5, *(dataptr+7));
01155
01156 movq_r2m(mm6, *(dataptr+3));
01157 psubw_r2r(mm2, mm0);
01158
01159
01160
01161
01162
01163 movq_m2r(*(dataptr+13), mm6);
01164 movq_r2r(mm7, mm5);
01165
01166 punpcklwd_r2r(mm3, mm7);
01167 movq_r2r(mm6, mm2);
01168
01169 punpcklwd_r2r(mm0, mm6);
01170 movq_r2r(mm7, mm1);
01171
01172 punpckldq_r2r(mm6, mm7);
01173
01174 punpckhdq_r2r(mm6, mm1);
01175
01176 movq_r2m(mm7, *(dataptr+9));
01177 punpckhwd_r2r(mm3, mm5);
01178
01179 movq_r2m(mm1, *(dataptr+11));
01180 punpckhwd_r2r(mm0, mm2);
01181
01182 movq_r2r(mm5, mm1);
01183 punpckldq_r2r(mm2, mm5);
01184
01185 movq_m2r(*(dataptr+1), mm0);
01186 punpckhdq_r2r(mm2, mm1);
01187
01188 movq_r2m(mm5, *(dataptr+13));
01189
01190
01191
01192 movq_r2m(mm1, *(dataptr+15));
01193
01194 movq_m2r(*(dataptr+5), mm2);
01195 movq_r2r(mm0, mm6);
01196
01197 punpcklwd_m2r(*(dataptr+3), mm0);
01198 movq_r2r(mm2, mm7);
01199
01200 punpcklwd_m2r(*(dataptr+7), mm2);
01201 movq_r2r(mm0, mm4);
01202
01203
01204
01205 movq_m2r(*(dataptr+8), mm1);
01206 punpckldq_r2r(mm2, mm0);
01207
01208 movq_m2r(*(dataptr+12), mm3);
01209 punpckhdq_r2r(mm2, mm4);
01210
01211 punpckhwd_m2r(*(dataptr+3), mm6);
01212 movq_r2r(mm1, mm2);
01213
01214 punpckhwd_m2r(*(dataptr+7), mm7);
01215 movq_r2r(mm6, mm5);
01216
01217 movq_r2m(mm0, *(dataptr+8));
01218 punpckhdq_r2r(mm7, mm5);
01219
01220 punpcklwd_m2r(*(dataptr+10), mm1);
01221 movq_r2r(mm3, mm0);
01222
01223 punpckhwd_m2r(*(dataptr+10), mm2);
01224
01225 movq_r2m(mm4, *(dataptr+10));
01226 punpckldq_r2r(mm7, mm6);
01227
01228 punpcklwd_m2r(*(dataptr+14), mm3);
01229 movq_r2r(mm1, mm4);
01230
01231 movq_r2m(mm6, *(dataptr+12));
01232 punpckldq_r2r(mm3, mm1);
01233
01234 punpckhwd_m2r(*(dataptr+14), mm0);
01235 movq_r2r(mm2, mm6);
01236
01237 movq_r2m(mm5, *(dataptr+14));
01238 punpckhdq_r2r(mm3, mm4);
01239
01240 movq_r2m(mm1, *(dataptr+1));
01241 punpckldq_r2r(mm0, mm2);
01242
01243 movq_r2m(mm4, *(dataptr+3));
01244 punpckhdq_r2r(mm0, mm6);
01245
01246 movq_r2m(mm2, *(dataptr+5));
01247
01248 movq_m2r(*dataptr, mm0);
01249
01250 movq_r2m(mm6, *(dataptr+7));
01251
01252
01253
01254 movq_m2r(*(dataptr+4), mm7);
01255 movq_r2r(mm0, mm2);
01256
01257 punpcklwd_m2r(*(dataptr+2), mm0);
01258 movq_r2r(mm7, mm4);
01259
01260 punpcklwd_m2r(*(dataptr+6), mm7);
01261 movq_r2r(mm0, mm1);
01262
01263 movq_m2r(*(dataptr+2), mm6);
01264 punpckldq_r2r(mm7, mm0);
01265
01266 movq_m2r(*(dataptr+6), mm5);
01267 punpckhdq_r2r(mm7, mm1);
01268
01269 movq_r2r(mm0, mm7);
01270 punpckhwd_r2r(mm6, mm2);
01271
01272 psubw_m2r(*(dataptr+14), mm7);
01273 movq_r2r(mm1, mm6);
01274
01275 paddw_m2r(*(dataptr+14), mm0);
01276 punpckhwd_r2r(mm5, mm4);
01277
01278 paddw_m2r(*(dataptr+12), mm1);
01279 movq_r2r(mm2, mm3);
01280
01281 psubw_m2r(*(dataptr+12), mm6);
01282 punpckldq_r2r(mm4, mm2);
01283
01284 movq_r2m(mm7, tmp7);
01285 movq_r2r(mm2, mm5);
01286
01287 movq_r2m(mm6, tmp6);
01288
01289 punpckhdq_r2r(mm4, mm3);
01290
01291 paddw_m2r(*(dataptr+10), mm2);
01292 movq_r2r(mm3, mm4);
01293
01294
01295
01296
01297
01298 paddw_m2r(*(dataptr+8), mm3);
01299 movq_r2r(mm0, mm7);
01300
01301 psubw_m2r(*(dataptr+8), mm4);
01302 movq_r2r(mm1, mm6);
01303
01304 paddw_r2r(mm3, mm0);
01305 psubw_r2r(mm3, mm7);
01306
01307 psubw_r2r(mm2, mm6);
01308 paddw_r2r(mm2, mm1);
01309
01310 psubw_m2r(*(dataptr+10), mm5);
01311 paddw_r2r(mm7, mm6);
01312
01313
01314
01315 movq_m2r(tmp6, mm2);
01316 movq_r2r(mm0, mm3);
01317
01318 psllw_i2r(2, mm6);
01319 paddw_r2r(mm1, mm0);
01320
01321 pmulhw_m2r(RTjpeg_C4, mm6);
01322 psubw_r2r(mm1, mm3);
01323
01324 movq_r2m(mm0, *dataptr);
01325 movq_r2r(mm7, mm0);
01326
01327
01328 movq_r2m(mm3, *(dataptr+8));
01329 paddw_r2r(mm5, mm4);
01330
01331 movq_m2r(tmp7, mm3);
01332 paddw_r2r(mm6, mm0);
01333
01334 paddw_r2r(mm2, mm5);
01335 psubw_r2r(mm6, mm7);
01336
01337 movq_r2m(mm0, *(dataptr+4));
01338 paddw_r2r(mm3, mm2);
01339
01340
01341 movq_r2m(mm7, *(dataptr+12));
01342 movq_r2r(mm4, mm1);
01343
01344 psubw_r2r(mm2, mm1);
01345 psllw_i2r(2, mm4);
01346
01347 movq_m2r(RTjpeg_C2mC6, mm0);
01348 psllw_i2r(2, mm1);
01349
01350 pmulhw_m2r(RTjpeg_C6, mm1);
01351 psllw_i2r(2, mm2);
01352
01353 pmulhw_r2r(mm0, mm4);
01354
01355
01356
01357 pmulhw_m2r(RTjpeg_C2pC6, mm2);
01358 psllw_i2r(2, mm5);
01359
01360 pmulhw_m2r(RTjpeg_C4, mm5);
01361 movq_r2r(mm3, mm0);
01362
01363 movq_m2r(*(dataptr+1), mm7);
01364 paddw_r2r(mm1, mm4);
01365
01366 paddw_r2r(mm1, mm2);
01367
01368 paddw_r2r(mm5, mm0);
01369 psubw_r2r(mm5, mm3);
01370
01371
01372
01373 movq_r2r(mm3, mm5);
01374 psubw_r2r(mm4, mm3);
01375
01376 paddw_r2r(mm4, mm5);
01377 movq_r2r(mm0, mm6);
01378
01379 movq_r2m(mm3, *(dataptr+6));
01380 psubw_r2r(mm2, mm0);
01381
01382 movq_r2m(mm5, *(dataptr+10));
01383 paddw_r2r(mm2, mm6);
01384
01385 movq_r2m(mm0, *(dataptr+14));
01386
01387
01388
01389
01390
01391 movq_m2r(*(dataptr+3), mm1);
01392 movq_r2r(mm7, mm0);
01393
01394 movq_r2m(mm6, *(dataptr+2));
01395
01396 movq_m2r(*(dataptr+5), mm2);
01397 movq_r2r(mm1, mm6);
01398
01399 paddw_m2r(*(dataptr+15), mm0);
01400
01401 movq_m2r(*(dataptr+7), mm3);
01402 movq_r2r(mm2, mm5);
01403
01404 psubw_m2r(*(dataptr+15), mm7);
01405 movq_r2r(mm3, mm4);
01406
01407 paddw_m2r(*(dataptr+13), mm1);
01408
01409 movq_r2m(mm7, tmp7);
01410 movq_r2r(mm0, mm7);
01411
01412 psubw_m2r(*(dataptr+13), mm6);
01413
01414
01415
01416 paddw_m2r(*(dataptr+9), mm3);
01417
01418 movq_r2m(mm6, tmp6);
01419 movq_r2r(mm1, mm6);
01420
01421 paddw_m2r(*(dataptr+11), mm2);
01422 paddw_r2r(mm3, mm0);
01423
01424 psubw_r2r(mm3, mm7);
01425
01426 psubw_m2r(*(dataptr+9), mm4);
01427 psubw_r2r(mm2, mm6);
01428
01429 paddw_r2r(mm2, mm1);
01430
01431 psubw_m2r(*(dataptr+11), mm5);
01432 paddw_r2r(mm7, mm6);
01433
01434
01435
01436 movq_m2r(tmp6, mm2);
01437 movq_r2r(mm0, mm3);
01438
01439 psllw_i2r(2, mm6);
01440 paddw_r2r(mm1, mm0);
01441
01442 pmulhw_m2r(RTjpeg_C4, mm6);
01443 psubw_r2r(mm1, mm3);
01444
01445 movq_r2m(mm0, *(dataptr+1));
01446 movq_r2r(mm7, mm0);
01447
01448
01449
01450 movq_r2m(mm3, *(dataptr+9));
01451 paddw_r2r(mm5, mm4);
01452
01453 movq_m2r(tmp7, mm3);
01454 paddw_r2r(mm6, mm0);
01455
01456 paddw_r2r(mm2, mm5);
01457 psubw_r2r(mm6, mm7);
01458
01459 movq_r2m(mm0, *(dataptr+5));
01460 paddw_r2r(mm3, mm2);
01461
01462
01463
01464 movq_r2m(mm7, *(dataptr+13));
01465 movq_r2r(mm4, mm1);
01466
01467 psubw_r2r(mm2, mm1);
01468 psllw_i2r(2, mm4);
01469
01470 movq_m2r(RTjpeg_C2mC6, mm0);
01471 psllw_i2r(2, mm1);
01472
01473 pmulhw_m2r(RTjpeg_C6, mm1);
01474 psllw_i2r(2, mm5);
01475
01476 pmulhw_r2r(mm0, mm4);
01477
01478
01479
01480 pmulhw_m2r(RTjpeg_C4, mm5);
01481 psllw_i2r(2, mm2);
01482
01483 pmulhw_m2r(RTjpeg_C2pC6, mm2);
01484 movq_r2r(mm3, mm0);
01485
01486 movq_m2r(*(dataptr+9), mm7);
01487 paddw_r2r(mm1, mm4);
01488
01489 paddw_r2r(mm5, mm0);
01490 psubw_r2r(mm5, mm3);
01491
01492
01493
01494 movq_r2r(mm3, mm5);
01495 paddw_r2r(mm1, mm2);
01496
01497 movq_r2r(mm0, mm6);
01498 psubw_r2r(mm4, mm5);
01499
01500 paddw_r2r(mm2, mm6);
01501 paddw_r2r(mm4, mm3);
01502
01503 movq_r2m(mm5, *(dataptr+7));
01504 psubw_r2r(mm2, mm0);
01505
01506 movq_r2m(mm3, *(dataptr+11));
01507
01508 movq_r2m(mm6, *(dataptr+3));
01509
01510 movq_r2m(mm0, *(dataptr+15));
01511
01512
01513 #endif
01514 }
01515
01516 #define FIX_1_082392200 ((int32_t) 277)
01517 #define FIX_1_414213562 ((int32_t) 362)
01518 #define FIX_1_847759065 ((int32_t) 473)
01519 #define FIX_2_613125930 ((int32_t) 669)
01520
01521 #define DESCALE(x) (int16_t)( ((x)+4) >> 3)
01522
01523
01524
01525 #define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
01526 #define MULTIPLY(var,const) (((int32_t) ((var) * (const)) + 128)>>8)
01527
01528 void RTjpeg::IdctInit(void)
01529 {
01530 int i;
01531
01532 for( i = 0; i < 64; i++)
01533 {
01534 liqt[i] = ((uint64_t)liqt[i] * RTjpeg_aan_tab[i]) >> 32;
01535 ciqt[i] = ((uint64_t)ciqt[i] * RTjpeg_aan_tab[i]) >> 32;
01536 }
01537 }
01538
01539 void RTjpeg::Idct(uint8_t *odata, int16_t *data, int rskip)
01540 {
01541 #ifdef MMX
01542
01543 static mmx_t fix_141 = { q: (long long)0x5a825a825a825a82LL };
01544 static mmx_t fix_184n261 = { q: (long long)0xcf04cf04cf04cf04LL };
01545 static mmx_t fix_184 = { q: (long long)0x7641764176417641LL };
01546 static mmx_t fix_n184 = { q: (long long)0x896f896f896f896fLL };
01547 static mmx_t fix_108n184 = { q: (long long)0xcf04cf04cf04cf04LL };
01548
01549 mmx_t *wsptr = (mmx_t *)ws;
01550 register mmx_t *dataptr = (mmx_t *)odata;
01551 mmx_t *idata = (mmx_t *)data;
01552
01553 rskip = rskip>>3;
01554
01555
01556
01557
01558
01559
01560 movq_m2r(*(idata+10), mm1);
01561
01562 movq_m2r(*(idata+6), mm0);
01563
01564 movq_m2r(*(idata+2), mm3);
01565
01566 movq_r2r(mm1, mm2);
01567
01568 movq_m2r(*(idata+14), mm4);
01569
01570 paddw_r2r(mm0, mm1);
01571
01572 psubw_r2r(mm0, mm2);
01573
01574 psllw_i2r(2, mm2);
01575 movq_r2r(mm2, mm0);
01576
01577 pmulhw_m2r(fix_184n261, mm2);
01578 movq_r2r(mm3, mm5);
01579
01580 pmulhw_m2r(fix_n184, mm0);
01581 paddw_r2r(mm4, mm3);
01582
01583 movq_r2r(mm3, mm6);
01584 psubw_r2r(mm4, mm5);
01585
01586 psubw_r2r(mm1, mm6);
01587 psllw_i2r(2, mm5);
01588
01589 movq_m2r(*(idata+12), mm4);
01590 movq_r2r(mm5, mm7);
01591
01592 pmulhw_m2r(fix_108n184, mm5);
01593 paddw_r2r(mm1, mm3);
01594
01595
01596
01597
01598 pmulhw_m2r(fix_184, mm7);
01599 psllw_i2r(2, mm6);
01600
01601 movq_m2r(*(idata+4), mm1);
01602
01603 paddw_r2r(mm5, mm0);
01604
01605 paddw_r2r(mm7, mm2);
01606
01607 pmulhw_m2r(fix_141, mm6);
01608 psubw_r2r(mm3, mm2);
01609
01610 movq_r2r(mm1, mm5);
01611 paddw_r2r(mm4, mm1);
01612
01613 psubw_r2r(mm4, mm5);
01614 psubw_r2r(mm2, mm6);
01615
01616 movq_r2m(mm1, *(wsptr));
01617 psllw_i2r(2, mm5);
01618
01619 movq_m2r(*(idata), mm7);
01620
01621 pmulhw_m2r(fix_141, mm5);
01622 paddw_r2r(mm6, mm0);
01623
01624 movq_m2r(*(idata+8), mm4);
01625
01626 psubw_r2r(mm1, mm5);
01627
01628 movq_r2m(mm0, *(wsptr+4));
01629 movq_r2r(mm7, mm1);
01630
01631 movq_r2m(mm5, *(wsptr+2));
01632 psubw_r2r(mm4, mm1);
01633
01634 paddw_r2r(mm4, mm7);
01635 movq_r2r(mm1, mm5);
01636
01637 paddw_m2r(*(wsptr+2), mm1);
01638 movq_r2r(mm7, mm4);
01639
01640 paddw_m2r(*(wsptr), mm7);
01641
01642 psubw_m2r(*(wsptr), mm4);
01643 movq_r2r(mm7, mm0);
01644
01645 psubw_m2r(*(wsptr+2), mm5);
01646 paddw_r2r(mm3, mm7);
01647
01648 psubw_r2r(mm3, mm0);
01649
01650 movq_r2m(mm7, *(wsptr));
01651 movq_r2r(mm1, mm3);
01652
01653 movq_r2m(mm0, *(wsptr+14));
01654 paddw_r2r(mm2, mm1);
01655
01656 psubw_r2r(mm2, mm3);
01657
01658 movq_r2m(mm1, *(wsptr+2));
01659 movq_r2r(mm4, mm1);
01660
01661 movq_r2m(mm3, *(wsptr+12));
01662
01663 paddw_m2r(*(wsptr+4), mm4);
01664
01665 psubw_m2r(*(wsptr+4), mm1);
01666
01667 movq_r2m(mm4, *(wsptr+8));
01668 movq_r2r(mm5, mm7);
01669
01670 paddw_r2r(mm6, mm5);
01671
01672 movq_r2m(mm1, *(wsptr+6));
01673 psubw_r2r(mm6, mm7);
01674
01675 movq_r2m(mm5, *(wsptr+4));
01676
01677 movq_r2m(mm7, *(wsptr+10));
01678
01679
01680
01681
01682
01683
01684 idata++;
01685 wsptr++;
01686
01687
01688
01689 movq_m2r(*(idata+10), mm1);
01690
01691 movq_m2r(*(idata+6), mm0);
01692
01693 movq_m2r(*(idata+2), mm3);
01694 movq_r2r(mm1, mm2);
01695
01696 movq_m2r(*(idata+14), mm4);
01697 paddw_r2r(mm0, mm1);
01698
01699 psubw_r2r(mm0, mm2);
01700
01701 psllw_i2r(2, mm2);
01702 movq_r2r(mm2, mm0);
01703
01704 pmulhw_m2r(fix_184n261, mm2);
01705 movq_r2r(mm3, mm5);
01706
01707 pmulhw_m2r(fix_n184, mm0);
01708 paddw_r2r(mm4, mm3);
01709
01710 movq_r2r(mm3, mm6);
01711 psubw_r2r(mm4, mm5);
01712
01713 psubw_r2r(mm1, mm6);
01714 psllw_i2r(2, mm5);
01715
01716 movq_m2r(*(idata+12), mm4);
01717 movq_r2r(mm5, mm7);
01718
01719 pmulhw_m2r(fix_108n184, mm5);
01720 paddw_r2r(mm1, mm3);
01721
01722
01723
01724
01725 pmulhw_m2r(fix_184, mm7);
01726 psllw_i2r(2, mm6);
01727
01728 movq_m2r(*(idata+4), mm1);
01729
01730 paddw_r2r(mm5, mm0);
01731
01732 paddw_r2r(mm7, mm2);
01733
01734 pmulhw_m2r(fix_141, mm6);
01735 psubw_r2r(mm3, mm2);
01736
01737 movq_r2r(mm1, mm5);
01738 paddw_r2r(mm4, mm1);
01739
01740 psubw_r2r(mm4, mm5);
01741 psubw_r2r(mm2, mm6);
01742
01743 movq_r2m(mm1, *(wsptr));
01744 psllw_i2r(2, mm5);
01745
01746 movq_m2r(*(idata), mm7);
01747 paddw_r2r(mm6, mm0);
01748
01749 pmulhw_m2r(fix_141, mm5);
01750
01751 movq_m2r(*(idata+8), mm4);
01752
01753 psubw_r2r(mm1, mm5);
01754
01755 movq_r2m(mm0, *(wsptr+4));
01756 movq_r2r(mm7, mm1);
01757
01758 movq_r2m(mm5, *(wsptr+2));
01759 psubw_r2r(mm4, mm1);
01760
01761 paddw_r2r(mm4, mm7);
01762 movq_r2r(mm1, mm5);
01763
01764 paddw_m2r(*(wsptr+2), mm1);
01765 movq_r2r(mm7, mm4);
01766
01767 paddw_m2r(*(wsptr), mm7);
01768
01769 psubw_m2r(*(wsptr), mm4);
01770 movq_r2r(mm7, mm0);
01771
01772 psubw_m2r(*(wsptr+2), mm5);
01773 paddw_r2r(mm3, mm7);
01774
01775 psubw_r2r(mm3, mm0);
01776
01777 movq_r2m(mm7, *(wsptr));
01778 movq_r2r(mm1, mm3);
01779
01780 movq_r2m(mm0, *(wsptr+14));
01781 paddw_r2r(mm2, mm1);
01782
01783 psubw_r2r(mm2, mm3);
01784
01785 movq_r2m(mm1, *(wsptr+2));
01786 movq_r2r(mm4, mm1);
01787
01788 movq_r2m(mm3, *(wsptr+12));
01789
01790 paddw_m2r(*(wsptr+4), mm4);
01791
01792 psubw_m2r(*(wsptr+4), mm1);
01793
01794 movq_r2m(mm4, *(wsptr+8));
01795 movq_r2r(mm5, mm7);
01796
01797 paddw_r2r(mm6, mm5);
01798
01799 movq_r2m(mm1, *(wsptr+6));
01800 psubw_r2r(mm6, mm7);
01801
01802 movq_r2m(mm5, *(wsptr+4));
01803
01804 movq_r2m(mm7, *(wsptr+10));
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814
01815 wsptr--;
01816
01817
01818
01819
01820
01821 movq_m2r(*(wsptr), mm0);
01822
01823 movq_m2r(*(wsptr+1), mm1);
01824 movq_r2r(mm0, mm2);
01825
01826 movq_m2r(*(wsptr+2), mm3);
01827 paddw_r2r(mm1, mm0);
01828
01829 movq_m2r(*(wsptr+3), mm4);
01830 psubw_r2r(mm1, mm2);
01831
01832 movq_r2r(mm0, mm6);
01833 movq_r2r(mm3, mm5);
01834
01835 paddw_r2r(mm4, mm3);
01836 movq_r2r(mm2, mm1);
01837
01838 psubw_r2r(mm4, mm5);
01839 punpcklwd_r2r(mm3, mm0);
01840
01841 movq_m2r(*(wsptr+7), mm7);
01842 punpckhwd_r2r(mm3, mm6);
01843
01844 movq_m2r(*(wsptr+4), mm3);
01845 punpckldq_r2r(mm6, mm0);
01846
01847 punpcklwd_r2r(mm5, mm1);
01848 movq_r2r(mm3, mm4);
01849
01850 movq_m2r(*(wsptr+6), mm6);
01851 punpckhwd_r2r(mm5, mm2);
01852
01853 movq_m2r(*(wsptr+5), mm5);
01854 punpckldq_r2r(mm2, mm1);
01855
01856
01857 paddw_r2r(mm5, mm3);
01858 movq_r2r(mm6, mm2);
01859
01860 psubw_r2r(mm5, mm4);
01861 paddw_r2r(mm7, mm6);
01862
01863 movq_r2r(mm3, mm5);
01864 punpcklwd_r2r(mm6, mm3);
01865
01866 psubw_r2r(mm7, mm2);
01867 punpckhwd_r2r(mm6, mm5);
01868
01869 movq_r2r(mm4, mm7);
01870 punpckldq_r2r(mm5, mm3);
01871
01872 punpcklwd_r2r(mm2, mm4);
01873
01874 punpckhwd_r2r(mm2, mm7);
01875
01876 punpckldq_r2r(mm7, mm4);
01877 movq_r2r(mm1, mm6);
01878
01879
01880
01881
01882
01883
01884
01885 movq_r2r(mm0, mm2);
01886 punpckhdq_r2r(mm4, mm6);
01887
01888 punpckldq_r2r(mm4, mm1);
01889 psllw_i2r(2, mm6);
01890
01891 pmulhw_m2r(fix_141, mm6);
01892 punpckldq_r2r(mm3, mm0);
01893
01894 punpckhdq_r2r(mm3, mm2);
01895 movq_r2r(mm0, mm7);
01896
01897
01898
01899 paddw_r2r(mm2, mm0);
01900 psubw_r2r(mm2, mm7);
01901
01902
01903 psubw_r2r(mm2, mm6);
01904
01905
01906 movq_r2r(mm1, mm5);
01907
01908
01909
01910
01911
01912
01913
01914
01915
01916 movq_m2r(*(wsptr), mm3);
01917 paddw_r2r(mm6, mm1);
01918
01919 movq_m2r(*(wsptr+1), mm4);
01920 psubw_r2r(mm6, mm5);
01921
01922 movq_r2r(mm3, mm6);
01923 punpckldq_r2r(mm4, mm3);
01924
01925 punpckhdq_r2r(mm6, mm4);
01926 movq_r2r(mm3, mm2);
01927
01928
01929 movq_r2m(mm0, *(wsptr));
01930 paddw_r2r(mm4, mm2);
01931
01932
01933
01934 movq_m2r(*(wsptr+2), mm6);
01935 psubw_r2r(mm4, mm3);
01936
01937 movq_m2r(*(wsptr+3), mm0);
01938 movq_r2r(mm6, mm4);
01939
01940 movq_r2m(mm1, *(wsptr+1));
01941 punpckldq_r2r(mm0, mm6);
01942
01943 punpckhdq_r2r(mm4, mm0);
01944 movq_r2r(mm6, mm1);
01945
01946
01947 paddw_r2r(mm0, mm6);
01948 movq_r2r(mm2, mm4);
01949
01950
01951 movq_r2m(mm5, *(wsptr+2));
01952 punpcklwd_r2r(mm6, mm2);
01953
01954 psubw_r2r(mm0, mm1);
01955 punpckhwd_r2r(mm6, mm4);
01956
01957 movq_r2r(mm3, mm0);
01958 punpcklwd_r2r(mm1, mm3);
01959
01960 movq_r2m(mm7, *(wsptr+3));
01961 punpckhwd_r2r(mm1, mm0);
01962
01963 movq_m2r(*(wsptr+4), mm6);
01964 punpckhdq_r2r(mm2, mm0);
01965
01966 movq_m2r(*(wsptr+5), mm7);
01967 punpckhdq_r2r(mm4, mm3);
01968
01969 movq_m2r(*(wsptr+6), mm1);
01970 movq_r2r(mm6, mm4);
01971
01972 punpckldq_r2r(mm7, mm6);
01973 movq_r2r(mm1, mm5);
01974
01975 punpckhdq_r2r(mm4, mm7);
01976 movq_r2r(mm6, mm2);
01977
01978 movq_m2r(*(wsptr+7), mm4);
01979 paddw_r2r(mm7, mm6);
01980
01981 psubw_r2r(mm7, mm2);
01982 punpckldq_r2r(mm4, mm1);
01983
01984 punpckhdq_r2r(mm5, mm4);
01985 movq_r2r(mm1, mm7);
01986
01987 paddw_r2r(mm4, mm1);
01988 psubw_r2r(mm4, mm7);
01989
01990 movq_r2r(mm6, mm5);
01991 punpcklwd_r2r(mm1, mm6);
01992
01993 punpckhwd_r2r(mm1, mm5);
01994 movq_r2r(mm2, mm4);
01995
01996 punpcklwd_r2r(mm7, mm2);
01997
01998 punpckhwd_r2r(mm7, mm4);
01999
02000 punpckhdq_r2r(mm6, mm4);
02001
02002 punpckhdq_r2r(mm5, mm2);
02003 movq_r2r(mm0, mm5);
02004
02005 punpckldq_r2r(mm4, mm0);
02006
02007 punpckhdq_r2r(mm4, mm5);
02008 movq_r2r(mm3, mm4);
02009
02010 punpckhdq_r2r(mm2, mm4);
02011 movq_r2r(mm5, mm1);
02012
02013 punpckldq_r2r(mm2, mm3);
02014
02015
02016 psubw_r2r(mm4, mm1);
02017
02018 paddw_r2r(mm4, mm5);
02019
02020 psllw_i2r(2, mm1);
02021
02022 psllw_i2r(2, mm0);
02023
02024 pmulhw_m2r(fix_141, mm1);
02025
02026
02027 psllw_i2r(2, mm3);
02028 movq_r2r(mm0, mm7);
02029
02030 pmulhw_m2r(fix_n184, mm7);
02031 movq_r2r(mm3, mm6);
02032
02033 movq_m2r(*(wsptr), mm2);
02034
02035 pmulhw_m2r(fix_108n184, mm6);
02036
02037
02038 movq_r2r(mm2, mm4);
02039
02040 pmulhw_m2r(fix_184n261, mm0);
02041 paddw_r2r(mm5, mm2);
02042
02043 pmulhw_m2r(fix_184, mm3);
02044 psubw_r2r(mm5, mm4);
02045
02046
02047 psraw_i2r(3, mm2);
02048
02049 paddw_r2r(mm6, mm7);
02050 psraw_i2r(3, mm4);
02051
02052 paddw_r2r(mm0, mm3);
02053
02054
02055 psubw_r2r(mm5, mm3);
02056
02057
02058 movq_m2r(*(wsptr+1), mm0);
02059 psubw_r2r(mm3, mm1);
02060
02061 movq_r2r(mm0, mm6);
02062 paddw_r2r(mm3, mm0);
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073
02074
02075
02076
02077 psubw_r2r(mm3, mm6);
02078 psraw_i2r(3, mm0);
02079
02080 psraw_i2r(3, mm6);
02081
02082 packuswb_r2r(mm4, mm0);
02083
02084 movq_m2r(*(wsptr+2), mm5);
02085 packuswb_r2r(mm6, mm2);
02086
02087
02088
02089
02090
02091 paddw_r2r(mm1, mm7);
02092 movq_r2r(mm5, mm3);
02093
02094 paddw_r2r(mm1, mm5);
02095 psubw_r2r(mm1, mm3);
02096
02097 psraw_i2r(3, mm5);
02098
02099 movq_m2r(*(wsptr+3), mm4);
02100 psraw_i2r(3, mm3);
02101
02102
02103
02104
02105
02106
02107
02108 movq_r2r(mm4, mm6);
02109 paddw_r2r(mm7, mm4);
02110
02111 psubw_r2r(mm7, mm6);
02112 psraw_i2r(3, mm4);
02113
02114
02115
02116 psraw_i2r(3, mm6);
02117
02118 packuswb_r2r(mm4, mm5);
02119
02120 packuswb_r2r(mm3, mm6);
02121 movq_r2r(mm2, mm4);
02122
02123 movq_r2r(mm5, mm7);
02124 punpcklbw_r2r(mm0, mm2);
02125
02126 punpckhbw_r2r(mm0, mm4);
02127 movq_r2r(mm2, mm1);
02128
02129 punpcklbw_r2r(mm6, mm5);
02130
02131
02132
02133 punpckhbw_r2r(mm6, mm7);
02134
02135 punpcklwd_r2r(mm5, mm2);
02136
02137
02138
02139 movq_r2r(mm7, mm6);
02140 punpckhwd_r2r(mm5, mm1);
02141
02142 movq_r2r(mm2, mm0);
02143 punpcklwd_r2r(mm4, mm6);
02144
02145
02146
02147 punpckldq_r2r(mm6, mm2);
02148
02149
02150
02151 movq_r2r(mm1, mm3);
02152
02153
02154
02155 punpckhwd_r2r(mm4, mm7);
02156
02157 movq_r2m(mm2, *(dataptr));
02158
02159 punpckhdq_r2r(mm6, mm0);
02160
02161 dataptr += rskip;
02162 movq_r2m(mm0, *(dataptr));
02163
02164 punpckldq_r2r(mm7, mm1);
02165 punpckhdq_r2r(mm7, mm3);
02166
02167 dataptr += rskip;
02168 movq_r2m(mm1, *(dataptr));
02169
02170 dataptr += rskip;
02171 movq_r2m(mm3, *(dataptr));
02172
02173
02174
02175 wsptr += 8;
02176
02177
02178
02179
02180
02181
02182
02183 movq_m2r(*(wsptr), mm0);
02184
02185 movq_m2r(*(wsptr+1), mm1);
02186 movq_r2r(mm0, mm2);
02187
02188 movq_m2r(*(wsptr+2), mm3);
02189 paddw_r2r(mm1, mm0);
02190
02191 movq_m2r(*(wsptr+3), mm4);
02192 psubw_r2r(mm1, mm2);
02193
02194 movq_r2r(mm0, mm6);
02195 movq_r2r(mm3, mm5);
02196
02197 paddw_r2r(mm4, mm3);
02198 movq_r2r(mm2, mm1);
02199
02200 psubw_r2r(mm4, mm5);
02201 punpcklwd_r2r(mm3, mm0);
02202
02203 movq_m2r(*(wsptr+7), mm7);
02204 punpckhwd_r2r(mm3, mm6);
02205
02206 movq_m2r(*(wsptr+4), mm3);
02207 punpckldq_r2r(mm6, mm0);
02208
02209 punpcklwd_r2r(mm5, mm1);
02210 movq_r2r(mm3, mm4);
02211
02212 movq_m2r(*(wsptr+6), mm6);
02213 punpckhwd_r2r(mm5, mm2);
02214
02215 movq_m2r(*(wsptr+5), mm5);
02216 punpckldq_r2r(mm2, mm1);
02217
02218 paddw_r2r(mm5, mm3);
02219 movq_r2r(mm6, mm2);
02220
02221 psubw_r2r(mm5, mm4);
02222 paddw_r2r(mm7, mm6);
02223
02224 movq_r2r(mm3, mm5);
02225 punpcklwd_r2r(mm6, mm3);
02226
02227 psubw_r2r(mm7, mm2);
02228 punpckhwd_r2r(mm6, mm5);
02229
02230 movq_r2r(mm4, mm7);
02231 punpckldq_r2r(mm5, mm3);
02232
02233 punpcklwd_r2r(mm2, mm4);
02234
02235 punpckhwd_r2r(mm2, mm7);
02236
02237 punpckldq_r2r(mm7, mm4);
02238 movq_r2r(mm1, mm6);
02239
02240
02241
02242
02243
02244
02245 movq_r2r(mm0, mm2);
02246 punpckhdq_r2r(mm4, mm6);
02247
02248 punpckldq_r2r(mm4, mm1);
02249 psllw_i2r(2, mm6);
02250
02251 pmulhw_m2r(fix_141, mm6);
02252 punpckldq_r2r(mm3, mm0);
02253
02254 punpckhdq_r2r(mm3, mm2);
02255 movq_r2r(mm0, mm7);
02256
02257
02258
02259 paddw_r2r(mm2, mm0);
02260 psubw_r2r(mm2, mm7);
02261
02262
02263 psubw_r2r(mm2, mm6);
02264
02265
02266 movq_r2r(mm1, mm5);
02267
02268
02269
02270
02271
02272
02273
02274
02275
02276
02277 movq_m2r(*(wsptr), mm3);
02278 paddw_r2r(mm6, mm1);
02279
02280 movq_m2r(*(wsptr+1), mm4);
02281 psubw_r2r(mm6, mm5);
02282
02283 movq_r2r(mm3, mm6);
02284 punpckldq_r2r(mm4, mm3);
02285
02286 punpckhdq_r2r(mm6, mm4);
02287 movq_r2r(mm3, mm2);
02288
02289
02290 movq_r2m(mm0, *(wsptr));
02291 paddw_r2r(mm4, mm2);
02292
02293
02294
02295 movq_m2r(*(wsptr+2), mm6);
02296 psubw_r2r(mm4, mm3);
02297
02298 movq_m2r(*(wsptr+3), mm0);
02299 movq_r2r(mm6, mm4);
02300
02301 movq_r2m(mm1, *(wsptr+1));
02302 punpckldq_r2r(mm0, mm6);
02303
02304 punpckhdq_r2r(mm4, mm0);
02305 movq_r2r(mm6, mm1);
02306
02307
02308 paddw_r2r(mm0, mm6);
02309 movq_r2r(mm2, mm4);
02310
02311
02312 movq_r2m(mm5, *(wsptr+2));
02313 punpcklwd_r2r(mm6, mm2);
02314
02315 psubw_r2r(mm0, mm1);
02316 punpckhwd_r2r(mm6, mm4);
02317
02318 movq_r2r(mm3, mm0);
02319 punpcklwd_r2r(mm1, mm3);
02320
02321 movq_r2m(mm7, *(wsptr+3));
02322 punpckhwd_r2r(mm1, mm0);
02323
02324 movq_m2r(*(wsptr+4), mm6);
02325 punpckhdq_r2r(mm2, mm0);
02326
02327 movq_m2r(*(wsptr+5), mm7);
02328 punpckhdq_r2r(mm4, mm3);
02329
02330 movq_m2r(*(wsptr+6), mm1);
02331 movq_r2r(mm6, mm4);
02332
02333 punpckldq_r2r(mm7, mm6);
02334 movq_r2r(mm1, mm5);
02335
02336 punpckhdq_r2r(mm4, mm7);
02337 movq_r2r(mm6, mm2);
02338
02339 movq_m2r(*(wsptr+7), mm4);
02340 paddw_r2r(mm7, mm6);
02341
02342 psubw_r2r(mm7, mm2);
02343 punpckldq_r2r(mm4, mm1);
02344
02345 punpckhdq_r2r(mm5, mm4);
02346 movq_r2r(mm1, mm7);
02347
02348 paddw_r2r(mm4, mm1);
02349 psubw_r2r(mm4, mm7);
02350
02351 movq_r2r(mm6, mm5);
02352 punpcklwd_r2r(mm1, mm6);
02353
02354 punpckhwd_r2r(mm1, mm5);
02355 movq_r2r(mm2, mm4);
02356
02357 punpcklwd_r2r(mm7, mm2);
02358
02359 punpckhwd_r2r(mm7, mm4);
02360
02361 punpckhdq_r2r(mm6, mm4);
02362
02363 punpckhdq_r2r(mm5, mm2);
02364 movq_r2r(mm0, mm5);
02365
02366 punpckldq_r2r(mm4, mm0);
02367
02368 punpckhdq_r2r(mm4, mm5);
02369 movq_r2r(mm3, mm4);
02370
02371 punpckhdq_r2r(mm2, mm4);
02372 movq_r2r(mm5, mm1);
02373
02374 punpckldq_r2r(mm2, mm3);
02375
02376
02377 psubw_r2r(mm4, mm1);
02378
02379 paddw_r2r(mm4, mm5);
02380
02381 psllw_i2r(2, mm1);
02382
02383 psllw_i2r(2, mm0);
02384
02385 pmulhw_m2r(fix_141, mm1);
02386
02387
02388 psllw_i2r(2, mm3);
02389 movq_r2r(mm0, mm7);
02390
02391 pmulhw_m2r(fix_n184, mm7);
02392 movq_r2r(mm3, mm6);
02393
02394 movq_m2r(*(wsptr), mm2);
02395
02396 pmulhw_m2r(fix_108n184, mm6);
02397
02398
02399 movq_r2r(mm2, mm4);
02400
02401 pmulhw_m2r(fix_184n261, mm0);
02402 paddw_r2r(mm5, mm2);
02403
02404 pmulhw_m2r(fix_184, mm3);
02405 psubw_r2r(mm5, mm4);
02406
02407
02408 psraw_i2r(3, mm2);
02409
02410 paddw_r2r(mm6, mm7);
02411 psraw_i2r(3, mm4);
02412
02413 paddw_r2r(mm0, mm3);
02414
02415
02416 psubw_r2r(mm5, mm3);
02417
02418
02419 movq_m2r(*(wsptr+1), mm0);
02420 psubw_r2r(mm3, mm1);
02421
02422 movq_r2r(mm0, mm6);
02423 paddw_r2r(mm3, mm0);
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437 psubw_r2r(mm3, mm6);
02438 psraw_i2r(3, mm0);
02439
02440 psraw_i2r(3, mm6);
02441
02442 packuswb_r2r(mm4, mm0);
02443
02444 movq_m2r(*(wsptr+2), mm5);
02445 packuswb_r2r(mm6, mm2);
02446
02447
02448
02449
02450
02451 paddw_r2r(mm1, mm7);
02452 movq_r2r(mm5, mm3);
02453
02454 paddw_r2r(mm1, mm5);
02455 psubw_r2r(mm1, mm3);
02456
02457 psraw_i2r(3, mm5);
02458
02459 movq_m2r(*(wsptr+3), mm4);
02460 psraw_i2r(3, mm3);
02461
02462
02463
02464
02465
02466
02467
02468 movq_r2r(mm4, mm6);
02469 paddw_r2r(mm7, mm4);
02470
02471 psubw_r2r(mm7, mm6);
02472 psraw_i2r(3, mm4);
02473
02474 psraw_i2r(3, mm6);
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484 packuswb_r2r(mm4, mm5);
02485
02486 packuswb_r2r(mm3, mm6);
02487 movq_r2r(mm2, mm4);
02488
02489 movq_r2r(mm5, mm7);
02490 punpcklbw_r2r(mm0, mm2);
02491
02492 punpckhbw_r2r(mm0, mm4);
02493 movq_r2r(mm2, mm1);
02494
02495 punpcklbw_r2r(mm6, mm5);
02496
02497 punpckhbw_r2r(mm6, mm7);
02498
02499 punpcklwd_r2r(mm5, mm2);
02500
02501 movq_r2r(mm7, mm6);
02502 punpckhwd_r2r(mm5, mm1);
02503
02504 movq_r2r(mm2, mm0);
02505 punpcklwd_r2r(mm4, mm6);
02506
02507 punpckldq_r2r(mm6, mm2);
02508
02509 movq_r2r(mm1, mm3);
02510
02511 punpckhwd_r2r(mm4, mm7);
02512
02513 dataptr += rskip;
02514 movq_r2m(mm2, *(dataptr));
02515
02516 punpckhdq_r2r(mm6, mm0);
02517
02518 dataptr += rskip;
02519 movq_r2m(mm0, *(dataptr));
02520
02521 punpckldq_r2r(mm7, mm1);
02522
02523 punpckhdq_r2r(mm7, mm3);
02524
02525 dataptr += rskip;
02526 movq_r2m(mm1, *(dataptr));
02527
02528 dataptr += rskip;
02529 movq_r2m(mm3, *(dataptr));
02530
02531 #else
02532 int32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
02533 int32_t tmp10, tmp11, tmp12, tmp13;
02534 int32_t z5, z10, z11, z12, z13;
02535 int16_t *inptr;
02536 int32_t *wsptr;
02537 uint8_t *outptr;
02538 int ctr;
02539 int32_t dcval;
02540
02541 inptr = data;
02542 wsptr = ws;
02543 for (ctr = 8; ctr > 0; ctr--) {
02544
02545 if ((inptr[8] | inptr[16] | inptr[24] |
02546 inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
02547 dcval = inptr[0];
02548 wsptr[0] = dcval;
02549 wsptr[8] = dcval;
02550 wsptr[16] = dcval;
02551 wsptr[24] = dcval;
02552 wsptr[32] = dcval;
02553 wsptr[40] = dcval;
02554 wsptr[48] = dcval;
02555 wsptr[56] = dcval;
02556
02557 inptr++;
02558 wsptr++;
02559 continue;
02560 }
02561
02562 tmp0 = inptr[0];
02563 tmp1 = inptr[16];
02564 tmp2 = inptr[32];
02565 tmp3 = inptr[48];
02566
02567 tmp10 = tmp0 + tmp2;
02568 tmp11 = tmp0 - tmp2;
02569
02570 tmp13 = tmp1 + tmp3;
02571 tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13;
02572
02573 tmp0 = tmp10 + tmp13;
02574 tmp3 = tmp10 - tmp13;
02575 tmp1 = tmp11 + tmp12;
02576 tmp2 = tmp11 - tmp12;
02577
02578 tmp4 = inptr[8];
02579 tmp5 = inptr[24];
02580 tmp6 = inptr[40];
02581 tmp7 = inptr[56];
02582
02583 z13 = tmp6 + tmp5;
02584 z10 = tmp6 - tmp5;
02585 z11 = tmp4 + tmp7;
02586 z12 = tmp4 - tmp7;
02587
02588 tmp7 = z11 + z13;
02589 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
02590
02591 z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
02592 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
02593 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
02594
02595 tmp6 = tmp12 - tmp7;
02596 tmp5 = tmp11 - tmp6;
02597 tmp4 = tmp10 + tmp5;
02598
02599 wsptr[0] = (int32_t) (tmp0 + tmp7);
02600 wsptr[56] = (int32_t) (tmp0 - tmp7);
02601 wsptr[8] = (int32_t) (tmp1 + tmp6);
02602 wsptr[48] = (int32_t) (tmp1 - tmp6);
02603 wsptr[16] = (int32_t) (tmp2 + tmp5);
02604 wsptr[40] = (int32_t) (tmp2 - tmp5);
02605 wsptr[32] = (int32_t) (tmp3 + tmp4);
02606 wsptr[24] = (int32_t) (tmp3 - tmp4);
02607
02608 inptr++;
02609 wsptr++;
02610 }
02611
02612 wsptr = ws;
02613 for (ctr = 0; ctr < 8; ctr++) {
02614 outptr = &(odata[ctr*rskip]);
02615
02616 tmp10 = wsptr[0] + wsptr[4];
02617 tmp11 = wsptr[0] - wsptr[4];
02618
02619 tmp13 = wsptr[2] + wsptr[6];
02620 tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
02621
02622 tmp0 = tmp10 + tmp13;
02623 tmp3 = tmp10 - tmp13;
02624 tmp1 = tmp11 + tmp12;
02625 tmp2 = tmp11 - tmp12;
02626
02627 z13 = wsptr[5] + wsptr[3];
02628 z10 = wsptr[5] - wsptr[3];
02629 z11 = wsptr[1] + wsptr[7];
02630 z12 = wsptr[1] - wsptr[7];
02631
02632 tmp7 = z11 + z13;
02633 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
02634
02635 z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
02636 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
02637 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
02638
02639 tmp6 = tmp12 - tmp7;
02640 tmp5 = tmp11 - tmp6;
02641 tmp4 = tmp10 + tmp5;
02642
02643 outptr[0] = RL(DESCALE(tmp0 + tmp7));
02644 outptr[7] = RL(DESCALE(tmp0 - tmp7));
02645 outptr[1] = RL(DESCALE(tmp1 + tmp6));
02646 outptr[6] = RL(DESCALE(tmp1 - tmp6));
02647 outptr[2] = RL(DESCALE(tmp2 + tmp5));
02648 outptr[5] = RL(DESCALE(tmp2 - tmp5));
02649 outptr[4] = RL(DESCALE(tmp3 + tmp4));
02650 outptr[3] = RL(DESCALE(tmp3 - tmp4));
02651
02652 wsptr += 8;
02653 }
02654 #endif
02655 }
02656
02657 inline void RTjpeg::CalcTbls(void)
02658 {
02659 int i;
02660 uint64_t qual;
02661
02662 qual = (uint64_t)Q << (32 - 7);
02663
02664 for(i = 0; i < 64; i++)
02665 {
02666 lqt[i] = (int32_t)((qual/((uint64_t)RTjpeg_lum_quant_tbl[i]<<16))>>3);
02667 if (lqt[i] == 0)
02668 lqt[i]=1;
02669
02670 cqt[i] = (int32_t)((qual/((uint64_t)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
02671 if (cqt[i] == 0)
02672 cqt[i]=1;
02673
02674 liqt[i] = (1<<16) / (lqt[i]<<3);
02675 ciqt[i] = (1<<16) / (cqt[i]<<3);
02676 lqt[i] = ((1<<16) / liqt[i])>>3;
02677 cqt[i] = ((1<<16) / ciqt[i])>>3;
02678 }
02679
02680 lb8 = 0;
02681 while (liqt[RTjpeg_ZZ[++lb8]] <= 8)
02682 ;
02683 lb8--;
02684 cb8 = 0;
02685
02686 while (ciqt[RTjpeg_ZZ[++cb8]] <= 8)
02687 ;
02688 cb8--;
02689 }
02690
02691 int RTjpeg::SetQuality(int *quality)
02692 {
02693 if (*quality < 1)
02694 *quality = 1;
02695 if (*quality > 255)
02696 *quality = 255;
02697
02698 Q = *quality;
02699
02700 CalcTbls();
02701 DctInit();
02702 IdctInit();
02703 QuantInit();
02704
02705 return 0;
02706 }
02707
02708 int RTjpeg::SetFormat(int *fmt)
02709 {
02710 f = *fmt;
02711 return 0;
02712 }
02713
02714 int RTjpeg::SetSize(int *w, int *h)
02715 {
02716 if ((*w < 0) || (*w > 65535))
02717 return -1;
02718 if ((*h < 0) || (*h > 65535))
02719 return -1;
02720
02721 width = *w;
02722 height = *h;
02723 Ywidth = width>>3;
02724 Ysize = width * height;
02725 Cwidth = width>>4;
02726 Csize = (width>>1) * height;
02727
02728 if (key_rate > 0)
02729 {
02730 unsigned long tmp;
02731 if (old)
02732 delete [] old_start;
02733 old_start = new int16_t[((4*width*height)+32)];
02734
02735 tmp = (unsigned long)old_start;
02736 tmp += 32;
02737 tmp = tmp>>5;
02738
02739 old = (int16_t *)(tmp<<5);
02740 if (!old)
02741 {
02742 fprintf(stderr, "RTjpeg: Could not allocate memory\n");
02743 return -1;
02744 }
02745 memset(old, 0, ((4*width*height)));
02746 }
02747 return 0;
02748 }
02749
02750 int RTjpeg::SetIntra(int *key, int *lm, int *cm)
02751 {
02752 unsigned long tmp;
02753
02754 if (*key < 0)
02755 *key = 0;
02756 if (*key > 255)
02757 *key = 255;
02758 key_rate = *key;
02759
02760 if (*lm < 0)
02761 *lm = 0;
02762 if (*lm > 16)
02763 *lm = 16;
02764 if (*cm < 0)
02765 *cm = 0;
02766 if (*cm > 16)
02767 *cm = 16;
02768
02769 #ifdef MMX
02770 lmask.uq = (((uint64_t)(*lm)<<48)|((uint64_t)(*lm)<<32)|((uint64_t)(*lm)<<16)|(uint64_t)(*lm));
02771 cmask.uq = (((uint64_t)(*cm)<<48)|((uint64_t)(*cm)<<32)|((uint64_t)(*cm)<<16)|(uint64_t)(*cm));
02772 #else
02773 lmask = *lm;
02774 cmask = *cm;
02775 #endif
02776
02777 if (old)
02778 delete [] old_start;
02779 old_start = new int16_t[((4*width*height)+32)];
02780 tmp = (unsigned long)old_start;
02781 tmp += 32;
02782 tmp = tmp >> 5;
02783 old = (int16_t *)(tmp << 5);
02784 if (!old)
02785 {
02786 fprintf(stderr, "RTjpeg: Could not allocate memory\n");
02787 return -1;
02788 }
02789 memset(old, 0, ((4*width*height)));
02790
02791 return 0;
02792 }
02793
02794 RTjpeg::RTjpeg(void)
02795 {
02796 for (int i = 0; i < 64; i++)
02797 {
02798 block[i] = 0;
02799 lqt[i] = cqt[i] = liqt[i] = ciqt[i] = 0;
02800 for (int j = 0; j < 4; j++)
02801 ws[i*j] = 0;
02802 }
02803 lb8 = cb8 = Ywidth = Cwidth = Ysize = Csize = key_count = 0;
02804 width = height = Q = f = key_rate = 0;
02805
02806 old = old_start = NULL;
02807
02808 #ifdef MMX
02809 lmask.q = cmask.q = 0;
02810 RTjpeg_ones.q =(long long)0x0001000100010001LL;
02811 RTjpeg_half.q =(long long)0x7fff7fff7fff7fffLL;
02812 RTjpeg_C4.q =(long long)0x2D412D412D412D41LL;
02813 RTjpeg_C6.q =(long long)0x187E187E187E187ELL;
02814 RTjpeg_C2mC6.q=(long long)0x22A322A322A322A3LL;
02815 RTjpeg_C2pC6.q=(long long)0x539F539F539F539FLL;
02816 RTjpeg_zero.q =(long long)0x0000000000000000LL;
02817 #else
02818 lmask = cmask = 0;
02819 #endif
02820 }
02821
02822 RTjpeg::~RTjpeg(void)
02823 {
02824 if (old_start)
02825 delete [] old_start;
02826 }
02827
02828 inline int RTjpeg::compressYUV420(int8_t *sp, uint8_t **planes)
02829 {
02830 int8_t * sb;
02831 register uint8_t * bp = planes[0];
02832 register uint8_t * bp1 = bp + (width<<3);
02833 register uint8_t * bp2 = planes[1];
02834 register uint8_t * bp3 = planes[2];
02835 register int i, j, k;
02836
02837 #ifdef MMX
02838 emms();
02839 #endif
02840 sb = sp;
02841
02842 for(i = height >> 1; i; i -= 8)
02843 {
02844 for(j = 0, k = 0; j < width; j += 16, k += 8)
02845 {
02846 DctY(bp+j, Ywidth);
02847 Quant(block, lqt);
02848 sp += b2s(block, sp, lb8);
02849
02850 DctY(bp+j+8, Ywidth);
02851 Quant(block, lqt);
02852 sp += b2s(block, sp, lb8);
02853
02854 DctY(bp1+j, Ywidth);
02855 Quant(block, lqt);
02856 sp += b2s(block, sp, lb8);
02857
02858 DctY(bp1+j+8, Ywidth);
02859 Quant(block, lqt);
02860 sp += b2s(block, sp, lb8);
02861
02862 DctY(bp2+k, Cwidth);
02863 Quant(block, cqt);
02864 sp += b2s(block, sp, cb8);
02865
02866 DctY(bp3+k, Cwidth);
02867 Quant(block, cqt);
02868 sp += b2s(block, sp, cb8);
02869 }
02870 bp += width<<4;
02871 bp1 += width<<4;
02872 bp2 += width<<2;
02873 bp3 += width<<2;
02874 }
02875 #ifdef MMX
02876 emms();
02877 #endif
02878 return (sp - sb);
02879 }
02880
02881 inline int RTjpeg::compressYUV422(int8_t *sp, uint8_t **planes)
02882 {
02883 int8_t * sb;
02884 register uint8_t * bp = planes[0];
02885 register uint8_t * bp2 = planes[1];
02886 register uint8_t * bp3 = planes[2];
02887 register int i, j, k;
02888
02889 #ifdef MMX
02890 emms();
02891 #endif
02892 sb=sp;
02893
02894 for(i=height; i; i-=8)
02895 {
02896 for(j=0, k=0; j<width; j+=16, k+=8)
02897 {
02898 DctY(bp+j, Ywidth);
02899 Quant(block, lqt);
02900 sp += b2s(block, sp, lb8);
02901
02902 DctY(bp+j+8, Ywidth);
02903 Quant(block, lqt);
02904 sp += b2s(block, sp, lb8);
02905
02906 DctY(bp2+k, Cwidth);
02907 Quant(block, cqt);
02908 sp+=b2s(block, sp, cb8);
02909
02910 DctY(bp3+k, Cwidth);
02911 Quant(block, cqt);
02912 sp+=b2s(block, sp, cb8);
02913
02914 }
02915 bp += width << 3;
02916 bp2 += width << 2;
02917 bp3 += width << 2;
02918
02919 }
02920 #ifdef MMX
02921 emms();
02922 #endif
02923 return (sp-sb);
02924 }
02925
02926 inline int RTjpeg::compress8(int8_t *sp, uint8_t **planes)
02927 {
02928 int8_t * sb;
02929 register uint8_t * bp = planes[0];
02930 int i, j;
02931
02932 #ifdef MMX
02933 emms();
02934 #endif
02935
02936 sb=sp;
02937
02938 for(i=0; i<height; i+=8)
02939 {
02940 for(j=0; j<width; j+=8)
02941 {
02942 DctY(bp+j, width);
02943 Quant(block, lqt);
02944 sp += b2s(block, sp, lb8);
02945 }
02946 bp += width;
02947 }
02948
02949 #ifdef MMX
02950 emms();
02951 #endif
02952 return (sp-sb);
02953 }
02954
02955 inline void RTjpeg::decompressYUV422(int8_t *sp, uint8_t **planes)
02956 {
02957 register uint8_t * bp = planes[0];
02958 register uint8_t * bp2 = planes[1];
02959 register uint8_t * bp3 = planes[2];
02960 int i, j,k;
02961
02962 #ifdef MMX
02963 emms();
02964 #endif
02965
02966
02967 for(i=height; i; i-=8)
02968 {
02969 for(k=0, j=0; j<width; j+=16, k+=8) {
02970 if (*sp==-1)sp++;
02971 else
02972 {
02973 sp += s2b(block, sp, lb8, liqt);
02974 Idct(bp+j, block, width);
02975 }
02976 if (*sp==-1)sp++;
02977 else
02978 {
02979 sp += s2b(block, sp, lb8, liqt);
02980 Idct(bp+j+8, block, width);
02981 }
02982 if (*sp==-1)sp++;
02983 else
02984 {
02985 sp += s2b(block, sp, cb8, ciqt);
02986 Idct(bp2+k, block, width>>1);
02987 }
02988 if (*sp==-1)sp++;
02989 else
02990 {
02991 sp += s2b(block, sp, cb8, ciqt);
02992 Idct(bp3+k, block, width>>1);
02993 }
02994 }
02995 bp += width<<3;
02996 bp2 += width<<2;
02997 bp3 += width<<2;
02998 }
02999 #ifdef MMX
03000 emms();
03001 #endif
03002 }
03003
03004 inline void RTjpeg::decompressYUV420(int8_t *sp, uint8_t **planes)
03005 {
03006 register uint8_t * bp = planes[0];
03007 register uint8_t * bp1 = bp + (width<<3);
03008 register uint8_t * bp2 = planes[1];
03009 register uint8_t * bp3 = planes[2];
03010 int i, j,k;
03011
03012 #ifdef MMX
03013 emms();
03014 #endif
03015
03016
03017 for(i=height>>1; i; i-=8)
03018 {
03019 for(k=0, j=0; j<width; j+=16, k+=8) {
03020 if (*sp==-1)sp++;
03021 else
03022 {
03023 sp += s2b(block, sp, lb8, liqt);
03024 Idct(bp+j, block, width);
03025 }
03026 if (*sp==-1)sp++;
03027 else
03028 {
03029 sp += s2b(block, sp, lb8, liqt);
03030 Idct(bp+j+8, block, width);
03031 }
03032 if (*sp==-1)sp++;
03033 else
03034 {
03035 sp += s2b(block, sp, lb8, liqt);
03036 Idct(bp1+j, block, width);
03037 }
03038 if (*sp==-1)sp++;
03039 else
03040 {
03041 sp += s2b(block, sp, lb8, liqt);
03042 Idct(bp1+j+8, block, width);
03043 }
03044 if (*sp==-1)sp++;
03045 else
03046 {
03047 sp += s2b(block, sp, cb8, ciqt);
03048 Idct(bp2+k, block, width>>1);
03049 }
03050 if (*sp==-1)sp++;
03051 else
03052 {
03053 sp += s2b(block, sp, cb8, ciqt);
03054 Idct(bp3+k, block, width>>1);
03055 }
03056 }
03057 bp += width<<4;
03058 bp1 += width<<4;
03059 bp2 += width<<2;
03060 bp3 += width<<2;
03061 }
03062 #ifdef MMX
03063 emms();
03064 #endif
03065 }
03066
03067 inline void RTjpeg::decompress8(int8_t *sp, uint8_t **planes)
03068 {
03069 register uint8_t * bp = planes[0];
03070 int i, j;
03071
03072 #ifdef MMX
03073 emms();
03074 #endif
03075
03076
03077 for(i=0; i<height; i+=8)
03078 {
03079 for(j=0; j<width; j+=8)
03080 if (*sp==-1)sp++;
03081 else
03082 {
03083 sp += s2b(block, sp, lb8, liqt);
03084 Idct(bp+j, block, width);
03085 }
03086 bp += width<<3;
03087 }
03088 }
03089
03090 #ifdef MMX
03091
03092 int RTjpeg::bcomp(int16_t *rblock, int16_t *old, mmx_t *mask)
03093 {
03094 int i;
03095 mmx_t *mold=(mmx_t *)old;
03096 mmx_t *mblock=(mmx_t *)rblock;
03097 volatile mmx_t result;
03098 static mmx_t neg= { uq: (unsigned long long)0xffffffffffffffffULL };
03099
03100 movq_m2r(*mask, mm7);
03101 movq_m2r(neg, mm6);
03102 pxor_r2r(mm5, mm5);
03103
03104 for(i=0; i<8; i++)
03105 {
03106 movq_m2r(*(mblock++), mm0);
03107 movq_m2r(*(mblock++), mm2);
03108 movq_m2r(*(mold++), mm1);
03109 movq_m2r(*(mold++), mm3);
03110 psubsw_r2r(mm1, mm0);
03111 psubsw_r2r(mm3, mm2);
03112 movq_r2r(mm0, mm1);
03113 movq_r2r(mm2, mm3);
03114 pcmpgtw_r2r(mm7, mm0);
03115 pcmpgtw_r2r(mm7, mm2);
03116 pxor_r2r(mm6, mm1);
03117 pxor_r2r(mm6, mm3);
03118 pcmpgtw_r2r(mm7, mm1);
03119 pcmpgtw_r2r(mm7, mm3);
03120 por_r2r(mm0, mm5);
03121 por_r2r(mm2, mm5);
03122 por_r2r(mm1, mm5);
03123 por_r2r(mm3, mm5);
03124 }
03125 movq_r2m(mm5, result);
03126
03127 if (result.q)
03128 {
03129 for(i=0; i<16; i++)((uint64_t *)old)[i]=((uint64_t *)rblock)[i];
03130 return 0;
03131 }
03132 return 1;
03133 }
03134
03135 #else
03136 int RTjpeg::bcomp(int16_t *rblock, int16_t *old, uint16_t *mask)
03137 {
03138 int i;
03139
03140 for(i=0; i<64; i++)
03141 if (abs(old[i]-rblock[i])>*mask)
03142 {
03143 for(i=0; i<16; i++)((uint64_t *)old)[i]=((uint64_t *)rblock)[i];
03144 return 0;
03145 }
03146 return 1;
03147 }
03148 #endif
03149
03150 inline int RTjpeg::mcompressYUV420(int8_t *sp, uint8_t **planes)
03151 {
03152 int8_t * sb;
03153 int16_t * lblock;
03154 register uint8_t * bp = planes[0];
03155 register uint8_t * bp1 = bp + (width<<3);
03156 register uint8_t * bp2 = planes[1];
03157 register uint8_t * bp3 = planes[2];
03158 register int i, j, k;
03159
03160 sb = sp;
03161 lblock = old;
03162
03163 for(i = height>>1; i; i-=8)
03164 {
03165 for(j=0, k=0; j < width; j+=16, k+=8)
03166 {
03167 DctY(bp+j, Ywidth);
03168 Quant(block, lqt);
03169 if (bcomp(block, lblock, &lmask))
03170 {
03171 *((uint8_t *)sp++)=255;
03172 }
03173 else sp+=b2s(block, sp, lb8);
03174 lblock += 64;
03175
03176 DctY(bp+j+8, Ywidth);
03177 Quant(block, lqt);
03178 if (bcomp(block, lblock, &lmask))
03179 {
03180 *((uint8_t *)sp++)=255;
03181 }
03182 else sp += b2s(block, sp, lb8);
03183 lblock += 64;
03184
03185 DctY(bp1+j, Ywidth);
03186 Quant(block, lqt);
03187 if (bcomp(block, lblock, &lmask))
03188 {
03189 *((uint8_t *)sp++)=255;
03190 }
03191 else sp += b2s(block, sp, lb8);
03192 lblock += 64;
03193
03194 DctY(bp1+j+8, Ywidth);
03195 Quant(block, lqt);
03196 if (bcomp(block, lblock, &lmask))
03197 {
03198 *((uint8_t *)sp++)=255;
03199 }
03200 else sp += b2s(block, sp, lb8);
03201 lblock += 64;
03202
03203 DctY(bp2+k, Cwidth);
03204 Quant(block, cqt);
03205 if (bcomp(block, lblock, &cmask))
03206 {
03207 *((uint8_t *)sp++)=255;
03208 }
03209 else sp+=b2s(block, sp, cb8);
03210 lblock+=64;
03211
03212 DctY(bp3+k, Cwidth);
03213 Quant(block, cqt);
03214 if (bcomp(block, lblock, &cmask))
03215 {
03216 *((uint8_t *)sp++)=255;
03217 }
03218 else sp+=b2s(block, sp, cb8);
03219 lblock+=64;
03220 }
03221 bp += width<<4;
03222 bp1 += width<<4;
03223 bp2 += width<<2;
03224 bp3 += width<<2;
03225 }
03226 #ifdef MMX
03227 emms();
03228 #endif
03229 return (sp-sb);
03230 }
03231
03232
03233 inline int RTjpeg::mcompressYUV422(int8_t *sp, uint8_t **planes)
03234 {
03235 int8_t * sb;
03236 int16_t *lblock;
03237 register uint8_t * bp = planes[0];
03238 register uint8_t * bp2 = planes[1];
03239 register uint8_t * bp3 = planes[2];
03240 register int i, j, k;
03241
03242 sb=sp;
03243 lblock = old;
03244 for(i = height; i; i-=8)
03245 {
03246 for(j=0, k=0; j<width; j+=16, k+=8)
03247 {
03248 DctY(bp+j, Ywidth);
03249 Quant(block, lqt);
03250 if (bcomp(block, lblock, &lmask))
03251 {
03252 *((uint8_t *)sp++)=255;
03253 }
03254 else sp+=b2s(block, sp, lb8);
03255 lblock+=64;
03256
03257 DctY(bp+j+8, Ywidth);
03258 Quant(block, lqt);
03259 if (bcomp(block, lblock, &lmask))
03260 {
03261 *((uint8_t *)sp++)=255;
03262 }
03263 else sp+=b2s(block, sp, lb8);
03264 lblock+=64;
03265
03266 DctY(bp2+k, Cwidth);
03267 Quant(block, cqt);
03268 if (bcomp(block, lblock, &cmask))
03269 {
03270 *((uint8_t *)sp++)=255;
03271 }
03272 else sp+=b2s(block, sp, cb8);
03273 lblock+=64;
03274
03275 DctY(bp3+k, Cwidth);
03276 Quant(block, cqt);
03277 if (bcomp(block, lblock, &cmask))
03278 {
03279 *((uint8_t *)sp++)=255;
03280 }
03281 else sp+=b2s(block, sp, cb8);
03282 lblock+=64;
03283
03284 }
03285 bp += width<<3;
03286 bp2 += width<<2;
03287 bp3 += width<<2;
03288 }
03289 #ifdef MMX
03290 emms();
03291 #endif
03292 return (sp-sb);
03293 }
03294
03295 inline int RTjpeg::mcompress8(int8_t *sp, uint8_t **planes)
03296 {
03297 register uint8_t * bp = planes[0];
03298 int8_t * sb;
03299 int16_t *lblock;
03300 int i, j;
03301
03302 sb=sp;
03303 lblock = old;
03304 for(i=0; i<height; i+=8)
03305 {
03306 for(j=0; j<width; j+=8)
03307 {
03308 DctY(bp+j, width);
03309 Quant(block, lqt);
03310 if (bcomp(block, lblock, &lmask))
03311 {
03312 *((uint8_t *)sp++)=255;
03313 } else sp+=b2s(block, sp, lb8);
03314 lblock+=64;
03315 }
03316 bp+=width<<3;
03317 }
03318 #ifdef MMX
03319 emms();
03320 #endif
03321 return (sp-sb);
03322 }
03323
03324 void RTjpeg::SetNextKey(void)
03325 {
03326 key_count = 0;
03327 }
03328
03329 int RTjpeg::Compress(int8_t *sp, uint8_t **planes)
03330 {
03331 RTjpeg_frameheader * fh = (RTjpeg_frameheader *)sp;
03332 int ds = 0;
03333
03334 if (key_rate == 0)
03335 {
03336 switch(f)
03337 {
03338 case RTJ_YUV420: ds = compressYUV420((int8_t*)&(fh->data), planes); break;
03339 case RTJ_YUV422: ds = compressYUV422((int8_t*)&(fh->data), planes); break;
03340 case RTJ_RGB8: ds = compress8((int8_t*)&(fh->data), planes); break;
03341 }
03342 fh->key = 0;
03343 } else {
03344 if (key_count == 0)
03345 memset(old, 0, ((4 * width * height)));
03346 switch(f)
03347 {
03348 case RTJ_YUV420: ds = mcompressYUV420((int8_t*)&(fh->data), planes); break;
03349 case RTJ_YUV422: ds = mcompressYUV422((int8_t*)&(fh->data), planes); break;
03350 case RTJ_RGB8: ds = mcompress8((int8_t*)&(fh->data), planes); break;
03351 }
03352 fh->key = key_count;
03353 if (++key_count > key_rate)
03354 key_count = 0;
03355 }
03356 ds += RTJPEG_HEADER_SIZE;
03357 fh->framesize = RTJPEG_SWAP_WORD(ds);
03358 fh->headersize = RTJPEG_HEADER_SIZE;
03359 fh->version = RTJPEG_FILE_VERSION;
03360 fh->width = RTJPEG_SWAP_HALFWORD(width);
03361 fh->height = RTJPEG_SWAP_HALFWORD(height);
03362 fh->quality = Q;
03363 return ds;
03364 }
03365
03366 void RTjpeg::Decompress(int8_t *sp, uint8_t **planes)
03367 {
03368 RTjpeg_frameheader * fh = (RTjpeg_frameheader *)sp;
03369
03370 if ((RTJPEG_SWAP_HALFWORD(fh->width) != width)||
03371 (RTJPEG_SWAP_HALFWORD(fh->height) != height))
03372 {
03373 int w = RTJPEG_SWAP_HALFWORD(fh->width);
03374 int h = RTJPEG_SWAP_HALFWORD(fh->height);
03375 SetSize(&w, &h);
03376 }
03377 if (fh->quality != Q)
03378 {
03379 int q = fh->quality;
03380 SetQuality(&q);
03381 }
03382 switch(f)
03383 {
03384 case RTJ_YUV420: decompressYUV420((int8_t*)&(fh->data), planes); break;
03385 case RTJ_YUV422: decompressYUV422((int8_t*)&(fh->data), planes); break;
03386 case RTJ_RGB8: decompress8((int8_t*)&(fh->data), planes); break;
03387 }
03388 }