00001 #include "mythconfig.h"
00002
00003
00004 void zoom_filter_xmmx (int prevX, int prevY, unsigned int *expix1, unsigned int *expix2, int *brutS, int *brutD, int buffratio, int precalCoef[16][16]);
00005 int zoom_filter_xmmx_supported (void);
00006
00007
00008 #if defined(MMX) && !defined(ARCH_X86_64)
00009
00010
00011
00012 #define STRICT_COMPAT
00013
00014
00015 #define BUFFPOINTNB 16
00016 #define BUFFPOINTMASK 0xffff
00017 #define BUFFINCR 0xff
00018
00019 #define sqrtperte 16
00020
00021 #define PERTEMASK 0xf
00022
00023 #define PERTEDEC 4
00024
00025
00026
00027 #include "mmx.h"
00028 #include "libavutil/cpu.h"
00029
00030 int zoom_filter_xmmx_supported () {
00031 return (av_get_cpu_flags() & AV_CPU_FLAG_SSE) >> 3;
00032 }
00033
00034 void zoom_filter_xmmx (int prevX, int prevY,
00035 unsigned int *expix1, unsigned int *expix2,
00036 int *lbruS, int *lbruD, int buffratio,
00037 int precalCoef[16][16])
00038 {
00039 int bufsize = prevX * prevY;
00040 volatile int loop;
00041
00042 mmx_t *brutS = (mmx_t*)lbruS;
00043 mmx_t *brutD = (mmx_t*)lbruD;
00044
00045 volatile mmx_t prevXY;
00046 volatile mmx_t ratiox;
00047
00048
00049 expix1[0]=expix1[prevX-1]=expix1[prevX*prevY-1]=expix1[prevX*prevY-prevX]=0;
00050
00051 prevXY.ud[0] = (prevX-1)<<PERTEDEC;
00052 prevXY.ud[1] = (prevY-1)<<PERTEDEC;
00053
00054 ratiox.d[0] = buffratio;
00055 ratiox.d[1] = buffratio;
00056 movq_m2r (ratiox, mm6);
00057 pslld_i2r (16,mm6);
00058
00059 pxor_r2r (mm7,mm7);
00060
00061 loop=0;
00062
00063
00064
00065
00066 while (loop < bufsize)
00067 {
00068
00069
00070
00071
00072
00073
00074 __asm__ __volatile__ (
00075 "movq %0,%%mm0\n"
00076 "movq %1,%%mm1\n"
00077 : :"m"(brutS[loop]),"m"(brutD[loop])
00078 );
00079
00080 psubd_r2r (mm0,mm1);
00081 movq_r2r (mm1, mm2);
00082
00083 pslld_i2r (16,mm1);
00084 mmx_r2r (pmulhuw, mm6, mm1);
00085 pmullw_r2r (mm6, mm2);
00086
00087 paddd_r2r (mm2, mm1);
00088 pslld_i2r (16,mm0);
00089
00090 paddd_r2r (mm1, mm0);
00091 psrld_i2r (16, mm0);
00092
00093
00094
00095
00096
00097
00098
00099 movq_m2r (prevXY,mm1);
00100 pcmpgtd_r2r (mm0, mm1);
00101
00102
00103
00104 #ifdef STRICT_COMPAT
00105 movq_r2r (mm1,mm2);
00106 punpckhdq_r2r (mm2,mm2);
00107 punpckldq_r2r (mm1,mm1);
00108 pand_r2r (mm2, mm0);
00109 #endif
00110 pand_r2r (mm1, mm0);
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120 __asm__ __volatile__ (
00121 "movd %%mm0,%%ecx\n"
00122 "movq %%mm0,%%mm1\n"
00123
00124 "andl $15,%%ecx\n"
00125 "psrlq $32,%%mm1\n"
00126
00127 "shll $6,%%ecx\n"
00128 "movd %%mm1,%%eax\n"
00129
00130 "addl %0,%%ecx\n"
00131 "andl $15,%%eax\n"
00132
00133 "movd (%%ecx,%%eax,4),%%mm3\n"
00134
00135 ::"m"(precalCoef):"eax","ecx");
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166 psrld_i2r (PERTEDEC,mm0);
00167 psrld_i2r (PERTEDEC,mm1);
00168 __asm__ __volatile__ (
00169 "movd %%mm1,%%eax\n"
00170 "movq %%mm3,%%mm5\n"
00171
00172 "mull %1\n"
00173 "movd %%mm0,%%ecx\n"
00174 "punpcklbw %%mm5, %%mm3\n"
00175
00176 "addl %%ecx,%%eax\n"
00177 "movq %%mm3,%%mm4\n"
00178 "movq %%mm3,%%mm5\n"
00179
00180 "movl %0,%%ecx\n"
00181 "punpcklbw %%mm5,%%mm3\n"
00182
00183 "movq (%%ecx,%%eax,4),%%mm0\n"
00184 "punpckhbw %%mm5,%%mm4\n"
00185
00186 "addl %1,%%eax\n"
00187 "movq (%%ecx,%%eax,4),%%mm2\n"
00188
00189 : : "X"(expix1), "X"(prevX):"eax","ecx"
00190 );
00191
00192
00193
00194
00195
00196
00197
00198
00199 movq_r2r (mm0, mm1);
00200
00201
00202 punpcklbw_r2r (mm7, mm0);
00203
00204
00205
00206 movq_r2r (mm3, mm5);
00207
00208
00209 punpckhbw_r2r (mm7, mm1);
00210
00211 punpcklbw_r2r (mm7, mm5);
00212 punpckhbw_r2r (mm7, mm3);
00213
00214
00215 pmullw_r2r (mm5, mm0);
00216 pmullw_r2r (mm3, mm1);
00217 paddw_r2r (mm1, mm0);
00218
00219
00220 movq_r2r (mm4, mm5);
00221 punpcklbw_r2r (mm7, mm4);
00222 punpckhbw_r2r (mm7, mm5);
00223
00224
00225 movq_r2r (mm2, mm1);
00226
00227
00228 punpcklbw_r2r (mm7, mm1);
00229 punpckhbw_r2r (mm7, mm2);
00230
00231
00232 pmullw_r2r (mm4, mm1);
00233 pmullw_r2r (mm5, mm2);
00234
00235
00236 paddw_r2r (mm1, mm0);
00237 paddw_r2r (mm2, mm0);
00238
00239
00240 psrlw_i2r (8, mm0);
00241 packuswb_r2r (mm7, mm0);
00242
00243 movd_r2m (mm0,expix2[loop]);
00244
00245 ++loop;
00246 }
00247 #ifdef HAVE_ATHLON
00248 __asm__ __volatile__ ("femms\n");
00249 #else
00250 emms();
00251 #endif
00252 }
00253 #else
00254 int zoom_filter_xmmx_supported () {
00255 return 0;
00256 }
00257 void zoom_filter_xmmx (int prevX, int prevY,
00258 unsigned int *expix1, unsigned int *expix2,
00259 int *lbruS, int *lbruD, int buffratio,
00260 int precalCoef[16][16])
00261 {
00262 (void) prevX; (void) prevY;
00263 (void) expix1; (void) expix2;
00264 (void) lbruS; (void) lbruD;
00265 (void) buffratio; (void) precalCoef;
00266 return;
00267 }
00268 #endif