2005-05-13 13:57:13 +00:00
|
|
|
--- gimp-2.2.7/app/composite/gimp-composite-sse2.c.gcc4 2005-05-13 13:15:20.000000000 +0200
|
2005-05-13 14:02:05 +00:00
|
|
|
+++ gimp-2.2.7/app/composite/gimp-composite-sse2.c 2005-05-13 14:31:22.000000000 +0200
|
|
|
|
@@ -100,7 +100,13 @@
|
|
|
|
"\tmovq %1,%%mm0"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_128), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%xmm0", "%mm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ , "%xmm0"
|
|
|
|
+#endif
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 4; n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -117,7 +123,10 @@
|
|
|
|
"\tmovdqu %%xmm1,%0\n"
|
|
|
|
: "=m" (*D)
|
|
|
|
: "m" (*A), "m" (*B)
|
|
|
|
- : "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
A++;
|
|
|
|
B++;
|
|
|
|
D++;
|
|
|
|
@@ -141,7 +150,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -161,7 +173,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -195,7 +210,10 @@
|
|
|
|
"\tmovdqu %%xmm2,%0\n"
|
|
|
|
: "=m" (*D)
|
|
|
|
: "m" (*A), "m" (*B)
|
|
|
|
- : "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm1", "%xmm2", "%xmm3", "%xmm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
A++;
|
|
|
|
B++;
|
|
|
|
D++;
|
|
|
|
@@ -212,7 +230,10 @@
|
|
|
|
"\tmovq %%mm2, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -226,7 +247,10 @@
|
|
|
|
"\tmovd %%mm2, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -247,7 +271,13 @@
|
|
|
|
"\tmovdqu %1,%%xmm0"
|
|
|
|
: /* */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
|
|
|
|
- : "%mm0", "%xmm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ , "%xmm0"
|
|
|
|
+#endif
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 4; n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -266,7 +296,10 @@
|
|
|
|
"\tmovdqu %%xmm1,%0\n"
|
|
|
|
: "=m" (*D)
|
|
|
|
: "m" (*A), "m" (*B)
|
|
|
|
- : "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
A++;
|
|
|
|
B++;
|
|
|
|
D++;
|
|
|
|
@@ -293,7 +326,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -316,7 +352,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -366,7 +405,11 @@
|
|
|
|
"\tmovdqu %%xmm7,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_128), "m" (*rgba8_alpha_mask_128)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ , "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 16;
|
|
|
|
op.B += 16;
|
|
|
|
op.D += 16;
|
|
|
|
@@ -409,7 +452,11 @@
|
|
|
|
"\tmovq %%mm7,%0\n"
|
|
|
|
: (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 8;
|
|
|
|
op.B += 8;
|
|
|
|
op.D += 8;
|
|
|
|
@@ -452,7 +499,11 @@
|
|
|
|
"\tmovd %%mm7,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -478,7 +529,13 @@
|
|
|
|
"\tmovdqu %3,%%xmm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64), "m" (*rgba8_alpha_mask_128), "m" (*rgba8_w128_128)
|
|
|
|
- : "%mm0", "%mm6", "%mm7", "%xmm0", "%xmm6", "%xmm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm6", "%mm7"
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ , "%xmm0", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 4; n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -509,7 +566,10 @@
|
|
|
|
"\tmovdqu %%xmm1,%0\n"
|
|
|
|
: "=m" (*D)
|
|
|
|
: "m" (*A), "m" (*B)
|
|
|
|
- : "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm1", "%xmm2", "%xmm3", "%xmm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
A++;
|
|
|
|
B++;
|
|
|
|
D++;
|
|
|
|
@@ -548,7 +608,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -573,7 +636,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -590,7 +656,13 @@
|
|
|
|
uint128 *B = (uint128 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movdqu %0,%%xmm0" : : "m" (*rgba8_alpha_mask_64) : "%xmm0");
|
|
|
|
+ asm volatile ("movdqu %0,%%xmm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 4; n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -606,7 +678,10 @@
|
|
|
|
"\tmovdqu %%xmm1, %0\n"
|
|
|
|
: "=m" (*D)
|
|
|
|
: "m" (*A), "m" (*B)
|
|
|
|
- : "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm1", "%xmm2", "%xmm3", "%xmm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
A++;
|
|
|
|
B++;
|
|
|
|
D++;
|
|
|
|
@@ -630,7 +705,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -650,7 +728,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -671,7 +752,13 @@
|
|
|
|
"\tmovdqu %1,%%xmm0\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
|
|
|
|
- : "%mm0", "%xmm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ , "%xmm0"
|
|
|
|
+#endif
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 4; n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -688,7 +775,10 @@
|
|
|
|
"\tmovdqu %%xmm1,%0\n"
|
|
|
|
: "=m" (*D)
|
|
|
|
: "m" (*A), "m" (*B)
|
|
|
|
- : "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm1", "%xmm2", "%xmm3", "%xmm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
A++;
|
|
|
|
B++;
|
|
|
|
D++;
|
|
|
|
@@ -712,7 +802,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -732,7 +825,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -772,7 +868,9 @@
|
|
|
|
"+m" (op.A[2]), "+m" (op.B[2]),
|
|
|
|
"+m" (op.A[3]), "+m" (op.B[3])
|
|
|
|
: /* empty */
|
|
|
|
+#ifdef __SSE__
|
|
|
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
);
|
|
|
|
#else
|
|
|
|
asm volatile (" movdqu %0,%%xmm0\n"
|
|
|
|
@@ -782,7 +880,9 @@
|
|
|
|
: "+m" (op.A[0]), "+m" (op.B[0]),
|
|
|
|
"+m" (op.A[1]), "+m" (op.B[1])
|
|
|
|
: /* empty */
|
|
|
|
+#ifdef __SSE__
|
|
|
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
);
|
|
|
|
|
|
|
|
asm volatile ("\tmovdqu %4,%%xmm4\n"
|
|
|
|
@@ -792,7 +892,9 @@
|
|
|
|
: "+m" (op.A[2]), "+m" (op.B[2]),
|
|
|
|
"+m" (op.A[3]), "+m" (op.B[3])
|
|
|
|
: /* empty */
|
|
|
|
+#ifdef __SSE__
|
|
|
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
);
|
|
|
|
|
|
|
|
asm volatile ("\tmovdqu %%xmm0,%1\n"
|
|
|
|
@@ -802,7 +904,9 @@
|
|
|
|
: "+m" (op.A[0]), "+m" (op.B[0]),
|
|
|
|
"+m" (op.A[1]), "+m" (op.B[1])
|
|
|
|
: /* empty */
|
|
|
|
+#ifdef __SSE__
|
|
|
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
);
|
|
|
|
|
|
|
|
asm volatile ("\tmovdqu %%xmm4,%5\n"
|
|
|
|
@@ -812,7 +916,9 @@
|
|
|
|
: "+m" (op.A[2]), "+m" (op.B[2]),
|
|
|
|
"+m" (op.A[3]), "+m" (op.B[3])
|
|
|
|
: /* empty */
|
|
|
|
+#ifdef __SSE__
|
|
|
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
|
|
|
+#endif
|
|
|
|
);
|
|
|
|
#endif
|
|
|
|
op.A += 64;
|
|
|
|
@@ -828,7 +934,10 @@
|
|
|
|
"\tmovdqu %%xmm2,%1\n"
|
|
|
|
: "+m" (*op.A), "+m" (*op.B)
|
|
|
|
: /* empty */
|
|
|
|
- : "%xmm2", "%xmm3");
|
|
|
|
+#ifdef __SSE__
|
|
|
|
+ : "%xmm2", "%xmm3"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 16;
|
|
|
|
op.B += 16;
|
|
|
|
}
|
|
|
|
@@ -841,7 +950,10 @@
|
|
|
|
"\tmovq %%mm2,%1\n"
|
|
|
|
: "+m" (*op.A), "+m" (*op.B)
|
|
|
|
: /* empty */
|
|
|
|
- : "%mm2", "%mm3");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm2", "%mm3"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 8;
|
|
|
|
op.B += 8;
|
|
|
|
}
|
|
|
|
@@ -854,7 +966,10 @@
|
|
|
|
"\tmovd %%mm2,%1\n"
|
|
|
|
: "+m" (*op.A), "+m" (*op.B)
|
|
|
|
: /* empty */
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
2005-05-13 13:57:13 +00:00
|
|
|
--- gimp-2.2.7/app/composite/gimp-composite-sse.c.gcc4 2005-01-08 00:58:33.000000000 +0100
|
2005-05-13 14:02:05 +00:00
|
|
|
+++ gimp-2.2.7/app/composite/gimp-composite-sse.c 2005-05-13 15:36:17.000000000 +0200
|
|
|
|
@@ -72,7 +72,10 @@
|
|
|
|
asm volatile ("movq %0,%%mm0"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%mm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -88,7 +91,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -108,7 +114,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -171,7 +180,11 @@
|
|
|
|
"\tmovq %%mm7,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : pdivwqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
d++;
|
|
|
|
b++;
|
|
|
|
a++;
|
|
|
|
@@ -225,7 +238,11 @@
|
|
|
|
"\tmovd %%mm7,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : pdivwqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -248,7 +265,10 @@
|
|
|
|
"\tmovq %%mm2, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -262,7 +282,10 @@
|
|
|
|
"\tmovd %%mm2, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -276,7 +299,13 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -295,7 +324,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -318,7 +350,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -334,7 +369,10 @@
|
|
|
|
"\tmovq %1, %%mm7\n"
|
|
|
|
:
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w1_64)
|
|
|
|
- : "%mm0", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -374,7 +412,11 @@
|
|
|
|
"\tmovq %%mm3,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 8;
|
|
|
|
op.B += 8;
|
|
|
|
op.D += 8;
|
|
|
|
@@ -419,7 +461,11 @@
|
|
|
|
"\tmovd %%mm3,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -469,7 +515,11 @@
|
|
|
|
"\tmovq %%mm7,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 8;
|
|
|
|
op.B += 8;
|
|
|
|
op.D += 8;
|
|
|
|
@@ -512,7 +562,11 @@
|
|
|
|
"\tmovd %%mm7,%2\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%eax", "%ecx", "%edx", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : "%eax", "%ecx", "%edx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -533,7 +587,10 @@
|
|
|
|
"\tmovq %1,%%mm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
|
|
|
- : "%mm0", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
@@ -565,7 +622,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -597,7 +657,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -616,7 +679,10 @@
|
|
|
|
"movq %1, %%mm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
|
|
|
- : "%mm0", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -644,7 +710,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -675,7 +744,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "+m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -689,7 +761,13 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -705,7 +783,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -728,7 +809,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -742,9 +826,27 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
- asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
|
|
|
|
- asm volatile ("pxor %%mm6,%%mm6" : : : "%mm6");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
+ asm volatile ("movq %0,%%mm7"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_w128_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
+ asm volatile ("pxor %%mm6,%%mm6"
|
|
|
|
+ :
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ :
|
|
|
|
+ : "%mm6"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -771,7 +873,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -798,7 +903,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -863,7 +971,11 @@
|
|
|
|
asm volatile ("pxor %%mm0,%%mm0\n"
|
|
|
|
"movq %0,%%mm7"
|
|
|
|
: /* empty */
|
|
|
|
- : "m" (*rgba8_w128_64) : "%mm0");
|
|
|
|
+ : "m" (*rgba8_w128_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -911,7 +1023,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 8;
|
|
|
|
op.B += 8;
|
|
|
|
op.D += 8;
|
|
|
|
@@ -966,7 +1081,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*op.D)
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -993,7 +1111,11 @@
|
|
|
|
"\tmovq %1,%%mm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (_op->scale.scale), "m" (*rgba8_w128_64)
|
|
|
|
- : "%eax", "%ebx", "%mm0", "%mm3", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : "%eax", "%ebx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm3", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -1015,7 +1137,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a)
|
|
|
|
- : "%mm1", "%mm2", "%mm4", "%mm5", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm4", "%mm5", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
d++;
|
|
|
|
}
|
|
|
|
@@ -1033,7 +1158,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a)
|
|
|
|
- : "%mm1", "%mm2", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1047,8 +1175,20 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
- asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
+ asm volatile ("movq %0,%%mm7"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_w128_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
asm volatile ("pxor %mm6, %mm6");
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
@@ -1100,7 +1240,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -1155,7 +1298,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1170,7 +1316,13 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -1186,7 +1338,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -1206,7 +1361,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1226,8 +1384,11 @@
|
|
|
|
"\tmovq %%mm3,%0\n"
|
|
|
|
"\tmovq %%mm2,%1\n"
|
|
|
|
: "+m" (*a), "+m" (*b)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
:
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
@@ -1239,8 +1400,11 @@
|
|
|
|
"\tmovd %%mm3,%0\n"
|
|
|
|
"\tmovd %%mm2,%1\n"
|
|
|
|
: "+m" (*a), "+m" (*b)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
: /* empty */
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1340,7 +1504,10 @@
|
|
|
|
asm("movq %0,%%mm1"
|
|
|
|
:
|
|
|
|
: "m" (*va8_alpha_mask)
|
|
|
|
- : "%mm1");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -1390,7 +1557,10 @@
|
|
|
|
"\tmovq %%mm7,(%2); addl $8,%2\n"
|
|
|
|
: "+r" (op.A), "+r" (op.B), "+r" (op.D)
|
|
|
|
: "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255), "m" (*va8_alpha_mask)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
if (op.n_pixels)
|
|
|
|
@@ -1441,7 +1611,10 @@
|
|
|
|
"\tmovd %%mm7,(%2)\n"
|
|
|
|
: /* empty */
|
|
|
|
: "r" (op.A), "r" (op.B), "r" (op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255), "m" (*va8_alpha_mask)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
2005-05-13 13:57:13 +00:00
|
|
|
--- gimp-2.2.7/app/composite/gimp-composite-mmx.c.gcc4 2005-01-11 12:27:25.000000000 +0100
|
2005-05-13 14:02:05 +00:00
|
|
|
+++ gimp-2.2.7/app/composite/gimp-composite-mmx.c 2005-05-13 14:19:39.000000000 +0200
|
|
|
|
@@ -94,7 +94,10 @@
|
|
|
|
asm volatile ("movq %0,%%mm0"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%mm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -110,7 +113,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -130,7 +136,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -192,7 +201,11 @@
|
|
|
|
"\tmovq %%mm7,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : pdivwqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
d++;
|
|
|
|
b++;
|
|
|
|
a++;
|
|
|
|
@@ -246,7 +259,11 @@
|
|
|
|
"\tmovd %%mm7,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : pdivwqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -269,7 +286,10 @@
|
|
|
|
"\tmovq %%mm2, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -283,7 +303,10 @@
|
|
|
|
"\tmovd %%mm2, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -297,7 +320,13 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -316,7 +345,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -339,7 +371,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -358,7 +393,10 @@
|
|
|
|
"\tmovq %1, %%mm7\n"
|
|
|
|
:
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w1_64)
|
|
|
|
- : "%mm0", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -398,7 +436,11 @@
|
|
|
|
"\tmovq %%mm3,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : pdivwuqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -442,7 +484,11 @@
|
|
|
|
"\tmovd %%mm3,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : pdivwuqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -495,7 +541,11 @@
|
|
|
|
"\tmovq %%mm7,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwuqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : pdivwuqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -538,7 +588,11 @@
|
|
|
|
"\tmovd %%mm7,%2\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*a), "m" (*b), "m" (*d), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+ : pdivwuqX_clobber
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -558,7 +612,10 @@
|
|
|
|
"movq %1,%%mm7\n"
|
|
|
|
: /* no outputs */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
|
|
|
- : "%mm0", "%mm7", "%mm6");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm7", "%mm6"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -589,7 +646,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -620,7 +680,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -639,7 +702,10 @@
|
|
|
|
"movq %1, %%mm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
|
|
|
- : "%mm0", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -667,7 +733,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -697,7 +766,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -711,7 +783,13 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -727,7 +805,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -750,7 +831,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -770,7 +854,10 @@
|
|
|
|
"pxor %%mm6,%%mm6\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
|
|
|
- : "%mm6", "%mm7", "%mm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm6", "%mm7", "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -797,7 +884,10 @@
|
|
|
|
"\tmovq %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -824,7 +914,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -892,7 +985,11 @@
|
|
|
|
asm volatile ("pxor %%mm0,%%mm0\n"
|
|
|
|
"movq %0,%%mm7"
|
|
|
|
: /* empty */
|
|
|
|
- : "m" (*rgba8_w128_64) : "%mm0");
|
|
|
|
+ : "m" (*rgba8_w128_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -943,7 +1040,10 @@
|
|
|
|
"\tmovq %%mm1,%2\n"
|
|
|
|
: "+m" (*a), "+m" (*b), "+m" (*d)
|
|
|
|
: "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -998,7 +1098,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b), "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1024,7 +1127,11 @@
|
|
|
|
"\tmovq %1,%%mm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (_op->scale.scale), "m" (*rgba8_w128_64)
|
|
|
|
- : "%eax", "%ebx", "%mm0", "%mm5", "%mm6", "%mm7");
|
|
|
|
+ : "%eax", "%ebx"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -1046,7 +1153,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
d++;
|
|
|
|
}
|
|
|
|
@@ -1064,7 +1174,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1083,7 +1196,10 @@
|
|
|
|
"movq %1,%%mm7\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
|
|
|
- : "%mm0", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -1134,7 +1250,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -1189,7 +1308,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm volatile ("emms");
|
|
|
|
@@ -1204,7 +1326,13 @@
|
|
|
|
uint64 *b = (uint64 *) _op->B;
|
|
|
|
gulong n_pixels = _op->n_pixels;
|
|
|
|
|
|
|
|
- asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
|
|
|
+ asm volatile ("movq %0,%%mm0"
|
|
|
|
+ :
|
|
|
|
+ : "m" (*rgba8_alpha_mask_64)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 2; n_pixels -= 2)
|
|
|
|
{
|
|
|
|
@@ -1224,7 +1352,10 @@
|
|
|
|
"\tmovq %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -1248,7 +1379,10 @@
|
|
|
|
"\tmovd %%mm1,%0\n"
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm volatile ("emms");
|
|
|
|
@@ -1268,8 +1402,11 @@
|
|
|
|
"\tmovq %%mm3,%0\n"
|
|
|
|
"\tmovq %%mm2,%1\n"
|
|
|
|
: "+m" (*a), "+m" (*b)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
:
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
@@ -1281,8 +1418,11 @@
|
|
|
|
"\tmovd %%mm3,%0\n"
|
|
|
|
"\tmovd %%mm2,%1\n"
|
|
|
|
: "+m" (*a), "+m" (*b)
|
|
|
|
+#ifdef __MMX__
|
|
|
|
:
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|
|
|
|
@@ -1301,7 +1441,10 @@
|
|
|
|
asm volatile ("movq %0,%%mm0"
|
|
|
|
:
|
|
|
|
: "m" (*va8_alpha_mask_64)
|
|
|
|
- : "%mm0");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; n_pixels >= 4; n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -1321,7 +1464,10 @@
|
|
|
|
#endif
|
|
|
|
: "=m" (*d)
|
|
|
|
: "m" (*a), "m" (*b)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
d++;
|
|
|
|
@@ -1345,7 +1491,10 @@
|
|
|
|
"\tmovd %%mm1, %0\n"
|
|
|
|
: "=m" (*d32)
|
|
|
|
: "m" (*a32), "m" (*b32)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
a32++;
|
|
|
|
b32++;
|
|
|
|
d32++;
|
|
|
|
@@ -1370,7 +1519,11 @@
|
|
|
|
"\tmovw %%ax, %0\n"
|
|
|
|
: "=m" (*d16)
|
|
|
|
: "m" (*a16), "m" (*b16)
|
|
|
|
- : "%eax", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+ : "%eax"
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
a16++;
|
|
|
|
b16++;
|
|
|
|
@@ -1390,7 +1543,10 @@
|
|
|
|
asm("movq %0,%%mm1"
|
|
|
|
:
|
|
|
|
: "m" (*va8_alpha_mask)
|
|
|
|
- : "%mm1");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
|
|
|
{
|
|
|
|
@@ -1440,7 +1596,10 @@
|
|
|
|
"\tmovq %%mm7,%2\n"
|
|
|
|
: /* empty */
|
|
|
|
: "+m" (*op.A), "+m" (*op.B), "+m" (*op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255_64), "m" (*va8_alpha_mask)
|
|
|
|
- : "%mm1", "%mm2", "%mm3", "%mm4");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm1", "%mm2", "%mm3", "%mm4"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
op.A += 8;
|
|
|
|
op.B += 8;
|
|
|
|
op.D += 8;
|
|
|
|
@@ -1493,7 +1652,10 @@
|
|
|
|
"\tmovd %%mm7,%2\n"
|
|
|
|
: /* empty */
|
|
|
|
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255_64), "m" (*va8_alpha_mask)
|
|
|
|
- : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
|
|
|
+#ifdef __MMX__
|
|
|
|
+ : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
|
|
|
|
+#endif
|
|
|
|
+ );
|
|
|
|
}
|
|
|
|
|
|
|
|
asm("emms");
|