xf86-video-intel: 4 commits - src/sna/compiler.h src/sna/gen3_render.c src/sna/gen4_vertex.c

Tue Feb 26 03:38:43 PST 2013

src/sna/compiler.h    |    9 +-
 src/sna/gen3_render.c |   16 ++--
 src/sna/gen4_vertex.c |  165 +++++++++++++++++++++++++++++++++-----------------
 3 files changed, 123 insertions(+), 67 deletions(-)

New commits:
commit aa358fad8c934e6962976ecfdc5e5ebc9d1ad83c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Feb 26 11:37:24 2013 +0000

    sna: Bump required GCC for sse2
    
    gcc-4.4.5 (on squeeze) triggers an ICE when using target(sse2).
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index 0e0be90..321e697 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -52,7 +52,7 @@
 #define flatten
 #endif
 
-#if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
+#if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 5)
 #define sse2 __attribute__((target("sse2,fpmath=sse+387")))
 #define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse+387")))
 #endif
commit 08747292b717fcea84ce3adc455b0161e5517e84
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Feb 26 11:28:05 2013 +0000

    sna: Conditionally compile sse2 routines
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index fed9bc0..0e0be90 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -55,8 +55,6 @@
 #if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
 #define sse2 __attribute__((target("sse2,fpmath=sse+387")))
 #define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse+387")))
-#else
-#define sse2
 #endif
 
 #if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 7)
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 1c6d0ec..79233ca 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3625,7 +3625,7 @@ gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_co
 	} while (--nbox);
 }
 
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 sse2 fastcall static void
 gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
 						   const struct sna_composite_spans_op *op,
@@ -4380,7 +4380,7 @@ gen3_render_composite_spans(struct sna *sna,
 	case SHADER_WHITE:
 	case SHADER_CONSTANT:
 		if (no_offset) {
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 			if (sna->cpu_features & SSE2) {
 				tmp->box = gen3_render_composite_spans_constant_box__sse2;
 				tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
@@ -4395,7 +4395,7 @@ gen3_render_composite_spans(struct sna *sna,
 				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
 			}
 		} else {
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 			if (sna->cpu_features & SSE2) {
 				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
 				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
@@ -4410,11 +4410,11 @@ gen3_render_composite_spans(struct sna *sna,
 	case SHADER_LINEAR:
 	case SHADER_RADIAL:
 		if (tmp->base.src.transform == NULL) {
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 			if (sna->cpu_features & SSE2) {
 				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
 				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
-			} else 
+			} else
 #endif
 			{
 				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
@@ -4422,7 +4422,7 @@ gen3_render_composite_spans(struct sna *sna,
 			}
 		} else if (tmp->base.src.is_affine) {
 			tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 			if (sna->cpu_features & SSE2) {
 				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
 				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
@@ -4436,7 +4436,7 @@ gen3_render_composite_spans(struct sna *sna,
 		break;
 	case SHADER_TEXTURE:
 		if (tmp->base.src.transform == NULL) {
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 			if (sna->cpu_features & SSE2) {
 				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
 				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
@@ -4449,7 +4449,7 @@ gen3_render_composite_spans(struct sna *sna,
 		} else if (tmp->base.src.is_affine) {
 			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
 			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
-#ifndef __x86_64__
+#if defined(sse2) && !defined(__x86_64__)
 			if (sna->cpu_features & SSE2) {
 				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
 				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index c307a6b..35f772e 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -34,6 +34,10 @@
 #include "sna_render_inline.h"
 #include "gen4_vertex.h"
 
+#ifndef sse2
+#define sse2
+#endif
+
 void gen4_vertex_flush(struct sna *sna)
 {
 	DBG(("%s[%x] = %d\n", __FUNCTION__,
commit 9c63ddd9f14588bcec8275e2a64e56cc5b661a5a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Feb 26 11:28:05 2013 +0000

    sna: Conditionally compile sse4_2 routines
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index dfaaaac..fed9bc0 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -57,7 +57,6 @@
 #define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse+387")))
 #else
 #define sse2
-#define sse4_2
 #endif
 
 #if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 7)
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 4ebd856..c307a6b 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -981,6 +981,7 @@ emit_composite_texcoord_affine(struct sna *sna,
 }
 
 /* SSE4_2 */
+#if defined(sse4_2)
 
 sse4_2 fastcall static void
 emit_primitive_linear__sse4_2(struct sna *sna,
@@ -1180,6 +1181,8 @@ emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+#endif
+
 /* AVX2 */
 #if defined(avx2)
 
@@ -1468,10 +1471,13 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 				tmp->emit_boxes = emit_boxes_linear__avx2;
 			} else
 #endif
+#if defined(sse4_2)
 			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_primitive_linear__sse4_2;
 				tmp->emit_boxes = emit_boxes_linear__sse4_2;
-			} else {
+			} else
+#endif
+			{
 				tmp->prim_emit = emit_primitive_linear;
 				tmp->emit_boxes = emit_boxes_linear;
 			}
@@ -1485,10 +1491,13 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 				tmp->emit_boxes = emit_boxes_identity_source__avx2;
 			} else
 #endif
+#if defined(sse4_2)
 			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_primitive_identity_source__sse4_2;
 				tmp->emit_boxes = emit_boxes_identity_source__sse4_2;
-			} else {
+			} else
+#endif
+			{
 				tmp->prim_emit = emit_primitive_identity_source;
 				tmp->emit_boxes = emit_boxes_identity_source;
 			}
@@ -1505,10 +1514,13 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 					tmp->emit_boxes = emit_boxes_simple_source__avx2;
 				} else
 #endif
+#if defined(sse4_2)
 				if (sna->cpu_features & SSE4_2) {
 					tmp->prim_emit = emit_primitive_simple_source__sse4_2;
 					tmp->emit_boxes = emit_boxes_simple_source__sse4_2;
-				} else {
+				} else
+#endif
+				{
 					tmp->prim_emit = emit_primitive_simple_source;
 					tmp->emit_boxes = emit_boxes_simple_source;
 				}
@@ -1930,6 +1942,7 @@ emit_span_boxes_linear(const struct sna_composite_spans_op *op,
 }
 
 /* SSE4_2 */
+#if defined(sse4_2)
 
 sse4_2 fastcall static void
 emit_span_identity__sse4_2(struct sna *sna,
@@ -2243,6 +2256,8 @@ emit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op,
 	} while (--nbox);
 }
 
+#endif
+
 /* AVX2 */
 #if defined(avx2)
 
@@ -2584,10 +2599,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 			tmp->emit_boxes = emit_span_boxes_linear__avx2;
 		} else
 #endif
+#if defined(sse4_2)
 		if (sna->cpu_features & SSE4_2) {
 			tmp->prim_emit = emit_span_linear__sse4_2;
 			tmp->emit_boxes = emit_span_boxes_linear__sse4_2;
-		} else {
+		} else
+#endif
+		{
 			tmp->prim_emit = emit_span_linear;
 			tmp->emit_boxes = emit_span_boxes_linear;
 		}
@@ -2600,10 +2618,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 			tmp->emit_boxes = emit_span_boxes_identity__avx2;
 		} else
 #endif
+#if defined(sse4_2)
 		if (sna->cpu_features & SSE4_2) {
 			tmp->prim_emit = emit_span_identity__sse4_2;
 			tmp->emit_boxes = emit_span_boxes_identity__sse4_2;
-		} else {
+		} else
+#endif
+		{
 			tmp->prim_emit = emit_span_identity;
 			tmp->emit_boxes = emit_span_boxes_identity;
 		}
@@ -2619,10 +2640,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 				tmp->emit_boxes = emit_span_boxes_simple__avx2;
 			} else
 #endif
+#if defined(sse4_2)
 			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_span_simple__sse4_2;
 				tmp->emit_boxes = emit_span_boxes_simple__sse4_2;
-			} else {
+			} else
+#endif
+			{
 				tmp->prim_emit = emit_span_simple;
 				tmp->emit_boxes = emit_span_boxes_simple;
 			}
@@ -2633,10 +2657,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 				tmp->emit_boxes = emit_span_boxes_affine__avx2;
 			} else
 #endif
+#if defined(sse4_2)
 			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_span_affine__sse4_2;
 				tmp->emit_boxes = emit_span_boxes_affine__sse4_2;
-			} else {
+			} else
+#endif
+			{
 				tmp->prim_emit = emit_span_affine;
 				tmp->emit_boxes = emit_span_boxes_affine;
 			}
commit dc885d350ec53d08e0d4d529bb938bf2568d61ae
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Feb 26 11:28:05 2013 +0000

    sna: Conditionally compile avx routines
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index 3dd8a6d..dfaaaac 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -52,14 +52,16 @@
 #define flatten
 #endif
 
-#if defined(__GNUC__) && (__GNUC__ >= 4) /* 4.4 */
+#if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
 #define sse2 __attribute__((target("sse2,fpmath=sse+387")))
 #define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse+387")))
-#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse+387")))
 #else
 #define sse2
 #define sse4_2
-#define avx2
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 7)
+#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse+387")))
 #endif
 
 #ifdef HAVE_VALGRIND
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 885b87e..4ebd856 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -1181,6 +1181,7 @@ emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
 }
 
 /* AVX2 */
+#if defined(avx2)
 
 avx2 fastcall static void
 emit_primitive_linear__avx2(struct sna *sna,
@@ -1380,6 +1381,8 @@ emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+#endif
+
 unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
 {
 	unsigned vb;
@@ -1459,10 +1462,13 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 			vb = 1;
 		} else if (tmp->src.is_linear) {
 			DBG(("%s: linear, no mask\n", __FUNCTION__));
+#if defined(avx2)
 			if (sna->cpu_features & AVX2) {
 				tmp->prim_emit = emit_primitive_linear__avx2;
 				tmp->emit_boxes = emit_boxes_linear__avx2;
-			} else  if (sna->cpu_features & SSE4_2) {
+			} else
+#endif
+			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_primitive_linear__sse4_2;
 				tmp->emit_boxes = emit_boxes_linear__sse4_2;
 			} else {
@@ -1473,10 +1479,13 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 			vb = 1;
 		} else if (tmp->src.transform == NULL) {
 			DBG(("%s: identity src, no mask\n", __FUNCTION__));
+#if defined(avx2)
 			if (sna->cpu_features & AVX2) {
 				tmp->prim_emit = emit_primitive_identity_source__avx2;
 				tmp->emit_boxes = emit_boxes_identity_source__avx2;
-			} else if (sna->cpu_features & SSE4_2) {
+			} else
+#endif
+			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_primitive_identity_source__sse4_2;
 				tmp->emit_boxes = emit_boxes_identity_source__sse4_2;
 			} else {
@@ -1490,10 +1499,13 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 			if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
 				DBG(("%s: simple src, no mask\n", __FUNCTION__));
+#if defined(avx2)
 				if (sna->cpu_features & AVX2) {
 					tmp->prim_emit = emit_primitive_simple_source__avx2;
 					tmp->emit_boxes = emit_boxes_simple_source__avx2;
-				} else if (sna->cpu_features & SSE4_2) {
+				} else
+#endif
+				if (sna->cpu_features & SSE4_2) {
 					tmp->prim_emit = emit_primitive_simple_source__sse4_2;
 					tmp->emit_boxes = emit_boxes_simple_source__sse4_2;
 				} else {
@@ -1917,46 +1929,6 @@ emit_span_boxes_linear(const struct sna_composite_spans_op *op,
 	} while (--nbox);
 }
 
-avx2 fastcall static void
-emit_span_identity__avx2(struct sna *sna,
-			 const struct sna_composite_spans_op *op,
-			 const BoxRec *box,
-			 float opacity)
-{
-	float *v;
-	union {
-		struct sna_coordinate p;
-		float f;
-	} dst;
-
-	float sx = op->base.src.scale[0];
-	float sy = op->base.src.scale[1];
-	int16_t tx = op->base.src.offset[0];
-	int16_t ty = op->base.src.offset[1];
-
-	assert(op->base.floats_per_rect == 12);
-	assert((sna->render.vertex_used % 4) == 0);
-	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 3*4;
-	assert(sna->render.vertex_used <= sna->render.vertex_size);
-
-	dst.p.x = box->x2;
-	dst.p.y = box->y2;
-	v[0] = dst.f;
-	v[1] = (box->x2 + tx) * sx;
-	v[6] = v[2] = (box->y2 + ty) * sy;
-
-	dst.p.x = box->x1;
-	v[4] = dst.f;
-	v[9] = v[5] = (box->x1 + tx) * sx;
-
-	dst.p.y = box->y1;
-	v[8] = dst.f;
-	v[10] = (box->y1 + ty) * sy;
-
-	v[11] = v[7] = v[3] = opacity;
-}
-
 /* SSE4_2 */
 
 sse4_2 fastcall static void
@@ -2272,6 +2244,47 @@ emit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op,
 }
 
 /* AVX2 */
+#if defined(avx2)
+
+avx2 fastcall static void
+emit_span_identity__avx2(struct sna *sna,
+			 const struct sna_composite_spans_op *op,
+			 const BoxRec *box,
+			 float opacity)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*4;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	v[1] = (box->x2 + tx) * sx;
+	v[6] = v[2] = (box->y2 + ty) * sy;
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	v[9] = v[5] = (box->x1 + tx) * sx;
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	v[10] = (box->y1 + ty) * sy;
+
+	v[11] = v[7] = v[3] = opacity;
+}
 
 avx2 fastcall static void
 emit_span_boxes_identity__avx2(const struct sna_composite_spans_op *op,
@@ -2544,6 +2557,7 @@ emit_span_boxes_linear__avx2(const struct sna_composite_spans_op *op,
 		b++;
 	} while (--nbox);
 }
+#endif
 
 inline static uint32_t
 gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
@@ -2564,10 +2578,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 		tmp->base.floats_per_vertex = 3;
 		vb = 1 << 2 | 1;
 	} else if (tmp->base.src.is_linear) {
+#if defined(avx2)
 		if (sna->cpu_features & AVX2) {
 			tmp->prim_emit = emit_span_linear__avx2;
 			tmp->emit_boxes = emit_span_boxes_linear__avx2;
-		} else if (sna->cpu_features & SSE4_2) {
+		} else
+#endif
+		if (sna->cpu_features & SSE4_2) {
 			tmp->prim_emit = emit_span_linear__sse4_2;
 			tmp->emit_boxes = emit_span_boxes_linear__sse4_2;
 		} else {
@@ -2577,10 +2594,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 		tmp->base.floats_per_vertex = 3;
 		vb = 1 << 2 | 1;
 	} else if (tmp->base.src.transform == NULL) {
+#if defined(avx2)
 		if (sna->cpu_features & AVX2) {
 			tmp->prim_emit = emit_span_identity__avx2;
 			tmp->emit_boxes = emit_span_boxes_identity__avx2;
-		} else if (sna->cpu_features & SSE4_2) {
+		} else
+#endif
+		if (sna->cpu_features & SSE4_2) {
 			tmp->prim_emit = emit_span_identity__sse4_2;
 			tmp->emit_boxes = emit_span_boxes_identity__sse4_2;
 		} else {
@@ -2593,10 +2613,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 		tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
 		tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
 		if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
+#if defined(avx2)
 			if (sna->cpu_features & AVX2) {
 				tmp->prim_emit = emit_span_simple__avx2;
 				tmp->emit_boxes = emit_span_boxes_simple__avx2;
-			} else if (sna->cpu_features & SSE4_2) {
+			} else
+#endif
+			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_span_simple__sse4_2;
 				tmp->emit_boxes = emit_span_boxes_simple__sse4_2;
 			} else {
@@ -2604,10 +2627,13 @@ unsigned gen4_choose_spans_emitter(struct sna *sna,
 				tmp->emit_boxes = emit_span_boxes_simple;
 			}
 		} else {
+#if defined(avx2)
 			if (sna->cpu_features & AVX2) {
 				tmp->prim_emit = emit_span_affine__avx2;
 				tmp->emit_boxes = emit_span_boxes_affine__avx2;
-			} else if (sna->cpu_features & SSE4_2) {
+			} else
+#endif
+			if (sna->cpu_features & SSE4_2) {
 				tmp->prim_emit = emit_span_affine__sse4_2;
 				tmp->emit_boxes = emit_span_boxes_affine__sse4_2;
 			} else {