xf86-video-intel: 3 commits - configure.ac src/sna/atomic.h src/sna/compiler.h src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen4_vertex.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem_debug_gen6.c src/sna/Makefile.am src/sna/sna_blt.c src/sna/sna.h src/sna/sna_render.c src/sna/sna_render.h src/sna/sna_threads.c src/sna/sna_trapezoids.c src/sna/sna_vertex.c
Chris Wilson
ickle at kemper.freedesktop.org
Sun Jan 27 09:00:28 PST 2013
configure.ac | 34 ++
src/sna/Makefile.am | 2
src/sna/atomic.h | 89 +++++++
src/sna/compiler.h | 2
src/sna/gen3_render.c | 562 ++++++++++++++++++++++++++++++++++++++++++++--
src/sna/gen4_render.c | 118 +++++++++
src/sna/gen4_vertex.c | 466 ++++++++++++++++++++++++++++++++++++--
src/sna/gen5_render.c | 118 +++++++++
src/sna/gen6_render.c | 124 +++++++++-
src/sna/gen7_render.c | 118 +++++++++
src/sna/kgem_debug_gen6.c | 4
src/sna/sna.h | 1
src/sna/sna_blt.c | 306 ++++++++++++++++++++++++-
src/sna/sna_render.c | 2
src/sna/sna_render.h | 55 ++++
src/sna/sna_threads.c | 6
src/sna/sna_trapezoids.c | 475 +++++++++++++++++++++++++++++++++++---
src/sna/sna_vertex.c | 37 +++
18 files changed, 2414 insertions(+), 105 deletions(-)
New commits:
commit 73f574945f2cac14f9bafa6395e2c4dbb16fcf5d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Jan 27 16:02:52 2013 +0000
sna: Disable all signals in the render threads
X uses them (SIGIO especially) for input handling, and gets rightfully
confused if it finds itself in a different thread.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
index 4a98753..f77ddbf 100644
--- a/src/sna/sna_threads.c
+++ b/src/sna/sna_threads.c
@@ -29,6 +29,7 @@
#include <unistd.h>
#include <pthread.h>
+#include <signal.h>
static int max_threads = -1;
@@ -44,6 +45,11 @@ static struct thread {
static void *__run__(void *arg)
{
struct thread *t = arg;
+ sigset_t signals;
+
+ /* Disable all signals in the slave threads as X uses them for IO */
+ sigfillset(&signals);
+ pthread_sigmask(SIG_BLOCK, &signals, NULL);
pthread_mutex_lock(&t->mutex);
while (1) {
commit 9a7bf70365980809d0f02190f2f620a957ff1ba8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jan 26 23:03:33 2013 +0000
sna: Enable threaded rasterisation for non-antialiased geometry
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 3224d71..6c0ea6a 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -431,6 +431,26 @@ gen3_emit_composite_primitive_constant(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+
+ v[2] = box->x1;
+ v[3] = box->y2;
+
+ v[4] = box->x1;
+ v[5] = box->y1;
+
+ box++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -457,6 +477,32 @@ gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+ v[2] = box->x2 + op->src.offset[0];
+ v[3] = box->y2 + op->src.offset[1];
+
+ v[4] = box->x1;
+ v[5] = box->y2;
+ v[6] = box->x1 + op->src.offset[0];
+ v[7] = box->y2 + op->src.offset[1];
+
+ v[8] = box->x1;
+ v[9] = box->y1;
+ v[10] = box->x1 + op->src.offset[0];
+ v[11] = box->y1 + op->src.offset[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -494,6 +540,40 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ const PictTransform *transform = op->src.transform;
+
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+ sna_get_transformed_coordinates(box->x2 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform,
+ &v[2], &v[3]);
+
+ v[4] = box->x1;
+ v[5] = box->y2;
+ sna_get_transformed_coordinates(box->x1 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform,
+ &v[6], &v[7]);
+
+ v[8] = box->x1;
+ v[9] = box->y1;
+ sna_get_transformed_coordinates(box->x1 + op->src.offset[0],
+ box->y1 + op->src.offset[1],
+ transform,
+ &v[10], &v[11]);
+
+ box++;
+ v += 12;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -519,6 +599,28 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2 + op->dst.x;
+ v[8] = v[4] = box->x1 + op->dst.x;
+ v[5] = v[1] = box->y2 + op->dst.y;
+ v[9] = box->y1 + op->dst.y;
+
+ v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -544,6 +646,28 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[8] = v[4] = box->x1;
+ v[5] = v[1] = box->y2;
+ v[9] = box->y1;
+
+ v[10] = v[6] = box->x1 * op->src.scale[0];
+ v[2] = box->x2 * op->src.scale[0];
+
+ v[11] = box->y1 * op->src.scale[1];
+ v[7] = v[3] = box->y2 * op->src.scale[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -577,6 +701,39 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ const PictTransform *transform = op->src.transform;
+
+ do {
+ v[0] = box->x2;
+ v[5] = v[1] = box->y2;
+ v[8] = v[4] = box->x1;
+ v[9] = box->y1;
+
+ _sna_get_transformed_scaled(box->x2 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[6], &v[7]);
+
+ _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
+ box->y1 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[10], &v[11]);
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -1900,9 +2057,9 @@ gen3_render_composite_box(struct sna *sna,
}
static void
-gen3_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen3_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
@@ -1936,6 +2093,60 @@ gen3_render_composite_boxes(struct sna *sna,
}
static void
+gen3_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+static void
gen3_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
@@ -2986,24 +3197,32 @@ gen3_render_composite(struct sna *sna,
case SHADER_WHITE:
case SHADER_CONSTANT:
tmp->prim_emit = gen3_emit_composite_primitive_constant;
+ tmp->emit_boxes = gen3_emit_composite_boxes_constant;
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
- if (tmp->src.transform == NULL)
+ if (tmp->src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
- else if (tmp->src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
+ } else if (tmp->src.is_affine) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
+ tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
+ }
break;
case SHADER_TEXTURE:
if (tmp->src.transform == NULL) {
- if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
+ if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
- else
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
+ } else {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
+ }
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
+ tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
}
break;
}
@@ -3035,7 +3254,11 @@ gen3_render_composite(struct sna *sna,
tmp->blt = gen3_render_composite_blt;
tmp->box = gen3_render_composite_box;
- tmp->boxes = gen3_render_composite_boxes;
+ tmp->boxes = gen3_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen3_render_composite_boxes;
+ tmp->thread_boxes = gen3_render_composite_boxes__thread;
+ }
tmp->done = gen3_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 65016cd..e4f5f59 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1109,9 +1109,9 @@ gen4_render_composite_box(struct sna *sna,
}
static void
-gen4_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen4_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
@@ -1145,6 +1145,62 @@ gen4_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen4_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen4_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1899,7 +1955,11 @@ gen4_render_composite(struct sna *sna,
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
- tmp->boxes = gen4_render_composite_boxes;
+ tmp->boxes = gen4_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen4_render_composite_boxes;
+ tmp->thread_boxes = gen4_render_composite_boxes__thread;
+ }
tmp->done = gen4_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index cc679d3..e513166 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -360,6 +360,31 @@ emit_primitive_solid(struct sna *sna,
}
fastcall static void
+emit_boxes_solid(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[5] = v[3] = v[1] = .5;
+ box++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_linear(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -390,6 +415,34 @@ emit_primitive_linear(struct sna *sna,
}
fastcall static void
+emit_boxes_linear(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ do {
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[3] = compute_linear(&op->src, box->x1, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 6;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -421,6 +474,36 @@ emit_primitive_identity_source(struct sna *sna,
}
fastcall static void
+emit_boxes_identity_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 9;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_simple_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -461,6 +544,45 @@ emit_primitive_simple_source(struct sna *sna,
}
fastcall static void
+emit_boxes_simple_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[1] = ((box->x2 + tx) * xx + x0) * sx;
+ v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+ v[8] = ((box->y1 + ty) * yy + y0) * sy;
+
+ v += 9;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -500,6 +622,43 @@ emit_primitive_affine_source(struct sna *sna,
}
fastcall static void
+emit_boxes_affine_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x2,
+ op->src.offset[1] + box->y2,
+ op->src.transform, op->src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
+ op->src.offset[1] + box->y2,
+ op->src.transform, op->src.scale,
+ &v[4], &v[5]);
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
+ op->src.offset[1] + box->y1,
+ op->src.transform, op->src.scale,
+ &v[7], &v[8]);
+ box++;
+ v += 9;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -543,6 +702,40 @@ emit_primitive_identity_mask(struct sna *sna,
}
fastcall static void
+emit_boxes_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+ v[9] = v[5] = v[1] = .5;
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_linear_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -588,6 +781,43 @@ emit_primitive_linear_identity_mask(struct sna *sna,
}
fastcall static void
+emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y2);
+ v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -760,11 +990,13 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
if (tmp->src.is_solid) {
DBG(("%s: solid, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_mask;
+ tmp->emit_boxes = emit_boxes_identity_mask;
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_linear_identity_mask;
+ tmp->emit_boxes = emit_boxes_linear_identity_mask;
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.transform == NULL) {
@@ -821,6 +1053,7 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
if (tmp->src.is_solid) {
DBG(("%s: solid, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_solid;
+ tmp->emit_boxes = emit_boxes_solid;
if (tmp->src.is_opaque && tmp->op == PictOpOver)
tmp->op = PictOpSrc;
tmp->floats_per_vertex = 2;
@@ -828,11 +1061,13 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
} else if (tmp->src.is_linear) {
DBG(("%s: linear, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_linear;
+ tmp->emit_boxes = emit_boxes_linear;
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_source;
+ tmp->emit_boxes = emit_boxes_identity_source;
tmp->floats_per_vertex = 3;
vb = 2;
} else if (tmp->src.is_affine) {
@@ -841,9 +1076,11 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_simple_source;
+ tmp->emit_boxes = emit_boxes_simple_source;
} else {
DBG(("%s: affine src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source;
+ tmp->emit_boxes = emit_boxes_affine_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 81e6635..998d55e 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1095,9 +1095,9 @@ gen5_render_composite_box(struct sna *sna,
}
static void
-gen5_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen5_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
@@ -1131,6 +1131,62 @@ gen5_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen5_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen5_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1874,7 +1930,11 @@ gen5_render_composite(struct sna *sna,
tmp->blt = gen5_render_composite_blt;
tmp->box = gen5_render_composite_box;
- tmp->boxes = gen5_render_composite_boxes;
+ tmp->boxes = gen5_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen5_render_composite_boxes;
+ tmp->thread_boxes = gen5_render_composite_boxes__thread;
+ }
tmp->done = gen5_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 4ff1606..4a9387a 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1340,9 +1340,9 @@ gen6_render_composite_box(struct sna *sna,
}
static void
-gen6_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen6_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
@@ -1372,6 +1372,62 @@ gen6_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen6_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen6_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -2214,7 +2270,11 @@ gen6_render_composite(struct sna *sna,
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
- tmp->boxes = gen6_render_composite_boxes;
+ tmp->boxes = gen6_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen6_render_composite_boxes;
+ tmp->thread_boxes = gen6_render_composite_boxes__thread;
+ }
tmp->done = gen6_render_composite_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 34ba252..6eec4b4 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1463,9 +1463,9 @@ gen7_render_composite_box(struct sna *sna,
}
static void
-gen7_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen7_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
@@ -1495,6 +1495,62 @@ gen7_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen7_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, op, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen7_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, op, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -2334,7 +2390,11 @@ gen7_render_composite(struct sna *sna,
tmp->blt = gen7_render_composite_blt;
tmp->box = gen7_render_composite_box;
- tmp->boxes = gen7_render_composite_boxes;
+ tmp->boxes = gen7_render_composite_boxes__blt;
+ if (tmp->emit_boxes){
+ tmp->boxes = gen7_render_composite_boxes;
+ tmp->thread_boxes = gen7_render_composite_boxes__thread;
+ }
tmp->done = gen7_render_composite_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 5602579..edfcb9e 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -934,6 +934,76 @@ static void blt_composite_fill_boxes_no_offset(struct sna *sna,
_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
}
+static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ const struct sna_blt_state *blt = &op->u.blt;
+ uint32_t cmd = blt->cmd;
+
+ DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+ sna_vertex_lock(&sna->render);
+ if (!kgem_check_batch(kgem, 3)) {
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ }
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ kgem->nbatch += 3 * nbox_this_time;
+ assert(kgem->nbatch < kgem->surface);
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ while (nbox_this_time >= 8) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
+ b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
+ b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
+ b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
+ b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
+ b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
+ b += 24;
+ nbox_this_time -= 8;
+ }
+ if (nbox_this_time & 4) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
+ b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
+ b += 12;
+ }
+ if (nbox_this_time & 2) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b += 6;
+ }
+ if (nbox_this_time & 1) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ }
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ if (!nbox)
+ break;
+
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ } while (1);
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void blt_composite_fill_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
@@ -957,6 +1027,92 @@ static void blt_composite_fill_boxes(struct sna *sna,
} while (--n);
}
+static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
+{
+ union {
+ uint64_t v;
+ int16_t i[4];
+ } vi;
+ vi.v = *(uint64_t *)b;
+ vi.i[0] += x;
+ vi.i[1] += y;
+ vi.i[2] += x;
+ vi.i[3] += y;
+ return vi.v;
+}
+
+static void blt_composite_fill_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ const struct sna_blt_state *blt = &op->u.blt;
+ uint32_t cmd = blt->cmd;
+ int16_t dx = op->dst.x;
+ int16_t dy = op->dst.y;
+
+ DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+ sna_vertex_lock(&sna->render);
+ if (!kgem_check_batch(kgem, 3)) {
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ }
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ kgem->nbatch += 3 * nbox_this_time;
+ assert(kgem->nbatch < kgem->surface);
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ while (nbox_this_time >= 8) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
+ b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
+ b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
+ b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
+ b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
+ b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
+ b += 24;
+ nbox_this_time -= 8;
+ }
+ if (nbox_this_time & 4) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
+ b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
+ b += 12;
+ }
+ if (nbox_this_time & 2) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b += 6;
+ }
+ if (nbox_this_time & 1) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ }
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ if (!nbox)
+ break;
+
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ } while (1);
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall
static void blt_composite_nop(struct sna *sna,
const struct sna_composite_op *op,
@@ -1014,6 +1170,7 @@ prepare_blt_clear(struct sna *sna,
op->blt = blt_composite_fill__cpu;
op->box = blt_composite_fill_box__cpu;
op->boxes = blt_composite_fill_boxes__cpu;
+ op->thread_boxes = blt_composite_fill_boxes__cpu;
op->done = nop_done;
op->u.blt.pixel = 0;
return true;
@@ -1023,9 +1180,11 @@ prepare_blt_clear(struct sna *sna,
if (op->dst.x|op->dst.y) {
op->box = blt_composite_fill_box;
op->boxes = blt_composite_fill_boxes;
+ op->thread_boxes = blt_composite_fill_boxes__thread;
} else {
op->box = blt_composite_fill_box_no_offset;
op->boxes = blt_composite_fill_boxes_no_offset;
+ op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
}
op->done = nop_done;
@@ -1050,6 +1209,7 @@ prepare_blt_fill(struct sna *sna,
op->blt = blt_composite_fill__cpu;
op->box = blt_composite_fill_box__cpu;
op->boxes = blt_composite_fill_boxes__cpu;
+ op->thread_boxes = blt_composite_fill_boxes__cpu;
op->done = nop_done;
return true;
}
@@ -1058,9 +1218,11 @@ prepare_blt_fill(struct sna *sna,
if (op->dst.x|op->dst.y) {
op->box = blt_composite_fill_box;
op->boxes = blt_composite_fill_boxes;
+ op->thread_boxes = blt_composite_fill_boxes__thread;
} else {
op->box = blt_composite_fill_box_no_offset;
op->boxes = blt_composite_fill_boxes_no_offset;
+ op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
}
op->done = nop_done;
@@ -1151,6 +1313,141 @@ static void blt_composite_copy_boxes(struct sna *sna,
} while(--nbox);
}
+static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
+{
+ x += v & 0xffff;
+ y += v >> 16;
+ return (uint16_t)y << 16 | x;
+}
+
+static void blt_composite_copy_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ int dst_dx = op->dst.x;
+ int dst_dy = op->dst.y;
+ int src_dx = op->src.offset[0];
+ int src_dy = op->src.offset[1];
+ uint32_t cmd = op->u.blt.cmd;
+ uint32_t br13 = op->u.blt.br13;
+ struct kgem_bo *src_bo = op->u.blt.bo[0];
+ struct kgem_bo *dst_bo = op->u.blt.bo[1];
+ int src_pitch = op->u.blt.pitch[0];
+
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+
+ if ((dst_dx | dst_dy) == 0) {
+ uint64_t hdr = (uint64_t)br13 << 32 | cmd;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+ assert(box->x1 + src_dx <= INT16_MAX);
+ assert(box->y1 + src_dy <= INT16_MAX);
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ *(uint64_t *)&b[0] = hdr;
+ *(uint64_t *)&b[2] = *(const uint64_t *)box;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = add2(b[2], src_dx, src_dy);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ } else {
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ }
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void
blt_composite_copy_with_alpha(struct sna *sna,
const struct sna_composite_op *op,
@@ -1277,6 +1574,7 @@ prepare_blt_copy(struct sna *sna,
op->blt = blt_composite_copy;
op->box = blt_composite_copy_box;
op->boxes = blt_composite_copy_boxes;
+ op->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
src->drawable.bitsPerPixel,
@@ -2075,6 +2373,7 @@ sna_blt_composite__convert(struct sna *sna,
tmp->blt = blt_composite_copy;
tmp->box = blt_composite_copy_box;
tmp->boxes = blt_composite_copy_boxes;
+ tmp->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &tmp->u.blt,
tmp->src.bo, tmp->dst.bo,
@@ -2446,13 +2745,6 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
return true;
}
-static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
-{
- x += v & 0xffff;
- y += v >> 16;
- return (uint16_t)y << 16 | x;
-}
-
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 6a0b1d8..c953e50 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -35,6 +35,8 @@ struct sna_composite_op {
const BoxRec *box);
void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
const BoxRec *box, int nbox);
+ void (*thread_boxes)(struct sna *sna, const struct sna_composite_op *op,
+ const BoxRec *box, int nbox);
void (*done)(struct sna *sna, const struct sna_composite_op *op);
struct sna_damage **damage;
@@ -93,6 +95,9 @@ struct sna_composite_op {
fastcall void (*prim_emit)(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
+ fastcall void (*emit_boxes)(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v);
struct sna_composite_redirect {
struct kgem_bo *real_bo;
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index bf4816b..7f7492a 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1876,7 +1876,7 @@ static void
mono_add_line(struct mono *mono,
int dst_x, int dst_y,
xFixed top, xFixed bottom,
- xPointFixed *p1, xPointFixed *p2,
+ const xPointFixed *p1, const xPointFixed *p2,
int dir)
{
struct mono_polygon *polygon = &mono->polygon;
@@ -1893,7 +1893,7 @@ mono_add_line(struct mono *mono,
dir));
if (top > bottom) {
- xPointFixed *t;
+ const xPointFixed *t;
y = top;
top = bottom;
@@ -2150,6 +2150,60 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box)
c->op.box(c->sna, &c->op, box);
}
+struct mono_span_thread_boxes {
+ const struct sna_composite_op *op;
+#define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec))
+ BoxRec boxes[MONO_SPAN_MAX_BOXES];
+ int num_boxes;
+};
+
+inline static void
+thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count)
+{
+ struct mono_span_thread_boxes *b = c->op.priv;
+
+ assert(count > 0 && count <= MONO_SPAN_MAX_BOXES);
+ if (b->num_boxes + count > MONO_SPAN_MAX_BOXES) {
+ b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes);
+ b->num_boxes = 0;
+ }
+
+ memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec));
+ b->num_boxes += count;
+ assert(b->num_boxes <= MONO_SPAN_MAX_BOXES);
+}
+
+fastcall static void
+thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box)
+{
+ pixman_region16_t region;
+
+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+ box->x1 = x1;
+ box->x2 = x2;
+
+ assert(c->clip.data);
+
+ pixman_region_init_rects(®ion, box, 1);
+ RegionIntersect(®ion, ®ion, &c->clip);
+ if (REGION_NUM_RECTS(®ion))
+ thread_mono_span_add_boxes(c,
+ REGION_RECTS(®ion),
+ REGION_NUM_RECTS(®ion));
+ pixman_region_fini(®ion);
+}
+
+fastcall static void
+thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box)
+{
+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+ box->x1 = x1;
+ box->x2 = x2;
+ thread_mono_span_add_boxes(c, box, 1);
+}
+
inline static void
mono_row(struct mono *c, int16_t y, int16_t h)
{
@@ -2267,10 +2321,7 @@ mono_render(struct mono *mono)
struct mono_polygon *polygon = &mono->polygon;
int i, j, h = mono->clip.extents.y2 - mono->clip.extents.y1;
- if (mono->clip.data == NULL && mono->op.damage == NULL)
- mono->span = mono_span__fast;
- else
- mono->span = mono_span;
+ assert(mono->span);
for (i = 0; i < h; i = j) {
j = i + 1;
@@ -4053,6 +4104,74 @@ choose_span(struct sna_composite_spans_op *tmp,
return span;
}
+struct mono_span_thread {
+ struct sna *sna;
+ const xTrapezoid *traps;
+ const struct sna_composite_op *op;
+ RegionPtr clip;
+ int ntrap;
+ BoxRec extents;
+ int dx, dy;
+};
+
+static void
+mono_span_thread(void *arg)
+{
+ struct mono_span_thread *thread = arg;
+ struct mono mono;
+ struct mono_span_thread_boxes boxes;
+ const xTrapezoid *t;
+ int n;
+
+ mono.sna = thread->sna;
+
+ mono.clip.extents = thread->extents;
+ mono.clip.data = NULL;
+ if (thread->clip->data) {
+ RegionIntersect(&mono.clip, &mono.clip, thread->clip);
+ if (RegionNil(&mono.clip))
+ return;
+ }
+
+ boxes.op = thread->op;
+ boxes.num_boxes = 0;
+ mono.op.priv = &boxes;
+
+ if (!mono_init(&mono, 2*thread->ntrap)) {
+ RegionUninit(&mono.clip);
+ return;
+ }
+
+ for (n = thread->ntrap, t = thread->traps; n--; t++) {
+ if (!xTrapezoidValid(t))
+ continue;
+
+ if (pixman_fixed_to_int(t->top) + thread->dy >= thread->extents.y2 ||
+ pixman_fixed_to_int(t->bottom) + thread->dy <= thread->extents.y1)
+ continue;
+
+ mono_add_line(&mono, thread->dx, thread->dy,
+ t->top, t->bottom,
+ &t->left.p1, &t->left.p2, 1);
+ mono_add_line(&mono, thread->dx, thread->dy,
+ t->top, t->bottom,
+ &t->right.p1, &t->right.p2, -1);
+ }
+
+ if (mono.clip.data == NULL)
+ mono.span = thread_mono_span;
+ else
+ mono.span = thread_mono_span_clipped;
+
+ mono_render(&mono);
+ mono_fini(&mono);
+
+ if (boxes.num_boxes)
+ thread->op->thread_boxes(thread->sna, thread->op,
+ boxes.boxes, boxes.num_boxes);
+ RegionUninit(&mono.clip);
+}
+
static bool
mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
INT16 src_x, INT16 src_y,
@@ -4062,8 +4181,8 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
BoxRec extents;
int16_t dst_x, dst_y;
int16_t dx, dy;
- bool was_clear;
- int n;
+ bool unbounded;
+ int num_threads, n;
if (NO_SCAN_CONVERTER)
return false;
@@ -4102,11 +4221,69 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
src_x + mono.clip.extents.x1 - dst_x - dx,
src_y + mono.clip.extents.y1 - dst_y - dy));
+ unbounded = (!sna_drawable_is_clear(dst->pDrawable) &&
+ !operator_is_bounded(op));
+
mono.sna = to_sna_from_drawable(dst->pDrawable);
- if (!mono_init(&mono, 2*ntrap))
+ if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
+ src_x + mono.clip.extents.x1 - dst_x - dx,
+ src_y + mono.clip.extents.y1 - dst_y - dy,
+ 0, 0,
+ mono.clip.extents.x1, mono.clip.extents.y1,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1,
+ memset(&mono.op, 0, sizeof(mono.op))))
return false;
- was_clear = sna_drawable_is_clear(dst->pDrawable);
+ num_threads = 1;
+ if (!NO_GPU_THREADS &&
+ mono.op.thread_boxes &&
+ mono.op.damage == NULL &&
+ !unbounded)
+ num_threads = sna_use_threads(mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1,
+ 16);
+ if (num_threads > 1) {
+ struct mono_span_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for mono span compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1));
+
+ threads[0].sna = mono.sna;
+ threads[0].op = &mono.op;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = mono.clip.extents;
+ threads[0].clip = &mono.clip;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+
+ y = extents.y1;
+ h = extents.y2 - extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(mono_span_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = extents.y2;
+ mono_span_thread(&threads[0]);
+
+ sna_threads_wait();
+ mono.op.done(mono.sna, &mono.op);
+ return true;
+ }
+
+ if (!mono_init(&mono, 2*ntrap))
+ return false;
for (n = 0; n < ntrap; n++) {
if (!xTrapezoidValid(&traps[n]))
@@ -4124,23 +4301,16 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
&traps[n].right.p1, &traps[n].right.p2, -1);
}
- memset(&mono.op, 0, sizeof(mono.op));
- if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
- src_x + mono.clip.extents.x1 - dst_x - dx,
- src_y + mono.clip.extents.y1 - dst_y - dy,
- 0, 0,
- mono.clip.extents.x1, mono.clip.extents.y1,
- mono.clip.extents.x2 - mono.clip.extents.x1,
- mono.clip.extents.y2 - mono.clip.extents.y1,
- &mono.op)) {
- mono_fini(&mono);
- return false;
- }
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
+
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
mono_fini(&mono);
- if (!was_clear && !operator_is_bounded(op)) {
+ if (unbounded) {
xPointFixed p1, p2;
if (!mono_init(&mono, 2+2*ntrap))
@@ -5245,6 +5415,11 @@ unbounded_pass:
mono.op.box = mono_inplace_composite_box;
mono.op.boxes = mono_inplace_composite_boxes;
}
+
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono_fini(&mono);
@@ -6850,6 +7025,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
mono.clip.extents.x2 - mono.clip.extents.x1,
mono.clip.extents.y2 - mono.clip.extents.y1,
&mono.op)) {
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
}
@@ -6893,6 +7072,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
mono.clip.extents.x2 - mono.clip.extents.x1,
mono.clip.extents.y2 - mono.clip.extents.y1,
&mono.op)) {
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
}
commit 8178cff5718e69e14d3953a7f754d7585a06838f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jan 26 14:41:04 2013 +0000
sna: Begin sketching out a threaded rasteriser for spans
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/configure.ac b/configure.ac
index cb1496b..46affdc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -104,6 +104,40 @@ if test x$ASM != "xno"; then
fi
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
+# Check for atomic intrinsics
+AC_CACHE_CHECK([for native atomic primitives], intel_cv_atomic_primitives,
+[
+ intel_cv_atomic_primitives="none"
+
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); }
+int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); }
+ ]],[[]])],
+ [intel_cv_atomic_primitives="Intel"],[])
+
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_CHECK_HEADER([atomic_ops.h], intel_cv_atomic_primitives="libatomic-ops")
+ fi
+
+ # atomic functions defined in <atomic.h> & libc on Solaris
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_CHECK_FUNC([atomic_cas_uint],
+ intel_cv_atomic_primitives="Solaris")
+ fi
+
+])
+if test "x$intel_cv_atomic_primitives" = xIntel; then
+ AC_DEFINE(HAVE_ATOMIC_PRIMITIVES, 1,
+ [Enable if your compiler supports the Intel __sync_* atomic primitives])
+fi
+if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then
+ AC_DEFINE(HAVE_LIB_ATOMIC_OPS, 1, [Enable if you have libatomic-ops-dev installed])
+fi
+
+if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_MSG_ERROR([xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package.])
+fi
+
AC_ARG_ENABLE(udev,
AS_HELP_STRING([--disable-udev],
[Disable udev-based monitor hotplug detection [default=auto]]),
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index bfa836f..c74c904 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -38,6 +38,7 @@ libsna_la_LDFLAGS = -pthread
libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la
libsna_la_SOURCES = \
+ atomic.h \
blt.c \
compiler.h \
kgem.c \
@@ -64,6 +65,7 @@ libsna_la_SOURCES = \
sna_tiling.c \
sna_transform.c \
sna_threads.c \
+ sna_vertex.c \
sna_video.c \
sna_video.h \
sna_video_overlay.c \
diff --git a/src/sna/atomic.h b/src/sna/atomic.h
new file mode 100644
index 0000000..306dc6d
--- /dev/null
+++ b/src/sna/atomic.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris at chris-wilson.co.uk>
+ *
+ */
+
+#ifndef ATOMIC_H
+#define ATOMIC_H
+
+#if HAVE_ATOMIC_PRIMITIVES
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+ int atomic;
+} atomic_t;
+
+# define atomic_read(x) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (val))
+# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
+# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
+# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v)))
+# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if HAVE_LIB_ATOMIC_OPS
+#include <atomic_ops.h>
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+ AO_t atomic;
+} atomic_t;
+
+# define atomic_read(x) AO_load_full(&(x)->atomic)
+# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
+# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
+# define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v)))
+# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
+# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
+
+#endif
+
+#if defined(__sun) && !defined(HAS_ATOMIC_OPS) /* Solaris & OpenSolaris */
+
+#include <sys/atomic.h>
+#define HAS_ATOMIC_OPS 1
+
+typedef struct { uint_t atomic; } atomic_t;
+
+# define atomic_read(x) (int) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
+# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
+# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
+# define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v)))
+# define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v)))
+# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if ! HAS_ATOMIC_OPS
+#error xf86-video-intel requires atomic operations, please define them for your CPU/compiler.
+#endif
+
+#endif
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index ff80365..b985f2b 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -36,6 +36,7 @@
#define fastcall __attribute__((regparm(3)))
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
+#define __packed__ __attribute__((__packed__))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
@@ -44,6 +45,7 @@
#define fastcall
#define must_check
#define constant
+#define __packed__
#endif
#ifdef HAVE_VALGRIND
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 01c0aee..3224d71 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1618,6 +1618,8 @@ static int gen3_vertex_finish(struct sna *sna)
assert(sna->render.vertex_used);
assert(sna->render.vertex_used <= sna->render.vertex_size);
+ sna_vertex_wait__locked(&sna->render);
+
bo = sna->render.vbo;
if (bo) {
DBG(("%s: reloc = %d\n", __FUNCTION__,
@@ -1796,6 +1798,17 @@ static int gen3_get_rectangles__flush(struct sna *sna,
}
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen3_vertex_finish(sna);
}
@@ -1838,6 +1851,7 @@ flush:
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
+ gen3_vertex_finish(sna);
_kgem_submit(&sna->kgem);
gen3_emit_composite_state(sna, op);
assert(sna->render.vertex_offset == 0);
@@ -3081,6 +3095,26 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+
+ v[2] = op->base.dst.x + b->box.x1;
+ v[3] = v[1];
+
+ v[4] = v[2];
+ v[5] = op->base.dst.x + b->box.y1;
+
+ v += 6;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3096,6 +3130,22 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = b->box.x2;
+ v[3] = v[1] = b->box.y2;
+ v[4] = v[2] = b->box.x1;
+ v[5] = b->box.y1;
+
+ b++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_constant(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3112,6 +3162,24 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[6] = v[3] = op->base.dst.x + b->box.x1;
+ v[4] = v[1] = op->base.dst.y + b->box.y2;
+ v[7] = op->base.dst.y + b->box.y1;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3128,6 +3196,23 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = b->box.x2;
+ v[6] = v[3] = b->box.x1;
+ v[4] = v[1] = b->box.y2;
+ v[7] = b->box.y1;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3156,6 +3241,36 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
+ v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
+ v[8] = v[3];
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ v[12] = v[7];
+ v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
+ v[14] = b->alpha;
+
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3190,6 +3305,40 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ PictTransform *transform = op->base.src.transform;
+
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[6] = v[1] = op->base.dst.y + b->box.y2;
+ v[10] = v[5] = op->base.dst.x + b->box.x1;
+ v[11] = op->base.dst.y + b->box.y1;
+ v[14] = v[9] = v[4] = b->alpha;
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform, op->base.src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform, op->base.src.scale,
+ &v[7], &v[8]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y1,
+ transform, op->base.src.scale,
+ &v[12], &v[13]);
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3218,6 +3367,36 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ v[2] = op->base.src.offset[0] + b->box.x2;
+ v[3] = op->base.src.offset[1] + b->box.y2;
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ v[7] = op->base.src.offset[0] + b->box.x1;
+ v[8] = v[3];
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ v[12] = v[7];
+ v[13] = op->base.src.offset[1] + b->box.y1;
+ v[14] = b->alpha;
+
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3253,6 +3432,43 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ PictTransform *transform = op->base.src.transform;
+
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x2,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform,
+ &v[2], &v[3]);
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform,
+ &v[7], &v[8]);
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y1,
+ transform,
+ &v[12], &v[13]);
+ v[14] = b->alpha;
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3297,6 +3513,48 @@ gen3_render_composite_spans_constant_box(struct sna *sna,
}
fastcall static void
+gen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * 9;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ do {
+ v[0] = box->box.x2;
+ v[6] = v[3] = box->box.x1;
+ v[4] = v[1] = box->box.y2;
+ v[7] = box->box.y1;
+ v[8] = v[5] = v[2] = box->alpha;
+ v += 9;
+ box++;
+ } while (--nbox_this_time);
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen3_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
@@ -3344,6 +3602,41 @@ gen3_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen3_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen3_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -3447,40 +3740,58 @@ gen3_render_composite_spans(struct sna *sna,
no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
tmp->box = gen3_render_composite_spans_box;
tmp->boxes = gen3_render_composite_spans_boxes;
+ tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
tmp->done = gen3_render_composite_spans_done;
tmp->prim_emit = gen3_emit_composite_spans_primitive;
switch (tmp->base.src.u.gen3.type) {
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
- tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero;
+ if (no_offset) {
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
+ } else {
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
+ }
break;
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
if (no_offset) {
tmp->box = gen3_render_composite_spans_constant_box;
+ tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
- } else
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
+ } else {
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
+ }
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
- if (tmp->base.src.transform == NULL)
+ if (tmp->base.src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
- else if (tmp->base.src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
+ } else if (tmp->base.src.is_affine) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
+ }
break;
case SHADER_TEXTURE:
- if (tmp->base.src.transform == NULL)
+ if (tmp->base.src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
- else if (tmp->base.src.is_affine) {
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
+ } else if (tmp->base.src.is_affine) {
tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
}
break;
}
+ if (tmp->emit_boxes == NULL)
+ tmp->thread_boxes = NULL;
tmp->base.mask.bo = NULL;
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index d2f3fff..65016cd 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -621,6 +621,17 @@ static int gen4_get_rectangles__flush(struct sna *sna,
op->u.gen4.wm_kernel);
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -656,6 +667,7 @@ flush:
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1966,6 +1978,42 @@ gen4_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen4_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen4_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -2080,6 +2128,8 @@ gen4_render_composite_spans(struct sna *sna,
tmp->box = gen4_render_composite_spans_box;
tmp->boxes = gen4_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
tmp->done = gen4_render_composite_spans_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 4e40467..cc679d3 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -36,12 +36,13 @@
void gen4_vertex_flush(struct sna *sna)
{
- assert(sna->render.vertex_offset);
- assert(sna->render.vertex_index > sna->render.vertex_start);
-
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
+
+ assert(sna->render.vertex_offset);
+ assert(sna->render.vertex_index > sna->render.vertex_start);
+
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
sna->render.vertex_offset = 0;
@@ -58,6 +59,8 @@ int gen4_vertex_finish(struct sna *sna)
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
+ sna_vertex_wait__locked(&sna->render);
+
/* Note: we only need dword alignment (currently) */
bo = sna->render.vbo;
@@ -73,6 +76,7 @@ int gen4_vertex_finish(struct sna *sna)
0);
}
+ assert(!sna->render.active);
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
@@ -87,6 +91,7 @@ int gen4_vertex_finish(struct sna *sna)
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
size = 256*1024;
+ assert(!sna->render.active);
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
while (sna->render.vbo == NULL && size > 16*1024) {
@@ -144,6 +149,8 @@ void gen4_vertex_close(struct sna *sna)
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
sna->render.vb_id, sna->render.nvertex_reloc));
+ assert(!sna->render.active);
+
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
@@ -205,6 +212,7 @@ void gen4_vertex_close(struct sna *sna)
sna->render.vb_id = 0;
if (sna->render.vbo == NULL) {
+ assert(!sna->render.active);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
@@ -853,7 +861,7 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
}
inline static void
-emit_spans_vertex(struct sna *sna,
+emit_span_vertex(struct sna *sna,
const struct sna_composite_spans_op *op,
int16_t x, int16_t y)
{
@@ -867,18 +875,18 @@ emit_composite_spans_primitive(struct sna *sna,
const BoxRec *box,
float opacity)
{
- emit_spans_vertex(sna, op, box->x2, box->y2);
+ emit_span_vertex(sna, op, box->x2, box->y2);
OUT_VERTEX_F(opacity);
- emit_spans_vertex(sna, op, box->x1, box->y2);
+ emit_span_vertex(sna, op, box->x1, box->y2);
OUT_VERTEX_F(opacity);
- emit_spans_vertex(sna, op, box->x1, box->y1);
+ emit_span_vertex(sna, op, box->x1, box->y1);
OUT_VERTEX_F(opacity);
}
fastcall static void
-emit_spans_solid(struct sna *sna,
+emit_span_solid(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -909,7 +917,36 @@ emit_spans_solid(struct sna *sna,
}
fastcall static void
-emit_spans_identity(struct sna *sna,
+emit_span_boxes_solid(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+
+ dst.p.x = b->box.x1;
+ v[3] = dst.f;
+
+ dst.p.y = b->box.y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = v[1] = .5;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_identity(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -949,7 +986,43 @@ emit_spans_identity(struct sna *sna,
}
fastcall static void
-emit_spans_simple(struct sna *sna,
+emit_span_boxes_identity(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ v[1] = (b->box.x2 + tx) * sx;
+ v[6] = v[2] = (b->box.y2 + ty) * sy;
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ v[9] = v[5] = (b->box.x1 + tx) * sx;
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ v[10] = (b->box.y1 + ty) * sy;
+
+ v[11] = v[7] = v[3] = b->alpha;
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_simple(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -993,7 +1066,47 @@ emit_spans_simple(struct sna *sna,
}
fastcall static void
-emit_spans_affine(struct sna *sna,
+emit_span_boxes_simple(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float xx = op->base.src.transform->matrix[0][0];
+ float x0 = op->base.src.transform->matrix[0][2];
+ float yy = op->base.src.transform->matrix[1][1];
+ float y0 = op->base.src.transform->matrix[1][2];
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ v[1] = ((b->box.x2 + tx) * xx + x0) * sx;
+ v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ v[10] = ((b->box.y1 + ty) * yy + y0) * sy;
+
+ v[11] = v[7] = v[3] = b->alpha;
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_affine(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -1038,7 +1151,50 @@ emit_spans_affine(struct sna *sna,
}
fastcall static void
-emit_spans_linear(struct sna *sna,
+emit_span_boxes_affine(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
+ op->base.src.offset[1] + b->box.y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+ op->base.src.offset[1] + b->box.y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[5], &v[6]);
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+ op->base.src.offset[1] + b->box.y1,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[9], &v[10]);
+
+ v[11] = v[7] = v[3] = b->alpha;
+
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_linear(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -1069,6 +1225,35 @@ emit_spans_linear(struct sna *sna,
v[8] = v[5] = v[2] = opacity;
}
+fastcall static void
+emit_span_boxes_linear(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ dst.p.x = b->box.x1;
+ v[3] = dst.f;
+ dst.p.y = b->box.y1;
+ v[6] = dst.f;
+
+ v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2);
+ v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2);
+ v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1);
+
+ v[8] = v[5] = v[2] = b->alpha;
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
inline inline static uint32_t
gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
{
@@ -1083,24 +1268,30 @@ unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
unsigned vb;
if (tmp->base.src.is_solid) {
- tmp->prim_emit = emit_spans_solid;
+ tmp->prim_emit = emit_span_solid;
+ tmp->emit_boxes = emit_span_boxes_solid;
tmp->base.floats_per_vertex = 3;
vb = 1 << 2 | 1;
} else if (tmp->base.src.is_linear) {
- tmp->prim_emit = emit_spans_linear;
+ tmp->prim_emit = emit_span_linear;
+ tmp->emit_boxes = emit_span_boxes_linear;
tmp->base.floats_per_vertex = 3;
vb = 1 << 2 | 1;
} else if (tmp->base.src.transform == NULL) {
- tmp->prim_emit = emit_spans_identity;
+ tmp->prim_emit = emit_span_identity;
+ tmp->emit_boxes = emit_span_boxes_identity;
tmp->base.floats_per_vertex = 4;
vb = 1 << 2 | 2;
} else if (tmp->base.is_affine) {
tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
- if (!sna_affine_transform_is_rotation(tmp->base.src.transform))
- tmp->prim_emit = emit_spans_simple;
- else
- tmp->prim_emit = emit_spans_affine;
+ if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
+ tmp->prim_emit = emit_span_simple;
+ tmp->emit_boxes = emit_span_boxes_simple;
+ } else {
+ tmp->prim_emit = emit_span_affine;
+ tmp->emit_boxes = emit_span_boxes_affine;
+ }
tmp->base.floats_per_vertex = 4;
vb = 1 << 2 | 2;
} else {
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 5995d1d..81e6635 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -607,6 +607,17 @@ static int gen5_get_rectangles__flush(struct sna *sna,
op->u.gen5.wm_kernel);
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -643,6 +654,7 @@ flush:
gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1939,6 +1951,42 @@ gen5_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen5_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen5_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -2049,6 +2097,8 @@ gen5_render_composite_spans(struct sna *sna,
tmp->box = gen5_render_composite_spans_box;
tmp->boxes = gen5_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
tmp->done = gen5_render_composite_spans_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 35ff862..4ff1606 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1157,6 +1157,17 @@ static int gen6_get_rectangles__flush(struct sna *sna,
}
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -1193,6 +1204,7 @@ flush:
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1293,6 +1305,7 @@ gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
+ assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
fastcall static void
@@ -1720,6 +1733,7 @@ static void gen6_render_composite_done(struct sna *sna,
{
DBG(("%s\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
@@ -2281,10 +2295,47 @@ gen6_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen6_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen6_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
@@ -2397,6 +2448,8 @@ gen6_render_composite_spans(struct sna *sna,
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
tmp->done = gen6_render_composite_spans_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
@@ -2768,6 +2821,7 @@ gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
}
@@ -3115,6 +3169,7 @@ gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -3409,6 +3464,7 @@ gen6_render_expire(struct kgem *kgem)
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
+ assert(!sna->render.active);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index fa36ce6..34ba252 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1282,6 +1282,17 @@ static int gen7_get_rectangles__flush(struct sna *sna,
}
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -1318,6 +1329,7 @@ flush:
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -2403,6 +2415,42 @@ gen7_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen7_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen7_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -2499,6 +2547,8 @@ gen7_render_composite_spans(struct sna *sna,
tmp->box = gen7_render_composite_spans_box;
tmp->boxes = gen7_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen7_render_composite_spans_boxes__thread;
tmp->done = gen7_render_composite_spans_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c
index fd3f789..7ef55d3 100644
--- a/src/sna/kgem_debug_gen6.c
+++ b/src/sna/kgem_debug_gen6.c
@@ -75,11 +75,11 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
- if (reloc == 0) {
+ if (reloc == -1) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
- if (bo->handle == reloc)
+ if (bo->target_handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 5832c99..84d9807 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -42,6 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#endif
#include <stdint.h>
+
#include "compiler.h"
#include <xorg-server.h>
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 84c6b35..4b32b82 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -304,6 +304,8 @@ void no_render_init(struct sna *sna)
sna->kgem.expire = no_render_expire;
if (sna->kgem.has_blt)
sna->kgem.ring = KGEM_BLT;
+
+ sna_vertex_init(sna);
}
static struct kgem_bo *
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 13a3e7d..6a0b1d8 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -7,6 +7,8 @@
#include <stdbool.h>
#include <stdint.h>
+#include <pthread.h>
+#include "atomic.h"
#define GRADIENT_CACHE_SIZE 16
@@ -142,6 +144,11 @@ struct sna_composite_op {
void *priv;
};
+struct sna_opacity_box {
+ BoxRec box;
+ float alpha;
+} __packed__;
+
struct sna_composite_spans_op {
struct sna_composite_op base;
@@ -153,6 +160,12 @@ struct sna_composite_spans_op {
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity);
+
+ fastcall void (*thread_boxes)(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox);
+
fastcall void (*done)(struct sna *sna,
const struct sna_composite_spans_op *op);
@@ -160,6 +173,9 @@ struct sna_composite_spans_op {
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity);
+ fastcall void (*emit_boxes)(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box, int nbox,
+ float *v);
};
struct sna_fill_op {
@@ -188,6 +204,10 @@ struct sna_copy_op {
};
struct sna_render {
+ pthread_mutex_t lock;
+ pthread_cond_t wait;
+ int active;
+
int max_3d_size;
int max_3d_pitch;
@@ -714,4 +734,34 @@ sna_render_copy_boxes__overlap(struct sna *sna, uint8_t alu,
bool
sna_composite_mask_is_opaque(PicturePtr mask);
+void sna_vertex_init(struct sna *sna);
+
+static inline void sna_vertex_lock(struct sna_render *r)
+{
+ pthread_mutex_lock(&r->lock);
+}
+
+static inline void sna_vertex_acquire__locked(struct sna_render *r)
+{
+ r->active++;
+}
+
+static inline void sna_vertex_unlock(struct sna_render *r)
+{
+ pthread_mutex_unlock(&r->lock);
+}
+
+static inline void sna_vertex_release__locked(struct sna_render *r)
+{
+ assert(r->active > 0);
+ if (--r->active == 0)
+ pthread_cond_signal(&r->wait);
+}
+
+static inline void sna_vertex_wait__locked(struct sna_render *r)
+{
+ while (r->active)
+ pthread_cond_wait(&r->wait, &r->lock);
+}
+
#endif /* SNA_RENDER_H */
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 79e845a..bf4816b 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -49,6 +49,7 @@
#define NO_ALIGNED_BOXES 0
#define NO_UNALIGNED_BOXES 0
#define NO_SCAN_CONVERTER 0
+#define NO_GPU_THREADS 0
/* TODO: Emit unantialiased and MSAA triangles. */
@@ -328,10 +329,10 @@ floored_divrem(int a, int b)
/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric
* division. */
static struct quorem
-floored_muldivrem(int x, int a, int b)
+floored_muldivrem(int32_t x, int32_t a, int32_t b)
{
struct quorem qr;
- long long xa = (long long)x*a;
+ int64_t xa = (int64_t)x*a;
qr.quo = xa/b;
qr.rem = xa%b;
if (qr.rem && (xa>=0) != (b>=0)) {
@@ -674,6 +675,8 @@ polygon_add_edge(struct polygon *polygon,
ybot = bottom <= ymax ? bottom : ymax;
e->ytop = ytop;
e->height_left = ybot - ytop;
+ if (e->height_left <= 0)
+ return;
if (dx == 0) {
e->x.quo = x1;
@@ -736,6 +739,8 @@ polygon_add_line(struct polygon *polygon,
e->ytop = top;
e->height_left = bot - top;
+ if (e->height_left <= 0)
+ return;
if (dx == 0) {
e->x.quo = p1->x;
@@ -4021,14 +4026,13 @@ static span_func_t
choose_span(struct sna_composite_spans_op *tmp,
PicturePtr dst,
PictFormatPtr maskFormat,
- uint8_t op,
RegionPtr clip)
{
span_func_t span;
if (is_mono(dst, maskFormat)) {
/* XXX An imprecise approximation */
- if (maskFormat && !operator_is_bounded(op)) {
+ if (maskFormat && !operator_is_bounded(tmp->base.op)) {
span = tor_blt_span_mono_unbounded;
if (REGION_NUM_RECTS(clip) > 1)
span = tor_blt_span_mono_unbounded_clipped;
@@ -4188,6 +4192,151 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return true;
}
+struct span_thread {
+ struct sna *sna;
+ const struct sna_composite_spans_op *op;
+ const xTrapezoid *traps;
+ RegionPtr clip;
+ span_func_t span;
+ BoxRec extents;
+ int dx, dy, draw_y;
+ int ntrap;
+ bool unbounded;
+};
+
+#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
+struct span_thread_boxes {
+ const struct sna_composite_spans_op *op;
+ struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
+ int num_boxes;
+};
+
+static void span_thread_add_boxes(struct sna *sna, void *data,
+ const BoxRec *box, int count, float alpha)
+{
+ struct span_thread_boxes *b = data;
+
+ __DBG(("%s: adding %d boxes with alpha=%f\n",
+ __FUNCTION__, count, alpha));
+
+ assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
+ if (b->num_boxes + count > SPAN_THREAD_MAX_BOXES) {
+ DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
+ assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+ b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
+ b->num_boxes = 0;
+ }
+
+ do {
+ b->boxes[b->num_boxes].box = *box++;
+ b->boxes[b->num_boxes].alpha = alpha;
+ b->num_boxes++;
+ } while (--count);
+ assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+}
+
+static void
+span_thread_box(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
+ span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage));
+}
+
+static void
+span_thread_clipped_box(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ pixman_region16_t region;
+
+ __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
+ AREA_TO_ALPHA(coverage)));
+
+ pixman_region_init_rects(®ion, box, 1);
+ RegionIntersect(®ion, ®ion, clip);
+ if (REGION_NUM_RECTS(®ion)) {
+ span_thread_add_boxes(sna, op,
+ REGION_RECTS(®ion),
+ REGION_NUM_RECTS(®ion),
+ AREA_TO_ALPHA(coverage));
+ }
+ pixman_region_fini(®ion);
+}
+
+static span_func_t
+thread_choose_span(struct sna_composite_spans_op *tmp,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ RegionPtr clip)
+{
+ span_func_t span;
+
+ if (tmp->base.damage)
+ return NULL;
+
+ if (is_mono(dst, maskFormat)) {
+ return NULL;
+ } else {
+ if (REGION_NUM_RECTS(clip) > 1)
+ span = span_thread_clipped_box;
+ else
+ span = span_thread_box;
+ }
+
+ return span;
+}
+
+static void
+span_thread(void *arg)
+{
+ struct span_thread *thread = arg;
+ struct span_thread_boxes boxes;
+ struct tor tor;
+ const xTrapezoid *t;
+ int n, y1, y2;
+
+ if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ return;
+
+ boxes.op = thread->op;
+ boxes.num_boxes = 0;
+
+ y1 = thread->extents.y1 - thread->draw_y;
+ y2 = thread->extents.y2 - thread->draw_y;
+ for (n = thread->ntrap, t = thread->traps; n--; t++) {
+ xTrapezoid tt;
+
+ if (pixman_fixed_to_int(t->top) >= y2 ||
+ pixman_fixed_to_int(t->bottom) < y1)
+ continue;
+
+ if (!project_trapezoid_onto_grid(t, thread->dx, thread->dy, &tt))
+ continue;
+
+ tor_add_edge(&tor, &tt, &tt.left, 1);
+ tor_add_edge(&tor, &tt, &tt.right, -1);
+ }
+
+ tor_render(thread->sna, &tor,
+ (struct sna_composite_spans_op *)&boxes, thread->clip,
+ thread->span, thread->unbounded);
+
+ tor_fini(&tor);
+
+ if (boxes.num_boxes) {
+ DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
+ assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
+ thread->op->thread_boxes(thread->sna, thread->op,
+ boxes.boxes, boxes.num_boxes);
+ }
+}
+
static bool
trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, unsigned int flags,
@@ -4196,12 +4345,12 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
{
struct sna *sna;
struct sna_composite_spans_op tmp;
- struct tor tor;
BoxRec extents;
pixman_region16_t clip;
int16_t dst_x, dst_y;
bool was_clear;
int dx, dy, n;
+ int num_threads;
if (NO_SCAN_CONVERTER)
return false;
@@ -4305,29 +4454,78 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
dx *= FAST_SAMPLES_X;
dy *= FAST_SAMPLES_Y;
- if (tor_init(&tor, &extents, 2*ntrap))
- goto skip;
- for (n = 0; n < ntrap; n++) {
- xTrapezoid t;
+ num_threads = 1;
+ if (!NO_GPU_THREADS && tmp.thread_boxes &&
+ thread_choose_span(&tmp, dst, maskFormat, &clip))
+ num_threads = sna_use_threads(extents.x2-extents.x1,
+ extents.y2-extents.y1,
+ 16);
+ if (num_threads == 1) {
+ struct tor tor;
- if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
- continue;
+ if (tor_init(&tor, &extents, 2*ntrap))
+ goto skip;
- if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
- pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
- continue;
+ for (n = 0; n < ntrap; n++) {
+ xTrapezoid t;
- tor_add_edge(&tor, &t, &t.left, 1);
- tor_add_edge(&tor, &t, &t.right, -1);
- }
+ if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
+ continue;
- tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
- !was_clear && maskFormat && !operator_is_bounded(op));
+ if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
+ pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
+
+ tor_render(sna, &tor, &tmp, &clip,
+ choose_span(&tmp, dst, maskFormat, &clip),
+ !was_clear && maskFormat && !operator_is_bounded(op));
skip:
- tor_fini(&tor);
+ tor_fini(&tor);
+ } else {
+ struct span_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for span compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1));
+
+ threads[0].sna = sna;
+ threads[0].op = &tmp;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = extents;
+ threads[0].clip = &clip;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+ threads[0].draw_y = dst->pDrawable->y;
+ threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
+ threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
+
+ y = extents.y1;
+ h = extents.y2 - extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(span_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = extents.y2;
+ span_thread(&threads[0]);
+
+ sna_threads_wait();
+ }
tmp.done(sna, &tmp);
REGION_UNINIT(NULL, &clip);
@@ -6282,7 +6480,7 @@ trap_span_converter(PicturePtr dst,
}
tor_render(sna, &tor, &tmp, clip,
- choose_span(&tmp, dst, NULL, PictOpAdd, clip), false);
+ choose_span(&tmp, dst, NULL, clip), false);
skip:
tor_fini(&tor);
@@ -6827,7 +7025,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
}
tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
+ choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
skip:
@@ -7201,7 +7399,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
assert(tor.polygon->num_edges <= 2*count);
tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
+ choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
skip:
diff --git a/src/sna/sna_vertex.c b/src/sna/sna_vertex.c
new file mode 100644
index 0000000..6755d9a
--- /dev/null
+++ b/src/sna/sna_vertex.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris at chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+
+#include <unistd.h>
+
+void sna_vertex_init(struct sna *sna)
+{
+ pthread_mutex_init(&sna->render.lock, NULL);
+ pthread_cond_init(&sna->render.wait, NULL);
+ sna->render.active = 0;
+}
More information about the xorg-commit
mailing list