xf86-video-intel: 9 commits - src/intel_dri.c src/sna/gen3_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_dri.c src/sna/sna_render.c src/sna/sna_render.h
Chris Wilson
ickle at kemper.freedesktop.org
Thu Jun 21 13:36:32 PDT 2012
src/intel_dri.c | 44 ++++++++++-----------
src/sna/gen3_render.c | 17 +++++++-
src/sna/gen5_render.c | 16 +++++++
src/sna/gen6_render.c | 16 +++++++
src/sna/gen7_render.c | 101 ++++++++++++++++++++++++++++++++++++++++----------
src/sna/kgem.c | 25 +++++++-----
src/sna/kgem.h | 1
src/sna/sna_dri.c | 72 ++++++++++++++++++-----------------
src/sna/sna_render.c | 7 +++
src/sna/sna_render.h | 1
10 files changed, 211 insertions(+), 89 deletions(-)
New commits:
commit 565297e6bd3457a150036af9c62fe0dc67b794ac
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 13:53:02 2012 +0100
sna/gen3+: Keep vbo cached
Once we switch to using a vbo, keep it cached (resetting everytime it is
idle) until we expire our caches.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 54ec9b2..b66e0e0 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1797,6 +1797,7 @@ static int gen3_get_rectangles__flush(struct sna *sna,
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
return 0;
+
if (op->need_magic_ca_pass && sna->render.vbo)
return 0;
@@ -1988,7 +1989,20 @@ gen3_render_retire(struct kgem *kgem)
struct sna *sna;
sna = container_of(kgem, struct sna, kgem);
- if (!kgem->need_retire && kgem->nbatch == 0 && sna->render.vbo) {
+ if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
+ DBG(("%s: resetting idle vbo\n", __FUNCTION__));
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ }
+}
+
+static void
+gen3_render_expire(struct kgem *kgem)
+{
+ struct sna *sna;
+
+ sna = container_of(kgem, struct sna, kgem);
+ if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
@@ -4680,5 +4694,6 @@ Bool gen3_render_init(struct sna *sna)
render->max_3d_pitch = MAX_3D_PITCH;
sna->kgem.retire = gen3_render_retire;
+ sna->kgem.expire = gen3_render_expire;
return TRUE;
}
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 65c21c3..7d424aa 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -3671,7 +3671,20 @@ gen5_render_retire(struct kgem *kgem)
struct sna *sna;
sna = container_of(kgem, struct sna, kgem);
- if (!kgem->need_retire && kgem->nbatch == 0 && sna->render.vbo) {
+ if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
+ DBG(("%s: resetting idle vbo\n", __FUNCTION__));
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ }
+}
+
+static void
+gen5_render_expire(struct kgem *kgem)
+{
+ struct sna *sna;
+
+ sna = container_of(kgem, struct sna, kgem);
+ if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
@@ -3944,6 +3957,7 @@ Bool gen5_render_init(struct sna *sna)
sna->kgem.context_switch = gen5_render_context_switch;
sna->kgem.retire = gen5_render_retire;
+ sna->kgem.expire = gen5_render_expire;
sna->render.composite = gen5_render_composite;
#if !NO_COMPOSITE_SPANS
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 563e04c..896693c 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -4187,7 +4187,20 @@ gen6_render_retire(struct kgem *kgem)
kgem->ring = kgem->mode;
sna = container_of(kgem, struct sna, kgem);
- if (!kgem->need_retire && kgem->nbatch == 0 && sna->render.vbo) {
+ if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
+ DBG(("%s: resetting idle vbo\n", __FUNCTION__));
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ }
+}
+
+static void
+gen6_render_expire(struct kgem *kgem)
+{
+ struct sna *sna;
+
+ sna = container_of(kgem, struct sna, kgem);
+ if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
kgem_bo_destroy(kgem, sna->render.vbo);
sna->render.vbo = NULL;
@@ -4273,6 +4286,7 @@ Bool gen6_render_init(struct sna *sna)
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
+ sna->kgem.expire = gen6_render_expire;
sna->render.composite = gen6_render_composite;
#if !NO_COMPOSITE_SPANS
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 726a67e..ea9b01a 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -4298,7 +4298,20 @@ gen7_render_retire(struct kgem *kgem)
kgem->ring = kgem->mode;
sna = container_of(kgem, struct sna, kgem);
- if (!kgem->need_retire && kgem->nbatch == 0 && sna->render.vbo) {
+ if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
+ DBG(("%s: resetting idle vbo\n", __FUNCTION__));
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ }
+}
+
+static void
+gen7_render_expire(struct kgem *kgem)
+{
+ struct sna *sna;
+
+ sna = container_of(kgem, struct sna, kgem);
+ if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
kgem_bo_destroy(kgem, sna->render.vbo);
sna->render.vbo = NULL;
@@ -4386,6 +4399,7 @@ Bool gen7_render_init(struct sna *sna)
sna->kgem.context_switch = gen7_render_context_switch;
sna->kgem.retire = gen7_render_retire;
+ sna->kgem.expire = gen7_render_expire;
sna->render.composite = gen7_render_composite;
#if !NO_COMPOSITE_SPANS
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0b5ca61..9fe3661 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2049,6 +2049,8 @@ bool kgem_expire_cache(struct kgem *kgem)
if (kgem->wedged)
kgem_cleanup(kgem);
+ kgem->expire(kgem);
+
if (kgem->need_purge)
kgem_purge_cache(kgem);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 408ad03..c154be5 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -169,6 +169,7 @@ struct kgem {
void (*context_switch)(struct kgem *kgem, int new_mode);
void (*retire)(struct kgem *kgem);
+ void (*expire)(struct kgem *kgem);
uint32_t batch[64*1024-8];
struct drm_i915_gem_exec_object2 exec[256];
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 49f7c5e..6ddf6f3 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -252,6 +252,12 @@ no_render_retire(struct kgem *kgem)
}
static void
+no_render_expire(struct kgem *kgem)
+{
+ (void)kgem;
+}
+
+static void
no_render_fini(struct sna *sna)
{
(void)sna;
@@ -282,6 +288,7 @@ void no_render_init(struct sna *sna)
sna->kgem.context_switch = no_render_context_switch;
sna->kgem.retire = no_render_retire;
+ sna->kgem.expire = no_render_expire;
if (sna->kgem.gen >= 60)
sna->kgem.ring = KGEM_BLT;
}
commit d806973e21cd46e605b3cd405323ae7a64c12798
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 12:57:13 2012 +0100
sna: Micro-optimise search_inactive_cache
Discard the unneeded next parameter to drop a memory reference in a hot
path, and don't wait for a retirement if we are looking in a larger
bucket than suits.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 90b4c96..0b5ca61 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2558,7 +2558,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
uint32_t flags)
{
struct list *cache;
- struct kgem_bo *bo, *next;
+ struct kgem_bo *bo;
uint32_t pitch, untiled_pitch, tiled_height, size;
uint32_t handle;
int i, bucket, retry;
@@ -2847,7 +2847,7 @@ search_inactive:
/* Now just look for a close match and prefer any currently active */
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->inactive[bucket];
- list_for_each_entry_safe(bo, next, cache, list) {
+ list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(bo->reusable);
@@ -2861,10 +2861,8 @@ search_inactive:
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (tiling != gem_set_tiling(kgem->fd,
bo->handle,
- tiling, pitch)) {
- kgem_bo_free(kgem, bo);
+ tiling, pitch))
continue;
- }
if (bo->map)
kgem_bo_release_map(kgem, bo);
@@ -2872,7 +2870,7 @@ search_inactive:
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
- continue;
+ break;
}
kgem_bo_remove_from_inactive(kgem, bo);
@@ -2903,6 +2901,7 @@ search_inactive:
if (--retry) {
bucket++;
+ flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
commit d39fef0a7f3daf5c07686b44e4dea01c0f06c77a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 12:25:35 2012 +0100
sna: Tiles are only 128 bytes wide on gen2
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index bb1b77d..90b4c96 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -803,11 +803,12 @@ void kgem_get_tile_size(struct kgem *kgem, int tiling,
{
if (kgem->gen <= 30) {
if (tiling) {
- *tile_width = 512;
if (kgem->gen < 30) {
+ *tile_width = 128;
*tile_height = 16;
*tile_size = 2048;
} else {
+ *tile_width = 512;
*tile_height = 8;
*tile_size = 4096;
}
@@ -853,8 +854,13 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
if (kgem->gen <= 30) {
if (tiling) {
- tile_width = 512;
- tile_height = kgem->gen < 30 ? 16 : 8;
+ if (kgem->gen < 30) {
+ tile_width = 128;
+ tile_height = 16;
+ } else {
+ tile_width = 512;
+ tile_height = 8;
+ }
} else {
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
commit 4f2dde1fa3b04b27bae8fc0bca9c824bd362d23b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 10:31:24 2012 +0100
sna/gen7: Eliminate the pipeline stall after a non-pipelined operation
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index b0f7cfc..726a67e 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -955,12 +955,12 @@ gen7_emit_vertex_elements(struct sna *sna,
}
inline static void
-gen7_emit_pipe_invalidate(struct sna *sna)
+gen7_emit_pipe_invalidate(struct sna *sna, bool stall)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
GEN7_PIPE_CONTROL_TC_FLUSH |
- GEN7_PIPE_CONTROL_CS_STALL);
+ (stall ? GEN7_PIPE_CONTROL_CS_STALL : 0));
OUT_BATCH(0);
OUT_BATCH(0);
}
@@ -1020,7 +1020,7 @@ gen7_emit_state(struct sna *sna,
need_stall &= gen7_emit_drawing_rectangle(sna, op);
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
- gen7_emit_pipe_invalidate(sna);
+ gen7_emit_pipe_invalidate(sna, need_stall);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
@@ -1042,7 +1042,7 @@ static void gen7_magic_ca_pass(struct sna *sna,
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
- gen7_emit_pipe_invalidate(sna);
+ gen7_emit_pipe_invalidate(sna, true);
gen7_emit_cc(sna, gen7_get_blend(PictOpAdd, TRUE, op->dst.format));
gen7_emit_wm(sna,
commit 3ef05a8d0833203e265aff392f225a11a11c2d01
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 09:36:42 2012 +0100
sna/gen7: Do not emit a pipeline stall after a non-pipelined command
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index e3c8269..b0f7cfc 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -850,7 +850,7 @@ gen7_emit_binding_table(struct sna *sna, uint16_t offset)
return true;
}
-static void
+static bool
gen7_emit_drawing_rectangle(struct sna *sna,
const struct sna_composite_op *op)
{
@@ -862,7 +862,7 @@ gen7_emit_drawing_rectangle(struct sna *sna,
if (sna->render_state.gen7.drawrect_limit == limit &&
sna->render_state.gen7.drawrect_offset == offset)
- return;
+ return true;
sna->render_state.gen7.drawrect_offset = offset;
sna->render_state.gen7.drawrect_limit = limit;
@@ -871,6 +871,7 @@ gen7_emit_drawing_rectangle(struct sna *sna,
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
+ return false;
}
static void
@@ -1016,7 +1017,7 @@ gen7_emit_state(struct sna *sna,
gen7_emit_vertex_elements(sna, op);
need_stall |= gen7_emit_binding_table(sna, wm_binding_table);
- gen7_emit_drawing_rectangle(sna, op);
+ need_stall &= gen7_emit_drawing_rectangle(sna, op);
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen7_emit_pipe_invalidate(sna);
commit 4501e131e6b737cb8f2581c8b1f7ea9d29a8e912
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 16:38:32 2012 +0100
sna/gen7: prefer using RENDER copy
Further testing and the balance of doubt swings in favour of using the
3D pipeline for copies.
For small copies the BLT unit is faster,
2.14M/sec vs 1.71M/sec for comppixwin10
And for large copies the RENDER pipeline is faster,
13000/sec vs 8000/sec for comppixwin500
I think the implication is that we are not efficiently utilising the EU
for small primitives - i.e. something that we might be able to improve.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 9c92e5a..e3c8269 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2448,7 +2448,7 @@ try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
- if (prefer_blt_ring(sna)) {
+ if (sna->kgem.ring == KGEM_BLT) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return TRUE;
}
@@ -2462,8 +2462,6 @@ try_blt(struct sna *sna,
if (can_switch_rings(sna)) {
if (sna_picture_is_solid(src, NULL))
return TRUE;
- if (src->pDrawable)
- return TRUE;
}
return FALSE;
@@ -3329,7 +3327,7 @@ static inline bool prefer_blt_copy(struct sna *sna,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr dst, struct kgem_bo *dst_bo)
{
- return (prefer_blt_ring(sna) ||
+ return (sna->kgem.ring == KGEM_BLT ||
prefer_blt_bo(sna, src, src_bo) ||
prefer_blt_bo(sna, dst, dst_bo));
}
commit 3da56c48b7820ec77d704c5a16670eb86a6f673f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Jun 20 15:14:23 2012 +0100
sna/gen7: Prefer using BLT rather than redirect for copies
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 3a9a856..9c92e5a 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -3400,6 +3400,32 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
box, n))
return TRUE;
+ if ((too_large(dst->drawable.width, dst->drawable.height) ||
+ too_large(src->drawable.width, src->drawable.height)) &&
+ sna_blt_compare_depth(&src->drawable, &dst->drawable)) {
+ BoxRec extents = box[0];
+ int i;
+
+ for (i = 1; i < n; i++) {
+ if (box[i].x1 < extents.x1)
+ extents.x1 = box[i].x1;
+ if (box[i].y1 < extents.y1)
+ extents.y1 = box[i].y1;
+
+ if (box[i].x2 > extents.x2)
+ extents.x2 = box[i].x2;
+ if (box[i].y2 > extents.y2)
+ extents.y2 = box[i].y2;
+ }
+ if (too_large(extents.x2 - extents.x1, extents.y2 - extents.y1) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+ }
+
if (!(alu == GXcopy || alu == GXclear) ||
overlaps(src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
@@ -3449,6 +3475,7 @@ fallback_blt:
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
+
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
commit b1f8386db6e9b3eea1bdbf8cde90f33792640ce8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Jun 20 23:28:14 2012 +0100
sna/gen7: Emit a pipeline flush after every render operation
For whatever reason, this produces a 30% improvement with the fish-demo
(500 -> 660 fps on i7-3730qm at 1024x768). However, it does cause about
a 5% regression in aa10text. We can appear to alleviate that by only
doing the flush when the composite op != PictOpSrc.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index c474767..3a9a856 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -953,8 +953,8 @@ gen7_emit_vertex_elements(struct sna *sna,
}
}
-static void
-gen7_emit_flush(struct sna *sna)
+inline static void
+gen7_emit_pipe_invalidate(struct sna *sna)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
@@ -964,6 +964,25 @@ gen7_emit_flush(struct sna *sna)
OUT_BATCH(0);
}
+inline static void
+gen7_emit_pipe_flush(struct sna *sna)
+{
+ OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+inline static void
+gen7_emit_pipe_stall(struct sna *sna)
+{
+ OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
+ GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
static void
gen7_emit_state(struct sna *sna,
const struct sna_composite_op *op,
@@ -1000,18 +1019,15 @@ gen7_emit_state(struct sna *sna,
gen7_emit_drawing_rectangle(sna, op);
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
- gen7_emit_flush(sna);
+ gen7_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
- if (need_stall) {
- OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
- OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
- GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
+ if (need_stall)
+ gen7_emit_pipe_stall(sna);
+
+ sna->render_state.gen7.emit_flush = op->op != PictOpSrc;
}
static void gen7_magic_ca_pass(struct sna *sna,
@@ -1025,7 +1041,7 @@ static void gen7_magic_ca_pass(struct sna *sna,
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
- gen7_emit_flush(sna);
+ gen7_emit_pipe_invalidate(sna);
gen7_emit_cc(sna, gen7_get_blend(PictOpAdd, TRUE, op->dst.format));
gen7_emit_wm(sna,
@@ -1055,6 +1071,11 @@ static void gen7_vertex_flush(struct sna *sna)
sna->kgem.batch[sna->render_state.gen7.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
sna->render_state.gen7.vertex_offset = 0;
+
+ if (sna->render_state.gen7.emit_flush) {
+ gen7_emit_pipe_flush(sna);
+ sna->render_state.gen7.emit_flush = false;
+ }
}
static int gen7_vertex_finish(struct sna *sna)
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index fd42b21..65ca359 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -461,6 +461,7 @@ struct gen7_render_state {
uint16_t surface_table;
Bool needs_invariant;
+ Bool emit_flush;
};
struct sna_static_stream {
commit d02e6d81420a114c9622bbdaf90fc3ae5d4b15a7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 21 16:10:02 2012 +0100
Encode the third pipe using the HIGH_CRTC shift for vblanks
The original vblank interface only understood 2 pipes (primary and
secondary) and so selecting the third pipe (introduced with IvyBridge)
requires use of the HIGH_CRTC. Using the second pipe where we meant the
third pipe could result in some spurious timings when waiting on the
vblank.
Reported-by: Adam Jackson <ajax at redhat.com>
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/intel_dri.c b/src/intel_dri.c
index 88ab249..ed5078e 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -1238,6 +1238,16 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec,
i830_dri2_del_frame_event(drawable, flip_info);
}
+static uint32_t pipe_select(int pipe)
+{
+ if (pipe > 1)
+ return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT;
+ else if (pipe > 0)
+ return DRM_VBLANK_SECONDARY;
+ else
+ return 0;
+}
+
/*
* ScheduleSwap is responsible for requesting a DRM vblank event for the
* appropriate frame.
@@ -1307,9 +1317,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
I830DRI2ReferenceBuffer(back);
/* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
ret = drmWaitVBlank(intel->drmSubFD, &vbl);
if (ret) {
@@ -1345,9 +1353,8 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
if (flip && I830DRI2ScheduleFlip(intel, draw, swap_info))
return TRUE;
- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe);
/* If non-pageflipping, but blitting/exchanging, we need to use
* DRM_VBLANK_NEXTONMISS to avoid unreliable timestamping later
@@ -1355,8 +1362,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
*/
if (flip == 0)
vbl.request.type |= DRM_VBLANK_NEXTONMISS;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
/* If target_msc already reached or passed, set it to
* current_msc to ensure we return a reasonable value back
@@ -1386,11 +1391,10 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
* and we need to queue an event that will satisfy the divisor/remainder
* equation.
*/
- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe);
if (flip == 0)
vbl.request.type |= DRM_VBLANK_NEXTONMISS;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
vbl.request.sequence = current_msc - (current_msc % divisor) +
remainder;
@@ -1463,9 +1467,7 @@ I830DRI2GetMSC(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
return TRUE;
}
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
ret = drmWaitVBlank(intel->drmSubFD, &vbl);
@@ -1531,9 +1533,7 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
}
/* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
ret = drmWaitVBlank(intel->drmSubFD, &vbl);
if (ret) {
@@ -1564,9 +1564,8 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
*/
if (current_msc >= target_msc)
target_msc = current_msc;
- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe);
vbl.request.sequence = target_msc;
vbl.request.signal = (unsigned long)wait_info;
ret = drmWaitVBlank(intel->drmSubFD, &vbl);
@@ -1591,9 +1590,8 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
* If we get here, target_msc has already passed or we don't have one,
* so we queue an event that will satisfy the divisor/remainder equation.
*/
- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe);
vbl.request.sequence = current_msc - (current_msc % divisor) +
remainder;
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 5390b5a..46a43e9 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -1032,6 +1032,21 @@ can_flip(struct sna * sna,
return TRUE;
}
+inline static uint32_t pipe_select(int pipe)
+{
+ /* The third pipe was introduced with IvyBridge long after
+ * multiple pipe support was added to the kernel, hence
+ * we can safely ignore the capability check - if we have more
+ * than two pipes, we can assume that they are fully supported.
+ */
+ if (pipe > 1)
+ return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT;
+ else if (pipe > 0)
+ return DRM_VBLANK_SECONDARY;
+ else
+ return 0;
+}
+
static void sna_dri_vblank_handle(int fd,
unsigned int frame, unsigned int tv_sec,
unsigned int tv_usec,
@@ -1086,9 +1101,8 @@ static void sna_dri_vblank_handle(int fd,
VG_CLEAR(vbl);
vbl.request.type =
DRM_VBLANK_RELATIVE |
- DRM_VBLANK_EVENT;
- if (info->pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ DRM_VBLANK_EVENT |
+ pipe_select(info->pipe);
vbl.request.sequence = 1;
vbl.request.signal = (unsigned long)info;
if (!sna_wait_vblank(sna, &vbl))
@@ -1437,9 +1451,7 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
sna_dri_reference_buffer(back);
/* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
if (sna_wait_vblank(sna, &vbl)) {
sna_dri_frame_event_info_free(info);
@@ -1452,9 +1464,8 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
vbl.request.type =
DRM_VBLANK_ABSOLUTE |
- DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ DRM_VBLANK_EVENT |
+ pipe_select(pipe);
/*
* If divisor is zero, or current_msc is smaller than target_msc
@@ -1610,9 +1621,8 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
vbl.request.type =
DRM_VBLANK_RELATIVE |
DRM_VBLANK_NEXTONMISS |
- DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ DRM_VBLANK_EVENT |
+ pipe_select(pipe);
vbl.request.sequence = 0;
vbl.request.signal = (unsigned long)info;
if (sna_wait_vblank(sna, &vbl) == 0)
@@ -1625,9 +1635,7 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
}
/* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
if (sna_wait_vblank(sna, &vbl))
goto blit_fallback;
@@ -1651,9 +1659,8 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
vbl.request.type =
DRM_VBLANK_ABSOLUTE |
- DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ DRM_VBLANK_EVENT |
+ pipe_select(pipe);
vbl.request.sequence = *target_msc;
vbl.request.signal = (unsigned long)info;
if (sna_wait_vblank(sna, &vbl))
@@ -1674,9 +1681,10 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
(int)divisor));
vbl.request.type =
- DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | DRM_VBLANK_NEXTONMISS;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ DRM_VBLANK_ABSOLUTE |
+ DRM_VBLANK_EVENT |
+ DRM_VBLANK_NEXTONMISS |
+ pipe_select(pipe);
vbl.request.sequence = current_msc - current_msc % divisor + remainder;
/*
@@ -1872,9 +1880,7 @@ sna_dri_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
VG_CLEAR(vbl);
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
if (sna_wait_vblank(sna, &vbl)) {
@@ -1924,9 +1930,7 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
VG_CLEAR(vbl);
/* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
vbl.request.sequence = 0;
if (sna_wait_vblank(sna, &vbl))
goto out_complete;
@@ -1964,9 +1968,10 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
* client.
*/
if (divisor == 0 || current_msc < target_msc) {
- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE |
+ DRM_VBLANK_EVENT |
+ pipe_select(pipe);
vbl.request.sequence = target_msc;
vbl.request.signal = (unsigned long)info;
if (sna_wait_vblank(sna, &vbl))
@@ -1981,9 +1986,8 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
* If we get here, target_msc has already passed or we don't have one,
* so we queue an event that will satisfy the divisor/remainder equation.
*/
- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
- if (pipe > 0)
- vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe);
vbl.request.sequence = current_msc - current_msc % divisor + remainder;
@@ -2024,7 +2028,7 @@ Bool sna_dri_open(struct sna *sna, ScreenPtr screen)
DBG(("%s()\n", __FUNCTION__));
- if (sna->kgem.wedged) {
+ if (wedged(sna)) {
xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
"cannot enable DRI2 whilst the GPU is wedged\n");
return FALSE;
More information about the xorg-commit
mailing list