[Mesa-stable] [PATCH] nv50, nvc0: fix buffer clearing to respect engine alignment requirements
Ilia Mirkin
imirkin at alum.mit.edu
Sat Jan 30 16:10:07 CET 2016
It appears that the nvidia render engine is quite picky when it comes to
linear surfaces. It doesn't like non-256-byte aligned offsets, and
apparently doesn't even do non-256-byte strides.
This makes arb_clear_buffer_object-unaligned pass on both nv50 and nvc0.
As a side-effect this also allows RGB32 clears to work via GPU data
upload instead of synchronizing the buffer to the CPU (nvc0 only).
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
Cc: mesa-stable at lists.freedesktop.org
---
src/gallium/drivers/nouveau/nv50/nv50_surface.c | 190 ++++++++++++++------
src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 229 ++++++++++++++++++------
2 files changed, 307 insertions(+), 112 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 86be1b4..618c39c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -595,6 +595,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
}
static void
+nv50_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+ unsigned tmp, i;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ unsigned data_words = data_size / 4;
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ offset &= ~0xff;
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, xcoord);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
+static void
nv50_clear_buffer(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size,
@@ -643,77 +719,85 @@ nv50_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 8191) / 8192;
width = elements / height;
+ width &= ~0xff;
- BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
- PUSH_DATAf(push, color.f[0]);
- PUSH_DATAf(push, color.f[1]);
- PUSH_DATAf(push, color.f[2]);
- PUSH_DATAf(push, color.f[3]);
+ if (width) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color.f[0]);
+ PUSH_DATAf(push, color.f[1]);
+ PUSH_DATAf(push, color.f[2]);
+ PUSH_DATAf(push, color.f[3]);
- if (nouveau_pushbuf_space(push, 32, 1, 0))
- return;
+ if (nouveau_pushbuf_space(push, 32, 1, 0))
+ return;
- PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
- BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2);
- PUSH_DATA (push, width << 16);
- PUSH_DATA (push, height << 16);
- BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
- PUSH_DATA (push, 8192 << 16);
- PUSH_DATA (push, 8192 << 16);
- nv50->scissors_dirty |= 1;
+ BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, width << 16);
+ PUSH_DATA (push, height << 16);
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, 8192 << 16);
+ PUSH_DATA (push, 8192 << 16);
+ nv50->scissors_dirty |= 1;
- BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
- PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
- PUSH_DATA (push, height);
- BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
- PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
- PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
+ PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
+ PUSH_DATA (push, height);
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, 0);
- /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
+ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
- BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
- PUSH_DATA (push, (width << 16));
- PUSH_DATA (push, (height << 16));
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, (width << 16));
+ PUSH_DATA (push, (height << 16));
- BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
- PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
+ BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, 0x3c);
- BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
- PUSH_DATA (push, 0x3c);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+ }
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
- height = 1;
- BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
- BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
- PUSH_DATA (push, height);
- BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
- PUSH_DATA (push, 0x3c);
+ nv50_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
}
- BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
- PUSH_DATA (push, nv50->cond_condmode);
-
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
-
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 4e43c4e..be4c531 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe,
}
static void
-nvc0_clear_buffer_cpu(struct pipe_context *pipe,
- struct pipe_resource *res,
- unsigned offset, unsigned size,
- const void *data, int data_size)
+nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
- struct pipe_transfer *pt;
- struct pipe_box box;
- unsigned elements, i;
+ unsigned i;
- elements = size / data_size;
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
- u_box_1d(offset, size, &box);
+ if (!PUSH_SPACE(push, nr + 9))
+ break;
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
- uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
- &box, &pt);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
- for (i = 0; i < elements; ++i)
- memcpy(&map[i*data_size], data, data_size);
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned i;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ if (!PUSH_SPACE(push, nr + 10))
+ break;
+
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned tmp;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
- buf->vtbl->transfer_unmap(pipe, pt);
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
+ else
+ nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
}
static void
@@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
memcpy(&color.ui, data, 16);
break;
case 12:
- /* This doesn't work, RGB32 is not a valid RT format.
- * dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
- * memcpy(&color.ui, data, 12);
- * memset(&color.ui[3], 0, 4);
+ /* RGB32 is not a valid RT format. This will be handled by the pushbuf
+ * uploader.
*/
break;
case 8:
@@ -437,67 +540,75 @@ nvc0_clear_buffer(struct pipe_context *pipe,
assert(size % data_size == 0);
if (data_size == 12) {
- /* TODO: Find a way to do this with the GPU! */
- nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size);
+ nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
return;
}
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 16383) / 16384;
width = elements / height;
+ width &= ~0xff;
- if (!PUSH_SPACE(push, 40))
- return;
-
- PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
-
- BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
- PUSH_DATAf(push, color.f[0]);
- PUSH_DATAf(push, color.f[1]);
- PUSH_DATAf(push, color.f[2]);
- PUSH_DATAf(push, color.f[3]);
- BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
- PUSH_DATA (push, width << 16);
- PUSH_DATA (push, height << 16);
-
- IMMED_NVC0(push, NVC0_3D(RT_CONTROL), 1);
-
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
- PUSH_DATAh(push, buf->address + offset);
- PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
- PUSH_DATA (push, height);
- PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
- PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
- PUSH_DATA (push, 1);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 0);
-
- IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
- IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
+ if (width) {
+ if (!PUSH_SPACE(push, 40))
+ return;
- IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+ PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
- IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color.f[0]);
+ PUSH_DATAf(push, color.f[1]);
+ PUSH_DATAf(push, color.f[2]);
+ PUSH_DATAf(push, color.f[3]);
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, width << 16);
+ PUSH_DATA (push, height << 16);
- if (width * height != elements) {
- offset += width * height * data_size;
- width = elements - width * height;
- height = 1;
+ IMMED_NVC0(push, NVC0_3D(RT_CONTROL), 1);
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4);
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, width * data_size);
PUSH_DATA (push, height);
+ PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
+ PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
+
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
}
- IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+ if (width * height != elements) {
+ offset += width * height * data_size;
+ width = elements - width * height;
+ nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
+ }
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
--
2.4.10
More information about the mesa-stable
mailing list