[Mesa-dev] [PATCH v3 02/14] nvc0: bind constant buffers for compute on Fermi
Samuel Pitoiset
samuel.pitoiset at gmail.com
Sun Feb 21 17:53:30 UTC 2016
On 02/21/2016 06:49 PM, Ilia Mirkin wrote:
> On Wed, Feb 17, 2016 at 4:27 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> Loosely based on 3D.
>>
>> Changs from v3:
>> - invalidate COMPUTE CBs after validating 3D CBs because they are
>> aliased
>>
>> Changes from v2:
>> - get rid of the 's' param to nvc0_cb_bo_push() because it doesn't
>> matter to upload constbufs for compute using the 3d chan
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>> src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 72 +++++++++++++++++++---
>> src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 11 +++-
>> src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 2 +-
>> src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 4 +-
>> .../drivers/nouveau/nvc0/nvc0_state_validate.c | 5 ++
>> 5 files changed, 81 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> index 5c7dc0e..0fe6353 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> @@ -138,11 +138,71 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
>> return false;
>> }
>>
>> +static void
>> +nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
>> +{
>> + struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>> + const int s = 5;
>> +
>> + while (nvc0->constbuf_dirty[s]) {
>> + int i = ffs(nvc0->constbuf_dirty[s]) - 1;
>> + nvc0->constbuf_dirty[s] &= ~(1 << i);
>> +
>> + if (nvc0->constbuf[s][i].user) {
>> + struct nouveau_bo *bo = nvc0->screen->uniform_bo;
>> + const unsigned base = s << 16;
>> + const unsigned size = nvc0->constbuf[s][0].size;
>> + assert(i == 0); /* we really only want OpenGL uniforms here */
>> + assert(nvc0->constbuf[s][0].u.data);
>> +
>> + if (nvc0->state.uniform_buffer_bound[s] < size) {
>> + nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
>> +
>> + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
>> + PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
>> + PUSH_DATAh(push, bo->offset + base);
>> + PUSH_DATA (push, bo->offset + base);
>> + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
>> + PUSH_DATA (push, (0 << 8) | 1);
>> + }
>> + nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
>> + base, nvc0->state.uniform_buffer_bound[s],
>> + 0, (size + 3) / 4,
>> + nvc0->constbuf[s][0].u.data);
>> + } else {
>> + struct nv04_resource *res =
>> + nv04_resource(nvc0->constbuf[s][i].u.buf);
>> + if (res) {
>> + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
>> + PUSH_DATA (push, nvc0->constbuf[s][i].size);
>> + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
>> + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
>> + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
>> + PUSH_DATA (push, (i << 8) | 1);
>> +
>> + BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
>> +
>> + res->cb_bindings[s] |= 1 << i;
>> + } else {
>> + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
>> + PUSH_DATA (push, (i << 8) | 0);
>> + }
>> + if (i == 0)
>> + nvc0->state.uniform_buffer_bound[s] = 0;
>> + }
>> + }
>> +
>> + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
>> + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>> +}
>> +
>> static bool
>> nvc0_compute_state_validate(struct nvc0_context *nvc0)
>> {
>> if (!nvc0_compute_validate_program(nvc0))
>> return false;
>> + if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
>> + nvc0_compute_validate_constbufs(nvc0);
>>
>> /* TODO: textures, samplers, surfaces, global memory buffers */
>>
>> @@ -188,7 +248,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
>> struct nvc0_context *nvc0 = nvc0_context(pipe);
>> struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>> struct nvc0_program *cp = nvc0->compprog;
>> - unsigned s, i;
>> + unsigned s;
>> int ret;
>>
>> ret = !nvc0_compute_state_validate(nvc0);
>> @@ -242,14 +302,10 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
>> BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
>> PUSH_DATA (push, 0x1);
>>
>> - /* rebind all the 3D constant buffers
>> - * (looks like binding a CB on COMPUTE clobbers 3D state) */
>> + /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
>> nvc0->dirty |= NVC0_NEW_CONSTBUF;
>> for (s = 0; s < 5; s++) {
>> - for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
>> - if (nvc0->constbuf[s][i].u.buf)
>> - nvc0->constbuf_dirty[s] |= 1 << i;
>> + nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
>> + nvc0->state.uniform_buffer_bound[s] = 0;
>> }
>> - memset(nvc0->state.uniform_buffer_bound, 0,
>> - sizeof(nvc0->state.uniform_buffer_bound));
>> }
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
>> index 547b8f5..4fed7b2 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
>> @@ -241,15 +241,20 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
>> }
>> }
>>
>> - for (s = 0; s < 5; ++s) {
>> + for (s = 0; s < 6; ++s) {
>> for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) {
>> if (!(nvc0->constbuf_valid[s] & (1 << i)))
>> continue;
>> if (!nvc0->constbuf[s][i].user &&
>> nvc0->constbuf[s][i].u.buf == res) {
>> - nvc0->dirty |= NVC0_NEW_CONSTBUF;
>> nvc0->constbuf_dirty[s] |= 1 << i;
>> - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
>> + if (unlikely(s == 5)) {
>> + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
>> + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
>> + } else {
>> + nvc0->dirty |= NVC0_NEW_CONSTBUF;
>> + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
>> + }
>> if (!--ref)
>> return ref;
>> }
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
>> index e86fe43..8487abc 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
>> @@ -51,7 +51,7 @@ struct nvc0_graph_state {
>> uint8_t c14_bound; /* whether immediate array constbuf is bound */
>> uint8_t clip_enable;
>> uint32_t clip_mode;
>> - uint32_t uniform_buffer_bound[5];
>> + uint32_t uniform_buffer_bound[6];
>> struct nvc0_transform_feedback_state *tfb;
>> bool seamless_cube_map;
>> };
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> index 448211f..157d628 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> @@ -839,7 +839,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
>> const unsigned i = index;
>>
>> if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
>> - assert(!cb || !cb->user_buffer);
>> + if (nvc0->constbuf[s][i].user)
>> + nvc0->constbuf[s][i].u.buf = NULL;
>> + else
>> if (nvc0->constbuf[s][i].u.buf)
>> nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
>>
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
>> index 5ac3676..2a210e9 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
>> @@ -467,6 +467,11 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
>> }
>> }
>> }
>> +
>> + /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
>> + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
>> + nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
>> + nvc0->state.uniform_buffer_bound[5] = 0;
>
> I know that I gave you a R-b and you already pushed this, but.... it
> occurs to me that this should actually be more like
>
> nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
> nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[s];
>
> Not only do you want the valid constbufs to be bound, but you also
> want the "old" ones to be properly *unbound*, since they will not be
> in the bufctx, and who knows what the hw will do even if the shader in
> question doesn't read from them. (And a similar change to the compute
> side of things.)
Mmmh yeah probably.
Anyway, we should also only dirty the CBs which are really overwritten,
this will avoid some re-binding.
I'll think more about this and send a patch.
>
> -ilia
>
More information about the mesa-dev
mailing list