[Mesa-dev] [PATCH 3/3] radeonsi: use TC write-back instead of full cache invalidation

Edward O'Callaghan funfunctor at folklore1984.net
Wed Oct 12 12:07:30 UTC 2016


yep!

Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>

On 10/12/2016 10:04 PM, Nicolai Hähnle wrote:
> That's a nice improvement. For the series:
> 
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> On 11.10.2016 16:48, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> ---
>>  src/gallium/drivers/radeonsi/si_compute.c    |  2 +-
>>  src/gallium/drivers/radeonsi/si_state.c      | 12 +++---------
>>  src/gallium/drivers/radeonsi/si_state_draw.c |  6 +++---
>>  3 files changed, 7 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>> b/src/gallium/drivers/radeonsi/si_compute.c
>> index 632839f..e785106 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -694,21 +694,21 @@ static void si_launch_grid(
>>
>>      /* Add buffer sizes for memory checking in need_cs_space. */
>>      r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
>>      /* TODO: add the scratch buffer */
>>
>>      if (info->indirect) {
>>          r600_context_add_resource_size(ctx, info->indirect);
>>
>>          /* The hw doesn't read the indirect buffer via TC L2. */
>>          if (r600_resource(info->indirect)->TC_L2_dirty) {
>> -            sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +            sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>              r600_resource(info->indirect)->TC_L2_dirty = false;
>>          }
>>      }
>>
>>      si_need_cs_space(sctx);
>>
>>      if (!sctx->cs_shader_state.initialized)
>>          si_initialize_compute(sctx);
>>
>>      if (sctx->b.flags)
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 34f3ed7..ad65fc2 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -3390,35 +3390,29 @@ static void si_memory_barrier(struct
>> pipe_context *ctx, unsigned flags)
>>           * automatically at end of shader, but the contents of other
>>           * L1 caches might still be stale. */
>>          sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
>>      }
>>
>>      if (flags & PIPE_BARRIER_INDEX_BUFFER) {
>>          /* Indices are read through TC L2 since VI.
>>           * L1 isn't used.
>>           */
>>          if (sctx->screen->b.chip_class <= CIK)
>> -            sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +            sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>      }
>>
>>      if (flags & PIPE_BARRIER_FRAMEBUFFER)
>>          sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
>>
>>      if (flags & (PIPE_BARRIER_FRAMEBUFFER |
>> -             PIPE_BARRIER_INDIRECT_BUFFER)) {
>> -        /* Not sure if INV_GLOBAL_L2 is the best thing here.
>> -         *
>> -         * We need to make sure that TC L1 & L2 are written back to
>> -         * memory, because CB fetches don't consider TC, but there's
>> -         * no need to invalidate any TC cache lines. */
>> -        sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> -    }
>> +             PIPE_BARRIER_INDIRECT_BUFFER))
>> +        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>  }
>>
>>  static void *si_create_blend_custom(struct si_context *sctx, unsigned
>> mode)
>>  {
>>      struct pipe_blend_state blend;
>>
>>      memset(&blend, 0, sizeof(blend));
>>      blend.independent_blend_enable = true;
>>      blend.rt[0].colormask = 0xf;
>>      return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>> index 33b6b23..c14e852 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>> @@ -1040,32 +1040,32 @@ void si_draw_vbo(struct pipe_context *ctx,
>> const struct pipe_draw_info *info)
>>              if (!ib.buffer)
>>                  return;
>>              /* info->start will be added by the drawing code */
>>              ib.offset -= start_offset;
>>          }
>>      }
>>
>>      /* VI reads index buffers through TC L2. */
>>      if (info->indexed && sctx->b.chip_class <= CIK &&
>>          r600_resource(ib.buffer)->TC_L2_dirty) {
>> -        sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>          r600_resource(ib.buffer)->TC_L2_dirty = false;
>>      }
>>
>>      if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
>> -        sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>          r600_resource(info->indirect)->TC_L2_dirty = false;
>>      }
>>
>>      if (info->indirect_params &&
>>          r600_resource(info->indirect_params)->TC_L2_dirty) {
>> -        sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
>> +        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>>          r600_resource(info->indirect_params)->TC_L2_dirty = false;
>>      }
>>
>>      /* Add buffer sizes for memory checking in need_cs_space. */
>>      if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
>>          r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
>>      if (info->indirect)
>>          r600_context_add_resource_size(ctx, info->indirect);
>>
>>      si_need_cs_space(sctx);
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161012/2337c923/attachment.sig>


More information about the mesa-dev mailing list