[Mesa-dev] [PATCH 1/2] radeon/winsys: add dma ring support to winsys

Jerome Glisse j.glisse at gmail.com
Tue Jan 8 07:51:15 PST 2013


On Tue, Jan 8, 2013 at 10:15 AM, Marek Olšák <maraeo at gmail.com> wrote:
> On Mon, Jan 7, 2013 at 9:30 PM,  <j.glisse at gmail.com> wrote:
>> From: Jerome Glisse <jglisse at redhat.com>
>>
>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>> ---
>>  src/gallium/drivers/r300/r300_context.c           |   2 +-
>>  src/gallium/drivers/r600/r600_pipe.c              |   2 +-
>>  src/gallium/drivers/radeonsi/radeonsi_pipe.c      |   2 +-
>>  src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |   2 +-
>>  src/gallium/winsys/radeon/drm/radeon_drm_cs.c     | 104 +++++++++++++++-------
>>  src/gallium/winsys/radeon/drm/radeon_drm_cs.h     |   2 +-
>>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   6 ++
>>  src/gallium/winsys/radeon/drm/radeon_winsys.h     |  21 ++++-
>>  8 files changed, 100 insertions(+), 41 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
>> index b498454..f0d738e 100644
>> --- a/src/gallium/drivers/r300/r300_context.c
>> +++ b/src/gallium/drivers/r300/r300_context.c
>> @@ -376,7 +376,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
>>                       sizeof(struct pipe_transfer), 64,
>>                       UTIL_SLAB_SINGLETHREADED);
>>
>> -    r300->cs = rws->cs_create(rws);
>> +    r300->cs = rws->cs_create(rws, RING_GFX);
>>      if (r300->cs == NULL)
>>          goto fail;
>>
>> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
>> index 29ef988..7c4ec44 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.c
>> +++ b/src/gallium/drivers/r600/r600_pipe.c
>> @@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>>                 goto fail;
>>         }
>>
>> -       rctx->cs = rctx->ws->cs_create(rctx->ws);
>> +       rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
>>         rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx);
>>
>>         rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256,
>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> index d66e30f..cfa1ff7 100644
>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> @@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>>         case TAHITI:
>>                 si_init_state_functions(rctx);
>>                 LIST_INITHEAD(&rctx->active_query_list);
>> -               rctx->cs = rctx->ws->cs_create(rctx->ws);
>> +               rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
>>                 rctx->max_db = 8;
>>                 si_init_config(rctx);
>>                 break;
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>> index 897e962..6daafc3 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>> @@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
>>                  } else {
>>                      /* Try to avoid busy-waiting in radeon_bo_wait. */
>>                      if (p_atomic_read(&bo->num_active_ioctls))
>> -                        radeon_drm_cs_sync_flush(cs);
>> +                        radeon_drm_cs_sync_flush(rcs);
>>                  }
>>
>>                  radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> index c5e7f1e..5e2c471 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> @@ -90,6 +90,10 @@
>>  #define RADEON_CS_RING_COMPUTE      1
>>  #endif
>>
>> +#ifndef RADEON_CS_RING_DMA
>> +#define RADEON_CS_RING_DMA          2
>> +#endif
>> +
>>  #ifndef RADEON_CS_END_OF_FRAME
>>  #define RADEON_CS_END_OF_FRAME      0x04
>>  #endif
>> @@ -161,7 +165,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
>>  DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
>>  static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
>>
>> -static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>> +static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type)
>>  {
>>      struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
>>      struct radeon_drm_cs *cs;
>> @@ -189,6 +193,7 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>>      cs->csc = &cs->csc1;
>>      cs->cst = &cs->csc2;
>>      cs->base.buf = cs->csc->buf;
>> +    cs->base.ring_type = ring_type;
>>
>>      p_atomic_inc(&ws->num_cs);
>>      if (cs->ws->num_cpus > 1 && debug_get_option_thread())
>> @@ -246,24 +251,34 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
>>      return -1;
>>  }
>>
>> -static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
>> +static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
>>                                   struct radeon_bo *bo,
>>                                   enum radeon_bo_usage usage,
>>                                   enum radeon_bo_domain domains,
>>                                   enum radeon_bo_domain *added_domains)
>>  {
>> +    struct radeon_cs_context *csc = cs->csc;
>>      struct drm_radeon_cs_reloc *reloc;
>>      unsigned i;
>>      unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
>>      enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
>>      enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
>> +    bool update_hash = TRUE;
>>
>>      if (csc->is_handle_added[hash]) {
>>          i = csc->reloc_indices_hashlist[hash];
>>          reloc = &csc->relocs[i];
>>          if (reloc->handle == bo->handle) {
>> +            /* do not update the hash table if it's dma ring, so that first hash always point
>> +             * to first bo relocation which will the one used by the kernel. Following relocation
>> +             * will be ignore by the kernel memory placement (but still use by the kernel to
>> +             * update the cmd stream with proper buffer offset).
>> +             */
>> +            update_hash = FALSE;
>>              update_reloc_domains(reloc, rd, wd, added_domains);
>> -            return i;
>> +            if (cs->base.ring_type != RING_DMA) {
>> +                return i;
>> +            }
>>          }
>>
>>          /* Hash collision, look for the BO in the list of relocs linearly. */
>> @@ -271,11 +286,18 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
>>              --i;
>>              reloc = &csc->relocs[i];
>>              if (reloc->handle == bo->handle) {
>> +                /* do not update the hash table if it's dma ring, so that first hash always point
>> +                 * to first bo relocation which will the one used by the kernel. Following relocation
>> +                 * will be ignore by the kernel memory placement (but still use by the kernel to
>> +                 * update the cmd stream with proper buffer offset).
>> +                 */
>> +                update_hash = FALSE;
>>                  update_reloc_domains(reloc, rd, wd, added_domains);
>> -
>>                  csc->reloc_indices_hashlist[hash] = i;
>>                  /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
>> -                return i;
>> +                if (cs->base.ring_type != RING_DMA) {
>> +                    return i;
>> +                }
>
> All the changes in radeon_add_reloc don't make any sense. The hash
> table has nothing to do with the kernel nor does it change the
> ordering of relocations. It's only an optimization for a quick lookup
> of relocation indices which were added to the list before. The
> function behaves exactly like a linear search, except that it's way
> faster than that. It needs no changes at all.
>
> The rest looks good.
>
> Marek
>

It needs change for dma, for dma you have to reemit the relocation as
many time as the buffer is use. We can't use the dma nop packet to
store an index in the dma cmd stream. So the change i made are
necessary. I keep the hash for dma so that i can update the first
relocation of a bo and update properly the read/write domain with the
new read write domain if those were ever to be different in a same dma
cmd stream.

Cheers,
Jerome


More information about the mesa-dev mailing list