[Mesa-dev] [PATCH 1/2] radeon/winsys: add dma ring support to winsys

Marek Olšák maraeo at gmail.com
Tue Jan 8 08:31:12 PST 2013


On Tue, Jan 8, 2013 at 4:51 PM, Jerome Glisse <j.glisse at gmail.com> wrote:
> On Tue, Jan 8, 2013 at 10:15 AM, Marek Olšák <maraeo at gmail.com> wrote:
>> On Mon, Jan 7, 2013 at 9:30 PM,  <j.glisse at gmail.com> wrote:
>>> From: Jerome Glisse <jglisse at redhat.com>
>>>
>>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>>> ---
>>>  src/gallium/drivers/r300/r300_context.c           |   2 +-
>>>  src/gallium/drivers/r600/r600_pipe.c              |   2 +-
>>>  src/gallium/drivers/radeonsi/radeonsi_pipe.c      |   2 +-
>>>  src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |   2 +-
>>>  src/gallium/winsys/radeon/drm/radeon_drm_cs.c     | 104 +++++++++++++++-------
>>>  src/gallium/winsys/radeon/drm/radeon_drm_cs.h     |   2 +-
>>>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   6 ++
>>>  src/gallium/winsys/radeon/drm/radeon_winsys.h     |  21 ++++-
>>>  8 files changed, 100 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
>>> index b498454..f0d738e 100644
>>> --- a/src/gallium/drivers/r300/r300_context.c
>>> +++ b/src/gallium/drivers/r300/r300_context.c
>>> @@ -376,7 +376,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
>>>                       sizeof(struct pipe_transfer), 64,
>>>                       UTIL_SLAB_SINGLETHREADED);
>>>
>>> -    r300->cs = rws->cs_create(rws);
>>> +    r300->cs = rws->cs_create(rws, RING_GFX);
>>>      if (r300->cs == NULL)
>>>          goto fail;
>>>
>>> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
>>> index 29ef988..7c4ec44 100644
>>> --- a/src/gallium/drivers/r600/r600_pipe.c
>>> +++ b/src/gallium/drivers/r600/r600_pipe.c
>>> @@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>>>                 goto fail;
>>>         }
>>>
>>> -       rctx->cs = rctx->ws->cs_create(rctx->ws);
>>> +       rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
>>>         rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx);
>>>
>>>         rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256,
>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> index d66e30f..cfa1ff7 100644
>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> @@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>>>         case TAHITI:
>>>                 si_init_state_functions(rctx);
>>>                 LIST_INITHEAD(&rctx->active_query_list);
>>> -               rctx->cs = rctx->ws->cs_create(rctx->ws);
>>> +               rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
>>>                 rctx->max_db = 8;
>>>                 si_init_config(rctx);
>>>                 break;
>>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>>> index 897e962..6daafc3 100644
>>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
>>> @@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
>>>                  } else {
>>>                      /* Try to avoid busy-waiting in radeon_bo_wait. */
>>>                      if (p_atomic_read(&bo->num_active_ioctls))
>>> -                        radeon_drm_cs_sync_flush(cs);
>>> +                        radeon_drm_cs_sync_flush(rcs);
>>>                  }
>>>
>>>                  radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
>>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>>> index c5e7f1e..5e2c471 100644
>>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>>> @@ -90,6 +90,10 @@
>>>  #define RADEON_CS_RING_COMPUTE      1
>>>  #endif
>>>
>>> +#ifndef RADEON_CS_RING_DMA
>>> +#define RADEON_CS_RING_DMA          2
>>> +#endif
>>> +
>>>  #ifndef RADEON_CS_END_OF_FRAME
>>>  #define RADEON_CS_END_OF_FRAME      0x04
>>>  #endif
>>> @@ -161,7 +165,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
>>>  DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
>>>  static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
>>>
>>> -static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>>> +static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type)
>>>  {
>>>      struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
>>>      struct radeon_drm_cs *cs;
>>> @@ -189,6 +193,7 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>>>      cs->csc = &cs->csc1;
>>>      cs->cst = &cs->csc2;
>>>      cs->base.buf = cs->csc->buf;
>>> +    cs->base.ring_type = ring_type;
>>>
>>>      p_atomic_inc(&ws->num_cs);
>>>      if (cs->ws->num_cpus > 1 && debug_get_option_thread())
>>> @@ -246,24 +251,34 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
>>>      return -1;
>>>  }
>>>
>>> -static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
>>> +static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
>>>                                   struct radeon_bo *bo,
>>>                                   enum radeon_bo_usage usage,
>>>                                   enum radeon_bo_domain domains,
>>>                                   enum radeon_bo_domain *added_domains)
>>>  {
>>> +    struct radeon_cs_context *csc = cs->csc;
>>>      struct drm_radeon_cs_reloc *reloc;
>>>      unsigned i;
>>>      unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
>>>      enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
>>>      enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
>>> +    bool update_hash = TRUE;
>>>
>>>      if (csc->is_handle_added[hash]) {
>>>          i = csc->reloc_indices_hashlist[hash];
>>>          reloc = &csc->relocs[i];
>>>          if (reloc->handle == bo->handle) {
>>> +            /* do not update the hash table if it's dma ring, so that first hash always point
>>> +             * to first bo relocation which will the one used by the kernel. Following relocation
>>> +             * will be ignore by the kernel memory placement (but still use by the kernel to
>>> +             * update the cmd stream with proper buffer offset).
>>> +             */
>>> +            update_hash = FALSE;
>>>              update_reloc_domains(reloc, rd, wd, added_domains);
>>> -            return i;
>>> +            if (cs->base.ring_type != RING_DMA) {
>>> +                return i;
>>> +            }
>>>          }
>>>
>>>          /* Hash collision, look for the BO in the list of relocs linearly. */
>>> @@ -271,11 +286,18 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
>>>              --i;
>>>              reloc = &csc->relocs[i];
>>>              if (reloc->handle == bo->handle) {
>>> +                /* do not update the hash table if it's dma ring, so that first hash always point
>>> +                 * to first bo relocation which will the one used by the kernel. Following relocation
>>> +                 * will be ignore by the kernel memory placement (but still use by the kernel to
>>> +                 * update the cmd stream with proper buffer offset).
>>> +                 */
>>> +                update_hash = FALSE;
>>>                  update_reloc_domains(reloc, rd, wd, added_domains);
>>> -
>>>                  csc->reloc_indices_hashlist[hash] = i;
>>>                  /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
>>> -                return i;
>>> +                if (cs->base.ring_type != RING_DMA) {
>>> +                    return i;
>>> +                }
>>
>> All the changes in radeon_add_reloc don't make any sense. The hash
>> table has nothing to do with the kernel nor does it change the
>> ordering of relocations. It's only an optimization for a quick lookup
>> of relocation indices which were added to the list before. The
>> function behaves exactly like a linear search, except that it's way
>> faster than that. It needs no changes at all.
>>
>> The rest looks good.
>>
>> Marek
>>
>
> It needs change for dma, for dma you have to reemit the relocation as
> many time as the buffer is use. We can't use the dma nop packet to
> store an index in the dma cmd stream. So the change i made are
> necessary. I keep the hash for dma so that i can update the first
> relocation of a bo and update properly the read/write domain with the
> new read write domain if those were ever to be different in a same dma
> cmd stream.

In that case, you should put what you just told me in the comment in
radeon_add_reloc, so that it's not so confusing.

Reviewed-by: Marek Olšák <maraeo at gmail.com>

Marek


More information about the mesa-dev mailing list