[Mesa-dev] [PATCH 1/2] radeon/winsys: add dma ring support to winsys

Christian König deathsimple at vodafone.de
Tue Jan 8 03:22:03 PST 2013


On 07.01.2013 21:30, j.glisse at gmail.com wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
Looks good to me.

Reviewed-by: Christian König <christian.koenig at amd.com>

> ---
>   src/gallium/drivers/r300/r300_context.c           |   2 +-
>   src/gallium/drivers/r600/r600_pipe.c              |   2 +-
>   src/gallium/drivers/radeonsi/radeonsi_pipe.c      |   2 +-
>   src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |   2 +-
>   src/gallium/winsys/radeon/drm/radeon_drm_cs.c     | 104 +++++++++++++++-------
>   src/gallium/winsys/radeon/drm/radeon_drm_cs.h     |   2 +-
>   src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   6 ++
>   src/gallium/winsys/radeon/drm/radeon_winsys.h     |  21 ++++-
>   8 files changed, 100 insertions(+), 41 deletions(-)
>
> diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
> index b498454..f0d738e 100644
> --- a/src/gallium/drivers/r300/r300_context.c
> +++ b/src/gallium/drivers/r300/r300_context.c
> @@ -376,7 +376,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
>                        sizeof(struct pipe_transfer), 64,
>                        UTIL_SLAB_SINGLETHREADED);
>   
> -    r300->cs = rws->cs_create(rws);
> +    r300->cs = rws->cs_create(rws, RING_GFX);
>       if (r300->cs == NULL)
>           goto fail;
>   
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 29ef988..7c4ec44 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>   		goto fail;
>   	}
>   
> -	rctx->cs = rctx->ws->cs_create(rctx->ws);
> +	rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
>   	rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx);
>   
>   	rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256,
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> index d66e30f..cfa1ff7 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> @@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>   	case TAHITI:
>   		si_init_state_functions(rctx);
>   		LIST_INITHEAD(&rctx->active_query_list);
> -		rctx->cs = rctx->ws->cs_create(rctx->ws);
> +		rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
>   		rctx->max_db = 8;
>   		si_init_config(rctx);
>   		break;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index 897e962..6daafc3 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
>                   } else {
>                       /* Try to avoid busy-waiting in radeon_bo_wait. */
>                       if (p_atomic_read(&bo->num_active_ioctls))
> -                        radeon_drm_cs_sync_flush(cs);
> +                        radeon_drm_cs_sync_flush(rcs);
>                   }
>   
>                   radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index c5e7f1e..5e2c471 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -90,6 +90,10 @@
>   #define RADEON_CS_RING_COMPUTE      1
>   #endif
>   
> +#ifndef RADEON_CS_RING_DMA
> +#define RADEON_CS_RING_DMA          2
> +#endif
> +
>   #ifndef RADEON_CS_END_OF_FRAME
>   #define RADEON_CS_END_OF_FRAME      0x04
>   #endif
> @@ -161,7 +165,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
>   DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
>   static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
>   
> -static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
> +static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type)
>   {
>       struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
>       struct radeon_drm_cs *cs;
> @@ -189,6 +193,7 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
>       cs->csc = &cs->csc1;
>       cs->cst = &cs->csc2;
>       cs->base.buf = cs->csc->buf;
> +    cs->base.ring_type = ring_type;
>   
>       p_atomic_inc(&ws->num_cs);
>       if (cs->ws->num_cpus > 1 && debug_get_option_thread())
> @@ -246,24 +251,34 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
>       return -1;
>   }
>   
> -static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
> +static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
>                                    struct radeon_bo *bo,
>                                    enum radeon_bo_usage usage,
>                                    enum radeon_bo_domain domains,
>                                    enum radeon_bo_domain *added_domains)
>   {
> +    struct radeon_cs_context *csc = cs->csc;
>       struct drm_radeon_cs_reloc *reloc;
>       unsigned i;
>       unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
>       enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
>       enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
> +    bool update_hash = TRUE;
>   
>       if (csc->is_handle_added[hash]) {
>           i = csc->reloc_indices_hashlist[hash];
>           reloc = &csc->relocs[i];
>           if (reloc->handle == bo->handle) {
> +            /* do not update the hash table if it's dma ring, so that first hash always point
> +             * to first bo relocation which will the one used by the kernel. Following relocation
> +             * will be ignore by the kernel memory placement (but still use by the kernel to
> +             * update the cmd stream with proper buffer offset).
> +             */
> +            update_hash = FALSE;
>               update_reloc_domains(reloc, rd, wd, added_domains);
> -            return i;
> +            if (cs->base.ring_type != RING_DMA) {
> +                return i;
> +            }
>           }
>   
>           /* Hash collision, look for the BO in the list of relocs linearly. */
> @@ -271,11 +286,18 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
>               --i;
>               reloc = &csc->relocs[i];
>               if (reloc->handle == bo->handle) {
> +                /* do not update the hash table if it's dma ring, so that first hash always point
> +                 * to first bo relocation which will the one used by the kernel. Following relocation
> +                 * will be ignore by the kernel memory placement (but still use by the kernel to
> +                 * update the cmd stream with proper buffer offset).
> +                 */
> +                update_hash = FALSE;
>                   update_reloc_domains(reloc, rd, wd, added_domains);
> -
>                   csc->reloc_indices_hashlist[hash] = i;
>                   /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
> -                return i;
> +                if (cs->base.ring_type != RING_DMA) {
> +                    return i;
> +                }
>               }
>           }
>       }
> @@ -305,7 +327,9 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
>       reloc->flags = 0;
>   
>       csc->is_handle_added[hash] = TRUE;
> -    csc->reloc_indices_hashlist[hash] = csc->crelocs;
> +    if (update_hash) {
> +        csc->reloc_indices_hashlist[hash] = csc->crelocs;
> +    }
>   
>       csc->chunks[1].length_dw += RELOC_DWORDS;
>   
> @@ -321,8 +345,7 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
>       struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
>       struct radeon_bo *bo = (struct radeon_bo*)buf;
>       enum radeon_bo_domain added_domains;
> -
> -    unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains);
> +    unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains);
>   
>       if (added_domains & RADEON_DOMAIN_GTT)
>           cs->csc->used_gart += bo->base.size;
> @@ -373,7 +396,6 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
>   {
>       struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
>       struct radeon_bo *bo = (struct radeon_bo*)buf;
> -
>       unsigned index = radeon_get_reloc(cs->csc, bo);
>   
>       if (index == -1) {
> @@ -425,8 +447,10 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
>       return NULL;
>   }
>   
> -void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs)
> +void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
>   {
> +    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
> +
>       /* Wait for any pending ioctl to complete. */
>       if (cs->thread && cs->flush_started) {
>           pipe_semaphore_wait(&cs->flush_completed);
> @@ -445,7 +469,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
>          fprintf(stderr, "radeon: command stream overflowed\n");
>       }
>   
> -    radeon_drm_cs_sync_flush(cs);
> +    radeon_drm_cs_sync_flush(rcs);
>   
>       /* Flip command streams. */
>       tmp = cs->csc;
> @@ -453,8 +477,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
>       cs->cst = tmp;
>   
>       /* If the CS is not empty or overflowed, emit it in a separate thread. */
> -    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS &&
> -	!debug_get_option_noop()) {
> +    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
>           unsigned i, crelocs = cs->cst->crelocs;
>   
>           cs->cst->chunks[0].length_dw = cs->base.cdw;
> @@ -464,28 +487,40 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
>               p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
>           }
>   
> -        cs->cst->flags[0] = 0;
> -        cs->cst->flags[1] = RADEON_CS_RING_GFX;
> -        cs->cst->cs.num_chunks = 2;
> -        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
> -            cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
> -            cs->cst->cs.num_chunks = 3;
> -        }
> -        if (cs->ws->info.r600_virtual_address) {
> -            cs->cst->flags[0] |= RADEON_CS_USE_VM;
> -            cs->cst->cs.num_chunks = 3;
> -        }
> -        if (flags & RADEON_FLUSH_END_OF_FRAME) {
> -            cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
> -            cs->cst->cs.num_chunks = 3;
> -        }
> -        if (flags & RADEON_FLUSH_COMPUTE) {
> -            cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
> +        switch (cs->base.ring_type) {
> +        case RING_DMA:
> +            cs->cst->flags[0] = 0;
> +            cs->cst->flags[1] = RADEON_CS_RING_DMA;
>               cs->cst->cs.num_chunks = 3;
> +            if (cs->ws->info.r600_virtual_address) {
> +                cs->cst->flags[0] |= RADEON_CS_USE_VM;
> +            }
> +            break;
> +        default:
> +        case RING_GFX:
> +            cs->cst->flags[0] = 0;
> +            cs->cst->flags[1] = RADEON_CS_RING_GFX;
> +            cs->cst->cs.num_chunks = 2;
> +            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
> +                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
> +                cs->cst->cs.num_chunks = 3;
> +            }
> +            if (cs->ws->info.r600_virtual_address) {
> +                cs->cst->flags[0] |= RADEON_CS_USE_VM;
> +                cs->cst->cs.num_chunks = 3;
> +            }
> +            if (flags & RADEON_FLUSH_END_OF_FRAME) {
> +                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
> +                cs->cst->cs.num_chunks = 3;
> +            }
> +            if (flags & RADEON_FLUSH_COMPUTE) {
> +                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
> +                cs->cst->cs.num_chunks = 3;
> +            }
> +            break;
>           }
>   
> -        if (cs->thread &&
> -            (flags & RADEON_FLUSH_ASYNC)) {
> +        if (cs->thread && (flags & RADEON_FLUSH_ASYNC)) {
>               cs->flush_started = 1;
>               pipe_semaphore_signal(&cs->flush_queued);
>           } else {
> @@ -503,7 +538,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
>   static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
>   {
>       struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
> -    radeon_drm_cs_sync_flush(cs);
> +
> +    radeon_drm_cs_sync_flush(rcs);
>       if (cs->thread) {
>           cs->kill_thread = 1;
>           pipe_semaphore_signal(&cs->flush_queued);
> @@ -525,6 +561,7 @@ static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
>                                       void *user)
>   {
>       struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
> +
>       cs->flush_cs = flush;
>       cs->flush_data = user;
>   }
> @@ -562,4 +599,5 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
>       ws->base.cs_flush = radeon_drm_cs_flush;
>       ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
>       ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
> +    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
>   }
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> index 286eb6a..99d5fbb 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> @@ -118,7 +118,7 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
>       return bo->num_cs_references != 0;
>   }
>   
> -void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs);
> +void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs);
>   void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
>   
>   #endif
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index bcfb448..685af29 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -319,6 +319,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
>           break;
>       }
>   
> +    /* Check for dma */
> +    ws->info.r600_has_dma = FALSE;
> +    if (ws->info.chip_class >= R700 && ws->info.drm_minor >= 27) {
> +        ws->info.r600_has_dma = TRUE;
> +    }
> +
>       /* Get GEM info. */
>       retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
>               &gem_info, sizeof(gem_info));
> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> index d0c4822..1d159dc 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> @@ -138,12 +138,19 @@ enum chip_class {
>       TAHITI,
>   };
>   
> +enum ring_type {
> +    RING_GFX = 0,
> +    RING_DMA,
> +    RING_LAST,
> +};
> +
>   struct winsys_handle;
>   struct radeon_winsys_cs_handle;
>   
>   struct radeon_winsys_cs {
> -    unsigned cdw;  /* Number of used dwords. */
> -    uint32_t *buf; /* The command buffer. */
> +    unsigned                    cdw;  /* Number of used dwords. */
> +    uint32_t                    *buf; /* The command buffer. */
> +    enum ring_type              ring_type;
>   };
>   
>   struct radeon_info {
> @@ -170,6 +177,7 @@ struct radeon_info {
>       uint32_t                    r600_max_pipes;
>       boolean                     r600_backend_map_valid;
>       boolean                     r600_virtual_address;
> +    boolean                     r600_has_dma;
>   };
>   
>   enum radeon_feature_id {
> @@ -350,7 +358,7 @@ struct radeon_winsys {
>        *
>        * \param ws        The winsys this function is called from.
>        */
> -    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws);
> +    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
>   
>       /**
>        * Destroy a command stream.
> @@ -435,6 +443,13 @@ struct radeon_winsys {
>                                     boolean enable);
>   
>       /**
> +     * Make sure all asynchronous flush of the cs have completed
> +     *
> +     * \param cs        A command stream.
> +     */
> +    void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
> +
> +    /**
>        * Initialize surface
>        *
>        * \param ws        The winsys this function is called from.



More information about the mesa-dev mailing list