[Mesa-dev] [PATCH 13/13] radeonsi: Use CE for all descriptors.

Nicolai Hähnle nhaehnle at gmail.com
Thu Apr 14 18:08:07 UTC 2016


On 13.04.2016 20:35, Bas Nieuwenhuizen wrote:
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>   src/gallium/drivers/radeonsi/si_descriptors.c | 46 +++++++++++++++++++++------
>   1 file changed, 36 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 5e26760..5ddb168 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -60,6 +60,7 @@
>   #include "si_shader.h"
>   #include "sid.h"
>
> +#include "util/u_math.h"
>   #include "util/u_memory.h"
>   #include "util/u_suballoc.h"
>   #include "util/u_upload_mgr.h"
> @@ -104,7 +105,10 @@ static void si_init_descriptors(struct si_descriptors *desc,
>   {
>   	int i;
>
> -	assert(num_elements <= sizeof(desc->enabled_mask)*8);
> +	/* Ensure that desc->enabled_mask covers all descriptors. The + 1 is
> +	 * to ensure that u_bit_scan_consecutive_range64 never shifts the 1
> +	 * out of the variable while creating the clear mask. */
> +	assert(num_elements + 1 <= sizeof(desc->enabled_mask) * CHAR_BIT);

I see you considered the case that I complained about in patch #12. 
Since others may be tempted to used that function, I still think you 
should fix patch #12, and then you can leave the assert as is.

Thanks,
Nicolai

>   	desc->list = CALLOC(num_elements, element_dw_size * 4);
>   	desc->element_dw_size = element_dw_size;
> @@ -157,24 +161,46 @@ static bool si_upload_descriptors(struct si_context *sctx,
>   				  struct si_descriptors *desc)
>   {
>   	unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
> -	void *ptr;
>
>   	if (!desc->list_dirty)
>   		return true;
>
> -	u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
> -		       &desc->buffer_offset,
> -		       (struct pipe_resource**)&desc->buffer, &ptr);
> -	if (!desc->buffer)
> -		return false; /* skip the draw call */
> +	if (sctx->ce_ib) {
> +		uint32_t const* list = (uint32_t const*)desc->list;
>
> -	util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
> +		while(desc->dirty_mask) {
> +			int begin, count;
> +			u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin,
> +						       &count);
>
> -	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
> -			      RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
> +			begin *= desc->element_dw_size;
> +			count *= desc->element_dw_size;
> +
> +			radeon_emit(sctx->ce_ib,
> +				    PKT3(PKT3_WRITE_CONST_RAM, count, 0));
> +			radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
> +			radeon_emit_array(sctx->ce_ib, list + begin, count);
> +		}
> +
> +		if(!si_ce_upload(sctx, desc->ce_offset, list_size,
> +				 &desc->buffer_offset, &desc->buffer))
> +			return false;
> +	} else {
> +		void *ptr;
> +
> +		u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
> +			&desc->buffer_offset,
> +			(struct pipe_resource**)&desc->buffer, &ptr);
> +		if (!desc->buffer)
> +			return false; /* skip the draw call */
>
> +		util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
> +	}
> +	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
> +		RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
>   	desc->list_dirty = false;
>   	desc->pointer_dirty = true;
> +	desc->dirty_mask = 0;
>   	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
>   	return true;
>   }
>


More information about the mesa-dev mailing list