[Mesa-dev] [PATCH 04/10] gallium/radeon: don't call u_format helpers if we have that info already

Nicolai Hähnle nhaehnle at gmail.com
Mon Oct 31 21:04:12 UTC 2016


On 29.10.2016 13:17, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/gallium/drivers/radeon/r600_texture.c | 13 +++++--------
>  src/gallium/drivers/radeonsi/si_blit.c    |  5 +++--
>  2 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 46281cb..364ed40 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -46,22 +46,21 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
>  			       struct r600_texture *rdst,
>  			       unsigned dst_level, unsigned dstx,
>  			       unsigned dsty, unsigned dstz,
>  			       struct r600_texture *rsrc,
>  			       unsigned src_level,
>  			       const struct pipe_box *src_box)
>  {
>  	if (!rctx->dma.cs)
>  		return false;
>
> -	if (util_format_get_blocksizebits(rdst->resource.b.b.format) !=
> -	    util_format_get_blocksizebits(rsrc->resource.b.b.format))
> +	if (rdst->surface.bpe != rsrc->surface.bpe)
>  		return false;
>
>  	/* MSAA: Blits don't exist in the real world. */
>  	if (rsrc->resource.b.b.nr_samples > 1 ||
>  	    rdst->resource.b.b.nr_samples > 1)
>  		return false;
>
>  	/* Depth-stencil surfaces:
>  	 *   When dst is linear, the DB->CB copy preserves HTILE.
>  	 *   When dst is tiled, the 3D path must be used to update HTILE.
> @@ -174,26 +173,24 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
>  	}
>
>  	rctx->dma_copy(ctx, dst, transfer->level,
>  		       transfer->box.x, transfer->box.y, transfer->box.z,
>  		       src, 0, &sbox);
>  }
>
>  static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
>  					const struct pipe_box *box)
>  {
> -	enum pipe_format format = rtex->resource.b.b.format;
> -
>  	return rtex->surface.level[level].offset +
>  	       box->z * rtex->surface.level[level].slice_size +
> -	       box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
> -	       box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
> +	       box->y / rtex->surface.blk_h * rtex->surface.level[level].pitch_bytes +
> +	       box->x / rtex->surface.blk_w * rtex->surface.bpe;
>  }
>
>  static int r600_init_surface(struct r600_common_screen *rscreen,
>  			     struct radeon_surf *surface,
>  			     const struct pipe_resource *ptex,
>  			     enum radeon_surf_mode array_mode,
>  			     unsigned pitch_in_bytes_override,
>  			     unsigned offset,
>  			     bool is_imported,
>  			     bool is_scanout,
> @@ -2188,21 +2185,21 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
>  /* FAST COLOR CLEAR */
>
>  static void evergreen_set_clear_color(struct r600_texture *rtex,
>  				      enum pipe_format surface_format,
>  				      const union pipe_color_union *color)
>  {
>  	union util_color uc;
>
>  	memset(&uc, 0, sizeof(uc));
>
> -	if (util_format_get_blocksizebits(surface_format) == 128) {
> +	if (rtex->surface.bpe == 16) {
>  		/* DCC fast clear only:
>  		 *   CLEAR_WORD0 = R = G = B
>  		 *   CLEAR_WORD1 = A
>  		 */
>  		assert(color->ui[0] == color->ui[1] &&
>  		       color->ui[0] == color->ui[2]);
>  		uc.ui[0] = color->ui[0];
>  		uc.ui[1] = color->ui[3];
>  	} else if (util_format_is_pure_uint(surface_format)) {
>  		util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
> @@ -2495,21 +2492,21 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>  							  &clear_words_needed))
>  				continue;
>
>  			vi_dcc_clear_level(rctx, tex, 0, reset_value);
>
>  			if (clear_words_needed)
>  				tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>  			tex->separate_dcc_dirty = true;
>  		} else {
>  			/* 128-bit formats are unusupported */
> -			if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) {
> +			if (tex->surface.bpe > 8) {
>  				continue;
>  			}
>
>  			/* Stoney/RB+ doesn't work with CMASK fast clear. */
>  			if (rctx->family == CHIP_STONEY)
>  				continue;
>
>  			/* ensure CMASK is enabled */
>  			r600_texture_alloc_cmask_separate(rctx->screen, tex);
>  			if (tex->cmask.size == 0) {
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 0f46d71..fe17f73 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -831,20 +831,21 @@ struct texture_orig_info {
>
>  void si_resource_copy_region(struct pipe_context *ctx,
>  			     struct pipe_resource *dst,
>  			     unsigned dst_level,
>  			     unsigned dstx, unsigned dsty, unsigned dstz,
>  			     struct pipe_resource *src,
>  			     unsigned src_level,
>  			     const struct pipe_box *src_box)
>  {
>  	struct si_context *sctx = (struct si_context *)ctx;
> +	struct r600_texture *rsrc = (struct r600_texture*)src;

I think this is technically undefined behavior since the check against 
PIPE_BUFFER only comes later.

Apart from, patches 1-4:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

>  	struct pipe_surface *dst_view, dst_templ;
>  	struct pipe_sampler_view src_templ, *src_view;
>  	unsigned dst_width, dst_height, src_width0, src_height0;
>  	unsigned src_force_level = 0;
>  	struct pipe_box sbox, dstbox;
>
>  	/* Handle buffers first. */
>  	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
>  		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
>  		return;
> @@ -860,21 +861,21 @@ void si_resource_copy_region(struct pipe_context *ctx,
>  	dst_width = u_minify(dst->width0, dst_level);
>  	dst_height = u_minify(dst->height0, dst_level);
>  	src_width0 = src->width0;
>  	src_height0 = src->height0;
>
>  	util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
>  	util_blitter_default_src_texture(&src_templ, src, src_level);
>
>  	if (util_format_is_compressed(src->format) ||
>  	    util_format_is_compressed(dst->format)) {
> -		unsigned blocksize = util_format_get_blocksize(src->format);
> +		unsigned blocksize = rsrc->surface.bpe;
>
>  		if (blocksize == 8)
>  			src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
>  		else
>  			src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
>  		dst_templ.format = src_templ.format;
>
>  		dst_width = util_format_get_nblocksx(dst->format, dst_width);
>  		dst_height = util_format_get_nblocksy(dst->format, dst_height);
>  		src_width0 = util_format_get_nblocksx(src->format, src_width0);
> @@ -902,21 +903,21 @@ void si_resource_copy_region(struct pipe_context *ctx,
>  			dst_width = util_format_get_nblocksx(dst->format, dst_width);
>  			src_width0 = util_format_get_nblocksx(src->format, src_width0);
>
>  			dstx = util_format_get_nblocksx(dst->format, dstx);
>
>  			sbox = *src_box;
>  			sbox.x = util_format_get_nblocksx(src->format, src_box->x);
>  			sbox.width = util_format_get_nblocksx(src->format, src_box->width);
>  			src_box = &sbox;
>  		} else {
> -			unsigned blocksize = util_format_get_blocksize(src->format);
> +			unsigned blocksize = rsrc->surface.bpe;
>
>  			switch (blocksize) {
>  			case 1:
>  				dst_templ.format = PIPE_FORMAT_R8_UNORM;
>  				src_templ.format = PIPE_FORMAT_R8_UNORM;
>  				break;
>  			case 2:
>  				dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
>  				src_templ.format = PIPE_FORMAT_R8G8_UNORM;
>  				break;
>


More information about the mesa-dev mailing list