[Mesa-dev] [PATCH 1/3] gallium: add pipe_grid_info::last_block
Liu, Leo
Leo.Liu at amd.com
Fri Mar 1 21:51:49 UTC 2019
Series are:
Acked-by: Leo Liu <leo.liu at amd.com>
On 2/27/19 5:19 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The OpenMAX state tracker will use this.
>
> RadeonSI is adapted to use pipe_grid_info::last_block instead of its
> internal state.
> ---
> src/gallium/auxiliary/util/u_screen.c | 3 +++
> src/gallium/docs/source/screen.rst | 2 ++
> src/gallium/drivers/radeonsi/si_compute.c | 2 +-
> .../drivers/radeonsi/si_compute_blit.c | 18 +++++----------
> src/gallium/drivers/radeonsi/si_get.c | 1 +
> src/gallium/drivers/radeonsi/si_pipe.h | 22 -------------------
> src/gallium/include/pipe/p_defines.h | 1 +
> src/gallium/include/pipe/p_state.h | 21 ++++++++++++++++++
> 8 files changed, 35 insertions(+), 35 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c
> index 50964f3b3ef..b902c083ad4 100644
> --- a/src/gallium/auxiliary/util/u_screen.c
> +++ b/src/gallium/auxiliary/util/u_screen.c
> @@ -334,14 +334,17 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
> return 2047;
>
> case PIPE_CAP_SURFACE_SAMPLE_COUNT:
> return 0;
> case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
> return 1;
>
> case PIPE_CAP_MAX_VARYINGS:
> return 8;
>
> + case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
> + return 0;
> +
> default:
> unreachable("bad PIPE_CAP_*");
> }
> }
> diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
> index 85ca5e1f5ce..60ba9bcbde0 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -485,20 +485,22 @@ The integer capabilities:
> * ``PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND``: True if the driver needs blend state to use zero/one instead of destination alpha for RGB/XRGB formats.
> * ``PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS``: True if the driver wants TESSINNER and TESSOUTER to be inputs (rather than system values) for tessellation evaluation shaders.
> * ``PIPE_CAP_DEST_SURFACE_SRGB_CONTROL``: Indicates whether the drivers
> supports switching the format between sRGB and linear for a surface that is
> used as destination in draw and blit calls.
> * ``PIPE_CAP_NIR_COMPACT_ARRAYS``: True if the compiler backend supports NIR's compact array feature, for all shader stages.
> * ``PIPE_CAP_MAX_VARYINGS``: The maximum number of fragment shader
> varyings. This will generally correspond to
> ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some
> cases may be a smaller number.
> +* ``PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK``: Whether pipe_grid_info::last_block
> + is implemented by the driver. See struct pipe_grid_info for more details.
>
> .. _pipe_capf:
>
> PIPE_CAPF_*
> ^^^^^^^^^^^^^^^^
>
> The floating-point capabilities are:
>
> * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
> * ``PIPE_CAPF_MAX_LINE_WIDTH_AA``: The maximum width of a smoothed line.
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 87addd53976..6c2269d903a 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -797,21 +797,21 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
> radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
> compute_resource_limits);
>
> unsigned dispatch_initiator =
> S_00B800_COMPUTE_SHADER_EN(1) |
> S_00B800_FORCE_START_AT_000(1) |
> /* If the KMD allows it (there is a KMD hw register for it),
> * allow launching waves out-of-order. (same as Vulkan) */
> S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
>
> - uint *last_block = sctx->compute_last_block;
> + uint *last_block = info->last_block;
> bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
>
> radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
>
> if (partial_block_en) {
> unsigned partial[3];
>
> /* If no partial_block, these should be an entire block size, not 0. */
> partial[0] = last_block[0] ? last_block[0] : info->block[0];
> partial[1] = last_block[1] ? last_block[1] : info->block[1];
> diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
> index f5e9c02dd10..a7453099ac6 100644
> --- a/src/gallium/drivers/radeonsi/si_compute_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
> @@ -374,45 +374,42 @@ void si_compute_copy_image(struct si_context *sctx,
> ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image);
>
> struct pipe_grid_info info = {0};
>
> if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) {
> if (!sctx->cs_copy_image_1d_array)
> sctx->cs_copy_image_1d_array =
> si_create_copy_image_compute_shader_1d_array(ctx);
> ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
> info.block[0] = 64;
> - sctx->compute_last_block[0] = width % 64;
> + info.last_block[0] = width % 64;
> info.block[1] = 1;
> info.block[2] = 1;
> info.grid[0] = DIV_ROUND_UP(width, 64);
> info.grid[1] = depth;
> info.grid[2] = 1;
> } else {
> if (!sctx->cs_copy_image)
> sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
> ctx->bind_compute_state(ctx, sctx->cs_copy_image);
> info.block[0] = 8;
> - sctx->compute_last_block[0] = width % 8;
> + info.last_block[0] = width % 8;
> info.block[1] = 8;
> - sctx->compute_last_block[1] = height % 8;
> + info.last_block[1] = height % 8;
> info.block[2] = 1;
> info.grid[0] = DIV_ROUND_UP(width, 8);
> info.grid[1] = DIV_ROUND_UP(height, 8);
> info.grid[2] = depth;
> }
>
> ctx->launch_grid(ctx, &info);
>
> - sctx->compute_last_block[0] = 0;
> - sctx->compute_last_block[1] = 0;
> -
> sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
> (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
> si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
> ctx->bind_compute_state(ctx, saved_cs);
> ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
> ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
> si_compute_internal_end(sctx);
> }
>
> void si_init_compute_blit_functions(struct si_context *sctx)
> @@ -476,44 +473,41 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
>
> ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image);
>
> struct pipe_grid_info info = {0};
>
> if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) {
> if (!sctx->cs_clear_render_target)
> sctx->cs_clear_render_target = si_clear_render_target_shader(ctx);
> ctx->bind_compute_state(ctx, sctx->cs_clear_render_target);
> info.block[0] = 8;
> - sctx->compute_last_block[0] = width % 8;
> + info.last_block[0] = width % 8;
> info.block[1] = 8;
> - sctx->compute_last_block[1] = height % 8;
> + info.last_block[1] = height % 8;
> info.block[2] = 1;
> info.grid[0] = DIV_ROUND_UP(width, 8);
> info.grid[1] = DIV_ROUND_UP(height, 8);
> info.grid[2] = num_layers;
> } else {
> if (!sctx->cs_clear_render_target_1d_array)
> sctx->cs_clear_render_target_1d_array =
> si_clear_render_target_shader_1d_array(ctx);
> ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array);
> info.block[0] = 64;
> - sctx->compute_last_block[0] = width % 64;
> + info.last_block[0] = width % 64;
> info.block[1] = 1;
> info.block[2] = 1;
> info.grid[0] = DIV_ROUND_UP(width, 64);
> info.grid[1] = num_layers;
> info.grid[2] = 1;
> }
>
> ctx->launch_grid(ctx, &info);
>
> - sctx->compute_last_block[0] = 0;
> - sctx->compute_last_block[1] = 0;
> -
> sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
> (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
> si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
> ctx->bind_compute_state(ctx, saved_cs);
> ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);
> ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
> si_compute_internal_end(sctx);
> }
> diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
> index a5cb209b59e..6fa67087c7d 100644
> --- a/src/gallium/drivers/radeonsi/si_get.c
> +++ b/src/gallium/drivers/radeonsi/si_get.c
> @@ -153,20 +153,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
> case PIPE_CAP_INT64:
> case PIPE_CAP_INT64_DIVMOD:
> case PIPE_CAP_TGSI_CLOCK:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
> case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
> case PIPE_CAP_TGSI_BALLOT:
> case PIPE_CAP_TGSI_VOTE:
> case PIPE_CAP_TGSI_FS_FBFETCH:
> + case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
> return 1;
>
> case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
> return !SI_BIG_ENDIAN && sscreen->info.has_userptr;
>
> case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
> return sscreen->info.has_gpu_reset_status_query ||
> sscreen->info.has_gpu_reset_counter_query;
>
> case PIPE_CAP_TEXTURE_MULTISAMPLE:
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index b3198d45ea6..b6858b46ec0 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -914,42 +914,20 @@ struct si_context {
> struct pipe_resource *gsvs_ring;
> struct pipe_resource *tess_rings;
> union pipe_color_union *border_color_table; /* in CPU memory, any endian */
> struct si_resource *border_color_buffer;
> union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
> unsigned border_color_count;
> unsigned num_vs_blit_sgprs;
> uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
> uint32_t cs_user_data[4];
>
> - /**
> - * last_block allows disabling threads at the farthermost grid boundary.
> - * Full blocks as specified by "block" are launched, but the threads
> - * outside of "last_block" dimensions are disabled.
> - *
> - * If a block touches the grid boundary in the i-th axis, threads with
> - * THREAD_ID[i] >= last_block[i] are disabled.
> - *
> - * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
> - * meaning no effect.
> - *
> - * It's equivalent to doing this at the beginning of the compute shader:
> - *
> - * for (i = 0; i < 3; i++) {
> - * if (block_id[i] == grid[i] - 1 &&
> - * last_block[i] && last_block[i] >= thread_id[i])
> - * return;
> - * }
> - * (this could be moved into pipe_grid_info)
> - */
> - uint compute_last_block[3];
> -
> /* Vertex and index buffers. */
> bool vertex_buffers_dirty;
> bool vertex_buffer_pointer_dirty;
> struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];
>
> /* MSAA config state. */
> int ps_iter_samples;
> bool ps_uses_fbfetch;
> bool smoothing_enabled;
>
> diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
> index e2b0104ce43..d4732dc257f 100644
> --- a/src/gallium/include/pipe/p_defines.h
> +++ b/src/gallium/include/pipe/p_defines.h
> @@ -851,20 +851,21 @@ enum pipe_cap
> PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS,
> PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET,
> PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET,
> PIPE_CAP_SURFACE_SAMPLE_COUNT,
> PIPE_CAP_TGSI_ATOMFADD,
> PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE,
> PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND,
> PIPE_CAP_DEST_SURFACE_SRGB_CONTROL,
> PIPE_CAP_NIR_COMPACT_ARRAYS,
> PIPE_CAP_MAX_VARYINGS,
> + PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK,
> };
>
> /**
> * Possible bits for PIPE_CAP_CONTEXT_PRIORITY_MASK param, which should
> * return a bitmask of the supported priorities. If the driver does not
> * support prioritized contexts, it can return 0.
> *
> * Note that these match __DRI2_RENDER_HAS_CONTEXT_PRIORITY_*
> */
> #define PIPE_CONTEXT_PRIORITY_LOW (1 << 0)
> diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
> index 38052e5fd3d..3a91ddd71b5 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -831,20 +831,41 @@ struct pipe_grid_info
> * clEnqueueNDRangeKernel. Note block[] and grid[] must be padded with
> * 1 for non-used dimensions.
> */
> uint work_dim;
>
> /**
> * Determine the layout of the working block (in thread units) to be used.
> */
> uint block[3];
>
> + /**
> + * last_block allows disabling threads at the farthermost grid boundary.
> + * Full blocks as specified by "block" are launched, but the threads
> + * outside of "last_block" dimensions are disabled.
> + *
> + * If a block touches the grid boundary in the i-th axis, threads with
> + * THREAD_ID[i] >= last_block[i] are disabled.
> + *
> + * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
> + * meaning no effect.
> + *
> + * It's equivalent to doing this at the beginning of the compute shader:
> + *
> + * for (i = 0; i < 3; i++) {
> + * if (block_id[i] == grid[i] - 1 &&
> + * last_block[i] && thread_id[i] >= last_block[i])
> + * return;
> + * }
> + */
> + uint last_block[3];
> +
> /**
> * Determine the layout of the grid (in block units) to be used.
> */
> uint grid[3];
>
> /* Indirect compute parameters resource: If not NULL, block sizes are taken
> * from this buffer instead, which is laid out as follows:
> *
> * struct {
> * uint32_t num_blocks_x;
More information about the mesa-dev
mailing list