[Mesa-dev] [PATCH 3/4] radeonsi: create load_const_buffer_desc_fast_path() helper
Marek Olšák
maraeo at gmail.com
Fri Mar 30 02:29:44 UTC 2018
The driver should ignore the nir option on SI with LLVM <= 0x0500.
Marek
On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarceri at itsqueeze.com>
wrote:
> This will be shared by the TGSI and NIR backends. For simplicity
> we leave the SI LLVM 5.0 and lower work around only in the TGSI
> backend.
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 88
> ++++++++++++++++++--------------
> 1 file changed, 49 insertions(+), 39 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index d5607a99d32..62cb7ea7eb5 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2322,6 +2322,49 @@ void si_tgsi_declare_compute_memory(struct
> si_shader_context *ctx,
> si_declare_compute_memory(ctx);
> }
>
> +static LLVMValueRef load_const_buffer_desc_fast_path(struct
> si_shader_context *ctx)
> +{
> + LLVMValueRef ptr =
> + LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_
> buffers);
> + struct si_shader_selector *sel = ctx->shader->selector;
> +
> + /* Do the bounds checking with a descriptor, because
> + * doing computation and manual bounds checking of 64-bit
> + * addresses generates horrible VALU code with very high
> + * VGPR usage and very low SIMD occupancy.
> + */
> + ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
> +
> + LLVMValueRef desc0, desc1;
> + if (HAVE_32BIT_POINTERS) {
> + desc0 = ptr;
> + desc1 = LLVMConstInt(ctx->i32,
> + S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi),
> 0);
> + } else {
> + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32,
> "");
> + desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr,
> ctx->i32_0, "");
> + desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr,
> ctx->i32_1, "");
> + /* Mask out all bits except BASE_ADDRESS_HI. */
> + desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
> + LLVMConstInt(ctx->i32,
> ~C_008F04_BASE_ADDRESS_HI, 0), "");
> + }
> +
> + LLVMValueRef desc_elems[] = {
> + desc0,
> + desc1,
> + LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) *
> 16, 0),
> + LLVMConstInt(ctx->i32,
> + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT)
> |
> + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32),
> 0)
> + };
> +
> + return ac_build_gather_values(&ctx->ac, desc_elems, 4);
> +}
> +
> static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx,
> int i)
> {
> LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
> @@ -2400,8 +2443,6 @@ static LLVMValueRef fetch_constant(
> /* Fast path when user data SGPRs point to constant buffer 0
> directly. */
> if (sel->info.const_buffers_declared == 1 &&
> sel->info.shader_buffers_declared == 0) {
> - LLVMValueRef ptr =
> - LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
>
> /* This enables use of s_load_dword and flat_load_dword
> for const buffer 0
> * loads, and up to x4 load opcode merging. However, it
> leads to horrible
> @@ -2416,48 +2457,17 @@ static LLVMValueRef fetch_constant(
> * s_buffer_load_dword (that we have to prevent) is when
> we use use
> * a literal offset where we don't need bounds checking.
> */
> - if (ctx->screen->info.chip_class == SI &&
> - HAVE_LLVM < 0x0600 &&
> - !reg->Register.Indirect) {
> + if (ctx->screen->info.chip_class == SI && HAVE_LLVM <
> 0x0600 &&
> + !reg->Register.Indirect) {
> + LLVMValueRef ptr =
> + LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
> +
> addr = LLVMBuildLShr(ctx->ac.builder, addr,
> LLVMConstInt(ctx->i32, 2, 0), "");
> LLVMValueRef result =
> ac_build_load_invariant(&ctx->ac, ptr, addr);
> return bitcast(bld_base, type, result);
> }
>
> - /* Do the bounds checking with a descriptor, because
> - * doing computation and manual bounds checking of 64-bit
> - * addresses generates horrible VALU code with very high
> - * VGPR usage and very low SIMD occupancy.
> - */
> - ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr,
> ctx->ac.intptr, "");
> -
> - LLVMValueRef desc0, desc1;
> - if (HAVE_32BIT_POINTERS) {
> - desc0 = ptr;
> - desc1 = LLVMConstInt(ctx->i32,
> - S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi),
> 0);
> - } else {
> - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
> ctx->v2i32, "");
> - desc0 = LLVMBuildExtractElement(ctx->ac.builder,
> ptr, ctx->i32_0, "");
> - desc1 = LLVMBuildExtractElement(ctx->ac.builder,
> ptr, ctx->i32_1, "");
> - /* Mask out all bits except BASE_ADDRESS_HI. */
> - desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
> - LLVMConstInt(ctx->i32,
> ~C_008F04_BASE_ADDRESS_HI, 0), "");
> - }
> -
> - LLVMValueRef desc_elems[] = {
> - desc0,
> - desc1,
> - LLVMConstInt(ctx->i32,
> (sel->info.const_file_max[0] + 1) * 16, 0),
> - LLVMConstInt(ctx->i32,
> - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT)
> |
> - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32),
> 0)
> - };
> - LLVMValueRef desc = ac_build_gather_values(&ctx->ac,
> desc_elems, 4);
> + LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
> LLVMValueRef result = buffer_load_const(ctx, desc, addr);
> return bitcast(bld_base, type, result);
> }
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180329/a7f7c6be/attachment.html>
More information about the mesa-dev
mailing list