[Mesa-dev] [PATCH 03/20] radeonsi: add shared memory

Nicolai Hähnle nhaehnle at gmail.com
Tue Apr 5 23:33:27 UTC 2016


On 02.04.2016 08:10, Bas Nieuwenhuizen wrote:
> Declares the shared memory as a global variable so that
> LLVM is aware of it and it does not conflict with passes
> like AMDGPUPromoteAlloca.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>   src/gallium/drivers/radeon/radeon_llvm.h           |  3 ++
>   .../drivers/radeon/radeon_setup_tgsi_llvm.c        |  4 +++
>   src/gallium/drivers/radeonsi/si_shader.c           | 35 ++++++++++++++++++++++
>   src/gallium/drivers/radeonsi/si_shader.h           |  3 ++
>   4 files changed, 45 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> index 0a164bb..3e11b36 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -68,6 +68,9 @@ struct radeon_llvm_context {
>   			unsigned index,
>   			const struct tgsi_full_declaration *decl);
>
> +	void (*declare_memory_region)(struct radeon_llvm_context *,
> +			const struct tgsi_full_declaration *decl);
> +
>   	/** This array contains the input values for the shader.  Typically these
>   	  * values will be in the form of a target intrinsic that will inform the
>   	  * backend how to load the actual inputs to the shader.
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index fb883cb..5a3b586 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -366,6 +366,10 @@ static void emit_declaration(
>   		break;
>   	}
>
> +	case TGSI_FILE_MEMORY:
> +		if (ctx->declare_memory_region)
> +			ctx->declare_memory_region(ctx, decl);
> +

I think you should drop the if-statement. TGSI_FILE_MEMORY should only 
occur when it makes sense and the function exists.

Also, add a break;.

>   	default:
>   		break;
>   	}
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 2c44b14..2ce37ca 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -121,6 +121,9 @@ struct si_shader_context
>   	LLVMTypeRef v4i32;
>   	LLVMTypeRef v4f32;
>   	LLVMTypeRef v8i32;
> +
> +	unsigned memory_region_count;
> +	LLVMValueRef *memory_regions;
>   };
>
>   static struct si_shader_context *si_shader_context(
> @@ -1320,6 +1323,37 @@ static void declare_system_value(
>   	radeon_bld->system_values[index] = value;
>   }
>
> +static void declare_compute_memory(struct radeon_llvm_context *radeon_bld,
> +                                   const struct tgsi_full_declaration *decl)
> +{
> +	struct si_shader_context *ctx =
> +		si_shader_context(&radeon_bld->soa.bld_base);
> +	struct si_shader_selector *sel = ctx->shader->selector;
> +	struct gallivm_state *gallivm = &radeon_bld->gallivm;
> +
> +	LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
> +	LLVMTypeRef i8p = LLVMPointerType(i8, LOCAL_ADDR_SPACE);
> +	LLVMValueRef var;
> +
> +	assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
> +	assert(decl->Range.First == decl->Range.Last);
> +
> +	if (ctx->memory_region_count <= decl->Range.Last) {
> +		ctx->memory_regions = realloc(ctx->memory_regions,
> +		                (decl->Range.Last + 1) * sizeof(LLVMValueRef));
> +
> +		ctx->memory_region_count = decl->Range.Last + 1;
> +	}
> +	var = LLVMAddGlobalInAddressSpace(gallivm->module,
> +	                                  LLVMArrayType(i8, sel->local_size),
> +	                                  "compute_lds",
> +	                                  LOCAL_ADDR_SPACE);
> +	LLVMSetAlignment(var, 4);
> +
> +	ctx->memory_regions[decl->Range.First] =
> +	               LLVMBuildBitCast(gallivm->builder, var, i8p, "");
> +}

The entire logic here seems a bit off, e.g. realloc doesn't 
zero-initialize memory etc.

Do we ever get TGSI shaders with more than one memory declaration?

I guess not, otherwise local_size doesn't really make sense. In that 
case, just forget about the whole realloc business, have a single 
LLVMValueRef in si_shader_context and add asserts for Range.First/Last.

Cheers,
Nicolai

> +
>   static LLVMValueRef fetch_constant(
>   	struct lp_build_tgsi_context *bld_base,
>   	const struct tgsi_full_src_register *reg,
> @@ -5778,6 +5812,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
>   			bld_base->emit_epilogue = si_llvm_return_fs_outputs;
>   		break;
>   	case TGSI_PROCESSOR_COMPUTE:
> +		ctx.radeon_bld.declare_memory_region = declare_compute_memory;
>   		break;
>   	default:
>   		assert(!"Unsupported shader type");
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 5043d43..70ae46f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -222,6 +222,9 @@ struct si_shader_selector {
>   	 */
>   	unsigned	colors_written_4bit;
>
> +	/* CS parameters */
> +	unsigned local_size;
> +
>   	/* masks of "get_unique_index" bits */
>   	uint64_t	outputs_written;
>   	uint32_t	patch_outputs_written;
>


More information about the mesa-dev mailing list