[Mesa-dev] [PATCH 1/5] amd/common: pass new enum ac_image_dim to ac_build_image_opcode

Wed Apr 11 11:13:53 UTC 2018

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

This is in preparation for the new, dimension-aware LLVM image
intrinsics.
---
 src/amd/common/ac_llvm_build.c                    | 10 ++++-
 src/amd/common/ac_llvm_build.h                    | 13 +++++-
 src/amd/common/ac_nir_to_llvm.c                   | 54 +++++++++++++++++++----
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 50 ++++++++++++++++++++-
 4 files changed, 114 insertions(+), 13 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 32d8a02f562..2bf38f809bb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1433,36 +1433,42 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 				   struct ac_image_args *a)
 {
 	LLVMValueRef args[11];
 	unsigned num_args = 0;
 	const char *name = NULL;
 	char intr_name[128], type[64];
 
 	bool sample = a->opcode == ac_image_sample ||
 		      a->opcode == ac_image_gather4 ||
 		      a->opcode == ac_image_get_lod;
+	bool da = a->dim == ac_image_cube ||
+		  a->dim == ac_image_1darray ||
+		  a->dim == ac_image_2darray ||
+		  a->dim == ac_image_2darraymsaa;
+	if (a->opcode == ac_image_get_lod)
+		da = false;
 
 	if (sample)
 		args[num_args++] = ac_to_float(ctx, a->addr);
 	else
 		args[num_args++] = a->addr;
 
 	args[num_args++] = a->resource;
 	if (sample)
 		args[num_args++] = a->sampler;
 	args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
 	if (sample)
 		args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
 	args[num_args++] = ctx->i1false; /* glc */
 	args[num_args++] = ctx->i1false; /* slc */
 	args[num_args++] = ctx->i1false; /* lwe */
-	args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
+	args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
 
 	switch (a->opcode) {
 	case ac_image_sample:
 		name = "llvm.amdgcn.image.sample";
 		break;
 	case ac_image_gather4:
 		name = "llvm.amdgcn.image.gather4";
 		break;
 	case ac_image_load:
 		name = "llvm.amdgcn.image.load";
@@ -2450,21 +2456,21 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
  * The sample index should be adjusted as follows:
  *   addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
  */
 void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
 			      LLVMValueRef *addr, bool is_array_tex)
 {
 	struct ac_image_args fmask_load = {};
 	fmask_load.opcode = ac_image_load;
 	fmask_load.resource = fmask;
 	fmask_load.dmask = 0xf;
-	fmask_load.da = is_array_tex;
+	fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
 
 	LLVMValueRef fmask_addr[4];
 	memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3);
 	fmask_addr[3] = LLVMGetUndef(ac->i32);
 
 	fmask_load.addr = ac_build_gather_values(ac, fmask_addr,
 						 is_array_tex ? 4 : 2);
 
 	LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
 	fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 8b35028a314..a51390794a7 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -310,35 +310,46 @@ void ac_build_export_null(struct ac_llvm_context *ctx);
 
 enum ac_image_opcode {
 	ac_image_sample,
 	ac_image_gather4,
 	ac_image_load,
 	ac_image_load_mip,
 	ac_image_get_lod,
 	ac_image_get_resinfo,
 };
 
+enum ac_image_dim {
+	ac_image_1d,
+	ac_image_2d,
+	ac_image_3d,
+	ac_image_cube, // includes cube arrays
+	ac_image_1darray,
+	ac_image_2darray,
+	ac_image_2dmsaa,
+	ac_image_2darraymsaa,
+};
+
 struct ac_image_args {
 	enum ac_image_opcode opcode;
+	enum ac_image_dim dim;
 	bool level_zero;
 	bool bias;
 	bool lod;
 	bool deriv;
 	bool compare;
 	bool offset;
 
 	LLVMValueRef resource;
 	LLVMValueRef sampler;
 	LLVMValueRef addr;
 	unsigned dmask;
 	bool unorm;
-	bool da;
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 				   struct ac_image_args *a);
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
 				    LLVMValueRef args[2]);
 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
 				     LLVMValueRef args[2]);
 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
 				     LLVMValueRef args[2]);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7c2bd5c0cca..ce7afaf96b4 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -69,20 +69,59 @@ build_store_values_extended(struct ac_llvm_context *ac,
 	unsigned i;
 
 	for (i = 0; i < value_count; i++) {
 		LLVMValueRef ptr = values[i * value_stride];
 		LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
 		LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
 		LLVMBuildStore(builder, value, ptr);
 	}
 }
 
+static enum ac_image_dim
+get_ac_sampler_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim dim,
+		   bool is_array)
+{
+	switch (dim) {
+	case GLSL_SAMPLER_DIM_1D:
+		if (ctx->chip_class >= GFX9)
+			return is_array ? ac_image_2darray : ac_image_2d;
+		return is_array ? ac_image_1darray : ac_image_1d;
+	case GLSL_SAMPLER_DIM_2D:
+	case GLSL_SAMPLER_DIM_RECT:
+	case GLSL_SAMPLER_DIM_SUBPASS:
+	case GLSL_SAMPLER_DIM_EXTERNAL:
+		return is_array ? ac_image_2darray : ac_image_2d;
+	case GLSL_SAMPLER_DIM_3D:
+		return ac_image_3d;
+	case GLSL_SAMPLER_DIM_CUBE:
+		return ac_image_cube;
+	case GLSL_SAMPLER_DIM_MS:
+	case GLSL_SAMPLER_DIM_SUBPASS_MS:
+		return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
+	default:
+		unreachable("bad sampler dim");
+	}
+}
+
+static enum ac_image_dim
+get_ac_image_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim sdim,
+		 bool is_array)
+{
+	enum ac_image_dim dim = get_ac_sampler_dim(ctx, sdim, is_array);
+
+	if (dim == ac_image_cube ||
+	    (ctx->chip_class <= VI && dim == ac_image_3d))
+		dim = ac_image_2darray;
+
+	return dim;
+}
+
 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
                                 const nir_ssa_def *def)
 {
 	LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
 	if (def->num_components > 1) {
 		type = LLVMVectorType(type, def->num_components);
 	}
 	return type;
 }
 
@@ -1117,21 +1156,21 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 	LLVMValueRef half_texel[2];
 	LLVMValueRef compare_cube_wa = NULL;
 	LLVMValueRef result;
 	int c;
 	unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
 
 	//TODO Rect
 	{
 		struct ac_image_args txq_args = { 0 };
 
-		txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
+		txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array);
 		txq_args.opcode = ac_image_get_resinfo;
 		txq_args.dmask = 0xf;
 		txq_args.addr = ctx->i32_0;
 		txq_args.resource = args->resource;
 		LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
 
 		for (c = 0; c < 2; c++) {
 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
 								LLVMConstInt(ctx->i32, c, false), "");
 			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
@@ -2048,21 +2087,21 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
 	fmask_load_address[0] = coord_x;
 	fmask_load_address[1] = coord_y;
 	if (coord_z) {
 		fmask_load_address[2] = coord_z;
 		fmask_load_address[3] = LLVMGetUndef(ctx->i32);
 	}
 
 	struct ac_image_args args = {0};
 
 	args.opcode = ac_image_load;
-	args.da = coord_z ? true : false;
+	args.dim = coord_z ? ac_image_2darray : ac_image_2d;
 	args.resource = fmask_desc_ptr;
 	args.dmask = 0xf;
 	args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
 
 	res = ac_build_image_opcode(ctx, &args);
 
 	res = ac_to_integer(ctx, res);
 	LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
 	LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
 
@@ -2390,21 +2429,22 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
 }
 
 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
 					const nir_intrinsic_instr *instr)
 {
 	const nir_variable *var = instr->variables[0]->var;
 	const struct glsl_type *type = glsl_without_array(var->type);
 
 	struct ac_image_args args = { 0 };
-	args.da = glsl_is_array_image(type);
+	args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type),
+				      glsl_sampler_type_is_array(type));
 	args.dmask = 0xf;
 	args.resource = get_sampler_desc(ctx, instr->variables[0],
 					 AC_DESC_IMAGE, NULL, true, false);
 	args.opcode = ac_image_get_resinfo;
 	args.addr = ctx->ac.i32_0;
 
 	return ac_build_image_opcode(&ctx->ac, &args);
 }
 
 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
@@ -2414,21 +2454,22 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 	const nir_variable *var = instr->variables[0]->var;
 	const struct glsl_type *type = glsl_without_array(var->type);
 
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
 		return get_buffer_size(ctx,
 			get_sampler_desc(ctx, instr->variables[0],
 					 AC_DESC_BUFFER, NULL, true, false), true);
 
 	struct ac_image_args args = { 0 };
 
-	args.da = glsl_is_array_image(type);
+	args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
+				    glsl_sampler_type_is_array(type));
 	args.dmask = 0xf;
 	args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
 	args.opcode = ac_image_get_resinfo;
 	args.addr = ctx->ac.i32_0;
 
 	res = ac_build_image_opcode(&ctx->ac, &args);
 
 	LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
 
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
@@ -3161,44 +3202,41 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
 
 static void set_tex_fetch_args(struct ac_llvm_context *ctx,
 			       struct ac_image_args *args,
 			       const nir_tex_instr *instr,
 			       nir_texop op,
 			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
 			       LLVMValueRef *param, unsigned count,
 			       unsigned dmask)
 {
 	unsigned is_rect = 0;
-	bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
 
-	if (op == nir_texop_lod)
-		da = false;
 	/* Pad to power of two vector */
 	while (count < util_next_power_of_two(count))
 		param[count++] = LLVMGetUndef(ctx->i32);
 
 	if (count > 1)
 		args->addr = ac_build_gather_values(ctx, param, count);
 	else
 		args->addr = param[0];
 
 	args->resource = res_ptr;
 	args->sampler = samp_ptr;
 
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
 		args->addr = param[0];
 		return;
 	}
 
 	args->dmask = dmask;
 	args->unorm = is_rect;
-	args->da = da;
+	args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
 }
 
 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
  *
  * SI-CI:
  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
  *   filtering manually. The driver sets img7 to a mask clearing
  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
  *     s_and_b32 samp0, samp0, img7
  *
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 6a307c4ddba..a54db9e8596 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -94,20 +94,58 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
 static bool tgsi_is_array_image(unsigned target)
 {
 	return target == TGSI_TEXTURE_3D ||
 	       target == TGSI_TEXTURE_CUBE ||
 	       target == TGSI_TEXTURE_1D_ARRAY ||
 	       target == TGSI_TEXTURE_2D_ARRAY ||
 	       target == TGSI_TEXTURE_CUBE_ARRAY ||
 	       target == TGSI_TEXTURE_2D_ARRAY_MSAA;
 }
 
+static enum ac_image_dim
+ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
+{
+	switch (target) {
+	case TGSI_TEXTURE_1D:
+	case TGSI_TEXTURE_SHADOW1D:
+		if (screen->info.chip_class >= GFX9)
+			return ac_image_2d;
+		return ac_image_1d;
+	case TGSI_TEXTURE_2D:
+	case TGSI_TEXTURE_SHADOW2D:
+	case TGSI_TEXTURE_RECT:
+	case TGSI_TEXTURE_SHADOWRECT:
+		return ac_image_2d;
+	case TGSI_TEXTURE_3D:
+		return ac_image_3d;
+	case TGSI_TEXTURE_CUBE:
+	case TGSI_TEXTURE_SHADOWCUBE:
+	case TGSI_TEXTURE_CUBE_ARRAY:
+	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+		return ac_image_cube;
+	case TGSI_TEXTURE_1D_ARRAY:
+	case TGSI_TEXTURE_SHADOW1D_ARRAY:
+		if (screen->info.chip_class >= GFX9)
+			return ac_image_2darray;
+		return ac_image_1darray;
+	case TGSI_TEXTURE_2D_ARRAY:
+	case TGSI_TEXTURE_SHADOW2D_ARRAY:
+		return ac_image_2darray;
+	case TGSI_TEXTURE_2D_MSAA:
+		return ac_image_2dmsaa;
+	case TGSI_TEXTURE_2D_ARRAY_MSAA:
+		return ac_image_2darraymsaa;
+	default:
+		unreachable("unhandled texture type");
+	}
+}
+
 /**
  * Given a 256-bit resource descriptor, force the DCC enable bit to off.
  *
  * At least on Tonga, executing image stores on images with DCC enabled and
  * non-trivial can eventually lead to lockups. This can occur when an
  * application binds an image as read-only but then uses a shader that writes
  * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
  * program termination) in this case, but it doesn't cost much to be a bit
  * nicer: disabling DCC in the shader still leads to undefined results but
  * avoids the lockup.
@@ -979,26 +1017,26 @@ static void set_tex_fetch_args(struct si_shader_context *ctx,
 
 	/* Pad to power of two vector */
 	while (count < util_next_power_of_two(count))
 		param[count++] = LLVMGetUndef(ctx->i32);
 
 	if (count > 1)
 		args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
 	else
 		args.addr = param[0];
 
+	args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
 	args.resource = res_ptr;
 	args.sampler = samp_ptr;
 	args.dmask = dmask;
 	args.unorm = target == TGSI_TEXTURE_RECT ||
 		     target == TGSI_TEXTURE_SHADOWRECT;
-	args.da = tgsi_is_array_sampler(target);
 
 	/* Ugly, but we seem to have no other choice right now. */
 	STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args));
 	memcpy(emit_data->args, &args, sizeof(args));
 }
 
 static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
 				unsigned target, LLVMValueRef out)
 {
 	LLVMBuilderRef builder = ctx->ac.builder;
@@ -1918,21 +1956,29 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
 
 		ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false);
 	}
 
 	addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr));
 
 	args.opcode = ac_image_load;
 	args.resource = image;
 	args.addr = addr_vec;
 	args.dmask = 0xf;
-	args.da = ctx->shader->key.mono.u.ps.fbfetch_layered;
+	if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+			ac_image_2darraymsaa : ac_image_2dmsaa;
+	else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+			ac_image_1darray : ac_image_1d;
+	else
+		args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+			ac_image_2darray : ac_image_2d;
 
 	emit_data->output[emit_data->chan] =
 		ac_build_image_opcode(&ctx->ac, &args);
 }
 
 static const struct lp_build_tgsi_action tex_action = {
 	.fetch_args = tex_fetch_args,
 	.emit = build_tex_intrinsic,
 };
 
-- 
2.14.1