[Mesa-dev] [PATCH 2/2] radeonsi: move instance divisors into a constant buffer

Marek Olšák maraeo at gmail.com
Tue Jun 27 00:14:54 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

Shader key size: 107 -> 47

Divisors of 0 and 1 are encoded in the shader key. Greater instance divisors
are loaded from a constant buffer.

The shader code doing the division is huge. Is it something we need to
worry about? Does any app use instance divisors >= 2?

VS prolog disassembly:
    s_load_dwordx4 s[12:15], s[0:1], 0x80  ; C00A0300 00000080
    s_nop 0                                ; BF800000
    s_waitcnt lgkmcnt(0)                   ; BF8C007F
    s_buffer_load_dword s14, s[12:15], 0x4 ; C0220386 00000004
    s_waitcnt lgkmcnt(0)                   ; BF8C007F
    v_cvt_f32_u32_e32 v4, s14              ; 7E080C0E
    v_rcp_iflag_f32_e32 v4, v4             ; 7E084704
    v_mul_f32_e32 v4, 0x4f800000, v4       ; 0A0808FF 4F800000
    v_cvt_u32_f32_e32 v4, v4               ; 7E080F04
    v_mul_hi_u32 v5, v4, s14               ; D2860005 00001D04
    v_mul_lo_i32 v6, v4, s14               ; D2850006 00001D04
    v_cmp_eq_u32_e64 s[12:13], 0, v5       ; D0CA000C 00020A80
    v_sub_i32_e32 v5, vcc, 0, v6           ; 340A0C80
    v_cndmask_b32_e64 v5, v6, v5, s[12:13] ; D1000005 00320B06
    v_mul_hi_u32 v5, v5, v4                ; D2860005 00020905
    v_add_i32_e32 v6, vcc, v5, v4          ; 320C0905
    v_subrev_i32_e32 v4, vcc, v5, v4       ; 36080905
    v_cndmask_b32_e64 v4, v4, v6, s[12:13] ; D1000004 00320D04
    v_mul_hi_u32 v5, v4, v1                ; D2860005 00020304
    v_add_i32_e32 v4, vcc, s8, v0          ; 32080008
    v_mul_lo_i32 v6, v5, s14               ; D2850006 00001D05
    v_add_i32_e32 v7, vcc, 1, v5           ; 320E0A81
    v_cmp_ge_u32_e64 s[12:13], v1, v6      ; D0CE000C 00020D01
    v_sub_i32_e32 v6, vcc, v1, v6          ; 340C0D01
    v_cmp_le_u32_e32 vcc, s14, v6          ; 7D960C0E
    v_cndmask_b32_e64 v8, 0, -1, s[12:13]  ; D1000008 00318280
    v_cndmask_b32_e64 v6, 0, -1, vcc       ; D1000006 01A98280
    v_and_b32_e32 v6, v8, v6               ; 260C0D08
    v_cmp_eq_u32_e32 vcc, 0, v6            ; 7D940C80
    v_cndmask_b32_e32 v6, v7, v5, vcc      ; 000C0B07
    v_add_i32_e32 v5, vcc, -1, v5          ; 320A0AC1
    v_cmp_eq_u32_e32 vcc, 0, v8            ; 7D941080
    v_cndmask_b32_e32 v5, v6, v5, vcc      ; 000A0B06
    v_add_i32_e32 v5, vcc, s9, v5          ; 320A0A09
---
 src/gallium/drivers/radeonsi/si_descriptors.c   |  2 +
 src/gallium/drivers/radeonsi/si_pipe.c          |  2 +
 src/gallium/drivers/radeonsi/si_shader.c        | 78 +++++++++++++++++--------
 src/gallium/drivers/radeonsi/si_shader.h        |  9 ++-
 src/gallium/drivers/radeonsi/si_state.c         | 15 +++++
 src/gallium/drivers/radeonsi/si_state.h         |  3 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  7 ++-
 7 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 75d2a1d..88f7dce 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2185,20 +2185,22 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx,
 					       R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
 			si_emit_shader_pointer(sctx, descs,
 					       R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
 		} else {
 			si_emit_shader_pointer(sctx, descs,
 					       R_00B230_SPI_SHADER_USER_DATA_GS_0);
 			si_emit_shader_pointer(sctx, descs,
 					       R_00B330_SPI_SHADER_USER_DATA_ES_0);
 			si_emit_shader_pointer(sctx, descs,
 					       R_00B430_SPI_SHADER_USER_DATA_HS_0);
+			si_emit_shader_pointer(sctx, descs,
+					       R_00B530_SPI_SHADER_USER_DATA_LS_0);
 		}
 	}
 
 	mask = sctx->shader_pointers_dirty &
 	       u_bit_consecutive(SI_DESCS_FIRST_SHADER,
 				 SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
 
 	while (mask) {
 		unsigned i = u_bit_scan(&mask);
 		unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4088849..a940bb8 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -301,20 +301,22 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
 				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
 							      &sctx->null_const_buf);
 			}
 		}
 
 		si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
 				 &sctx->null_const_buf);
+		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
+				 &sctx->null_const_buf);
 		si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES,
 				 &sctx->null_const_buf);
 		si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE,
 				 &sctx->null_const_buf);
 		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS,
 				 &sctx->null_const_buf);
 
 		/* Clear the NULL constant buffer, because loads should return zeros. */
 		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
 				     sctx->null_const_buf.buffer->width0, 0,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 42b08bf..55d1232 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -305,31 +305,30 @@ get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
 	LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
 
 	return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
 			    LLVMBuildMul(gallivm->builder, patch_stride,
 					 rel_patch_id, ""),
 			    "");
 }
 
 static LLVMValueRef get_instance_index_for_fetch(
 	struct si_shader_context *ctx,
-	unsigned param_start_instance, unsigned divisor)
+	unsigned param_start_instance, LLVMValueRef divisor)
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 
 	LLVMValueRef result = LLVMGetParam(ctx->main_fn,
 					   ctx->param_instance_id);
 
 	/* The division must be done before START_INSTANCE is added. */
-	if (divisor > 1)
-		result = LLVMBuildUDiv(gallivm->builder, result,
-				LLVMConstInt(ctx->i32, divisor, 0), "");
+	if (divisor != ctx->i32_1)
+		result = LLVMBuildUDiv(gallivm->builder, result, divisor, "");
 
 	return LLVMBuildAdd(gallivm->builder, result,
 			    LLVMGetParam(ctx->main_fn, param_start_instance), "");
 }
 
 /* Bitcast <4 x float> to <2 x double>, extract the component, and convert
  * to float. */
 static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx,
 					    LLVMValueRef vec4,
 					    unsigned double_index)
@@ -5275,26 +5274,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 		FREE(shader);
 		shader = NULL;
 	}
 	return shader;
 }
 
 static void si_dump_shader_key_vs(const struct si_shader_key *key,
 				  const struct si_vs_prolog_bits *prolog,
 				  const char *prefix, FILE *f)
 {
-	fprintf(f, "  %s.instance_divisors = {", prefix);
-	for (int i = 0; i < ARRAY_SIZE(prolog->instance_divisors); i++) {
-		fprintf(f, !i ? "%u" : ", %u",
-			prolog->instance_divisors[i]);
-	}
-	fprintf(f, "}\n");
+	fprintf(f, "  %s.instance_divisor_is_one = %u\n",
+		prefix, prolog->instance_divisor_is_one);
+	fprintf(f, "  %s.instance_divisor_is_fetched = %u\n",
+		prefix, prolog->instance_divisor_is_fetched);
 
 	fprintf(f, "  mono.vs.fix_fetch = {");
 	for (int i = 0; i < SI_MAX_ATTRIBS; i++)
 		fprintf(f, !i ? "%u" : ", %u", key->mono.vs_fix_fetch[i]);
 	fprintf(f, "}\n");
 }
 
 static void si_dump_shader_key(unsigned processor, const struct si_shader *shader,
 			       FILE *f)
 {
@@ -5596,24 +5593,26 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
 	key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
 	key->vs_prolog.as_ls = shader_out->key.as_ls;
 
 	if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
 		key->vs_prolog.as_ls = 1;
 		key->vs_prolog.num_merged_next_stage_vgprs = 2;
 	} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
 		key->vs_prolog.num_merged_next_stage_vgprs = 5;
 	}
 
-	/* Set the instanceID flag. */
-	for (unsigned i = 0; i < info->num_inputs; i++)
-		if (key->vs_prolog.states.instance_divisors[i])
-			shader_out->info.uses_instanceid = true;
+	/* Enable loading the InstanceID VGPR. */
+	uint16_t input_mask = u_bit_consecutive(0, info->num_inputs);
+
+	if ((key->vs_prolog.states.instance_divisor_is_one |
+	     key->vs_prolog.states.instance_divisor_is_fetched) & input_mask)
+		shader_out->info.uses_instanceid = true;
 }
 
 /**
  * Compute the PS prolog key, which contains all the information needed to
  * build the PS prolog function, and set related bits in shader->config.
  */
 static void si_get_ps_prolog_key(struct si_shader *shader,
 				 union si_shader_part_key *key,
 				 bool separate_prolog)
 {
@@ -6520,20 +6519,35 @@ si_get_shader_part(struct si_screen *sscreen,
 
 	result->next = *list;
 	*list = result;
 
 out:
 	si_llvm_dispose(&ctx);
 	mtx_unlock(&sscreen->shader_parts_mutex);
 	return result;
 }
 
+static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
+{
+	struct gallivm_state *gallivm = &ctx->gallivm;
+	LLVMValueRef ptr[2], list;
+
+	/* Get the pointer to rw buffers. */
+	ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
+	ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
+	list = lp_build_gather_values(gallivm, ptr, 2);
+	list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
+	list = LLVMBuildIntToPtr(gallivm->builder, list,
+				 si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
+	return list;
+}
+
 /**
  * Build the vertex shader prolog function.
  *
  * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
  * All inputs are returned unmodified. The vertex load indices are
  * stored after them, which will be used by the API VS for fetching inputs.
  *
  * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
  *   input_v0,
  *   input_v1,
@@ -6602,25 +6616,47 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 		LLVMValueRef p = LLVMGetParam(func, i);
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
 	}
 	for (; i < num_params; i++) {
 		LLVMValueRef p = LLVMGetParam(func, i);
 		p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
 	}
 
 	/* Compute vertex load indices from instance divisors. */
+	LLVMValueRef instance_divisor_constbuf = NULL;
+
+	if (key->vs_prolog.states.instance_divisor_is_fetched) {
+		LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
+		LLVMValueRef buf_index =
+			LLVMConstInt(ctx->i32, SI_VS_CONST_INSTANCE_DIVISORS, 0);
+		instance_divisor_constbuf =
+			ac_build_indexed_load_const(&ctx->ac, list, buf_index);
+	}
+
 	for (i = 0; i <= key->vs_prolog.last_input; i++) {
-		unsigned divisor = key->vs_prolog.states.instance_divisors[i];
+		bool divisor_is_one =
+			key->vs_prolog.states.instance_divisor_is_one & (1u << i);
+		bool divisor_is_fetched =
+			key->vs_prolog.states.instance_divisor_is_fetched & (1u << i);
 		LLVMValueRef index;
 
-		if (divisor) {
+		if (divisor_is_one || divisor_is_fetched) {
+			LLVMValueRef divisor = ctx->i32_1;
+
+			if (divisor_is_fetched) {
+				divisor = buffer_load_const(ctx, instance_divisor_constbuf,
+							    LLVMConstInt(ctx->i32, i * 4, 0));
+				divisor = LLVMBuildBitCast(gallivm->builder, divisor,
+							   ctx->i32, "");
+			}
+
 			/* InstanceID / Divisor + StartInstance */
 			index = get_instance_index_for_fetch(ctx,
 							     user_sgpr_base +
 							     SI_SGPR_START_INSTANCE,
 							     divisor);
 		} else {
 			/* VertexID + BaseVertex */
 			index = LLVMBuildAdd(gallivm->builder,
 					     LLVMGetParam(func, ctx->param_vertex_id),
 					     LLVMGetParam(func, user_sgpr_base +
@@ -6859,29 +6895,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 	for (i = 0; i < num_params; i++) {
 		LLVMValueRef p = LLVMGetParam(func, i);
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
 	}
 
 	/* Polygon stippling. */
 	if (key->ps_prolog.states.poly_stipple) {
 		/* POS_FIXED_PT is always last. */
 		unsigned pos = key->ps_prolog.num_input_sgprs +
 			       key->ps_prolog.num_input_vgprs - 1;
-		LLVMValueRef ptr[2], list;
-
-		/* Get the pointer to rw buffers. */
-		ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
-		ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
-		list = lp_build_gather_values(gallivm, ptr, 2);
-		list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
-		list = LLVMBuildIntToPtr(gallivm->builder, list,
-					  si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
+		LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
 
 		si_llvm_emit_polygon_stipple(ctx, list, pos);
 	}
 
 	if (key->ps_prolog.states.bc_optimize_for_persp ||
 	    key->ps_prolog.states.bc_optimize_for_linear) {
 		unsigned i, base = key->ps_prolog.num_input_sgprs;
 		LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
 
 		/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6432126..a10067d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -378,21 +378,28 @@ struct si_shader_selector {
  * -> = merged with the next stage
  */
 
 /* Use the byte alignment for all following structure members for optimal
  * shader key memory footprint.
  */
 #pragma pack(push, 1)
 
 /* Common VS bits between the shader key and the prolog key. */
 struct si_vs_prolog_bits {
-	unsigned	instance_divisors[SI_MAX_ATTRIBS];
+	/* - If neither "is_one" nor "is_fetched" has a bit set, the instance
+	 *   divisor is 0.
+	 * - If "is_one" has a bit set, the instance divisor is 1.
+	 * - If "is_fetched" has a bit set, the instance divisor will be loaded
+	 *   from the constant buffer.
+	 */
+	uint16_t	instance_divisor_is_one;     /* bitmask of inputs */
+	uint16_t	instance_divisor_is_fetched; /* bitmask of inputs */
 };
 
 /* Common TCS bits between the shader key and the epilog key. */
 struct si_tcs_epilog_bits {
 	unsigned	prim_mode:3;
 	unsigned	tes_reads_tess_factors:1;
 };
 
 struct si_gs_prolog_bits {
 	unsigned	tri_strip_adj_fix:1;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index a674a60..7e3d1a0 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3766,20 +3766,25 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 		unsigned char swizzle[4];
 
 		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
 			FREE(v);
 			return NULL;
 		}
 
 		if (elements[i].instance_divisor) {
 			v->uses_instance_divisors = true;
 			v->instance_divisors[i] = elements[i].instance_divisor;
+
+			if (v->instance_divisors[i] == 1)
+				v->instance_divisor_is_one |= 1u << i;
+			else
+				v->instance_divisor_is_fetched |= 1u << i;
 		}
 
 		if (!used[vbo_index]) {
 			v->first_vb_use_mask |= 1 << i;
 			used[vbo_index] = true;
 		}
 
 		desc = util_format_description(elements[i].src_format);
 		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
 		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
@@ -3894,20 +3899,30 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
 	sctx->vertex_elements = v;
 	sctx->vertex_buffers_dirty = true;
 
 	if (v &&
 	    (!old ||
 	     old->count != v->count ||
 	     old->uses_instance_divisors != v->uses_instance_divisors ||
 	     v->uses_instance_divisors || /* we don't check which divisors changed */
 	     memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
 		sctx->do_update_shaders = true;
+
+	if (v && v->instance_divisor_is_fetched) {
+		struct pipe_constant_buffer cb;
+
+		cb.buffer = NULL;
+		cb.user_buffer = v->instance_divisors;
+		cb.buffer_offset = 0;
+		cb.buffer_size = sizeof(uint32_t) * v->count;
+		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
+	}
 }
 
 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	if (sctx->vertex_elements == state)
 		sctx->vertex_elements = NULL;
 	FREE(state);
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index c9e0770..ec28aba 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -108,20 +108,22 @@ struct si_vertex_elements
 	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
 	uint8_t				format_size[SI_MAX_ATTRIBS];
 	uint8_t				vertex_buffer_index[SI_MAX_ATTRIBS];
 
 	uint8_t				count;
 	bool				uses_instance_divisors;
 
 	uint16_t			first_vb_use_mask;
 	/* Vertex buffer descriptor list size aligned for optimal prefetch. */
 	uint16_t			desc_list_byte_size;
+	uint16_t			instance_divisor_is_one; /* bitmask of inputs */
+	uint16_t			instance_divisor_is_fetched;  /* bitmask of inputs */
 };
 
 union si_state {
 	struct {
 		struct si_state_blend		*blend;
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*poly_offset;
 		struct si_pm4_state		*ls;
 		struct si_pm4_state		*hs;
@@ -175,20 +177,21 @@ enum {
 	SI_GS_RING_ESGS,
 
 	SI_RING_GSVS,
 
 	SI_VS_STREAMOUT_BUF0,
 	SI_VS_STREAMOUT_BUF1,
 	SI_VS_STREAMOUT_BUF2,
 	SI_VS_STREAMOUT_BUF3,
 
 	SI_HS_CONST_DEFAULT_TESS_LEVELS,
+	SI_VS_CONST_INSTANCE_DIVISORS,
 	SI_VS_CONST_CLIP_PLANES,
 	SI_PS_CONST_POLY_STIPPLE,
 	SI_PS_CONST_SAMPLE_POSITIONS,
 
 	SI_NUM_RW_BUFFERS,
 };
 
 /* Indices into sctx->descriptors, laid out so that gfx and compute pipelines
  * are contiguous:
  *
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4eb3b75..63cc746 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1180,24 +1180,27 @@ static unsigned si_get_alpha_test_func(struct si_context *sctx)
 }
 
 static void si_shader_selector_key_vs(struct si_context *sctx,
 				      struct si_shader_selector *vs,
 				      struct si_shader_key *key,
 				      struct si_vs_prolog_bits *prolog_key)
 {
 	if (!sctx->vertex_elements)
 		return;
 
+	prolog_key->instance_divisor_is_one =
+		sctx->vertex_elements->instance_divisor_is_one;
+	prolog_key->instance_divisor_is_fetched =
+		sctx->vertex_elements->instance_divisor_is_fetched;
+
 	unsigned count = MIN2(vs->info.num_inputs,
 			      sctx->vertex_elements->count);
-	memcpy(prolog_key->instance_divisors,
-	       sctx->vertex_elements->instance_divisors, count * 4);
 	memcpy(key->mono.vs_fix_fetch, sctx->vertex_elements->fix_fetch, count);
 }
 
 static void si_shader_selector_key_hw_vs(struct si_context *sctx,
 					 struct si_shader_selector *vs,
 					 struct si_shader_key *key)
 {
 	struct si_shader_selector *ps = sctx->ps_shader.cso;
 
 	key->opt.clip_disable =
-- 
2.7.4



More information about the mesa-dev mailing list