[Mesa-dev] [PATCH] Remove 3D registers from compute command stream

Tom Stellard tstellar at gmail.com
Mon Aug 20 08:55:29 PDT 2012


On Mon, Aug 13, 2012 at 09:05:29PM +0100, archibald wrote:
> Hi list,
> 
> Here is my attempt at solving the task "Remove 3D registers from
> compute
> command stream" on http://dri.freedesktop.org/wiki/R600ToDo. It's my
> first attempt at a patch for mesa, so I'd appreciate any comments or
> advice that people might have.
> 
> I don't have a Cayman card, so I'm not able to test on that, so that
> part
> is officially untested.
> 
> I ran the opencl-example programs to test the opencl aspect and
> there was
> no difference in the number of passed and failed tests (67:4) before
> and
> after the patch. OpenArena and my desktop session ran fine
> afterwards, but
> I'm having `fun' trying to get piglit to behave so I couldn't do a
> full regression test.
> 
> Thanks,
> Archibald

The non-cayman parts of this patch have been committed as
59361d76a5b0b6b77d6e6bc976e02df2e8df9ec3 I wasn't able to test this
patch thoroughly on Cayman due to hanging piglit tests, so I dropped
that part of the patch.  I'll take a look at the cayman changes again
once compute support is more mature.

Thanks for the patch!

-Tom

> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 0d6eb4e..acf91ba 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>  	struct evergreen_compute_resource *resources =
>  					ctx->cs_shader_state.shader->resources;
>  
> -	/* Initialize all the registers common to both 3D and compute.  Some
> -	 * 3D only register will be initialized by this atom as well, but
> -	 * this is OK for now.
> -	 *
> -	 * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
> -	 * evergreen_state.c for the list of registers that are intialized by
> -	 * the start_cs_cmd atom.
> -	 */
> -	r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
> -
> -	/* Initialize all the compute specific registers.
> +	/* Initialize all the compute-related registers.
>  	 *
>  	 * See evergreen_init_atom_start_compute_cs() in this file for the list
> -	 * of registers initialized by the start_compuet_cs_cmd atom.
> +	 * of registers initialized by the start_compute_cs_cmd atom.
>  	 */
>  	r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
>  
> @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
>  	int num_threads;
>  	int num_stack_entries;
>  
> -	/* We aren't passing the EMIT_EARLY flag as the third argument
> -	 * because we will be emitting this atom manually in order to
> -	 * ensure it gets emitted after the start_cs_cmd atom.
> +	/* since all required registers are initialised in the
> +	 * start_compute_cs_cmd atom, we can EMIT_EARLY here.
>  	 */
> -	r600_init_command_buffer(cb, 256, 0);
> +	r600_init_command_buffer(cb, 256, EMIT_EARLY);
>  	cb->pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
>  
>  	switch (ctx->family) {
> @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
>  	}
>  
>  	/* Config Registers */
> +	evergreen_init_common_regs(cb, ctx->chip_class
> +			, ctx->family, ctx->screen->info.drm_minor);
>  
>  	/* The primitive type always needs to be POINTLIST for compute. */
>  	r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 67ae7d3..addc36a 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
>  	r600_store_value(cb, 0x80000000);
>  	r600_store_value(cb, 0x80000000);
>  
> +	cayman_init_common_regs(cb);
> +
>  	r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
>  	r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
>  	/* always set the temp clauses */
>  	r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
>  
> -	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> -	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> -	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> -
> -	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> -
> -	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> -
>  	r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
>  	r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
>  	r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
> @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
>  	r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
>  	r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
>  
> -	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> -	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> -	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> -
>  	r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
>  	r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
>  	r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
>  
>  	r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
>  
> +	r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
> +
> +	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
> +	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
> +	r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
> +
> +	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
> +
> +	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
> +	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
> +
> +	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
> +	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
> +
> +	r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
> +	r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
> +	r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
> +
> +	r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
> +	r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
> +	r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
> +	r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
> +	r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
> +
> +	r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
> +	r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
> +	r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
> +
> +	r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
> +	r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
> +	r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
> +
> +	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
> +
> +	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> +	if (rctx->screen->has_streamout) {
> +		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
> +	}
> +
> +	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
> +	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
> +}
> +
> +void cayman_init_common_regs(struct r600_command_buffer *cb)
> +{
> +	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> +	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> +	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> +
> +	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> +
> +	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> +
> +	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> +	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> +	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> +
> +	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> +
> +	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> +
> +	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> +	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> +	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> +
>  	r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | S_028AA8_PRIMGROUP_SIZE(63));
>  
>  	r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
> @@ -1987,75 +2042,43 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
>  	r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */
>  	r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */
>  
> -	r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
> -
>  	r600_store_context_reg_seq(cb, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
>  	r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */
>  	r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */
>  
> -	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
> -	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
> -	r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
> -
> -	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
> -
>  	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
>  	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
>  	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
>  	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
>  
> -	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
> -	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
> -
>  	r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
>  	r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
>  	r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
>  
>  	r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
> -	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
> -	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
> -	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
> -
> -	r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
> -	r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
> -	r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
> -
> -	r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
> -	r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
> -	r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
> -	r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
> -	r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
> -
> -	r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
> -	r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
> -	r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
>  
> -	r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
> -	r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
> -	r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
> +	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
>  
>  	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
>  	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> -	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
>  
>  	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
> -	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> -	if (rctx->screen->has_streamout) {
> -		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
> -	}
> -
> -	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
> -	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
>  }
>  
> -void evergreen_init_atom_start_cs(struct r600_context *rctx)
> +void evergreen_init_common_regs(struct r600_command_buffer *cb
> +	, enum chip_class ctx_chip_class
> +	, enum radeon_family ctx_family
> +	, int ctx_drm_minor)
>  {
> -	struct r600_command_buffer *cb = &rctx->start_cs_cmd;
>  	int ps_prio;
>  	int vs_prio;
>  	int gs_prio;
>  	int es_prio;
> -	int hs_prio, cs_prio, ls_prio;
> +
> +	int hs_prio;
> +	int cs_prio;
> +	int ls_prio;
> +
>  	int num_ps_gprs;
>  	int num_vs_gprs;
>  	int num_gs_gprs;
> @@ -2063,12 +2086,213 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  	int num_hs_gprs;
>  	int num_ls_gprs;
>  	int num_temp_gprs;
> +
> +	unsigned tmp;
> +
> +	if (ctx_chip_class == CAYMAN) {
> +		cayman_init_common_regs(cb);
> +		return;
> +	}
> +
> +	ps_prio = 0;
> +	vs_prio = 1;
> +	gs_prio = 2;
> +	es_prio = 3;
> +	hs_prio = 0;
> +	ls_prio = 0;
> +	cs_prio = 0;
> +
> +	switch (ctx_family) {
> +	case CHIP_CEDAR:
> +	default:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_REDWOOD:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_JUNIPER:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_CYPRESS:
> +	case CHIP_HEMLOCK:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_PALM:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_SUMO:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_SUMO2:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_BARTS:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_TURKS:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	case CHIP_CAICOS:
> +		num_ps_gprs = 93;
> +		num_vs_gprs = 46;
> +		num_temp_gprs = 4;
> +		num_gs_gprs = 31;
> +		num_es_gprs = 31;
> +		num_hs_gprs = 23;
> +		num_ls_gprs = 23;
> +		break;
> +	}
> +
> +	tmp = 0;
> +	switch (ctx_family) {
> +	case CHIP_CEDAR:
> +	case CHIP_PALM:
> +	case CHIP_SUMO:
> +	case CHIP_SUMO2:
> +	case CHIP_CAICOS:
> +		break;
> +	default:
> +		tmp |= S_008C00_VC_ENABLE(1);
> +		break;
> +	}
> +	tmp |= S_008C00_EXPORT_SRC_C(1);
> +	tmp |= S_008C00_CS_PRIO(cs_prio);
> +	tmp |= S_008C00_LS_PRIO(ls_prio);
> +	tmp |= S_008C00_HS_PRIO(hs_prio);
> +	tmp |= S_008C00_PS_PRIO(ps_prio);
> +	tmp |= S_008C00_VS_PRIO(vs_prio);
> +	tmp |= S_008C00_GS_PRIO(gs_prio);
> +	tmp |= S_008C00_ES_PRIO(es_prio);
> +
> +	/* enable dynamic GPR resource management */
> +	if (ctx_drm_minor >= 7) {
> +		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> +		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> +		/* always set temp clauses */
> +		r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> +		r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> +		r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> +		r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> +		r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> +		r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> +					S_028838_PS_GPRS(0x1e) |
> +					S_028838_VS_GPRS(0x1e) |
> +					S_028838_GS_GPRS(0x1e) |
> +					S_028838_ES_GPRS(0x1e) |
> +					S_028838_HS_GPRS(0x1e) |
> +					S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> +	} else {
> +		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
> +		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> +
> +		tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
> +		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> +		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
> +		r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> +
> +		tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
> +		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
> +		r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
> +
> +		tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
> +		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
> +		r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> +	}
> +
> +	r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
> +			      S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
> +
> +	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> +
> +	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> +	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> +	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> +
> +	r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
> +
> +	r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
> +	r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
> +	r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
> +
> +	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
> +	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
> +	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
> +	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
> +
> +	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
> +
> +	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> +	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> +
> +	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
> +
> +	return;
> +}
> +
> +void evergreen_init_atom_start_cs(struct r600_context *rctx)
> +{
> +	struct r600_command_buffer *cb = &rctx->start_cs_cmd;
>  	int num_ps_threads;
>  	int num_vs_threads;
>  	int num_gs_threads;
>  	int num_es_threads;
>  	int num_hs_threads;
>  	int num_ls_threads;
> +
>  	int num_ps_stack_entries;
>  	int num_vs_stack_entries;
>  	int num_gs_stack_entries;
> @@ -2090,25 +2314,13 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  	r600_store_value(cb, 0x80000000);
>  	r600_store_value(cb, 0x80000000);
>  
> -	family = rctx->family;
> -	ps_prio = 0;
> -	vs_prio = 1;
> -	gs_prio = 2;
> -	es_prio = 3;
> -	hs_prio = 0;
> -	ls_prio = 0;
> -	cs_prio = 0;
> +	evergreen_init_common_regs(cb, rctx->chip_class
> +			, rctx->family, rctx->screen->info.drm_minor);
>  
> +	family = rctx->family;
>  	switch (family) {
>  	case CHIP_CEDAR:
>  	default:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 96;
>  		num_vs_threads = 16;
>  		num_gs_threads = 16;
> @@ -2123,13 +2335,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 42;
>  		break;
>  	case CHIP_REDWOOD:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 128;
>  		num_vs_threads = 20;
>  		num_gs_threads = 20;
> @@ -2144,13 +2349,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 42;
>  		break;
>  	case CHIP_JUNIPER:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 128;
>  		num_vs_threads = 20;
>  		num_gs_threads = 20;
> @@ -2166,13 +2364,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		break;
>  	case CHIP_CYPRESS:
>  	case CHIP_HEMLOCK:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 128;
>  		num_vs_threads = 20;
>  		num_gs_threads = 20;
> @@ -2187,13 +2378,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 85;
>  		break;
>  	case CHIP_PALM:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 96;
>  		num_vs_threads = 16;
>  		num_gs_threads = 16;
> @@ -2208,13 +2392,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 42;
>  		break;
>  	case CHIP_SUMO:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 96;
>  		num_vs_threads = 25;
>  		num_gs_threads = 25;
> @@ -2229,13 +2406,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 42;
>  		break;
>  	case CHIP_SUMO2:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 96;
>  		num_vs_threads = 25;
>  		num_gs_threads = 25;
> @@ -2250,13 +2420,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 85;
>  		break;
>  	case CHIP_BARTS:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 128;
>  		num_vs_threads = 20;
>  		num_gs_threads = 20;
> @@ -2271,13 +2434,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 85;
>  		break;
>  	case CHIP_TURKS:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 128;
>  		num_vs_threads = 20;
>  		num_gs_threads = 20;
> @@ -2292,13 +2448,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		num_ls_stack_entries = 42;
>  		break;
>  	case CHIP_CAICOS:
> -		num_ps_gprs = 93;
> -		num_vs_gprs = 46;
> -		num_temp_gprs = 4;
> -		num_gs_gprs = 31;
> -		num_es_gprs = 31;
> -		num_hs_gprs = 23;
> -		num_ls_gprs = 23;
>  		num_ps_threads = 128;
>  		num_vs_threads = 10;
>  		num_gs_threads = 10;
> @@ -2314,66 +2463,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  		break;
>  	}
>  
> -	tmp = 0;
> -	switch (family) {
> -	case CHIP_CEDAR:
> -	case CHIP_PALM:
> -	case CHIP_SUMO:
> -	case CHIP_SUMO2:
> -	case CHIP_CAICOS:
> -		break;
> -	default:
> -		tmp |= S_008C00_VC_ENABLE(1);
> -		break;
> -	}
> -	tmp |= S_008C00_EXPORT_SRC_C(1);
> -	tmp |= S_008C00_CS_PRIO(cs_prio);
> -	tmp |= S_008C00_LS_PRIO(ls_prio);
> -	tmp |= S_008C00_HS_PRIO(hs_prio);
> -	tmp |= S_008C00_PS_PRIO(ps_prio);
> -	tmp |= S_008C00_VS_PRIO(vs_prio);
> -	tmp |= S_008C00_GS_PRIO(gs_prio);
> -	tmp |= S_008C00_ES_PRIO(es_prio);
> -
> -	/* enable dynamic GPR resource management */
> -	if (rctx->screen->info.drm_minor >= 7) {
> -		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> -		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> -		/* always set temp clauses */
> -		r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> -		r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> -		r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> -		r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> -		r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> -		r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> -					S_028838_PS_GPRS(0x1e) |
> -					S_028838_VS_GPRS(0x1e) |
> -					S_028838_GS_GPRS(0x1e) |
> -					S_028838_ES_GPRS(0x1e) |
> -					S_028838_HS_GPRS(0x1e) |
> -					S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> -	} else {
> -		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
> -		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> -
> -		tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
> -		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> -		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
> -		r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> -
> -		tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
> -		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
> -		r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
> -
> -		tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
> -		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
> -		r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> -	}
> -
>  	tmp = S_008C18_NUM_PS_THREADS(num_ps_threads);
>  	tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
>  	tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
>  	tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
> +
>  	r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
>  	r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */
>  
> @@ -2393,14 +2487,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  	tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
>  	r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */
>  
> -	r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
> -			      S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
> -
>  	r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0);
>  	r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4));
>  
> -	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> -
>  	r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6);
>  	r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */
>  	r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */
> @@ -2430,10 +2519,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  	r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
>  	r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
>  
> -	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> -	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> -	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> -
>  	r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
>  	r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
>  	r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
> @@ -2484,23 +2569,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  
>  	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
>  	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
> -	r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
> -
> -	r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
> -	r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
> -	r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
>  
>  	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
>  	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
>  	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
>  
> -	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
> -	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
> -	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
> -	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
> -
> -	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
> -
>  	r600_store_context_reg_seq(cb, R_028C00_PA_SC_LINE_CNTL, 2);
>  	r600_store_value(cb, 0x00000400); /* R_028C00_PA_SC_LINE_CNTL */
>  	r600_store_value(cb, 0); /* R_028C04_PA_SC_AA_CONFIG */
> @@ -2522,11 +2595,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>  	r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
>  	r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
>  
> -	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> -	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
>  	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
>  
> -	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
>  	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
>  	if (rctx->screen->has_streamout) {
>  		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 0464183..cf2e61e 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -447,6 +447,13 @@ static INLINE void r600_atom_dirty(struct r600_context *rctx, struct r600_atom *
>  }
>  
>  /* evergreen_state.c */
> +void cayman_init_common_regs(struct r600_command_buffer *cb);
> +
> +void evergreen_init_common_regs(struct r600_command_buffer *cb,
> +				enum chip_class ctx_chip_class,
> +				enum radeon_family ctx_family,
> +				int ctx_drm_minor);
> +
>  void evergreen_init_state_functions(struct r600_context *rctx);
>  void evergreen_init_atom_start_cs(struct r600_context *rctx);
>  void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);

> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list