[Mesa-dev] [PATCH] Remove 3D registers from compute command stream
Tom Stellard
tstellar at gmail.com
Mon Aug 20 08:55:29 PDT 2012
On Mon, Aug 13, 2012 at 09:05:29PM +0100, archibald wrote:
> Hi list,
>
> Here is my attempt at solving the task "Remove 3D registers from
> compute
> command stream" on http://dri.freedesktop.org/wiki/R600ToDo. It's my
> first attempt at a patch for mesa, so I'd appreciate any comments or
> advice that people might have.
>
> I don't have a Cayman card, so I'm not able to test on that, so that
> part
> is officially untested.
>
> I ran the opencl-example programs to test the opencl aspect and
> there was
> no difference in the number of passed and failed tests (67:4) before
> and
> after the patch. OpenArena and my desktop session ran fine
> afterwards, but
> I'm having `fun' trying to get piglit to behave so I couldn't do a
> full regression test.
>
> Thanks,
> Archibald
The non-cayman parts of this patch have been committed as
59361d76a5b0b6b77d6e6bc976e02df2e8df9ec3 I wasn't able to test this
patch thoroughly on Cayman due to hanging piglit tests, so I dropped
that part of the patch. I'll take a look at the cayman changes again
once compute support is more mature.
Thanks for the patch!
-Tom
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 0d6eb4e..acf91ba 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
> struct evergreen_compute_resource *resources =
> ctx->cs_shader_state.shader->resources;
>
> - /* Initialize all the registers common to both 3D and compute. Some
> - * 3D only register will be initialized by this atom as well, but
> - * this is OK for now.
> - *
> - * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
> - * evergreen_state.c for the list of registers that are intialized by
> - * the start_cs_cmd atom.
> - */
> - r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
> -
> - /* Initialize all the compute specific registers.
> + /* Initialize all the compute-related registers.
> *
> * See evergreen_init_atom_start_compute_cs() in this file for the list
> - * of registers initialized by the start_compuet_cs_cmd atom.
> + * of registers initialized by the start_compute_cs_cmd atom.
> */
> r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
>
> @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
> int num_threads;
> int num_stack_entries;
>
> - /* We aren't passing the EMIT_EARLY flag as the third argument
> - * because we will be emitting this atom manually in order to
> - * ensure it gets emitted after the start_cs_cmd atom.
> + /* since all required registers are initialised in the
> + * start_compute_cs_cmd atom, we can EMIT_EARLY here.
> */
> - r600_init_command_buffer(cb, 256, 0);
> + r600_init_command_buffer(cb, 256, EMIT_EARLY);
> cb->pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
>
> switch (ctx->family) {
> @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
> }
>
> /* Config Registers */
> + evergreen_init_common_regs(cb, ctx->chip_class
> + , ctx->family, ctx->screen->info.drm_minor);
>
> /* The primitive type always needs to be POINTLIST for compute. */
> r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 67ae7d3..addc36a 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, 0x80000000);
> r600_store_value(cb, 0x80000000);
>
> + cayman_init_common_regs(cb);
> +
> r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
> /* always set the temp clauses */
> r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
>
> - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> -
> - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> -
> - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> -
> r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
> r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
> r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
> @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
> r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
>
> - r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> - r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> - r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> -
> r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
> r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
> r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
>
> r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
>
> + r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
> +
> + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
> + r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
> + r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
> +
> + r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
> +
> + r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
> + r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
> +
> + r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
> + r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
> +
> + r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
> + r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
> + r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
> +
> + r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
> + r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
> + r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
> + r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
> + r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
> +
> + r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
> + r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
> + r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
> +
> + r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
> + r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
> + r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
> +
> + r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
> +
> + r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> + if (rctx->screen->has_streamout) {
> + r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
> + }
> +
> + eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
> + eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
> +}
> +
> +void cayman_init_common_regs(struct r600_command_buffer *cb)
> +{
> + r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> + r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> + r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> +
> + r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> +
> + r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> +
> + r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> + r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> + r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> +
> + r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> +
> + r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> +
> + r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> + r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> + r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> +
> r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | S_028AA8_PRIMGROUP_SIZE(63));
>
> r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
> @@ -1987,75 +2042,43 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */
> r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */
>
> - r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
> -
> r600_store_context_reg_seq(cb, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
> r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */
> r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */
>
> - r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
> - r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
> - r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
> -
> - r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
> -
> r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
> r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
> r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
> r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
>
> - r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
> - r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
> -
> r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
> r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
> r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
>
> r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
> - r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
> - r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
> - r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
> -
> - r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
> - r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
> - r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
> -
> - r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
> - r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
> - r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
> - r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
> - r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
> -
> - r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
> - r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
> - r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
>
> - r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
> - r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
> - r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
> + r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
>
> r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> - r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
>
> r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
> - r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> - if (rctx->screen->has_streamout) {
> - r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
> - }
> -
> - eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
> - eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
> }
>
> -void evergreen_init_atom_start_cs(struct r600_context *rctx)
> +void evergreen_init_common_regs(struct r600_command_buffer *cb
> + , enum chip_class ctx_chip_class
> + , enum radeon_family ctx_family
> + , int ctx_drm_minor)
> {
> - struct r600_command_buffer *cb = &rctx->start_cs_cmd;
> int ps_prio;
> int vs_prio;
> int gs_prio;
> int es_prio;
> - int hs_prio, cs_prio, ls_prio;
> +
> + int hs_prio;
> + int cs_prio;
> + int ls_prio;
> +
> int num_ps_gprs;
> int num_vs_gprs;
> int num_gs_gprs;
> @@ -2063,12 +2086,213 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> int num_hs_gprs;
> int num_ls_gprs;
> int num_temp_gprs;
> +
> + unsigned tmp;
> +
> + if (ctx_chip_class == CAYMAN) {
> + cayman_init_common_regs(cb);
> + return;
> + }
> +
> + ps_prio = 0;
> + vs_prio = 1;
> + gs_prio = 2;
> + es_prio = 3;
> + hs_prio = 0;
> + ls_prio = 0;
> + cs_prio = 0;
> +
> + switch (ctx_family) {
> + case CHIP_CEDAR:
> + default:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_REDWOOD:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_JUNIPER:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_CYPRESS:
> + case CHIP_HEMLOCK:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_PALM:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_SUMO:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_SUMO2:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_BARTS:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_TURKS:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + case CHIP_CAICOS:
> + num_ps_gprs = 93;
> + num_vs_gprs = 46;
> + num_temp_gprs = 4;
> + num_gs_gprs = 31;
> + num_es_gprs = 31;
> + num_hs_gprs = 23;
> + num_ls_gprs = 23;
> + break;
> + }
> +
> + tmp = 0;
> + switch (ctx_family) {
> + case CHIP_CEDAR:
> + case CHIP_PALM:
> + case CHIP_SUMO:
> + case CHIP_SUMO2:
> + case CHIP_CAICOS:
> + break;
> + default:
> + tmp |= S_008C00_VC_ENABLE(1);
> + break;
> + }
> + tmp |= S_008C00_EXPORT_SRC_C(1);
> + tmp |= S_008C00_CS_PRIO(cs_prio);
> + tmp |= S_008C00_LS_PRIO(ls_prio);
> + tmp |= S_008C00_HS_PRIO(hs_prio);
> + tmp |= S_008C00_PS_PRIO(ps_prio);
> + tmp |= S_008C00_VS_PRIO(vs_prio);
> + tmp |= S_008C00_GS_PRIO(gs_prio);
> + tmp |= S_008C00_ES_PRIO(es_prio);
> +
> + /* enable dynamic GPR resource management */
> + if (ctx_drm_minor >= 7) {
> + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> + r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> + /* always set temp clauses */
> + r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> + r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> + r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> + r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> + r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> + r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> + S_028838_PS_GPRS(0x1e) |
> + S_028838_VS_GPRS(0x1e) |
> + S_028838_GS_GPRS(0x1e) |
> + S_028838_ES_GPRS(0x1e) |
> + S_028838_HS_GPRS(0x1e) |
> + S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> + } else {
> + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
> + r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> +
> + tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
> + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
> + r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> +
> + tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
> + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
> + r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
> +
> + tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
> + tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
> + r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> + }
> +
> + r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
> + S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
> +
> + r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> +
> + r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> + r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> + r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> +
> + r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
> +
> + r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
> + r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
> + r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
> +
> + r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
> + r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
> + r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
> + r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
> +
> + r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
> +
> + r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> + r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> +
> + r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
> +
> + return;
> +}
> +
> +void evergreen_init_atom_start_cs(struct r600_context *rctx)
> +{
> + struct r600_command_buffer *cb = &rctx->start_cs_cmd;
> int num_ps_threads;
> int num_vs_threads;
> int num_gs_threads;
> int num_es_threads;
> int num_hs_threads;
> int num_ls_threads;
> +
> int num_ps_stack_entries;
> int num_vs_stack_entries;
> int num_gs_stack_entries;
> @@ -2090,25 +2314,13 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, 0x80000000);
> r600_store_value(cb, 0x80000000);
>
> - family = rctx->family;
> - ps_prio = 0;
> - vs_prio = 1;
> - gs_prio = 2;
> - es_prio = 3;
> - hs_prio = 0;
> - ls_prio = 0;
> - cs_prio = 0;
> + evergreen_init_common_regs(cb, rctx->chip_class
> + , rctx->family, rctx->screen->info.drm_minor);
>
> + family = rctx->family;
> switch (family) {
> case CHIP_CEDAR:
> default:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 96;
> num_vs_threads = 16;
> num_gs_threads = 16;
> @@ -2123,13 +2335,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 42;
> break;
> case CHIP_REDWOOD:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 128;
> num_vs_threads = 20;
> num_gs_threads = 20;
> @@ -2144,13 +2349,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 42;
> break;
> case CHIP_JUNIPER:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 128;
> num_vs_threads = 20;
> num_gs_threads = 20;
> @@ -2166,13 +2364,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> break;
> case CHIP_CYPRESS:
> case CHIP_HEMLOCK:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 128;
> num_vs_threads = 20;
> num_gs_threads = 20;
> @@ -2187,13 +2378,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 85;
> break;
> case CHIP_PALM:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 96;
> num_vs_threads = 16;
> num_gs_threads = 16;
> @@ -2208,13 +2392,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 42;
> break;
> case CHIP_SUMO:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 96;
> num_vs_threads = 25;
> num_gs_threads = 25;
> @@ -2229,13 +2406,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 42;
> break;
> case CHIP_SUMO2:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 96;
> num_vs_threads = 25;
> num_gs_threads = 25;
> @@ -2250,13 +2420,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 85;
> break;
> case CHIP_BARTS:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 128;
> num_vs_threads = 20;
> num_gs_threads = 20;
> @@ -2271,13 +2434,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 85;
> break;
> case CHIP_TURKS:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 128;
> num_vs_threads = 20;
> num_gs_threads = 20;
> @@ -2292,13 +2448,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> num_ls_stack_entries = 42;
> break;
> case CHIP_CAICOS:
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> num_ps_threads = 128;
> num_vs_threads = 10;
> num_gs_threads = 10;
> @@ -2314,66 +2463,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> break;
> }
>
> - tmp = 0;
> - switch (family) {
> - case CHIP_CEDAR:
> - case CHIP_PALM:
> - case CHIP_SUMO:
> - case CHIP_SUMO2:
> - case CHIP_CAICOS:
> - break;
> - default:
> - tmp |= S_008C00_VC_ENABLE(1);
> - break;
> - }
> - tmp |= S_008C00_EXPORT_SRC_C(1);
> - tmp |= S_008C00_CS_PRIO(cs_prio);
> - tmp |= S_008C00_LS_PRIO(ls_prio);
> - tmp |= S_008C00_HS_PRIO(hs_prio);
> - tmp |= S_008C00_PS_PRIO(ps_prio);
> - tmp |= S_008C00_VS_PRIO(vs_prio);
> - tmp |= S_008C00_GS_PRIO(gs_prio);
> - tmp |= S_008C00_ES_PRIO(es_prio);
> -
> - /* enable dynamic GPR resource management */
> - if (rctx->screen->info.drm_minor >= 7) {
> - r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> - r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> - /* always set temp clauses */
> - r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> - r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> - S_028838_PS_GPRS(0x1e) |
> - S_028838_VS_GPRS(0x1e) |
> - S_028838_GS_GPRS(0x1e) |
> - S_028838_ES_GPRS(0x1e) |
> - S_028838_HS_GPRS(0x1e) |
> - S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> - } else {
> - r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
> - r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> -
> - tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
> - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
> - r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> -
> - tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
> - tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
> - r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
> -
> - tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
> - tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
> - r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> - }
> -
> tmp = S_008C18_NUM_PS_THREADS(num_ps_threads);
> tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
> tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
> tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
> +
> r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
> r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */
>
> @@ -2393,14 +2487,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
> r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */
>
> - r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
> - S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
> -
> r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0);
> r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4));
>
> - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> -
> r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6);
> r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */
> r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */
> @@ -2430,10 +2519,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
> r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
>
> - r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
> - r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
> - r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
> -
> r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
> r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
> r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
> @@ -2484,23 +2569,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>
> r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
> r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
> - r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
> -
> - r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
> - r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
> - r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
>
> r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
> r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
> r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
>
> - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
> - r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
> - r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
> - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
> -
> - r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
> -
> r600_store_context_reg_seq(cb, R_028C00_PA_SC_LINE_CNTL, 2);
> r600_store_value(cb, 0x00000400); /* R_028C00_PA_SC_LINE_CNTL */
> r600_store_value(cb, 0); /* R_028C04_PA_SC_AA_CONFIG */
> @@ -2522,11 +2595,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
> r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
>
> - r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> - r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
> r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
>
> - r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
> r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> if (rctx->screen->has_streamout) {
> r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 0464183..cf2e61e 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -447,6 +447,13 @@ static INLINE void r600_atom_dirty(struct r600_context *rctx, struct r600_atom *
> }
>
> /* evergreen_state.c */
> +void cayman_init_common_regs(struct r600_command_buffer *cb);
> +
> +void evergreen_init_common_regs(struct r600_command_buffer *cb,
> + enum chip_class ctx_chip_class,
> + enum radeon_family ctx_family,
> + int ctx_drm_minor);
> +
> void evergreen_init_state_functions(struct r600_context *rctx);
> void evergreen_init_atom_start_cs(struct r600_context *rctx);
> void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list