[Mesa-dev] [PATCH 1/8] radeonsi: mask out high VM address bits in registers where needed

Marek Olšák maraeo at gmail.com
Mon Feb 26 11:04:01 UTC 2018


I don't know if it fixes anything, but better safe than sorry.

Marek

On Mon, Feb 26, 2018 at 11:55 AM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
>
> On 02/26/2018 10:28 AM, Ernst Sjöstrand wrote:
>>
>> I guess this fixes something since it's nominated for stable?
>> Could have a commit message with that.
>
>
> Yeah, would be nice to know why this is needed, thanks!
>
>
>>
>> Regards
>> //Ernst
>>
>> 2018-02-25 2:02 GMT+01:00 Marek Olšák <maraeo at gmail.com>:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> Cc: 17.3 18.0 <mesa-stable at lists.freedesktop.org>
>>> ---
>>>   src/gallium/drivers/radeonsi/si_compute.c       |  4 ++--
>>>   src/gallium/drivers/radeonsi/si_state.c         | 24
>>> +++++++++++++-----------
>>>   src/gallium/drivers/radeonsi/si_state_shaders.c | 18 +++++++++---------
>>>   3 files changed, 24 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>>> b/src/gallium/drivers/radeonsi/si_compute.c
>>> index 4192798..46873cc 100644
>>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>>> @@ -323,21 +323,21 @@ static void si_initialize_compute(struct si_context
>>> *sctx)
>>>                  radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID,
>>>                                    0x190 /* Default value */);
>>>          }
>>>
>>>          /* Set the pointer to border colors. */
>>>          bc_va = sctx->border_color_buffer->gpu_address;
>>>
>>>          if (sctx->b.chip_class >= CIK) {
>>>                  radeon_set_uconfig_reg_seq(cs,
>>> R_030E00_TA_CS_BC_BASE_ADDR, 2);
>>>                  radeon_emit(cs, bc_va >> 8);  /*
>>> R_030E00_TA_CS_BC_BASE_ADDR */
>>> -               radeon_emit(cs, bc_va >> 40); /*
>>> R_030E04_TA_CS_BC_BASE_ADDR_HI */
>>> +               radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /*
>>> R_030E04_TA_CS_BC_BASE_ADDR_HI */
>>>          } else {
>>>                  if (sctx->screen->info.drm_major == 3 ||
>>>                      (sctx->screen->info.drm_major == 2 &&
>>>                       sctx->screen->info.drm_minor >= 48)) {
>>>                          radeon_set_config_reg(cs,
>>> R_00950C_TA_CS_BC_BASE_ADDR,
>>>                                                bc_va >> 8);
>>>                  }
>>>          }
>>>
>>>          sctx->cs_shader_state.emitted_program = NULL;
>>> @@ -460,21 +460,21 @@ static bool si_switch_compute_shader(struct
>>> si_context *sctx,
>>>                  /* Shader code is placed after the amd_kernel_code_t
>>>                   * struct. */
>>>                  shader_va += sizeof(amd_kernel_code_t);
>>>          }
>>>
>>>          radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
>>>                                    RADEON_USAGE_READ,
>>> RADEON_PRIO_SHADER_BINARY);
>>>
>>>          radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
>>>          radeon_emit(cs, shader_va >> 8);
>>> -       radeon_emit(cs, shader_va >> 40);
>>> +       radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
>>>
>>>          radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
>>>          radeon_emit(cs, config->rsrc1);
>>>          radeon_emit(cs, config->rsrc2);
>>>
>>>          COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
>>>                  "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1,
>>> config->rsrc2);
>>>
>>>          radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
>>>                    S_00B860_WAVES(sctx->scratch_waves)
>>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>>> b/src/gallium/drivers/radeonsi/si_state.c
>>> index a6ec427..6c82257 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>>> @@ -3039,34 +3039,34 @@ static void si_emit_framebuffer_state(struct
>>> si_context *sctx, struct r600_atom
>>>                          cb_color_base |= tex->surface.tile_swizzle;
>>>                          if (!tex->fmask.size)
>>>                                  cb_color_fmask = cb_color_base;
>>>                          cb_color_attrib |=
>>> S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
>>>
>>> S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
>>>
>>> S_028C74_RB_ALIGNED(meta.rb_aligned) |
>>>
>>> S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
>>>
>>>                          radeon_set_context_reg_seq(cs,
>>> R_028C60_CB_COLOR0_BASE + i * 0x3C, 15);
>>>                          radeon_emit(cs, cb_color_base);         /*
>>> CB_COLOR0_BASE */
>>> -                       radeon_emit(cs, cb_color_base >> 32);   /*
>>> CB_COLOR0_BASE_EXT */
>>> +                       radeon_emit(cs, S_028C64_BASE_256B(cb_color_base
>>> >> 32)); /* CB_COLOR0_BASE_EXT */
>>>                          radeon_emit(cs, cb->cb_color_attrib2);  /*
>>> CB_COLOR0_ATTRIB2 */
>>>                          radeon_emit(cs, cb->cb_color_view);     /*
>>> CB_COLOR0_VIEW */
>>>                          radeon_emit(cs, cb_color_info);         /*
>>> CB_COLOR0_INFO */
>>>                          radeon_emit(cs, cb_color_attrib);       /*
>>> CB_COLOR0_ATTRIB */
>>>                          radeon_emit(cs, cb->cb_dcc_control);    /*
>>> CB_COLOR0_DCC_CONTROL */
>>>                          radeon_emit(cs, tex->cmask.base_address_reg); /*
>>> CB_COLOR0_CMASK */
>>> -                       radeon_emit(cs, tex->cmask.base_address_reg >>
>>> 32); /* CB_COLOR0_CMASK_BASE_EXT */
>>> +                       radeon_emit(cs,
>>> S_028C80_BASE_256B(tex->cmask.base_address_reg >> 32)); /*
>>> CB_COLOR0_CMASK_BASE_EXT */
>>>                          radeon_emit(cs, cb_color_fmask);        /*
>>> CB_COLOR0_FMASK */
>>> -                       radeon_emit(cs, cb_color_fmask >> 32);  /*
>>> CB_COLOR0_FMASK_BASE_EXT */
>>> +                       radeon_emit(cs, S_028C88_BASE_256B(cb_color_fmask
>>> >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */
>>>                          radeon_emit(cs, tex->color_clear_value[0]); /*
>>> CB_COLOR0_CLEAR_WORD0 */
>>>                          radeon_emit(cs, tex->color_clear_value[1]); /*
>>> CB_COLOR0_CLEAR_WORD1 */
>>>                          radeon_emit(cs, cb_dcc_base);           /*
>>> CB_COLOR0_DCC_BASE */
>>> -                       radeon_emit(cs, cb_dcc_base >> 32);     /*
>>> CB_COLOR0_DCC_BASE_EXT */
>>> +                       radeon_emit(cs, S_028C98_BASE_256B(cb_dcc_base >>
>>> 32)); /* CB_COLOR0_DCC_BASE_EXT */
>>>
>>>                          radeon_set_context_reg(cs,
>>> R_0287A0_CB_MRT0_EPITCH + i * 4,
>>>
>>> S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch));
>>>                  } else {
>>>                          /* Compute mutable surface parameters
>>> (SI-CI-VI). */
>>>                          const struct legacy_surf_level *level_info =
>>>
>>> &tex->surface.u.legacy.level[cb->base.u.tex.level];
>>>                          unsigned pitch_tile_max, slice_tile_max,
>>> tile_mode_index;
>>>                          unsigned cb_color_pitch, cb_color_slice,
>>> cb_color_fmask_slice;
>>>
>>> @@ -3133,35 +3133,35 @@ static void si_emit_framebuffer_state(struct
>>> si_context *sctx, struct r600_atom
>>>
>>>                  radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>>>                                        &rtex->resource,
>>> RADEON_USAGE_READWRITE,
>>>                                        zb->base.texture->nr_samples > 1 ?
>>>
>>> RADEON_PRIO_DEPTH_BUFFER_MSAA :
>>>                                                RADEON_PRIO_DEPTH_BUFFER);
>>>
>>>                  if (sctx->b.chip_class >= GFX9) {
>>>                          radeon_set_context_reg_seq(cs,
>>> R_028014_DB_HTILE_DATA_BASE, 3);
>>>                          radeon_emit(cs, zb->db_htile_data_base);
>>> /* DB_HTILE_DATA_BASE */
>>> -                       radeon_emit(cs, zb->db_htile_data_base >> 32);
>>> /* DB_HTILE_DATA_BASE_HI */
>>> +                       radeon_emit(cs,
>>> S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */
>>>                          radeon_emit(cs, zb->db_depth_size);
>>> /* DB_DEPTH_SIZE */
>>>
>>>                          radeon_set_context_reg_seq(cs,
>>> R_028038_DB_Z_INFO, 10);
>>>                          radeon_emit(cs, zb->db_z_info |
>>> /* DB_Z_INFO */
>>>
>>> S_028038_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
>>>                          radeon_emit(cs, zb->db_stencil_info);
>>> /* DB_STENCIL_INFO */
>>>                          radeon_emit(cs, zb->db_depth_base);
>>> /* DB_Z_READ_BASE */
>>> -                       radeon_emit(cs, zb->db_depth_base >> 32);
>>> /* DB_Z_READ_BASE_HI */
>>> +                       radeon_emit(cs,
>>> S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */
>>>                          radeon_emit(cs, zb->db_stencil_base);
>>> /* DB_STENCIL_READ_BASE */
>>> -                       radeon_emit(cs, zb->db_stencil_base >> 32);
>>> /* DB_STENCIL_READ_BASE_HI */
>>> +                       radeon_emit(cs,
>>> S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
>>>                          radeon_emit(cs, zb->db_depth_base);
>>> /* DB_Z_WRITE_BASE */
>>> -                       radeon_emit(cs, zb->db_depth_base >> 32);
>>> /* DB_Z_WRITE_BASE_HI */
>>> +                       radeon_emit(cs,
>>> S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */
>>>                          radeon_emit(cs, zb->db_stencil_base);
>>> /* DB_STENCIL_WRITE_BASE */
>>> -                       radeon_emit(cs, zb->db_stencil_base >> 32);
>>> /* DB_STENCIL_WRITE_BASE_HI */
>>> +                       radeon_emit(cs,
>>> S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
>>>
>>>                          radeon_set_context_reg_seq(cs,
>>> R_028068_DB_Z_INFO2, 2);
>>>                          radeon_emit(cs, zb->db_z_info2);        /*
>>> DB_Z_INFO2 */
>>>                          radeon_emit(cs, zb->db_stencil_info2);  /*
>>> DB_STENCIL_INFO2 */
>>>                  } else {
>>>                          radeon_set_context_reg(cs,
>>> R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
>>>
>>>                          radeon_set_context_reg_seq(cs,
>>> R_02803C_DB_DEPTH_INFO, 9);
>>>                          radeon_emit(cs, zb->db_depth_info);     /*
>>> DB_DEPTH_INFO */
>>>                          radeon_emit(cs, zb->db_z_info |         /*
>>> DB_Z_INFO */
>>> @@ -5026,22 +5026,24 @@ static void si_init_config(struct si_context
>>> *sctx)
>>>                      sctx->b.family >= CHIP_POLARIS10)
>>>                          vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
>>>
>>>                  si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
>>> vgt_tess_distribution);
>>>          } else if (!has_clear_state) {
>>>                  si_pm4_set_reg(pm4,
>>> R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
>>>                  si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
>>>          }
>>>
>>>          si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >>
>>> 8);
>>> -       if (sctx->b.chip_class >= CIK)
>>> -               si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI,
>>> border_color_va >> 40);
>>> +       if (sctx->b.chip_class >= CIK) {
>>> +               si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI,
>>> +                              S_028084_ADDRESS(border_color_va >> 40));
>>> +       }
>>>          si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
>>>                        RADEON_PRIO_BORDER_COLORS);
>>>
>>>          if (sctx->b.chip_class >= GFX9) {
>>>                  unsigned num_se = sscreen->info.max_se;
>>>                  unsigned pc_lines = 0;
>>>
>>>                  switch (sctx->b.family) {
>>>                  case CHIP_VEGA10:
>>>                          pc_lines = 4096;
>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> index 56e058f..b9c712d 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> @@ -485,21 +485,21 @@ static void si_shader_ls(struct si_screen *sscreen,
>>> struct si_shader *shader)
>>>          va = shader->bo->gpu_address;
>>>          si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ,
>>> RADEON_PRIO_SHADER_BINARY);
>>>
>>>          /* We need at least 2 components for LS.
>>>           * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0,
>>> InstanceID).
>>>           * StepRate0 is set to 1. so that VGPR3 doesn't have to be
>>> loaded.
>>>           */
>>>          vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
>>>
>>>          si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
>>> -       si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
>>> +       si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
>>> S_00B524_MEM_BASE(va >> 40));
>>>
>>>          shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs
>>> - 1) / 4) |
>>>                             S_00B528_SGPRS((shader->config.num_sgprs - 1)
>>> / 8) |
>>>                             S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
>>>                             S_00B528_DX10_CLAMP(1) |
>>>
>>> S_00B528_FLOAT_MODE(shader->config.float_mode);
>>>          shader->config.rsrc2 =
>>> S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) |
>>>
>>> S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
>>>   }
>>>
>>> @@ -511,38 +511,38 @@ static void si_shader_hs(struct si_screen *sscreen,
>>> struct si_shader *shader)
>>>
>>>          pm4 = si_get_shader_pm4_state(shader);
>>>          if (!pm4)
>>>                  return;
>>>
>>>          va = shader->bo->gpu_address;
>>>          si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ,
>>> RADEON_PRIO_SHADER_BINARY);
>>>
>>>          if (sscreen->info.chip_class >= GFX9) {
>>>                  si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >>
>>> 8);
>>> -               si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >>
>>> 40);
>>> +               si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS,
>>> S_00B414_MEM_BASE(va >> 40));
>>>
>>>                  /* We need at least 2 components for LS.
>>>                   * VGPR0-3: (VertexID, RelAutoindex, InstanceID /
>>> StepRate0, InstanceID).
>>>                   * StepRate0 is set to 1. so that VGPR3 doesn't have to
>>> be loaded.
>>>                   */
>>>                  ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
>>>
>>>                  unsigned num_user_sgprs =
>>>
>>> si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR);
>>>
>>>                  shader->config.rsrc2 =
>>>                          S_00B42C_USER_SGPR(num_user_sgprs) |
>>>                          S_00B42C_USER_SGPR_MSB(num_user_sgprs >> 5) |
>>>
>>> S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
>>>          } else {
>>>                  si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >>
>>> 8);
>>> -               si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >>
>>> 40);
>>> +               si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
>>> S_00B424_MEM_BASE(va >> 40));
>>>
>>>                  shader->config.rsrc2 =
>>>                          S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) |
>>>                          S_00B42C_OC_LDS_EN(1) |
>>>
>>> S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
>>>          }
>>>
>>>          si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
>>>                         S_00B428_VGPRS((shader->config.num_vgprs - 1) /
>>> 4) |
>>>                         S_00B428_SGPRS((shader->config.num_sgprs - 1) /
>>> 8) |
>>> @@ -581,21 +581,21 @@ static void si_shader_es(struct si_screen *sscreen,
>>> struct si_shader *shader)
>>>                  vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 :
>>> 2;
>>>                  num_user_sgprs = SI_TES_NUM_USER_SGPR;
>>>          } else
>>>                  unreachable("invalid shader selector type");
>>>
>>>          oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1
>>> : 0;
>>>
>>>          si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
>>>                         shader->selector->esgs_itemsize / 4);
>>>          si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
>>> -       si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
>>> +       si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
>>> S_00B324_MEM_BASE(va >> 40));
>>>          si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
>>>                         S_00B328_VGPRS((shader->config.num_vgprs - 1) /
>>> 4) |
>>>                         S_00B328_SGPRS((shader->config.num_sgprs - 1) /
>>> 8) |
>>>                         S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
>>>                         S_00B328_DX10_CLAMP(1) |
>>>                         S_00B328_FLOAT_MODE(shader->config.float_mode));
>>>          si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
>>>                         S_00B32C_USER_SGPR(num_user_sgprs) |
>>>                         S_00B32C_OC_LDS_EN(oc_lds_en) |
>>>
>>> S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
>>> @@ -785,21 +785,21 @@ static void si_shader_gs(struct si_screen *sscreen,
>>> struct si_shader *shader)
>>>
>>>                  unsigned num_user_sgprs;
>>>                  if (es_type == PIPE_SHADER_VERTEX)
>>>                          num_user_sgprs =
>>> si_get_num_vs_user_sgprs(GFX9_VSGS_NUM_USER_SGPR);
>>>                  else
>>>                          num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
>>>
>>>                  gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
>>>
>>>                  si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >>
>>> 8);
>>> -               si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, va >>
>>> 40);
>>> +               si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES,
>>> S_00B214_MEM_BASE(va >> 40));
>>>
>>>                  si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
>>>                                 S_00B228_VGPRS((shader->config.num_vgprs
>>> - 1) / 4) |
>>>                                 S_00B228_SGPRS((shader->config.num_sgprs
>>> - 1) / 8) |
>>>                                 S_00B228_DX10_CLAMP(1) |
>>>
>>> S_00B228_FLOAT_MODE(shader->config.float_mode) |
>>>
>>> S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
>>>                  si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
>>>                                 S_00B22C_USER_SGPR(num_user_sgprs) |
>>>                                 S_00B22C_USER_SGPR_MSB(num_user_sgprs >>
>>> 5) |
>>> @@ -817,21 +817,21 @@ static void si_shader_gs(struct si_screen *sscreen,
>>> struct si_shader *shader)
>>>                  si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
>>>                                 shader->key.part.gs.es->esgs_itemsize /
>>> 4);
>>>
>>>                  if (es_type == PIPE_SHADER_TESS_EVAL)
>>>                          si_set_tesseval_regs(sscreen,
>>> shader->key.part.gs.es, pm4);
>>>
>>>                  polaris_set_vgt_vertex_reuse(sscreen,
>>> shader->key.part.gs.es,
>>>                                               NULL, pm4);
>>>          } else {
>>>                  si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >>
>>> 8);
>>> -               si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >>
>>> 40);
>>> +               si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
>>> S_00B224_MEM_BASE(va >> 40));
>>>
>>>                  si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
>>>                                 S_00B228_VGPRS((shader->config.num_vgprs
>>> - 1) / 4) |
>>>                                 S_00B228_SGPRS((shader->config.num_sgprs
>>> - 1) / 8) |
>>>                                 S_00B228_DX10_CLAMP(1) |
>>>
>>> S_00B228_FLOAT_MODE(shader->config.float_mode));
>>>                  si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
>>>                                 S_00B22C_USER_SGPR(GFX6_GS_NUM_USER_SGPR)
>>> |
>>>
>>> S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
>>>          }
>>> @@ -928,21 +928,21 @@ static void si_shader_vs(struct si_screen *sscreen,
>>> struct si_shader *shader,
>>>
>>> S_02870C_POS2_EXPORT_FORMAT(shader->info.nr_pos_exports > 2 ?
>>>
>>> V_02870C_SPI_SHADER_4COMP :
>>>
>>> V_02870C_SPI_SHADER_NONE) |
>>>
>>> S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ?
>>>
>>> V_02870C_SPI_SHADER_4COMP :
>>>
>>> V_02870C_SPI_SHADER_NONE));
>>>
>>>          oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1
>>> : 0;
>>>
>>>          si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
>>> -       si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
>>> +       si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
>>> S_00B124_MEM_BASE(va >> 40));
>>>          si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
>>>                         S_00B128_VGPRS((shader->config.num_vgprs - 1) /
>>> 4) |
>>>                         S_00B128_SGPRS((shader->config.num_sgprs - 1) /
>>> 8) |
>>>                         S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
>>>                         S_00B128_DX10_CLAMP(1) |
>>>                         S_00B128_FLOAT_MODE(shader->config.float_mode));
>>>          si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
>>>                         S_00B12C_USER_SGPR(num_user_sgprs) |
>>>                         S_00B12C_OC_LDS_EN(oc_lds_en) |
>>>
>>> S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
>>> @@ -1108,21 +1108,21 @@ static void si_shader_ps(struct si_shader
>>> *shader)
>>>                         ac_get_spi_shader_z_format(info->writes_z,
>>>                                                    info->writes_stencil,
>>>
>>> info->writes_samplemask));
>>>
>>>          si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
>>> spi_shader_col_format);
>>>          si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
>>>
>>>          va = shader->bo->gpu_address;
>>>          si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ,
>>> RADEON_PRIO_SHADER_BINARY);
>>>          si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
>>> -       si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
>>> +       si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
>>> S_00B024_MEM_BASE(va >> 40));
>>>
>>>          si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
>>>                         S_00B028_VGPRS((shader->config.num_vgprs - 1) /
>>> 4) |
>>>                         S_00B028_SGPRS((shader->config.num_sgprs - 1) /
>>> 8) |
>>>                         S_00B028_DX10_CLAMP(1) |
>>>                         S_00B028_FLOAT_MODE(shader->config.float_mode));
>>>          si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
>>>                         S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size)
>>> |
>>>                         S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
>>>
>>> S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
>>> @@ -3015,21 +3015,21 @@ static void si_init_tess_factor_ring(struct
>>> si_context *sctx)
>>>                               sctx->screen->tess_offchip_ring_size;
>>>
>>>          /* Append these registers to the init config state. */
>>>          if (sctx->b.chip_class >= CIK) {
>>>                  si_pm4_set_reg(sctx->init_config,
>>> R_030938_VGT_TF_RING_SIZE,
>>>
>>> S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
>>>                  si_pm4_set_reg(sctx->init_config,
>>> R_030940_VGT_TF_MEMORY_BASE,
>>>                                 factor_va >> 8);
>>>                  if (sctx->b.chip_class >= GFX9)
>>>                          si_pm4_set_reg(sctx->init_config,
>>> R_030944_VGT_TF_MEMORY_BASE_HI,
>>> -                                      factor_va >> 40);
>>> +                                      S_030944_BASE_HI(factor_va >>
>>> 40));
>>>                  si_pm4_set_reg(sctx->init_config,
>>> R_03093C_VGT_HS_OFFCHIP_PARAM,
>>>                                 sctx->screen->vgt_hs_offchip_param);
>>>          } else {
>>>                  si_pm4_set_reg(sctx->init_config,
>>> R_008988_VGT_TF_RING_SIZE,
>>>
>>> S_008988_SIZE(sctx->screen->tess_factor_ring_size / 4));
>>>                  si_pm4_set_reg(sctx->init_config,
>>> R_0089B8_VGT_TF_MEMORY_BASE,
>>>                                 factor_va >> 8);
>>>                  si_pm4_set_reg(sctx->init_config,
>>> R_0089B0_VGT_HS_OFFCHIP_PARAM,
>>>                                 sctx->screen->vgt_hs_offchip_param);
>>>          }
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>


More information about the mesa-dev mailing list