<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"><!-- P {margin-top:0;margin-bottom:0;} --></style>
</head>
<body dir="ltr">
<div id="divtagdefaultwrapper" style="font-size:12pt;color:#000000;background-color:#FFFFFF;font-family:Calibri,Arial,Helvetica,sans-serif;">
<p>Yup, Nicolai pointed that out.  I have a v2 posted (sorry I forgot to update the subject line but "v2" is in the patch comments) to the list.</p>
<p><br>
</p>
<p>Also Nicolai requested that I s/0xFFFFFFFF/0xffffffff/g to remain consistent with the rest of the parameters.</p>
<p><br>
</p>
<p>In that case I'll post a v3 of patch #1 shortly.</p>
<p><br>
</p>
<p>Tom</p>
<br>
<br>
<div style="color: rgb(0, 0, 0);">
<div>
<hr tabindex="-1" style="display:inline-block; width:98%">
<div id="x_divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" color="#000000" style="font-size:11pt"><b>From:</b> Deucher, Alexander<br>
<b>Sent:</b> Wednesday, June 29, 2016 09:46<br>
<b>To:</b> 'Tom St Denis'; amd-gfx@lists.freedesktop.org<br>
<b>Cc:</b> StDenis, Tom<br>
<b>Subject:</b> RE: [PATCH 1/3] drm/amd/gfx: add instance field to select_se_sh</font>
<div> </div>
</div>
</div>
<font size="2"><span style="font-size:10pt;">
<div class="PlainText">> -----Original Message-----<br>
> From: amd-gfx [<a href="mailto:amd-gfx-bounces@lists.freedesktop.org">mailto:amd-gfx-bounces@lists.freedesktop.org</a>] On Behalf<br>
> Of Tom St Denis<br>
> Sent: Tuesday, June 28, 2016 10:44 AM<br>
> To: amd-gfx@lists.freedesktop.org<br>
> Cc: StDenis, Tom<br>
> Subject: [PATCH 1/3] drm/amd/gfx: add instance field to select_se_sh<br>
> <br>
> Add ability to specify instance in select_se_sh callback.<br>
> Defaults to 0 all over the driver.<br>
<br>
I think it should default to 0xffffffff and you should set instance to broadcast in that case.<br>
<br>
Alex<br>
<br>
> <br>
> Signed-off-by: Tom St Denis <tom.stdenis@amd.com><br>
> ---<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  4 ++--<br>
>  drivers/gpu/drm/amd/amdgpu/cik.c      |  4 ++--<br>
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 25 +++++++++++++------------<br>
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 19 ++++++++++---------<br>
>  drivers/gpu/drm/amd/amdgpu/vi.c       |  4 ++--<br>
>  5 files changed, 29 insertions(+), 27 deletions(-)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> index 780a830b2bbd..d7efbd0ee983 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> @@ -1158,7 +1158,7 @@ struct amdgpu_cu_info {<br>
>  struct amdgpu_gfx_funcs {<br>
>        /* get the gpu clock counter */<br>
>        uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);<br>
> -     void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32<br>
> sh_num);<br>
> +     void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32<br>
> sh_num, u32 instance);<br>
>  };<br>
> <br>
>  struct amdgpu_gfx {<br>
> @@ -2295,7 +2295,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring<br>
> *ring)<br>
>  #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs-<br>
> >vblank_too_short((adev))<br>
>  #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs-<br>
> >enable_bapm((adev), (e))<br>
>  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs-<br>
> >get_gpu_clock_counter((adev))<br>
> -#define amdgpu_gfx_select_se_sh(adev, se, sh) (adev)->gfx.funcs-<br>
> >select_se_sh((adev), (se), (sh))<br>
> +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)-<br>
> >gfx.funcs->select_se_sh((adev), (se), (sh), (instance))<br>
> <br>
>  #define amdgpu_dpm_get_temperature(adev) \<br>
>        ((adev)->pp_enabled ?                                           \<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c<br>
> b/drivers/gpu/drm/amd/amdgpu/cik.c<br>
> index ebc8c256286b..1799612b8299 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c<br>
> @@ -1036,12 +1036,12 @@ static uint32_t cik_read_indexed_register(struct<br>
> amdgpu_device *adev,<br>
> <br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)<br>
> -             amdgpu_gfx_select_se_sh(adev, se_num, sh_num);<br>
> +             amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0);<br>
> <br>
>        val = RREG32(reg_offset);<br>
> <br>
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)<br>
> -             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
>        return val;<br>
>  }<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
> index 9edec8a98ee2..44def1665f0c 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
> @@ -1584,10 +1584,11 @@ static void<br>
> gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)<br>
>   * broadcast to all SEs or SHs (CIK).<br>
>   */<br>
>  static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,<br>
> -                               u32 se_num, u32 sh_num)<br>
> +                               u32 se_num, u32 sh_num, u32 instance)<br>
>  {<br>
>        u32 data =<br>
> GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;<br>
> <br>
> +     data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX,<br>
> instance);<br>
>        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))<br>
>                data |=<br>
> GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |<br>
> <br>
>        GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;<br>
> @@ -1660,13 +1661,13 @@ static void gfx_v7_0_setup_rb(struct<br>
> amdgpu_device *adev)<br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {<br>
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {<br>
> -                     gfx_v7_0_select_se_sh(adev, i, j);<br>
> +                     gfx_v7_0_select_se_sh(adev, i, j, 0);<br>
>                        data = gfx_v7_0_get_rb_active_bitmap(adev);<br>
>                        active_rbs |= data << ((i * adev-<br>
> >gfx.config.max_sh_per_se + j) *<br>
>                                               rb_bitmap_width_per_sh);<br>
>                }<br>
>        }<br>
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
> <br>
>        adev->gfx.config.backend_enable_mask = active_rbs;<br>
> @@ -1747,7 +1748,7 @@ static void gfx_v7_0_gpu_init(struct<br>
> amdgpu_device *adev)<br>
>         * making sure that the following register writes will be broadcasted<br>
>         * to all the shaders<br>
>         */<br>
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
> <br>
>        /* XXX SH_MEM regs */<br>
>        /* where to put LDS, scratch, GPUVM in FSA64 space */<br>
> @@ -3381,7 +3382,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct<br>
> amdgpu_device *adev)<br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {<br>
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {<br>
> -                     gfx_v7_0_select_se_sh(adev, i, j);<br>
> +                     gfx_v7_0_select_se_sh(adev, i, j, 0);<br>
>                        for (k = 0; k < adev->usec_timeout; k++) {<br>
>                                if<br>
> (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)<br>
>                                        break;<br>
> @@ -3389,7 +3390,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct<br>
> amdgpu_device *adev)<br>
>                        }<br>
>                }<br>
>        }<br>
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
> <br>
>        mask =<br>
> RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |<br>
> @@ -3549,7 +3550,7 @@ static int gfx_v7_0_rlc_resume(struct<br>
> amdgpu_device *adev)<br>
>        WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);<br>
> <br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);<br>
>        WREG32(mmRLC_LB_PARAMS, 0x00600408);<br>
>        WREG32(mmRLC_LB_CNTL, 0x80000004);<br>
> @@ -3589,7 +3590,7 @@ static void gfx_v7_0_enable_cgcg(struct<br>
> amdgpu_device *adev, bool enable)<br>
>                tmp = gfx_v7_0_halt_rlc(adev);<br>
> <br>
>                mutex_lock(&adev->grbm_idx_mutex);<br>
> -             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK,<br>
> 0xffffffff);<br>
>                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK,<br>
> 0xffffffff);<br>
>                tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |<br>
> @@ -3640,7 +3641,7 @@ static void gfx_v7_0_enable_mgcg(struct<br>
> amdgpu_device *adev, bool enable)<br>
>                tmp = gfx_v7_0_halt_rlc(adev);<br>
> <br>
>                mutex_lock(&adev->grbm_idx_mutex);<br>
> -             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK,<br>
> 0xffffffff);<br>
>                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK,<br>
> 0xffffffff);<br>
>                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |<br>
> @@ -3691,7 +3692,7 @@ static void gfx_v7_0_enable_mgcg(struct<br>
> amdgpu_device *adev, bool enable)<br>
>                tmp = gfx_v7_0_halt_rlc(adev);<br>
> <br>
>                mutex_lock(&adev->grbm_idx_mutex);<br>
> -             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK,<br>
> 0xffffffff);<br>
>                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK,<br>
> 0xffffffff);<br>
>                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |<br>
> RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;<br>
> @@ -5055,7 +5056,7 @@ static void gfx_v7_0_get_cu_info(struct<br>
> amdgpu_device *adev)<br>
>                        mask = 1;<br>
>                        ao_bitmap = 0;<br>
>                        counter = 0;<br>
> -                     gfx_v7_0_select_se_sh(adev, i, j);<br>
> +                     gfx_v7_0_select_se_sh(adev, i, j, 0);<br>
>                        bitmap = gfx_v7_0_get_cu_active_bitmap(adev);<br>
>                        cu_info->bitmap[i][j] = bitmap;<br>
> <br>
> @@ -5071,7 +5072,7 @@ static void gfx_v7_0_get_cu_info(struct<br>
> amdgpu_device *adev)<br>
>                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));<br>
>                }<br>
>        }<br>
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
> <br>
>        cu_info->number = active_cu_number;<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> index 9aa30bc06e4a..45c3ad52d21c 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> @@ -3447,10 +3447,11 @@ static void<br>
> gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)<br>
>  }<br>
> <br>
>  static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,<br>
> -                               u32 se_num, u32 sh_num)<br>
> +                               u32 se_num, u32 sh_num, u32 instance)<br>
>  {<br>
>        u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,<br>
> INSTANCE_BROADCAST_WRITES, 1);<br>
> <br>
> +     data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX,<br>
> instance);<br>
>        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {<br>
>                data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
> SH_BROADCAST_WRITES, 1);<br>
>                data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
> SE_BROADCAST_WRITES, 1);<br>
> @@ -3499,13 +3500,13 @@ static void gfx_v8_0_setup_rb(struct<br>
> amdgpu_device *adev)<br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {<br>
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {<br>
> -                     gfx_v8_0_select_se_sh(adev, i, j);<br>
> +                     gfx_v8_0_select_se_sh(adev, i, j, 0);<br>
>                        data = gfx_v8_0_get_rb_active_bitmap(adev);<br>
>                        active_rbs |= data << ((i * adev-<br>
> >gfx.config.max_sh_per_se + j) *<br>
>                                               rb_bitmap_width_per_sh);<br>
>                }<br>
>        }<br>
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
> <br>
>        adev->gfx.config.backend_enable_mask = active_rbs;<br>
> @@ -3609,7 +3610,7 @@ static void gfx_v8_0_gpu_init(struct<br>
> amdgpu_device *adev)<br>
>         * making sure that the following register writes will be broadcasted<br>
>         * to all the shaders<br>
>         */<br>
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
> <br>
>        WREG32(mmPA_SC_FIFO_SIZE,<br>
>                   (adev->gfx.config.sc_prim_fifo_size_frontend <<<br>
> @@ -3632,7 +3633,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct<br>
> amdgpu_device *adev)<br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {<br>
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {<br>
> -                     gfx_v8_0_select_se_sh(adev, i, j);<br>
> +                     gfx_v8_0_select_se_sh(adev, i, j, 0);<br>
>                        for (k = 0; k < adev->usec_timeout; k++) {<br>
>                                if<br>
> (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)<br>
>                                        break;<br>
> @@ -3640,7 +3641,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct<br>
> amdgpu_device *adev)<br>
>                        }<br>
>                }<br>
>        }<br>
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
> <br>
>        mask =<br>
> RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |<br>
> @@ -5409,7 +5410,7 @@ static void gfx_v8_0_send_serdes_cmd(struct<br>
> amdgpu_device *adev,<br>
>  {<br>
>        uint32_t data;<br>
> <br>
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
> <br>
>        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);<br>
>        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);<br>
> @@ -6501,7 +6502,7 @@ static void gfx_v8_0_get_cu_info(struct<br>
> amdgpu_device *adev)<br>
>                        mask = 1;<br>
>                        ao_bitmap = 0;<br>
>                        counter = 0;<br>
> -                     gfx_v8_0_select_se_sh(adev, i, j);<br>
> +                     gfx_v8_0_select_se_sh(adev, i, j, 0);<br>
>                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);<br>
>                        cu_info->bitmap[i][j] = bitmap;<br>
> <br>
> @@ -6517,7 +6518,7 @@ static void gfx_v8_0_get_cu_info(struct<br>
> amdgpu_device *adev)<br>
>                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));<br>
>                }<br>
>        }<br>
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
> <br>
>        cu_info->number = active_cu_number;<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c<br>
> b/drivers/gpu/drm/amd/amdgpu/vi.c<br>
> index c628a09f84d7..c7762a554e6b 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c<br>
> @@ -534,12 +534,12 @@ static uint32_t vi_read_indexed_register(struct<br>
> amdgpu_device *adev, u32 se_num,<br>
> <br>
>        mutex_lock(&adev->grbm_idx_mutex);<br>
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)<br>
> -             amdgpu_gfx_select_se_sh(adev, se_num, sh_num);<br>
> +             amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0);<br>
> <br>
>        val = RREG32(reg_offset);<br>
> <br>
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)<br>
> -             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff);<br>
> +             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);<br>
>        mutex_unlock(&adev->grbm_idx_mutex);<br>
>        return val;<br>
>  }<br>
> --<br>
> 2.9.0<br>
> <br>
> _______________________________________________<br>
> amd-gfx mailing list<br>
> amd-gfx@lists.freedesktop.org<br>
> <a href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a><br>
</div>
</span></font></div>
</div>
</body>
</html>