<div dir="ltr"><div>I think TCCs are global, because all memory traffic from gfx engines+cp+sdma has to go through TCCs, e.g. memory requests from different SEs accessing the same memory address go to the same TCC.<br></div><div><br></div><div>Marek<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Sep 24, 2019 at 10:58 PM Alex Deucher <<a href="mailto:alexdeucher@gmail.com" target="_blank">alexdeucher@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">On Tue, Sep 24, 2019 at 6:29 PM Marek Olšák <<a href="mailto:maraeo@gmail.com" target="_blank">maraeo@gmail.com</a>> wrote:<br>
><br>
> From: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank">marek.olsak@amd.com</a>><br>
><br>
> UMDs need this for correct programming of harvested chips.<br>
><br>
> Signed-off-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank">marek.olsak@amd.com</a>><br>
> ---<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  2 ++<br>
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 11 +++++++++++<br>
>  include/uapi/drm/amdgpu_drm.h           |  2 ++<br>
>  5 files changed, 18 insertions(+), 1 deletion(-)<br>
><br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> index f82d634cf3f9..b70b30378c20 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> @@ -75,23 +75,24 @@<br>
>   * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).<br>
>   * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.<br>
>   * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.<br>
>   * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES<br>
>   * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID<br>
>   * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.<br>
>   * - 3.31.0 - Add support for per-flip tiling attribute changes with DC<br>
>   * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.<br>
>   * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.<br>
>   * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches<br>
> + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask<br>
>   */<br>
>  #define KMS_DRIVER_MAJOR       3<br>
> -#define KMS_DRIVER_MINOR       34<br>
> +#define KMS_DRIVER_MINOR       35<br>
>  #define KMS_DRIVER_PATCHLEVEL  0<br>
><br>
>  #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256<br>
><br>
>  int amdgpu_vram_limit = 0;<br>
>  int amdgpu_vis_vram_limit = 0;<br>
>  int amdgpu_gart_size = -1; /* auto */<br>
>  int amdgpu_gtt_size = -1; /* auto */<br>
>  int amdgpu_moverate = -1; /* auto */<br>
>  int amdgpu_benchmarking = 0;<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
> index 59c5464c96be..88dccff41dff 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
> @@ -158,20 +158,21 @@ struct amdgpu_gfx_config {<br>
>         struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];<br>
><br>
>         /* gfx configure feature */<br>
>         uint32_t double_offchip_lds_buf;<br>
>         /* cached value of DB_DEBUG2 */<br>
>         uint32_t db_debug2;<br>
>         /* gfx10 specific config */<br>
>         uint32_t num_sc_per_sh;<br>
>         uint32_t num_packer_per_sc;<br>
>         uint32_t pa_sc_tile_steering_override;<br>
> +       uint64_t tcc_disabled_mask;<br>
>  };<br>
><br>
>  struct amdgpu_cu_info {<br>
>         uint32_t simd_per_cu;<br>
>         uint32_t max_waves_per_simd;<br>
>         uint32_t wave_front_size;<br>
>         uint32_t max_scratch_slots_per_cu;<br>
>         uint32_t lds_size;<br>
><br>
>         /* total active CU number */<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c<br>
> index 91f5aaf99861..7356efe7e2d3 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c<br>
> @@ -775,20 +775,22 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file<br>
>                 dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;<br>
>                 dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches;<br>
>                 dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth;<br>
>                 dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;<br>
>                 dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;<br>
><br>
>                 if (adev->family >= AMDGPU_FAMILY_NV)<br>
>                         dev_info.pa_sc_tile_steering_override =<br>
>                                 adev->gfx.config.pa_sc_tile_steering_override;<br>
><br>
> +               dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;<br>
> +<br>
>                 return copy_to_user(out, &dev_info,<br>
>                                     min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;<br>
>         }<br>
>         case AMDGPU_INFO_VCE_CLOCK_TABLE: {<br>
>                 unsigned i;<br>
>                 struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};<br>
>                 struct amd_vce_state *vce_state;<br>
><br>
>                 for (i = 0; i < AMDGPU_VCE_CLOCK_TABLE_ENTRIES; i++) {<br>
>                         vce_state = amdgpu_dpm_get_vce_clock_state(adev, i);<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
> index cfc0952f6175..ca01643fa0c8 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
> @@ -1684,31 +1684,42 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)<br>
>                                 tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);<br>
>                                 WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);<br>
>                         }<br>
>                 }<br>
><br>
>                 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);<br>
>                 mutex_unlock(&adev->grbm_idx_mutex);<br>
>         }<br>
>  }<br>
><br>
> +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)<br>
> +{<br>
> +       uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) |<br>
> +                              RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE);<br>
> +<br>
> +       adev->gfx.config.tcc_disabled_mask =<br>
> +               REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |<br>
> +               (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);<br>
> +}<br>
<br>
Are TCCs per SE/SH?  If so, you'll need to walk each instance and<br>
create a mask from each instance like we do for setup_rb and cu_info.<br>
<br>
Alex<br>
<br>
> +<br>
>  static void gfx_v10_0_constants_init(struct amdgpu_device *adev)<br>
>  {<br>
>         u32 tmp;<br>
>         int i;<br>
><br>
>         WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);<br>
><br>
>         gfx_v10_0_tiling_mode_table_init(adev);<br>
><br>
>         gfx_v10_0_setup_rb(adev);<br>
>         gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);<br>
> +       gfx_v10_0_get_tcc_info(adev);<br>
>         adev->gfx.config.pa_sc_tile_steering_override =<br>
>                 gfx_v10_0_init_pa_sc_tile_steering_override(adev);<br>
><br>
>         /* XXX SH_MEM regs */<br>
>         /* where to put LDS, scratch, GPUVM in FSA64 space */<br>
>         mutex_lock(&adev->srbm_mutex);<br>
>         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {<br>
>                 nv_grbm_select(adev, 0, 0, 0, i);<br>
>                 /* CP and shaders */<br>
>                 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);<br>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h<br>
> index f3ad429173e3..a69e31929155 100644<br>
> --- a/include/uapi/drm/amdgpu_drm.h<br>
> +++ b/include/uapi/drm/amdgpu_drm.h<br>
> @@ -1001,20 +1001,22 @@ struct drm_amdgpu_info_device {<br>
>         __u32 max_gs_waves_per_vgt;<br>
>         __u32 _pad1;<br>
>         /* always on cu bitmap */<br>
>         __u32 cu_ao_bitmap[4][4];<br>
>         /** Starting high virtual address for UMDs. */<br>
>         __u64 high_va_offset;<br>
>         /** The maximum high virtual address */<br>
>         __u64 high_va_max;<br>
>         /* gfx10 pa_sc_tile_steering_override */<br>
>         __u32 pa_sc_tile_steering_override;<br>
> +       /* disabled TCCs */<br>
> +       __u64 tcc_disabled_mask;<br>
>  };<br>
><br>
>  struct drm_amdgpu_info_hw_ip {<br>
>         /** Version of h/w IP */<br>
>         __u32  hw_ip_version_major;<br>
>         __u32  hw_ip_version_minor;<br>
>         /** Capabilities */<br>
>         __u64  capabilities_flags;<br>
>         /** command buffer address start alignment*/<br>
>         __u32  ib_start_alignment;<br>
> --<br>
> 2.17.1<br>
><br>
> _______________________________________________<br>
> amd-gfx mailing list<br>
> <a href="mailto:amd-gfx@lists.freedesktop.org" target="_blank">amd-gfx@lists.freedesktop.org</a><br>
> <a href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a><br>
</blockquote></div>