[Intel-gfx] [PATCH v6 2/2] drm/i915/icl: Define MOCS table for Icelake

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Dec 10 15:17:29 UTC 2018


On 07/11/2018 15:16, Tomasz Lis wrote:
> The table has been unified across OSes to minimize virtualization overhead.
> 
> The MOCS table is now published as part of bspec, and versioned. Entries
> are supposed to never be modified, but new ones can be added. Adding
> entries increases table version. The patch includes version 1 entries.
> 
> Meaning of each entry is now explained in bspec, and user mode clients
> are expected to know what each entry means. The 3 entries used for previous
> platforms are still compatible with their legacy definitions, but that is
> not guaranteed to be true for future platforms.
> 
> v2: Fixed SCC values, improved commit comment (Daniele)
> v3: Improved MOCS table comment (Daniele)
> v4: Moved new entries below gen9 ones. Put common entries into
>      definition to be used in multiple arrays. (Lucas)
> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
> v6: Removed definitions of reserved entries. (Michal)
>      Increased limit of entries sent to the hardware on gen11+.
> 
> BSpec: 34007
> BSpec: 560
> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com> (v4)

R-b upgrade needed here as well.

> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
> Cc: Zhi A Wang <zhi.a.wang at intel.com>
> Cc: Anuj Phogat <anuj.phogat at intel.com>
> Cc: Adam Cetnerowski <adam.cetnerowski at intel.com>
> Cc: Piotr Rozenfeld <piotr.rozenfeld at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>   drivers/gpu/drm/i915/intel_mocs.c | 222 +++++++++++++++++++++++++++++++++-----
>   1 file changed, 197 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 8d08a7b..4eb05c6 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>   #define LE_SCC(value)		((value) << 8)
>   #define LE_PFM(value)		((value) << 11)
>   #define LE_SCF(value)		((value) << 14)
> +#define LE_COS(value)		((value) << 15)
> +#define LE_SSE(value)		((value) << 17)
>   
>   /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>   #define L3_ESC(value)		((value) << 0)
> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>   
>   /* Helper defines */
>   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
> +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> +
> +#define NUM_MOCS_ENTRIES(i915) \
> +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)

I have a suggestion to make this a bit more elegant by avoiding a 
sprinkle of conditionals throughout the code - since I count ten 
non-debug call sites of this macros.

Since the MOCS code seems nicely driven from struct drm_i915_mocs_table, 
the suggestion is to store both the used and maximum valid number of 
entries per platform in that structure.

It's all nicely consolidated in get_mocs_settings, where all the sanity 
checks could be moved as well. Other bits of the code would then just 
trust the table.

>   
>   /* (e)LLC caching options */
>   #define LE_PAGETABLE		0
> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>    *
>    * These tables are intended to be kept reasonably consistent across
> - * platforms. However some of the fields are not applicable to all of
> - * them.
> + * HW platforms, and for ICL+, be identical across OSes. To achieve
> + * that, for Icelake and above, list of entries is published as part
> + * of bspec.
>    *
>    * Entries not part of the following tables are undefined as far as
> - * userspace is concerned and shouldn't be relied upon.  For the time
> - * being they will be implicitly initialized to the strictest caching
> - * configuration (uncached) to guarantee forwards compatibility with
> - * userspace programs written against more recent kernels providing
> - * additional MOCS entries.
> + * userspace is concerned and shouldn't be relied upon.
> + *
> + * The last two entries are reserved by the hardware. For ICL+ they
> + * should be initialized according to bspec and never used, for older
> + * platforms they should never be written to.
>    *
> - * NOTE: These tables MUST start with being uncached and the length
> - *       MUST be less than 63 as the last two registers are reserved
> - *       by the hardware.  These tables are part of the kernel ABI and
> - *       may only be updated incrementally by adding entries at the
> - *       end.
> + * NOTE: These tables are part of bspec and defined as part of hardware
> + *       interface for ICL+. For older platforms, they are part of kernel
> + *       ABI. It is expected that existing entries will remain constant
> + *       and the tables will only be updated by adding new entries.
>    */
>   
>   #define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf) \
> @@ -147,6 +153,167 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>   #undef MOCS_CONTROL_VALUE
>   #undef MOCS_L3CC_VALUE
>   
> +#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf, cos, sse) \
> +	(LE_CACHEABILITY(lecc) | LE_TGT_CACHE(tc) | \
> +	LE_LRUM(lrum) | LE_AOM(daom) | LE_RSC(ersc) | LE_SCC(scc) | \
> +	LE_PFM(pfm) | LE_SCF(scf) | LE_COS(cos) | LE_SSE(sse))
> +
> +#define MOCS_L3CC_VALUE(esc, scc, l3cc) \
> +	(L3_ESC(esc) | L3_SCC(scc) | L3_CACHEABILITY(l3cc))
> +
> +#define GEN11_MOCS_ENTRIES \
> +	[0] = { \
> +	  /* Base - Uncached (Deprecated) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
> +					      0, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[1] = { \
> +	  /* Base - L3 + LeCC:PAT (Deprecated) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_PAGETABLE, LE_TC_LLC, \
> +					      0, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[2] = { \
> +	  /* Base - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[3] = { \
> +	  /* Base - Uncached */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
> +					      0, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[4] = { \
> +	  /* Base - L3 */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
> +					      0, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[5] = { \
> +	  /* Base - LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[6] = { \
> +	  /* Age 0 - LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      1, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[7] = { \
> +	  /* Age 0 - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      1, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[8] = { \
> +	  /* Age: Don't Chg. - LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      2, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[9] = { \
> +	  /* Age: Don't Chg. - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      2, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[10] = { \
> +	  /* No AOM - LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 1, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[11] = { \
> +	  /* No AOM - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 1, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[12] = { \
> +	  /* No AOM; Age 0 - LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      1, 1, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[13] = { \
> +	  /* No AOM; Age 0 - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      1, 1, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[14] = { \
> +	  /* No AOM; Age:DC - LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      2, 1, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[15] = { \
> +	  /* No AOM; Age:DC - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      2, 1, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[18] = { \
> +	  /* Self-Snoop - L3 + LLC */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 0, 0, 0, 0, 3), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[19] = { \
> +	  /* Skip Caching - L3 + LLC(12.5%) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 7, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[20] = { \
> +	  /* Skip Caching - L3 + LLC(25%) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 3, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[21] = { \
> +	  /* Skip Caching - L3 + LLC(50%) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 1, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[22] = { \
> +	  /* Skip Caching - L3 + LLC(75%) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 1, 3, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[23] = { \
> +	  /* Skip Caching - L3 + LLC(87.5%) */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 1, 7, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
> +	}, \
> +	[62] = { \
> +	  /* HW Reserved - SW program but never use */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	}, \
> +	[63] = { \
> +	  /* HW Reserved - SW program but never use */ \
> +	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
> +					      3, 0, 0, 0, 0, 0, 0, 0), \
> +	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
> +	},
> +
> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
> +	GEN11_MOCS_ENTRIES
> +};
> +
> +#undef MOCS_CONTROL_VALUE
> +#undef MOCS_L3CC_VALUE
> +
>   /**
>    * get_mocs_settings()
>    * @dev_priv:	i915 device.
> @@ -164,8 +331,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>   {
>   	bool result = false;
>   
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
> -	    IS_ICELAKE(dev_priv)) {
> +	if (IS_ICELAKE(dev_priv)) {
> +		table->size  = ARRAY_SIZE(icelake_mocs_table);
> +		table->table = icelake_mocs_table;
> +		result = true;
> +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>   		table->size  = ARRAY_SIZE(skylake_mocs_table);
>   		table->table = skylake_mocs_table;
>   		result = true;

So initialize table max here as well.

> @@ -228,7 +398,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	if (!get_mocs_settings(dev_priv, &table))
>   		return;
>   
> -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
> +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));

Then this can go.

>   
>   	for (index = 0; index < table.size; index++)
>   		I915_WRITE(mocs_register(engine->id, index),
> @@ -242,7 +412,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
> +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
This can use table->max (or something).

>   		I915_WRITE(mocs_register(engine->id, index),
>   			   table.table[0].control_value);
>   }
> @@ -260,18 +430,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   static int emit_mocs_control_table(struct i915_request *rq,
>   				   const struct drm_i915_mocs_table *table)
>   {
> +	struct drm_i915_private *i915 = rq->i915;
>   	enum intel_engine_id engine = rq->engine->id;
>   	unsigned int index;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>   		return -ENODEV;

This can then go as well. Or at least it can be GEM_WARN_ON since it 
cannot leave development in the broken state.

>   
> -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
> +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));

These two can be table->max as well then.

Etc to the end.

See what you think.

Regards,

Tvrtko

>   
>   	for (index = 0; index < table->size; index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> @@ -286,7 +457,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>   		*cs++ = table->table[0].control_value;
>   	}
> @@ -319,17 +490,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>   				const struct drm_i915_mocs_table *table)
>   {
> +	struct drm_i915_private *i915 = rq->i915;
>   	unsigned int i;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>   		return -ENODEV;
>   
> -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
> +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>   
>   	for (i = 0; i < table->size/2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> @@ -348,7 +520,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>   	 * this will be uncached. Leave the last pair uninitialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
> +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>   		*cs++ = l3cc_combine(table, 0, 0);
>   	}
> @@ -395,7 +567,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>   	 * this will be uncached. Leave the last pair as initialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
> +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>   }
>   
> 


More information about the Intel-gfx mailing list