[Intel-gfx] [PATCH v7 4/4] drm/i915: cache number of MOCS entries

Mon Jan 7 10:26:08 UTC 2019

On 04/01/2019 23:47, Lucas De Marchi wrote:
> On Fri, Dec 21, 2018 at 11:56:43AM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/12/2018 18:20, Lucas De Marchi wrote:
>>> Instead of checking the gen number every time we need to know the max
>>> number of entries, just save it into the table struct so we don't need
>>> extra branches throughout the code.
>>>
>>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_mocs.c | 31 ++++++++++++++-----------------
>>>    1 file changed, 14 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
>>> index dfc4edea020f..22c5f576a3c2 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -32,6 +32,7 @@ struct drm_i915_mocs_entry {
>>>    struct drm_i915_mocs_table {
>>>    	u32 size;
>>> +	u32 n_entries;
>>
>> While at it I'd convert both counts to normal unsigned int.
>>
>> Another nitpick: I'd also suggest some more descriptive names since I read
>> n_entries and size completely opposite than what they are in the code. Maybe
>> just s/n_entries/max_entries/ to keep the diff small, or even consider
>> changing s/size/used_entries/ or something?
> 
> size = ARRAY_SIZE() -- we use that to track accesses to the table so we
> don't access beyond the array size.
> 
> n_entries = GEN[X]_NUM_ENTRIES -- we use that to track the number of
> entries we will program.
> 
> So IMO the names look reasonable to me.

It was a minor comment so feel free to keep the naming. I was only 
commenting that for me as a reader it wasn't immediately obvious from 
the name to what the count refers, is it the table object in memory, or 
the table as supported by the underlying platform.

>>
>>>    	const struct drm_i915_mocs_entry *table;
>>>    };
>>> @@ -56,9 +57,6 @@ struct drm_i915_mocs_table {
>>>    #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
>>>    #define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>>> -#define NUM_MOCS_ENTRIES(i915) \
>>> -	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
>>> -
>>
>> Do you want to go through patch 3 adds this, patch 4 removes it, or why not
>> just squash it into one?
> 
> I considered doing that, but then thought the split approach would be
> more logical since this patch is about caching the number of entries.
> Squashing on the previous patch creates more noise on that patch and
> additional headache since I'm not the original author. Not having this
> macro in the previous patch basically means squashing this entire patch
> there.
> 
> So in the end I decided to do it on top. Is that ok to you?

Yes that's okay.

Regards,

Tvrtko

> 
> 
>>
>>>    /* (e)LLC caching options */
>>>    #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
>>>    #define LE_1_UC			_LE_CACHEABILITY(1)
>>> @@ -283,14 +281,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>>>    	if (IS_ICELAKE(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(icelake_mocs_table);
>>> +		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
>>>    		table->table = icelake_mocs_table;
>>>    		result = true;
>>>    	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(skylake_mocs_table);
>>> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>>>    		table->table = skylake_mocs_table;
>>>    		result = true;
>>>    	} else if (IS_GEN9_LP(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(broxton_mocs_table);
>>> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>>>    		table->table = broxton_mocs_table;
>>>    		result = true;
>>>    	} else {
>>> @@ -348,8 +349,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	if (!get_mocs_settings(dev_priv, &table))
>>>    		return;
>>> -	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>> -
>>>    	for (index = 0; index < table.size; index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>>    			   table.table[index].control_value);
>>> @@ -362,7 +361,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>> +	for (; index < table.n_entries; index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>>    			   table.table[0].control_value);
>>>    }
>>> @@ -380,19 +379,18 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    static int emit_mocs_control_table(struct i915_request *rq,
>>>    				   const struct drm_i915_mocs_table *table)
>>>    {
>>> -	struct drm_i915_private *i915 = rq->i915;
>>>    	enum intel_engine_id engine = rq->engine->id;
>>>    	unsigned int index;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>> +	if (GEM_WARN_ON(table->size > table->n_entries))
>>>    		return -ENODEV;
>>
>> This (and another below) could go to get_mocs_settings which is now the
>> authority to set things up correctly. So fire a warn from there and say no
>> mocs for you.
> 
> yep, this seems reasonable. I will change that in the next version after
> this one settles.
> 
> Lucas De Marchi
> 
>>
>>> -	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>> +	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
>>>    	for (index = 0; index < table->size; index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>> @@ -407,7 +405,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>> +	for (; index < table->n_entries; index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>>    		*cs++ = table->table[0].control_value;
>>>    	}
>>> @@ -440,18 +438,17 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>>>    static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    				const struct drm_i915_mocs_table *table)
>>>    {
>>> -	struct drm_i915_private *i915 = rq->i915;
>>>    	unsigned int i;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>> +	if (GEM_WARN_ON(table->size > table->n_entries))
>>>    		return -ENODEV;
>>> -	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>> +	cs = intel_ring_begin(rq, 2 + table->n_entries);
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>>>    	for (i = 0; i < table->size/2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>> @@ -470,7 +467,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    	 * this will be uncached. Leave the last pair uninitialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>> +	for (; i < table->n_entries / 2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>>    		*cs++ = l3cc_combine(table, 0, 0);
>>>    	}
>>> @@ -517,7 +514,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>>>    	 * this will be uncached. Leave the last pair as initialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>> +	for (; i < table.n_entries / 2; i++)
>>>    		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>>    }
>>>
>>
>> Otherwise looks good - thanks for agreeing the suggestion makes sense!
>>
>> Regards,
>>
>> Tvrtko
>