[Intel-gfx] [PATCH v3] drm/i915/mocs: Program MOCS for all engines on init

Chris Wilson chris at chris-wilson.co.uk
Fri Mar 11 20:32:36 UTC 2016


On Fri, Mar 11, 2016 at 02:00:22PM +0000, Peter Antoine wrote:
> Allow for the MOCS to be programmed for all engines.
> Currently we program the MOCS when the first render batch
> goes through. This works on most platforms but fails on
> platforms that do not run a render batch early,
> i.e. headless servers. The patch now programs all initialised engines
> on init and the RCS is programmed again within the initial batch. This
> is done for predictable consistency with regards to the hardware
> context.
> 
> Hardware context loading sets the values of the MOCS for RCS
> and L3CC. Programming them from within the batch makes sure that
> the render context is valid, no matter what the previous state of
> the saved-context was.
> 
> v2: posted correct version to the mailing list.
> v3: moved programming to within engine->init_hw() (Chris Wilson)
> 
> Signed-off-by: Peter Antoine <peter.antoine at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c   |   3 +
>  drivers/gpu/drm/i915/intel_lrc.c  |   4 ++
>  drivers/gpu/drm/i915/intel_mocs.c | 120 +++++++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_mocs.h |   2 +
>  4 files changed, 114 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b854af2..3e18cbd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -32,6 +32,7 @@
>  #include "i915_vgpu.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include "intel_mocs.h"
>  #include <linux/shmem_fs.h>
>  #include <linux/slab.h>
>  #include <linux/swap.h>
> @@ -4882,6 +4883,8 @@ i915_gem_init_hw(struct drm_device *dev)
>  			goto out;
>  	}
>  
> +	program_mocs_l3cc_table(dev);

intel_mocs_init_l3cc_table(dev_priv);

> +
>  	/* We can't enable contexts until all firmware is loaded */
>  	if (HAS_GUC_UCODE(dev)) {
>  		ret = intel_guc_ucode_load(dev);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 27c9ee3..2ef7a161 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1603,6 +1603,8 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
>  
>  	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
>  
> +	program_mocs_control_table(ring);

ret = intel_mocs_init_engine(ring);

Check and propagate the errors!

> +
>  	return 0;
>  }
>  
> @@ -2151,6 +2153,8 @@ static int logical_render_ring_init(struct drm_device *dev)
>  			  ret);
>  	}
>  
> +	program_mocs_control_table(ring);

Due to the splitting inside intel_lrc, it actually makes sense to do a
ret = engine->init_hw(engine) call here. But for the time being, adding
the setup here is redundant.

>  /**
> + * program_mocs_control_table() - emit the mocs control table
> + * @ring:	The engine for whom to emit the registers.
> + *
> + * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> + * given table starting at the given address.
> + *
> + * Return: 0 on success, otherwise the error status.
> + */
> +int program_mocs_control_table(struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_mocs_table table;
> +	unsigned int index;
> +
> +	if (get_mocs_settings(dev, &table)) {

if (!get_mocs_setttings())
	return 0;

And then we can lose one level of indentation.

> +		if (WARN_ON(table.size > GEN9_NUM_MOCS_ENTRIES))
> +			return -ENODEV;
> +
> +		for (index = 0; index < table.size; index++) {
> +			I915_WRITE(mocs_register(ring->id, index),
> +				table.table[index].control_value);

Whitespace makes this easier to read. Please align continuation lines
with the opening bracket.

> +		}
> +
> +		/*
> +		 * Ok, now set the unused entries to uncached. These entries
> +		 * are officially undefined and no contract for the contents
> +		 * and settings is given for these entries.
> +		 *
> +		 * Entry 0 in the table is uncached - so we are just writing
> +		 * that value to all the used entries.
> +		 */
> +		for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> +			I915_WRITE(mocs_register(ring->id, index),
> +						table.table[0].control_value);

Especially here.

> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
>   * emit_mocs_control_table() - emit the mocs control table
>   * @req:	Request to set up the MOCS table for.
>   * @table:	The values to program into the control regs.
> @@ -210,7 +252,8 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
>  				MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
>  
>  	for (index = 0; index < table->size; index++) {
> -		intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
> +		intel_logical_ring_emit_reg(ringbuf,
> +					mocs_register(ring, index));
>  		intel_logical_ring_emit(ringbuf,
>  					table->table[index].control_value);
>  	}
> @@ -224,8 +267,10 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
>  	 * that value to all the used entries.
>  	 */
>  	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> -		intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
> -		intel_logical_ring_emit(ringbuf, table->table[0].control_value);
> +		intel_logical_ring_emit_reg(ringbuf,
> +						mocs_register(ring, index));
> +		intel_logical_ring_emit(ringbuf,
> +						table->table[0].control_value);
>  	}

I'll pretend I never saw these spurious chunks.

>  
>  	intel_logical_ring_emit(ringbuf, MI_NOOP);
> @@ -302,6 +347,57 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
>  }
>  
>  /**
> + * program_mocs_l3cc_table() - program the mocs control table
> + * @dev:      The the device to be programmed.
> + * @table:    The values to program into the control regs.
> + *
> + * This function simply programs the mocs registers for the given table
> + * starting at the given address. This register set is  programmed in pairs.
> + *
> + * These registers may get programmed more than once, it is simpler to
> + * re-program 32 registers than maintain the state of when they were programmed.
> + * We are always reprogramming with the same values and this only on context
> + * start.
> + *
> + * Return: Nothing.
> + */
> +void program_mocs_l3cc_table(struct drm_device *dev)
> +{
> +	unsigned int count;
> +	unsigned int i;
> +	u32 value;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_mocs_table table;
> +
> +	if (get_mocs_settings(dev, &table)) {

Indentation, whitespace, passing around dev but using dev_priv

> +		u32 filler = (table.table[0].l3cc_value & 0xffff) |
> +				((table.table[0].l3cc_value & 0xffff) << 16);

l3cc_value is 16bit so the masking is garbage.

static inline u32 l3cc_combine(u16 lower, u16 upper)
{
	return lower | upper << 16;
}

for (i = 0; i < table.size/2; i++) 
	I915_WRITE(GEN9_LNCFCMOCS(i),
		   l3cc_combine(table.table[2*i].l3cc_value,
				table.table[2*i+1].l3cc_value));

if (table.size & 1) {
	I915_WRITE(GEN9_LNCFCMOCS(i),
		   l3cc_combine(table.table[2*i].l3cc_value,
				table.table[0].l3cc_value));
	i++;
}

> +		/*
> +		 * Now set the rest of the table to uncached - use entry 0 as
> +		 * this will be uncached. Leave the last pair as initialised as
> +		 * they are reserved by the hardware.
> +		 */
for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
	I915_WRITE(GEN9_LNCFCMOCS(i),
		   l3cc_combine(table.table[0].l3cc_value,
				table.table[0].l3cc_value));

is easier on the eyes.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre


More information about the Intel-gfx mailing list