[Intel-gfx] [PATCH v3] drm/i915/mocs: Program MOCS for all engines on init
Chris Wilson
chris at chris-wilson.co.uk
Fri Mar 11 20:32:36 UTC 2016
On Fri, Mar 11, 2016 at 02:00:22PM +0000, Peter Antoine wrote:
> Allow for the MOCS to be programmed for all engines.
> Currently we program the MOCS when the first render batch
> goes through. This works on most platforms but fails on
> platforms that do not run a render batch early,
> i.e. headless servers. The patch now programs all initialised engines
> on init and the RCS is programmed again within the initial batch. This
> is done for predictable consistency with regards to the hardware
> context.
>
> Hardware context loading sets the values of the MOCS for RCS
> and L3CC. Programming them from within the batch makes sure that
> the render context is valid, no matter what the previous state of
> the saved-context was.
>
> v2: posted correct version to the mailing list.
> v3: moved programming to within engine->init_hw() (Chris Wilson)
>
> Signed-off-by: Peter Antoine <peter.antoine at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 3 +
> drivers/gpu/drm/i915/intel_lrc.c | 4 ++
> drivers/gpu/drm/i915/intel_mocs.c | 120 +++++++++++++++++++++++++++++++++-----
> drivers/gpu/drm/i915/intel_mocs.h | 2 +
> 4 files changed, 114 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b854af2..3e18cbd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -32,6 +32,7 @@
> #include "i915_vgpu.h"
> #include "i915_trace.h"
> #include "intel_drv.h"
> +#include "intel_mocs.h"
> #include <linux/shmem_fs.h>
> #include <linux/slab.h>
> #include <linux/swap.h>
> @@ -4882,6 +4883,8 @@ i915_gem_init_hw(struct drm_device *dev)
> goto out;
> }
>
> + program_mocs_l3cc_table(dev);
intel_mocs_init_l3cc_table(dev_priv);
> +
> /* We can't enable contexts until all firmware is loaded */
> if (HAS_GUC_UCODE(dev)) {
> ret = intel_guc_ucode_load(dev);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 27c9ee3..2ef7a161 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1603,6 +1603,8 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
>
> memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
>
> + program_mocs_control_table(ring);
ret = intel_mocs_init_engine(ring);
Check and propagate the errors!
> +
> return 0;
> }
>
> @@ -2151,6 +2153,8 @@ static int logical_render_ring_init(struct drm_device *dev)
> ret);
> }
>
> + program_mocs_control_table(ring);
Due to the splitting inside intel_lrc, it actually makes sense to do a
ret = engine->init_hw(engine) call here. But for the time being, adding
the setup here is redundant.
> /**
> + * program_mocs_control_table() - emit the mocs control table
> + * @ring: The engine for whom to emit the registers.
> + *
> + * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> + * given table starting at the given address.
> + *
> + * Return: 0 on success, otherwise the error status.
> + */
> +int program_mocs_control_table(struct intel_engine_cs *ring)
> +{
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_mocs_table table;
> + unsigned int index;
> +
> + if (get_mocs_settings(dev, &table)) {
if (!get_mocs_setttings())
return 0;
And then we can lose one level of indentation.
> + if (WARN_ON(table.size > GEN9_NUM_MOCS_ENTRIES))
> + return -ENODEV;
> +
> + for (index = 0; index < table.size; index++) {
> + I915_WRITE(mocs_register(ring->id, index),
> + table.table[index].control_value);
Whitespace makes this easier to read. Please align continuation lines
with the opening bracket.
> + }
> +
> + /*
> + * Ok, now set the unused entries to uncached. These entries
> + * are officially undefined and no contract for the contents
> + * and settings is given for these entries.
> + *
> + * Entry 0 in the table is uncached - so we are just writing
> + * that value to all the used entries.
> + */
> + for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> + I915_WRITE(mocs_register(ring->id, index),
> + table.table[0].control_value);
Especially here.
> + }
> + }
> +
> + return 0;
> +}
> +
> +/**
> * emit_mocs_control_table() - emit the mocs control table
> * @req: Request to set up the MOCS table for.
> * @table: The values to program into the control regs.
> @@ -210,7 +252,8 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
> MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
>
> for (index = 0; index < table->size; index++) {
> - intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
> + intel_logical_ring_emit_reg(ringbuf,
> + mocs_register(ring, index));
> intel_logical_ring_emit(ringbuf,
> table->table[index].control_value);
> }
> @@ -224,8 +267,10 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
> * that value to all the used entries.
> */
> for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> - intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
> - intel_logical_ring_emit(ringbuf, table->table[0].control_value);
> + intel_logical_ring_emit_reg(ringbuf,
> + mocs_register(ring, index));
> + intel_logical_ring_emit(ringbuf,
> + table->table[0].control_value);
> }
I'll pretend I never saw these spurious chunks.
>
> intel_logical_ring_emit(ringbuf, MI_NOOP);
> @@ -302,6 +347,57 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
> }
>
> /**
> + * program_mocs_l3cc_table() - program the mocs control table
> + * @dev: The the device to be programmed.
> + * @table: The values to program into the control regs.
> + *
> + * This function simply programs the mocs registers for the given table
> + * starting at the given address. This register set is programmed in pairs.
> + *
> + * These registers may get programmed more than once, it is simpler to
> + * re-program 32 registers than maintain the state of when they were programmed.
> + * We are always reprogramming with the same values and this only on context
> + * start.
> + *
> + * Return: Nothing.
> + */
> +void program_mocs_l3cc_table(struct drm_device *dev)
> +{
> + unsigned int count;
> + unsigned int i;
> + u32 value;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_mocs_table table;
> +
> + if (get_mocs_settings(dev, &table)) {
Indentation, whitespace, passing around dev but using dev_priv
> + u32 filler = (table.table[0].l3cc_value & 0xffff) |
> + ((table.table[0].l3cc_value & 0xffff) << 16);
l3cc_value is 16bit so the masking is garbage.
static inline u32 l3cc_combine(u16 lower, u16 upper)
{
return lower | upper << 16;
}
for (i = 0; i < table.size/2; i++)
I915_WRITE(GEN9_LNCFCMOCS(i),
l3cc_combine(table.table[2*i].l3cc_value,
table.table[2*i+1].l3cc_value));
if (table.size & 1) {
I915_WRITE(GEN9_LNCFCMOCS(i),
l3cc_combine(table.table[2*i].l3cc_value,
table.table[0].l3cc_value));
i++;
}
> + /*
> + * Now set the rest of the table to uncached - use entry 0 as
> + * this will be uncached. Leave the last pair as initialised as
> + * they are reserved by the hardware.
> + */
for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
I915_WRITE(GEN9_LNCFCMOCS(i),
l3cc_combine(table.table[0].l3cc_value,
table.table[0].l3cc_value));
is easier on the eyes.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list