[Intel-gfx] [PATCH] drm/i915 : Added Programming of the MOCS
Matt Turner
mattst88 at gmail.com
Thu Jun 4 14:33:07 PDT 2015
On Thu, Jun 4, 2015 at 11:27 AM, Peter Antoine <peter.antoine at intel.com> wrote:
> This change adds the programming of the MOCS registers to the gen 9+
> platforms. This change set programs the MOCS register values to a set
> of values that are defined to be optimal.
>
> It creates a fixed register set that is programmed across the different
> engines so that all engines have the same table. This is done as the
> main RCS context only holds the registers for itself and the shared
> L3 values. By trying to keep the registers consistent across the
> different engines it should make the programming for the registers
> consistent.
>
> Signed-off-by: Peter Antoine <peter.antoine at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 3 +-
> drivers/gpu/drm/i915/i915_reg.h | 9 ++
> drivers/gpu/drm/i915/intel_lrc.c | 68 +++++++++++
> drivers/gpu/drm/i915/intel_mocs.c | 241 ++++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_mocs.h | 101 ++++++++++++++++
> 5 files changed, 421 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/drm/i915/intel_mocs.c
> create mode 100644 drivers/gpu/drm/i915/intel_mocs.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index b7ddf48..cd7b910 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -36,7 +36,8 @@ i915-y += i915_cmd_parser.o \
> i915_trace_points.o \
> intel_lrc.o \
> intel_ringbuffer.o \
> - intel_uncore.o
> + intel_uncore.o \
> + intel_mocs.o
>
> # autogenerated null render state
> i915-y += intel_renderstate_gen6.o \
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 7213224..3a435b5 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7829,4 +7829,13 @@ enum skl_disp_power_wells {
> #define _PALETTE_A (dev_priv->info.display_mmio_offset + 0xa000)
> #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800)
>
> +/* MOCS (Memory Object Control State) registers */
> +#define GEN9_LNCFCMOCS0 (0xB020) /* L3 Cache Control base */
> +
> +#define GEN9_GFX_MOCS_0 (0xc800) /* Graphics MOCS base register*/
> +#define GEN9_MFX0_MOCS_0 (0xc900) /* Media 0 MOCS base register*/
> +#define GEN9_MFX1_MOCS_0 (0xcA00) /* Media 1 MOCS base register*/
> +#define GEN9_VEBOX_MOCS_0 (0xcB00) /* Video MOCS base register*/
> +#define GEN9_BLT_MOCS_0 (0xcc00) /* Blitter MOCS base register*/
> +
> #endif /* _I915_REG_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9f5485d..c875569 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -135,6 +135,7 @@
> #include <drm/drmP.h>
> #include <drm/i915_drm.h>
> #include "i915_drv.h"
> +#include "intel_mocs.h"
>
> #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
> #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
> @@ -1370,6 +1371,67 @@ out:
> return ret;
> }
>
> +/*
> + * i915_gem_program_mocs() - program the MOCS register.
> + *
> + * ring: The ring that the programming batch will be run in.
> + * ctx: The intel_context to be used.
> + *
> + * This function will emit a batch buffer with the values required for
> + * programming the MOCS register values for all the currenly supported
> + * rings.
> + *
> + * Return: 0 on success, otherwise the error status.
> + */
> +static int i915_gem_program_mocs(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> +{
> + int ret = 0;
> +
> + struct drm_i915_mocs_table t;
> + struct drm_device *dev = ring->dev;
> + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> +
> + if (get_mocs_settings(dev, &t)) {
> + u32 table_size;
> +
> + /*
> + * OK. For each supported ring:
> + * table_size * 2 dwords for each control_value
> + * plus table/2 dwords for l3cc values.
> + *
> + * Plus 1 for the load command and 1 for the NOOP per ring
> + * and the l3cc programming.
> + */
> + table_size = GEN9_NUM_MOCS_RINGS * ((2 * t.size) + 2) +
> + t.size + 2;
> + ret = intel_logical_ring_begin(ringbuf, ctx, table_size);
> + if (ret) {
> + DRM_ERROR("intel_logical_ring_begin failed %d\n", ret);
> + return ret;
> + }
> +
> + /* program the control registers */
> + emit_mocs_control_table(ringbuf, &t, GEN9_GFX_MOCS_0);
> + emit_mocs_control_table(ringbuf, &t, GEN9_MFX0_MOCS_0);
> + emit_mocs_control_table(ringbuf, &t, GEN9_MFX1_MOCS_0);
> + emit_mocs_control_table(ringbuf, &t, GEN9_VEBOX_MOCS_0);
> + emit_mocs_control_table(ringbuf, &t, GEN9_BLT_MOCS_0);
> +
> + /* now program the l3cc registers */
> + emit_mocs_l3cc_table(ringbuf, &t);
> +
> + intel_logical_ring_advance(ringbuf);
> +
> + DRM_INFO("MOCS: Table set in Context\n");
> + } else {
> + DRM_INFO("MOCS: Table Not supported on platform\n");
> + }
> +
> + return ret;
> +}
> +
> +
> static int gen8_init_rcs_context(struct intel_engine_cs *ring,
> struct intel_context *ctx)
> {
> @@ -1379,6 +1441,12 @@ static int gen8_init_rcs_context(struct intel_engine_cs *ring,
> if (ret)
> return ret;
>
> + /*
> + * Failing to program the MOCS is non-fatal.The system will not
> + * run at peak performance. So generate a warning and carry on.
> + */
> + WARN_ON(i915_gem_program_mocs(ring, ctx) != 0);
> +
> return intel_lr_context_render_state_init(ring, ctx);
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> new file mode 100644
> index 0000000..20c9736
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -0,0 +1,241 @@
> +/*
> + * Copyright (c) 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions: *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * Authors:
> + * Peter Antoine <peter.antoine at intel.com>
> + */
> +
> +#include "intel_mocs.h"
> +#include "intel_lrc.h"
> +#include "intel_ringbuffer.h"
> +
> +/*
> + * MOCS tables
> + *
> + * These are the MOCS tables that are programmed across all the rings.
> + * The control value is programmed to all the rings that support the
> + * MOCS registers. While the l3cc_values are only programmed to the
> + * LNCFCMOCS0 - LNCFCMOCS32 registers.
> + *
> + * NOTE: These tables MUST start with being uncached {0,0} and the
> + * the length MUST be less than 63 as the last two registers are
> + * reserved by the hardware.
> + */
> +struct drm_i915_mocs_entry skylake_mocs_table[] = {
Presumably you want this...
> + /* {0x00000009, 0x0010} */
> + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(0) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x0000003b, 0x0030} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))},
> + /* {0x00000039, 0x0010} */
> + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x00000017, 0x0030} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(1) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))},
> + /* {0x00000017, 0x0010} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(1) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x00000019, 0x0010} */
> + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(1) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x00000037, 0x0030} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))},
> + /* {0x0000003b, 0x0010} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> +};
> +
> +struct drm_i915_mocs_entry broxton_mocs_table[] = {
... and this to be "static const"?
> + /* {0x00000001, 0x0010} */
> + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(0) | MOCS_LRUM(0) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x00004001, 0x0010} */
> + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(0) | MOCS_LRUM(0) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(1)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x0000403b, 0x0030} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(1)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))},
> + /* {0x0000401b, 0x0030} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(1) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(1)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))},
> + /* {0x00000017, 0x0010} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(1) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x00000019, 0x0010} */
> + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(1) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> + /* {0x00000037, 0x0030} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))},
> + /* {0x0000003b, 0x0010} */
> + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) |
> + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) |
> + MOCS_SCF(0)),
> + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))},
> +};
> +
> +/**
> + * get_mocs_settings
> + *
> + * This function will return the values of the MOCS table that needs to
> + * be programmed for the platform. It will return the values that need
> + * to be programmed and if they need to be programmed.
> + *
> + * If the return values is false then the registers do not need programming.
> + */
> +bool get_mocs_settings(struct drm_device *dev,
> + struct drm_i915_mocs_table *table) {
> + bool result = false;
> +
> + if (IS_SKYLAKE(dev)) {
> + table->size = ARRAY_SIZE(skylake_mocs_table);
> + table->table = skylake_mocs_table;
> + result = true;
> + } else if (IS_BROXTON(dev)) {
> + table->size = ARRAY_SIZE(broxton_mocs_table);
> + table->table = broxton_mocs_table;
> + result = true;
> + } else {
> + /* Platform that should have a MOCS table does not */
> + WARN_ON(INTEL_INFO(dev)->gen >= 9);
> + }
> +
> + return result;
> +}
> +
> +/**
> + * emit_mocs_control_table() - emit the mocs control table
> + * @ringbuf: DRM device.
> + * @table: The values to program into the control regs.
> + * @reg_base: The base for the Engine that needs to be programmed.
> + *
> + * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> + * given table starting at the given address.
> + *
> + * Return: Nothing.
> + */
> +void emit_mocs_control_table(struct intel_ringbuffer *ringbuf,
> + struct drm_i915_mocs_table *table,
> + u32 reg_base)
> +{
> + unsigned int index;
> +
> + intel_logical_ring_emit(ringbuf,
> + MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
> +
> + for (index = 0; index < table->size; index++) {
> + intel_logical_ring_emit(ringbuf, reg_base + (index * 4));
> + intel_logical_ring_emit(ringbuf,
> + table->table[index].control_value);
> + }
> +
> + /*
> + * Ok, now set the unused entries to uncached. These entries are
> + * officially undefined and no contact is given for the contents and
> + * settings is given for these entries.
> + *
> + * Entry 0 in the table is uncached - so we are just written that
> + * value to all the used entries.
> + */
> + for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> + intel_logical_ring_emit(ringbuf, reg_base + (index * 4));
> + intel_logical_ring_emit(ringbuf, table->table[0].control_value);
> + }
> +
> + intel_logical_ring_emit(ringbuf, MI_NOOP);
> +}
> +
> +/**
> + * emit_mocs_l3cc_table() - emit the mocs control table
> + * @ringbuf: DRM device.
> + * @table: The values to program into the control regs.
> + *
> + * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> + * given table starting at the given address. This register set is programmed
> + * in pairs.
> + *
> + * Return: Nothing.
> + */
> +void emit_mocs_l3cc_table(struct intel_ringbuffer *ringbuf,
> + struct drm_i915_mocs_table *table) {
> + unsigned int count;
> + unsigned int index;
> +
> + intel_logical_ring_emit(ringbuf,
> + MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES/2));
I think the style is to always put spaces around operators (the /2).
Occurs twice in the two for-loops below as well.
> +
> + for (index = 0, count = 0; index < table->size/2; index++, count += 2) {
> + u32 value = (table->table[count].l3cc_value & 0xffff) |
> + ((table->table[count + 1].l3cc_value & 0xffff) << 16);
> +
> + intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + (index * 4));
> + intel_logical_ring_emit(ringbuf, value);
> + }
> +
> + /*
> + * Now set the rest of the table to uncached - use entry 0 as this
> + * will be uncached. Leave the last pair initialised as reserved by
> + * the hardware.
> + */
> + for (; index < GEN9_NUM_MOCS_ENTRIES/2; index++) {
> + u32 value = (table->table[0].l3cc_value & 0xffff) |
> + ((table->table[0].l3cc_value & 0xffff) << 16);
> +
> + intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + (index * 4));
> + intel_logical_ring_emit(ringbuf, value);
> + }
More information about the Intel-gfx
mailing list