[Intel-gfx] [PATCH 3/4] drm/i915: HSW GT3 Slices: exec flag to warn kernel that userspace is using predication
Rodrigo Vivi
rodrigo.vivi at gmail.com
Wed Oct 16 14:12:46 CEST 2013
Hi Chris,
Thank you very much for all suggestions. Will do the changes. But for
that question below I don't have the clear answer:
On Wed, Oct 16, 2013 at 6:15 AM, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> On Tue, Oct 15, 2013 at 05:50:49PM -0300, Rodrigo Vivi wrote:
>> If Userspace isn't using MI_PREDICATE all slices must be enabled for
>> backward compatibility.
>>
>> If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force
>> all slices on.
>>
>> v2: fix the inverted logic for backwards compatibility
>> USE_PREDICATE unset force gt_full when defaul is half
>> instead of GT_FULL flag.
>>
>> CC: Eric Anholt <eric at anholt.net>
>> CC: Kenneth Graunke <kenneth at whitecape.org>
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at gmail.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 8 ++++
>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++
>> drivers/gpu/drm/i915/i915_reg.h | 11 +++++
>> drivers/gpu/drm/i915/i915_sysfs.c | 11 ++++-
>> drivers/gpu/drm/i915/intel_display.c | 6 +++
>> drivers/gpu/drm/i915/intel_drv.h | 1 +
>> drivers/gpu/drm/i915/intel_pm.c | 39 ++++++++++++++++-
>> include/uapi/drm/i915_drm.h | 8 +++-
>> 8 files changed, 146 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 685fb1d..bd4774e 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1219,6 +1219,12 @@ struct i915_package_c8 {
>> } regsave;
>> };
>>
>> +struct i915_gt_slices {
>> + int state_default;
>
> Either use it as an integer (a count of slices enabled by default) or
> make it an enum.
>
>> + int forcing_full;
>
> bool legacy_userspace_busy; ?
>
>> + struct mutex m_lock;
>
> Ugh. Just struct mutex lock; /* locks all access to this struct and
> slice registers */ will suffice
>
>> +};
>> +
>> typedef struct drm_i915_private {
>> struct drm_device *dev;
>> struct kmem_cache *slab;
>> @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private {
>>
>> struct i915_package_c8 pc8;
>>
>> + struct i915_gt_slices gt_slices;
>> +
>> /* Old dri1 support infrastructure, beware the dragons ya fools entering
>> * here! */
>> struct i915_dri1_state dri1;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 0ce0d47..eb09a51 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>> }
>>
>> static int
>> +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring)
>
> That's a misnomer.
>
>> +{
>> + drm_i915_private_t *dev_priv = dev->dev_private;
>> + int ret;
>> +
>> + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
>> + return 0;
>> +
>> + ret = intel_ring_begin(ring, 3);
>> + if (ret)
>> + return ret;
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, HSW_GT_SLICE_INFO);
>> + intel_ring_emit(ring, SLICE_SEL_BOTH);
>> + intel_ring_advance(ring);
>> +
>> + ret = intel_ring_begin(ring, 3);
>> + if (ret)
>> + return ret;
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
>> + intel_ring_emit(ring, LOWER_SLICE_ENABLED);
>> + intel_ring_advance(ring);
>> +
>> + ret = intel_ring_begin(ring, 3);
>> + if (ret)
>> + return ret;
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, HSW_SLICESHUTDOWN);
>> + intel_ring_emit(ring, ~SLICE_SHUTDOWN);
>> + intel_ring_advance(ring);
>> +
>> + ret = intel_ring_begin(ring, 3);
>> + if (ret)
>> + return ret;
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
>> + intel_ring_emit(ring, CS_IDLE_COUNT_1US);
>> + intel_ring_advance(ring);
>> +
>> + ret = intel_ring_begin(ring, 3);
>> + if (ret)
>> + return ret;
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
>> + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
>> + intel_ring_advance(ring);
>> +
>> + ret = intel_ring_begin(ring, 3);
>> + if (ret)
>> + return ret;
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
>> + intel_ring_emit(ring, CS_IDLE_COUNT_5US);
>> + intel_ring_advance(ring);
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>> struct drm_file *file,
>> struct drm_i915_gem_execbuffer2 *args,
>> @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>> return -EINVAL;
>> }
>>
>> + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 &&
>> + dev_priv->gt_slices.state_default == 0 &&
>> + !dev_priv->gt_slices.forcing_full) {
>
> Always set legacy_userspace_busy here so that a change in state_default
> cannot possibly break currently active users i.e. they are independent
> bookkeeping.
>
>> + dev_priv->gt_slices.forcing_full = true;
>> + i915_gt_full_immediate(dev, ring);
>
> What happens when this fails? If it only partially emits the commands,
> etc.
This is a very good question. If it fails, my only concern would be
the sync between HSW_GT_SLICE_INFO and MI_PREDICATE_RESULT_2. But I
don't know how to do this during the exec buf without delaying the
execution. Do you have any suggestion?
>
>> + }
>> +
>> mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>> mask = I915_EXEC_CONSTANTS_MASK;
>> switch (mode) {
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 497c441..0146bef 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -277,6 +277,17 @@
>> #define SLICE_STATUS_MAIN_ON (2<<0)
>> #define SLICE_STATUS_BOTH_ON (3<<0)
>>
>> +#define HSW_SLICESHUTDOWN 0xA190
>> +#define SLICE_SHUTDOWN (1<<0)
>> +
>> +#define RC_IDLE_MAX_COUNT 0x2054
>> +#define CS_IDLE_COUNT_1US (1<<1)
>> +#define CS_IDLE_COUNT_5US (1<<3)
>> +
>> +#define WAIT_FOR_RC6_EXIT 0x20CC
>> +#define WAIT_RC6_EXIT (1<<0)
>> +#define MASK_WAIT_RC6_EXIT (1<<16)
>> +
>> /*
>> * 3D instructions used by the kernel
>> */
>> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
>> index 86ccd52..25b0c5b 100644
>> --- a/drivers/gpu/drm/i915/i915_sysfs.c
>> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
>> @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev,
>> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>> struct drm_device *dev = minor->dev;
>> struct drm_i915_private *dev_priv = dev->dev_private;
>> + bool full;
>>
>> - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
>> - LOWER_SLICE_ENABLED ? "full" : "half");
>> + mutex_lock(&dev_priv->gt_slices.m_lock);
>> + full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED;
>> + mutex_unlock(&dev_priv->gt_slices.m_lock);
>
> This locking is not stopping any races - it is superfluous.
>
>> + return sprintf(buf, "%s\n", full ? "full" : "half");
>> }
>>
>> static ssize_t gt_slice_config_store(struct device *kdev,
>> @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev,
>> {
>> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>> struct drm_device *dev = minor->dev;
>> + struct drm_i915_private *dev_priv = dev->dev_private;
>> int ret;
>>
>> if (!strncmp(buf, "full", sizeof("full") - 1)) {
>> ret = intel_set_gt_full(dev);
>> if (ret)
>> return ret;
>> + dev_priv->gt_slices.state_default = 1;
>> } else if (!strncmp(buf, "half", sizeof("half") - 1)) {
>> ret = intel_set_gt_half(dev);
>> if (ret)
>> return ret;
>> + dev_priv->gt_slices.state_default = 0;
>> } else
>> return -EINVAL;
>> return count;
>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>> index 4f1b636..1a2f8bc 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev)
>>
>> if (dev_priv->info->gen >= 6)
>> gen6_rps_idle(dev->dev_private);
>> +
>> + if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full &&
>> + dev_priv->gt_slices.state_default == 0) {
>> + intel_set_gt_half_async(dev);
>> + dev_priv->gt_slices.forcing_full = false;
>
> Again, you want to be ignoring state_default when changing
> legacy_userspace_busy.
>
> The lower level slice config handling code should be making the judgment
> based on all parameters after one changes.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
--
Rodrigo Vivi
Blog: http://blog.vivi.eng.br
More information about the Intel-gfx
mailing list