[Intel-gfx] [PATCH] drm/i915: HSW GT3 Slices: exec flag to warn kernel that userspace is using predication

Tue Oct 22 19:33:16 CEST 2013

On Tue, Oct 22, 2013 at 5:47 AM, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> On Mon, Oct 21, 2013 at 07:00:18PM -0200, Rodrigo Vivi wrote:
>>  static int
>> +i915_legacy_userspace_busy(struct drm_device *dev,
>> +                        struct intel_ring_buffer *ring)
>
> s/i915_legacy_userspace_busy/gt_legacy_userspace_busy/
>
> As that is a bit more distinctive.

ok, makes sense.

>
>> +{
>> +     drm_i915_private_t *dev_priv = dev->dev_private;
>> +     int ret;
>> +
>> +     if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
>> +             return -ENODEV;
>> +
>> +     if (dev_priv->gt_slices.state_default == 1)
>> +             return -EBADE;
>
> This needs to be replaced.
>
> if (dev_priv->gt_slices.config == 2)
>   return 0;
>
>> +
>> +     ret = intel_ring_begin(ring, 3);
>
> The dword count must be even, or else the hardware chokes.
> However, since we cannot interrupt this sequence and leave the hw in an
> inconsistent state, we need to emit this entire sequence in a single
> block.

makes sense... changed and worked locally here.

>
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, HSW_GT_SLICE_INFO);
>> +     intel_ring_emit(ring, SLICE_SEL_BOTH);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
>> +     intel_ring_emit(ring, LOWER_SLICE_ENABLED);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, HSW_SLICESHUTDOWN);
>> +     intel_ring_emit(ring, ~SLICE_SHUTDOWN);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
>> +     intel_ring_emit(ring, CS_IDLE_COUNT_1US);
>> +     intel_ring_advance(ring);
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
>> +     intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
>
> _MASKED_BIT_ENABLE(WAIT_RC6_EXIT)

thanks

>
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
>> +     intel_ring_emit(ring, CS_IDLE_COUNT_5US);
>> +     intel_ring_advance(ring);
>> +
>     dev_priv->gt_slices.config = 2;
>> +     return 0;
>> +}
>> +
>> +static int
>>  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>                      struct drm_file *file,
>>                      struct drm_i915_gem_execbuffer2 *args,
>> @@ -999,6 +1063,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>               return -EINVAL;
>>       }
>>
>> +     if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 &&
>> +         !dev_priv->gt_slices.legacy_userspace_busy)
>> +             if (i915_legacy_userspace_busy(dev, ring) == 0)
>> +                     dev_priv->gt_slices.legacy_userspace_busy = true;
>
> Now here we cannot just ignore a failure to switch slice configuration.
>
>  static bool gt_legacy_userspace(struct intel_ring_buffer *ring,
>                                  struct drm_i915_gem_execbuffer2 *args)
>  {
>     if (ring->id == BCS)
>        return false;
>
>     if (!HAS_SLICE_SHUTDOWN(ring->dev))
>        return false;
>
>     return (args->flags & I915_EXEC_USE_PREDICATE) == 0;
>  }
>
>  if (gt_legacy_userspace(ring, args)) {
>     ret = gt_legacy_userspace_busy(ring);
>     if (ret)
>        return ret;
>
>     dev_priv->gt_slices.legacy_userspace_busy = true;
>  }

I liked all this simplification. Thanks.
>
>
>> +
>>       mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>>       mask = I915_EXEC_CONSTANTS_MASK;
>>       switch (mode) {
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 497c441..0146bef 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -277,6 +277,17 @@
>>  #define   SLICE_STATUS_MAIN_ON       (2<<0)
>>  #define   SLICE_STATUS_BOTH_ON       (3<<0)
>>
>> +#define HSW_SLICESHUTDOWN    0xA190
>> +#define   SLICE_SHUTDOWN     (1<<0)
>> +
>> +#define RC_IDLE_MAX_COUNT    0x2054
>> +#define   CS_IDLE_COUNT_1US  (1<<1)
>> +#define   CS_IDLE_COUNT_5US  (1<<3)
>> +
>> +#define WAIT_FOR_RC6_EXIT    0x20CC
>> +#define   WAIT_RC6_EXIT              (1<<0)
>> +#define   MASK_WAIT_RC6_EXIT (1<<16)
>> +
>>  /*
>>   * 3D instructions used by the kernel
>>   */
>> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
>> index 86ccd52..1dd57fb 100644
>> --- a/drivers/gpu/drm/i915/i915_sysfs.c
>> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
>> @@ -125,8 +125,7 @@ static ssize_t gt_slice_config_show(struct device *kdev,
>>       struct drm_device *dev = minor->dev;
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> -     return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
>> -                    LOWER_SLICE_ENABLED ? "full" : "half");
>> +     return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED ? "full" : "half");
>>  }
>>
>>  static ssize_t gt_slice_config_store(struct device *kdev,
>> @@ -135,16 +134,19 @@ static ssize_t gt_slice_config_store(struct device *kdev,
>>  {
>>       struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>>       struct drm_device *dev = minor->dev;
>> +     struct drm_i915_private *dev_priv = dev->dev_private;
>>       int ret;
>
> This cannot change state whilst legacy_userspace_busy.

We cannot change to half while in legacy_userspace_busy and this check
was already inside
gt_slices_set_half(dev). So just the state_default was changed so on
next idleness it would come to half anyway.

>>
>>       if (!strncmp(buf, "full", sizeof("full") - 1)) {
>>               ret = intel_set_gt_full(dev);
>>               if (ret)
>>                       return ret;
>> +             dev_priv->gt_slices.state_default = 1;
> dev_priv->gt_slices.max_config = 2;
>>       } else if (!strncmp(buf, "half", sizeof("half") - 1)) {
>>               ret = intel_set_gt_half(dev);
>>               if (ret)
>>                       return ret;
>> +             dev_priv->gt_slices.state_default = 0;
> dev_priv->gt_slices.max_config = 1;
>>       } else
>>               return -EINVAL;
>
> (void)intel_gt_update_slice_config(dev);

This remove the syncrony you had asked. Without it testcase for sysfs
change is impossible because we don't know if it is in _busy... so we
cannot simply check state after request a change.

>
> where
>
>   int intel_gt_update_slice_config(struct drm_device *dev)
>   {
>      int config;
>
>      if (!HAS_SLICE_SHUTDOWN(dev))
>        return -ENODEV;
>
>      if (dev_priv-gt_slices.legacy_userspace_busy)
>        return -EBUSY;
>
>      config = min(dev_priv->gt_slices.config,
>                   dev_priv->gt_slices.max_config);
>      if (config == dev_priv->gt_slices.config)

I still think state_default is working, enough and simpler...
or do you really want me to use config and max_config instead?

>        return 0;
>
>      /* slice_set_config(dev, config); ? */
>      switch (config) {
>      case 1: gt_slice_set_half(dev); break;
>      case 2: gt_slice_set_full(dev); break;
>      default: return -EINVAL;
>      }
>
>      dev_priv->gt_slices.config = config;
>      return 0;
>   }
>
>>       return count;
>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>> index 4f1b636..3810ecf 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev)
>>
>>       if (dev_priv->info->gen >= 6)
>>               gen6_rps_idle(dev->dev_private);
>> +
>> +     if (HAS_SLICE_SHUTDOWN(dev) &&
>> +         dev_priv->gt_slices.legacy_userspace_busy) {
>> +             intel_set_gt_half_async(dev);
>> +     }
>
> Just if (dev_priv->gt_slices.legacy_userspace_busy) {
>          dev_priv->gt_slices.legacy_userspace_busy = false;
>          intel_gt_update_slice_config(dev);
>      }
>
> --
> Chris Wilson, Intel Open Source Technology Centre

Thank you very much.

-- 
Rodrigo Vivi
Blog: http://blog.vivi.eng.br