[Intel-gfx] [PATCH] drm/i915: read EU powergating status from command streamer
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Jun 21 13:14:52 UTC 2018
On 21/06/18 12:54, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2018-06-21 12:27:12)
>> Powergating of the EU array is configured as part of the context
>> image. This seems to imply we need a context to have run before we can
>> read the slice/subslice/EU powergating status.
>>
>> This change captures the values of the powergating status registers
>> from the command streamer to ensure a valid we get valid values. This
>> is currently used only on gen9+ but someone could rightfully argue to
>> go as far as gen8.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
>> ---
>> drivers/gpu/drm/i915/i915_debugfs.c | 177 ++++++++++++++++++++++++----
>> 1 file changed, 155 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index c400f42a54ec..0da8ce8acbcb 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -4310,14 +4310,120 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>> #undef SS_MAX
>> }
>>
>> -static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>> - struct sseu_dev_info *sseu)
>> +static struct drm_i915_gem_object *
>> +gen9_read_registers(struct drm_i915_private *i915,
>> + u32 *register_offsets,
>> + u32 n_registers)
> unsigned int n_register (or even unsigned long for total pedantry). I
> don't see why you want to specify a bitwidth.
Sure.
>
>> +{
>> + struct drm_i915_gem_object *bo;
>> + struct i915_request *rq;
>> + struct i915_vma *vma;
>> + u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, PAGE_SIZE);
>> + int ret, r;
>> +
>> + ret = i915_mutex_lock_interruptible(&i915->drm);
>> + if (ret)
>> + return ERR_PTR(ret);
>> +
>> + bo = i915_gem_object_create(i915, bo_size);
> create_internal() (just be sure you don't read back uninitialised data)
>
> bo_size = PAGE_ALIGN(sizeof(u32) * n_registers);
Thanks.
>
>> + if (IS_ERR(bo)) {
>> + ret = PTR_ERR(bo);
>> + goto unlock;
>> + }
>> +
>> + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>> + if (ret)
>> + goto put_bo;
>> +
>> +
>> + vma = i915_vma_instance(bo,
>> + &i915->kernel_context->ppgtt->vm,
>> + NULL);
>> + if (IS_ERR(vma)) {
>> + ret = PTR_ERR(vma);
>> + goto put_bo;
>> + }
>> +
>> + ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);
> CONTEXT_ALIGN?
Dammit... copy-paste...
>
>> + if (ret)
>> + goto vma_unpin;
>> +
>> +
>> + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
>> + if (IS_ERR(rq)) {
>> + ret = PTR_ERR(rq);
>> + goto vma_unpin;
>> + }
>> +
>> + cs = intel_ring_begin(rq, n_registers * 4);
>> + if (IS_ERR(cs)) {
>> + i915_request_add(rq);
>> + ret = PTR_ERR(cs);
>> + goto vma_unpin;
>> + }
>> +
>> + for (r = 0; r < n_registers; r++) {
>> + u64 offset = vma->node.start + r * 4;
>> +
>> + *cs++ = MI_STORE_REGISTER_MEM_GEN8;
>> + *cs++ = register_offsets[r];
>> + *cs++ = lower_32_bits(offset);
>> + *cs++ = upper_32_bits(offset);
>> + }
> I think you should i915_vma_move_to_active() for safety and give it
> an active reference.
Indeed.
>
>> + intel_ring_advance(rq, cs);
>> +
>> + i915_request_add(rq);
>> +
>> + mutex_unlock(&i915->drm.struct_mutex);
>> +
>> + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> For sanity, check for an error. (Hence why using vma_move_to_active
> becomes relevant.)
Thanks!
>
>> + return bo;
>> +
>> +vma_unpin:
>> + i915_vma_unpin(vma);
>> +put_bo:
>> + i915_gem_object_put(bo);
>> +unlock:
>> + mutex_unlock(&i915->drm.struct_mutex);
>> + return bo;
> Report the ERR_PTR(ret) (otherwise, a neat use-after-free).
Oops my bad.
>
>> +}
>> +
>> +
>> +static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>> + struct sseu_dev_info *sseu)
>> {
>> #define SS_MAX 6
>> const struct intel_device_info *info = INTEL_INFO(dev_priv);
>> - u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
>> + struct drm_i915_gem_object *bo;
>> + struct {
>> + u32 s_reg[SS_MAX];
>> + u32 eu_reg[2 * SS_MAX];
>> + } reg_offsets, *reg_data;
>> + u32 eu_mask[2];
>> int s, ss;
>>
>> + for (s = 0; s < info->sseu.max_slices; s++) {
>> + reg_offsets.s_reg[s] =
>> + i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
>> + reg_offsets.eu_reg[2 * s] =
>> + i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
>> + reg_offsets.eu_reg[2 * s] =
>> + i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));
> I started by wishing you used i915_mmio_t, but I can appreciate the
> logic of having offset/data tied together in the same layout.
>
>> + }
>> +
>> + bo = gen9_read_registers(dev_priv, reg_offsets.s_reg,
>> + sizeof(reg_offsets) / sizeof(u32));
>> + if (IS_ERR(bo))
>> + return PTR_ERR(bo);
> I'd prefer this to use i915_gem_object_to_cpu_domain() but there's no
> advantage to that, and you've set it up to just work. So leave it.
>
>> + reg_data = i915_gem_object_pin_map(bo, I915_MAP_WB);
>> + if (IS_ERR(reg_data)) {
>> + i915_gem_object_put(bo);
>> + return PTR_ERR(reg_data);
>> + }
> Other than questioning your sanity here at doing this rather than just
> deleting the debugfs, there's nothing inherently broken. I would do it
> for gen8 as well, no point leaving the odd one out.
We have an igt tests checking these values and because you haven't
brought up deleting the test before, I assumed we still wanted it fixed.
But now that you brought it up, I'm equally fine deleting this debugfs
code and removing the igt test.
>
> How about if we just hexdumped the kernel_context image and let the
> debugger inspect any and all registers at their discretion?
Hmm... these Ack registers aren't saved in the context image I think.
> -Chris
>
More information about the Intel-gfx
mailing list