[Intel-gfx] [PATCH 2/2] drm/i915: Make for_each_engine_masked() more compact and quicker
Mika Kuoppala
mika.kuoppala at linux.intel.com
Wed Aug 17 15:12:50 UTC 2016
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Rather than walk the full array of engines checking whether each is in
> the mask in turn, we can use the mask to jump to the right engines. This
> should quicker for a sparse array of engines or mask, whilst generating
> smaller code:
>
I just checked that with this patch the bsfl was in the assembly. Could
be faster but our number of engines is quite small.
Checkpatch complains that 'unsigned's should be 'unsigned int's.
Otherwise this looks ok for me. Noticed that Joonas and you were
wrestling about this a little on the irc and I didn't quite get
what the concerns were from the irc logs.
-Mika
> text data bss dec hex filename
> 1251010 4579 800 1256389 132bc5 drivers/gpu/drm/i915/i915.ko
> 1250530 4579 800 1255909 1329e5 drivers/gpu/drm/i915/i915.ko
>
> The downside is that we have to pass in a temporary, alas no C99
> iterators yet.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++------
> drivers/gpu/drm/i915/i915_gem_request.c | 3 ++-
> drivers/gpu/drm/i915/i915_guc_submission.c | 3 ++-
> drivers/gpu/drm/i915/i915_irq.c | 3 ++-
> drivers/gpu/drm/i915/intel_uncore.c | 9 ++++++---
> 5 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f4985279182c..2beb83023213 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2110,13 +2110,16 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
> for_each_if (((id__) = (engine__)->id, \
> intel_engine_initialized(engine__)))
>
> +#define __mask_next_bit(mask) ({ \
> + int __idx = ffs(mask) - 1; \
> + mask &= ~BIT(__idx); \
> + __idx; \
> +})
> +
> /* Iterator over subset of engines selected by mask */
> -#define for_each_engine_masked(engine__, dev_priv__, mask__) \
> - for ((engine__) = &(dev_priv__)->engine[0]; \
> - (engine__) < &(dev_priv__)->engine[I915_NUM_ENGINES]; \
> - (engine__)++) \
> - for_each_if (((mask__) & intel_engine_flag(engine__)) && \
> - intel_engine_initialized(engine__))
> +#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
> + for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \
> + tmp__ ? (engine__ = &(dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; )
>
> enum hdmi_force_audio {
> HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 02242736e492..44b464a20eb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -744,6 +744,7 @@ static bool engine_retire_requests(struct intel_engine_cs *engine)
> void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
> {
> struct intel_engine_cs *engine;
> + unsigned tmp;
>
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
>
> @@ -752,7 +753,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
>
> GEM_BUG_ON(!dev_priv->gt.awake);
>
> - for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) {
> + for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp) {
> if (engine_retire_requests(engine))
> dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
> }
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index bb4079223e39..6475bee90730 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -330,6 +330,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
> struct i915_gem_context *ctx = client->owner;
> struct guc_context_desc desc;
> struct sg_table *sg;
> + unsigned tmp;
> u32 gfx_addr;
>
> memset(&desc, 0, sizeof(desc));
> @@ -339,7 +340,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
> desc.priority = client->priority;
> desc.db_id = client->doorbell_id;
>
> - for_each_engine_masked(engine, dev_priv, client->engines) {
> + for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
> struct intel_context *ce = &ctx->engine[engine->id];
> uint32_t guc_engine_id = engine->guc_id;
> struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 7a236dd337fa..f4f0c410f72b 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3140,6 +3140,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>
> if (hung) {
> char msg[80];
> + unsigned tmp;
> int len;
>
> /* If some rings hung but others were still busy, only
> @@ -3149,7 +3150,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
> hung &= ~stuck;
> len = scnprintf(msg, sizeof(msg),
> "%s on ", stuck == hung ? "No progress" : "Hang");
> - for_each_engine_masked(engine, dev_priv, hung)
> + for_each_engine_masked(engine, dev_priv, hung, tmp)
> len += scnprintf(msg + len, sizeof(msg) - len,
> "%s, ", engine->name);
> msg[len-2] = '\0';
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index a6b04da4bf21..3a797c35d253 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1597,8 +1597,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
> if (engine_mask == ALL_ENGINES) {
> hw_mask = GEN6_GRDOM_FULL;
> } else {
> + unsigned tmp;
> +
> hw_mask = 0;
> - for_each_engine_masked(engine, dev_priv, engine_mask)
> + for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
> hw_mask |= hw_engine_mask[engine->id];
> }
>
> @@ -1714,15 +1716,16 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
> unsigned engine_mask)
> {
> struct intel_engine_cs *engine;
> + unsigned tmp;
>
> - for_each_engine_masked(engine, dev_priv, engine_mask)
> + for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
> if (gen8_request_engine_reset(engine))
> goto not_ready;
>
> return gen6_reset_engines(dev_priv, engine_mask);
>
> not_ready:
> - for_each_engine_masked(engine, dev_priv, engine_mask)
> + for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
> gen8_unrequest_engine_reset(engine);
>
> return -EIO;
> --
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list