[Intel-gfx] [PATCH v2] drm/i915: Trim gen11_gt_irq_handler

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Fri Mar 9 19:14:53 UTC 2018



On 08/03/18 17:33, Chris Wilson wrote:
> Give the compiler a helping hand in mapping (bank,bit) to our struct
> intel_engine_cs by trading object code size for data cache:
> 
> add/remove: 2/0 grow/shrink: 0/1 up/down: 48/-135 (-87)
> Function                                     old     new   delta
> bank1_map                                      -      32     +32
> bank0_map                                      -      16     +16
> gen11_irq_handler                            706     571    -135
> 
> v2: u8 arrays for tighter packing
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c | 53 +++++++++++------------------------------
>   1 file changed, 14 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index c8c29d8ecbab..e423ec58e5d2 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2762,44 +2762,6 @@ static void __fini_wedge(struct wedge_me *w)
>   	     (W)->i915;							\
>   	     __fini_wedge((W)))
>   
> -static void
> -gen11_gt_engine_irq_handler(struct drm_i915_private * const i915,
> -			    const unsigned int bank,
> -			    const unsigned int engine_n,
> -			    const u16 iir)
> -{
> -	struct intel_engine_cs ** const engine = i915->engine;
> -
> -	switch (bank) {
> -	case 0:
> -		switch (engine_n) {
> -
> -		case GEN11_RCS0:
> -			return gen8_cs_irq_handler(engine[RCS], iir);
> -
> -		case GEN11_BCS:
> -			return gen8_cs_irq_handler(engine[BCS], iir);
> -		}
> -	case 1:
> -		switch (engine_n) {
> -
> -		case GEN11_VCS(0):
> -			return gen8_cs_irq_handler(engine[_VCS(0)], iir);
> -		case GEN11_VCS(1):
> -			return gen8_cs_irq_handler(engine[_VCS(1)], iir);
> -		case GEN11_VCS(2):
> -			return gen8_cs_irq_handler(engine[_VCS(2)], iir);
> -		case GEN11_VCS(3):
> -			return gen8_cs_irq_handler(engine[_VCS(3)], iir);
> -
> -		case GEN11_VECS(0):
> -			return gen8_cs_irq_handler(engine[_VECS(0)], iir);
> -		case GEN11_VECS(1):
> -			return gen8_cs_irq_handler(engine[_VECS(1)], iir);
> -		}
> -	}
> -}
> -
>   static u32
>   gen11_gt_engine_intr(struct drm_i915_private * const i915,
>   		     const unsigned int bank, const unsigned int bit)
> @@ -2836,10 +2798,23 @@ static void
>   gen11_gt_irq_handler(struct drm_i915_private * const i915,
>   		     const u32 master_ctl)
>   {
> +	static const u8 bank0_map[] = {
> +		[GEN11_RCS0] = RCS,
> +		[GEN11_BCS]  = BCS,
> +	};
> +	static const u8 bank1_map[] = {
> +		[GEN11_VCS(0)]  = _VCS(0),
> +		[GEN11_VCS(1)]  = _VCS(1),
> +		[GEN11_VCS(2)]  = _VCS(2),
> +		[GEN11_VCS(3)]  = _VCS(3),
> +		[GEN11_VECS(0)] = _VECS(0),
> +		[GEN11_VECS(1)] = _VECS(1),
> +	};
>   	void __iomem * const regs = i915->regs;
>   	unsigned int bank;
>   
>   	for (bank = 0; bank < 2; bank++) {
> +		const u8 *map = bank ? bank1_map : bank0_map;
>   		unsigned long intr_dw;
>   		unsigned int bit;
>   
> @@ -2859,7 +2834,7 @@ gen11_gt_irq_handler(struct drm_i915_private * const i915,
>   			if (unlikely(!iir))
>   				continue;
>   
> -			gen11_gt_engine_irq_handler(i915, bank, bit, iir);
> +			gen8_cs_irq_handler(i915->engine[map[bit]], iir);

With guc and PM interrupts we get a couple of non-engine interrupts on 
bank0, so this may not scale very well depending how we approach it.

My vote still goes to deriving class and instance from the register 
(bits 16-18 and 20-25 respectively). If we return the full register 
value from gen11_gt_engine_intr, we can then do something like:

for_each_set_bit(bit, &intr_dw, 32) {
         const u32 ident = gen11_gt_engine_intr(i915, bank, bit);
         u16 iir = ident & GEN11_INTR_ENGINE_MASK;
         u8 class, instance;

         if (unlikely(!iir))
                 continue;

         class = (ident >> 16) & (BIT(3) - 1);
         instance = (ident >> 20) &
                 (BIT(GEN11_ENGINE_INSTANCE_WIDTH) - 1);

         gen8_cs_irq_handler(i915->engine_class[class][instance], iir);
}

Which saves a bit more code and is more adaptable to non-engine 
interrupts IMHO, since we have the class and all non-engine interrupts 
come under class 4.

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-137 (-137)
Function                                     old     new   delta
gen11_irq_handler                            711     574    -137
Total: Before=1487912, After=1487775, chg -0.01%

Downside is that we have a few more instructions in an hot path.

Thanks,
Daniele

>   		}
>   
>   		/* Clear must be after shared has been served for engine */
> 


More information about the Intel-gfx mailing list