[Intel-gfx] [RFC] drm/i915: store all mmio bases in intel_engines
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Thu Mar 8 18:46:08 UTC 2018
On 08/03/18 01:31, Tvrtko Ursulin wrote:
>
> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>> The mmio bases we're currently storing in the intel_engines array are
>> only valid for a subset of gens, so we need to ignore them and use
>> different values in some cases. Instead of doing that, we can have a
>> table of [starting gen, mmio base] pairs for each engine in
>> intel_engines and select the correct one based on the gen we're running
>> on in a consistent way.
>>
>> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> ---
>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>> +++++++++++++++++++++------------
>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 4ba139c27fba..1dd92cac8d67 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>> intel_engine_classes[] = {
>> },
>> };
>> +#define MAX_MMIO_BASES 3
>> struct engine_info {
>> unsigned int hw_id;
>> unsigned int uabi_id;
>> u8 class;
>> u8 instance;
>> - u32 mmio_base;
>> + struct engine_mmio_base {
>> + u32 gen : 8;
>> + u32 base : 24;
>> + } mmio_bases[MAX_MMIO_BASES];
>> unsigned irq_shift;
>> };
>
> It's not bad, but I am just wondering if it is too complicated versus
> simply duplicating the tables.
>
> Duplicated tables would certainly be much less code and complexity, but
> what about the size of pure data?
>
> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
> NUM_ENGINES equals 240 bytes for intel_engines[] array with this scheme.
>
we remove a u32 (the old mmio base) so we only grow 8 per engine, but
the compiler rounds up to a multiple of u32 so 28 per engine, for a
total of 224.
> Separate tables per gens would be:
>
> sizeof(struct engine_info) is 18 bytes.
>
> For < gen6 we'd need 3 engines * 18 = 54
> < gen11 = 5 engines * 18 = 90
> >= gen11 = 8 engines * 18 = 144
>
> 54 + 90 + 144 = 288 bytes
>
> So a little bit bigger. Hm. Plus we would need to refactor so
> intel_engines[] is not indexed by engine->id but is contiguous array
> with engine->id in the elements. Code to lookup the compact array should
> be simpler than combined new code from this patch, especially if you add
> the test as Chris suggested. So separate engine info arrays would win I
> think overall - when considering size of text + size of data + size of
> source code.
>
Not sure. I would exclude the selftest, which is usually not compiled
for released kernels, which leads to:
add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
Function old new delta
intel_engines 160 224 +64
__func__ 13891 13910 +19
intel_engines_init_mmio 1247 1264 +17
intel_init_bsd_ring_buffer 142 135 -7
Total: Before=1479626, After=1479719, chg +0.01%
Total growth is 93, which is less then your estimation for the growth
introduced by replicating the table.
> But on the other hand your solution might be more future proof. So I
> don't know. Use the crystal ball to decide? :)
>
I think we should expect that the total engine count could grow with
future gens. In this case to me adding a new entry to a unified table
seems much cleaner (and uses less data) than adding a completely new
table each time.
Daniele
> Regards,
>
> Tvrtko
>
>
>> @@ -96,7 +100,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_RENDER,
>> .class = RENDER_CLASS,
>> .instance = 0,
>> - .mmio_base = RENDER_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 1, .base = RENDER_RING_BASE }
>> + },
>> .irq_shift = GEN8_RCS_IRQ_SHIFT,
>> },
>> [BCS] = {
>> @@ -104,7 +110,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BLT,
>> .class = COPY_ENGINE_CLASS,
>> .instance = 0,
>> - .mmio_base = BLT_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 6, .base = BLT_RING_BASE }
>> + },
>> .irq_shift = GEN8_BCS_IRQ_SHIFT,
>> },
>> [VCS] = {
>> @@ -112,7 +120,11 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 0,
>> - .mmio_base = GEN6_BSD_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD_RING_BASE },
>> + { .gen = 6, .base = GEN6_BSD_RING_BASE },
>> + { .gen = 4, .base = BSD_RING_BASE }
>> + },
>> .irq_shift = GEN8_VCS1_IRQ_SHIFT,
>> },
>> [VCS2] = {
>> @@ -120,7 +132,10 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 1,
>> - .mmio_base = GEN8_BSD2_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD2_RING_BASE },
>> + { .gen = 8, .base = GEN8_BSD2_RING_BASE }
>> + },
>> .irq_shift = GEN8_VCS2_IRQ_SHIFT,
>> },
>> [VCS3] = {
>> @@ -128,7 +143,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 2,
>> - .mmio_base = GEN11_BSD3_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD3_RING_BASE }
>> + },
>> .irq_shift = 0, /* not used */
>> },
>> [VCS4] = {
>> @@ -136,7 +153,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 3,
>> - .mmio_base = GEN11_BSD4_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD4_RING_BASE }
>> + },
>> .irq_shift = 0, /* not used */
>> },
>> [VECS] = {
>> @@ -144,7 +163,10 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_VEBOX,
>> .class = VIDEO_ENHANCEMENT_CLASS,
>> .instance = 0,
>> - .mmio_base = VEBOX_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
>> + { .gen = 7, .base = VEBOX_RING_BASE }
>> + },
>> .irq_shift = GEN8_VECS_IRQ_SHIFT,
>> },
>> [VECS2] = {
>> @@ -152,7 +174,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_VEBOX,
>> .class = VIDEO_ENHANCEMENT_CLASS,
>> .instance = 1,
>> - .mmio_base = GEN11_VEBOX2_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
>> + },
>> .irq_shift = 0, /* not used */
>> },
>> };
>> @@ -223,6 +247,21 @@ __intel_engine_context_size(struct
>> drm_i915_private *dev_priv, u8 class)
>> }
>> }
>> +static u32 __engine_mmio_base(struct drm_i915_private *i915,
>> + const struct engine_mmio_base* bases)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MAX_MMIO_BASES; i++)
>> + if (INTEL_GEN(i915) >= bases[i].gen)
>> + break;
>> +
>> + GEM_BUG_ON(i == MAX_MMIO_BASES);
>> + GEM_BUG_ON(!bases[i].base);
>> +
>> + return bases[i].base;
>> +}
>> +
>> static int
>> intel_engine_setup(struct drm_i915_private *dev_priv,
>> enum intel_engine_id id)
>> @@ -257,25 +296,7 @@ intel_engine_setup(struct drm_i915_private
>> *dev_priv,
>> class_info->name, info->instance) >=
>> sizeof(engine->name));
>> engine->hw_id = engine->guc_id = info->hw_id;
>> - if (INTEL_GEN(dev_priv) >= 11) {
>> - switch (engine->id) {
>> - case VCS:
>> - engine->mmio_base = GEN11_BSD_RING_BASE;
>> - break;
>> - case VCS2:
>> - engine->mmio_base = GEN11_BSD2_RING_BASE;
>> - break;
>> - case VECS:
>> - engine->mmio_base = GEN11_VEBOX_RING_BASE;
>> - break;
>> - default:
>> - /* take the original value for all other engines */
>> - engine->mmio_base = info->mmio_base;
>> - break;
>> - }
>> - } else {
>> - engine->mmio_base = info->mmio_base;
>> - }
>> + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
>> engine->irq_shift = info->irq_shift;
>> engine->class = info->class;
>> engine->instance = info->instance;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 1d599524a759..2e4408477ab5 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2079,7 +2079,6 @@ int intel_init_bsd_ring_buffer(struct
>> intel_engine_cs *engine)
>> engine->emit_flush = gen6_bsd_ring_flush;
>> engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
>> } else {
>> - engine->mmio_base = BSD_RING_BASE;
>> engine->emit_flush = bsd_ring_flush;
>> if (IS_GEN5(dev_priv))
>> engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
>>
More information about the Intel-gfx
mailing list