[PATCH 3/3] drm/i915: Optimize GGTT PTE encode a bit
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jul 7 12:38:43 UTC 2023
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
If we notice gen8 and mtl pte encode vfuncs are basically just remapping
bits we can express that without any conditionals and at the same time
shrink function sizes.
Before:
00000000000000f0 <gen8_ggtt_pte_encode>:
f0: e8 00 00 00 00 call f5 <gen8_ggtt_pte_encode+0x5>
f5: 48 89 f8 mov %rdi,%rax
f8: 48 83 cf 03 or $0x3,%rdi
fc: 48 83 c8 01 or $0x1,%rax
100: 83 e2 02 and $0x2,%edx
103: 48 0f 45 c7 cmovne %rdi,%rax
107: c3 ret
0000000000000250 <mtl_ggtt_pte_encode>:
250: e8 00 00 00 00 call 255 <mtl_ggtt_pte_encode+0x5>
255: 48 b8 ff 0f 00 00 00 movabs $0xffffc00000000fff,%rax
25c: c0 ff ff
25f: 48 85 c7 test %rax,%rdi
262: 75 28 jne 28c <mtl_ggtt_pte_encode+0x3c>
264: 48 89 f8 mov %rdi,%rax
267: 48 83 cf 03 or $0x3,%rdi
26b: 48 83 c8 01 or $0x1,%rax
26f: 83 e2 02 and $0x2,%edx
272: 48 0f 45 c7 cmovne %rdi,%rax
276: 40 f6 c6 01 test $0x1,%sil
27a: 74 05 je 281 <mtl_ggtt_pte_encode+0x31>
27c: 48 0f ba e8 34 bts $0x34,%rax
281: 83 e6 02 and $0x2,%esi
284: 74 05 je 28b <mtl_ggtt_pte_encode+0x3b>
286: 48 0f ba e8 35 bts $0x35,%rax
28b: c3 ret
After:
00000000000000f0 <gen8_ggtt_pte_encode>:
f0: e8 00 00 00 00 call f5 <gen8_ggtt_pte_encode+0x5>
f5: 83 e2 02 and $0x2,%edx
f8: 48 09 fa or %rdi,%rdx
fb: 48 89 d0 mov %rdx,%rax
fe: 48 83 c8 01 or $0x1,%rax
102: c3 ret
0000000000000250 <mtl_ggtt_pte_encode>:
250: e8 00 00 00 00 call 255 <mtl_ggtt_pte_encode+0x5>
255: 48 b8 ff 0f 00 00 00 movabs $0xffffc00000000fff,%rax
25c: c0 ff ff
25f: 48 85 c7 test %rax,%rdi
262: 75 17 jne 27b <mtl_ggtt_pte_encode+0x2b>
264: 89 f0 mov %esi,%eax
266: 83 e2 02 and $0x2,%edx
269: 83 e0 03 and $0x3,%eax
26c: 48 c1 e0 34 shl $0x34,%rax
270: 48 09 d0 or %rdx,%rax
273: 48 09 f8 or %rdi,%rax
276: 48 83 c8 01 or $0x1,%rax
27a: c3 ret
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/gt/intel_ggtt.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index dd0ed941441a..7237dc3e9d15 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -228,14 +228,8 @@ static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
- if (flags & PTE_LM)
- pte |= GEN12_GGTT_PTE_LM;
-
- if (pat_index & BIT(0))
- pte |= MTL_GGTT_PTE_PAT0;
-
- if (pat_index & BIT(1))
- pte |= MTL_GGTT_PTE_PAT1;
+ pte |= (flags & PTE_LM) << (ilog2(GEN12_GGTT_PTE_LM) - ilog2(PTE_LM));
+ pte |= (pat_index & (BIT(0) | BIT(1))) << (ilog2(MTL_GGTT_PTE_PAT0));
return pte;
}
@@ -246,8 +240,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
- if (flags & PTE_LM)
- pte |= GEN12_GGTT_PTE_LM;
+ pte |= (flags & PTE_LM) << (ilog2(GEN12_GGTT_PTE_LM) - ilog2(PTE_LM));
return pte;
}
--
2.39.2
More information about the Intel-gfx-trybot
mailing list