[PATCH 2/3] drm/i915: Optimize PTE encode a bit
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jul 7 12:38:42 UTC 2023
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
If we notice gen12 pte encode vfunc is basically just remapping bits we
can express that without any conditionals and at the same time shrink
function sizes.
Also uses the opportunity to split out gen12 from mtl vfunc to avoid
running pointless code on older platforms.
Before:
0000000000000060 <gen12_pte_encode>:
60: e8 00 00 00 00 call 65 <gen12_pte_encode+0x5>
65: 48 89 f9 mov %rdi,%rcx
68: 48 89 f8 mov %rdi,%rax
6b: 48 83 e1 fd and $0xfffffffffffffffd,%rcx
6f: 48 83 c8 03 or $0x3,%rax
73: 48 83 c9 01 or $0x1,%rcx
77: f6 c2 01 test $0x1,%dl
7a: 48 0f 45 c1 cmovne %rcx,%rax
7e: 48 89 c1 mov %rax,%rcx
81: 80 cd 08 or $0x8,%ch
84: 83 e2 02 and $0x2,%edx
87: 48 0f 45 c1 cmovne %rcx,%rax
8b: 48 89 c2 mov %rax,%rdx
8e: 48 83 ca 08 or $0x8,%rdx
92: 40 f6 c6 01 test $0x1,%sil
96: 48 0f 45 c2 cmovne %rdx,%rax
9a: 48 89 c2 mov %rax,%rdx
9d: 48 83 ca 10 or $0x10,%rdx
a1: 40 f6 c6 02 test $0x2,%sil
a5: 48 0f 45 c2 cmovne %rdx,%rax
a9: 48 89 c2 mov %rax,%rdx
ac: 80 ca 80 or $0x80,%dl
af: 40 f6 c6 04 test $0x4,%sil
b3: 48 0f 45 c2 cmovne %rdx,%rax
b7: 83 e6 08 and $0x8,%esi
ba: 74 05 je c1 <gen12_pte_encode+0x61>
bc: 48 0f ba e8 3e bts $0x3e,%rax
c1: c3 ret
After:
0000000000001360 <mtl_pte_encode>:
1360: e8 00 00 00 00 call 1365 <mtl_pte_encode+0x5>
1365: 48 89 f8 mov %rdi,%rax
1368: 48 83 cf 03 or $0x3,%rdi
136c: 89 f1 mov %esi,%ecx
136e: 48 83 c8 01 or $0x1,%rax
1372: f6 c2 01 test $0x1,%dl
1375: 48 0f 45 f8 cmovne %rax,%rdi
1379: 89 f0 mov %esi,%eax
137b: 83 e6 04 and $0x4,%esi
137e: 83 e0 03 and $0x3,%eax
1381: 48 c1 e6 05 shl $0x5,%rsi
1385: 48 c1 e0 03 shl $0x3,%rax
1389: 83 e2 02 and $0x2,%edx
138c: 83 e1 08 and $0x8,%ecx
138f: 48 c1 e2 0a shl $0xa,%rdx
1393: 48 09 f0 or %rsi,%rax
1396: 48 c1 e1 3b shl $0x3b,%rcx
139a: 48 09 d0 or %rdx,%rax
139d: 48 09 c8 or %rcx,%rax
13a0: 48 09 f8 or %rdi,%rax
13a3: c3 ret
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 38 +++++++++++++++++-----------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c8568e5d1147..1f7cb5f3caa3 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -32,10 +32,12 @@ static u64 gen8_pte_encode(dma_addr_t addr,
unsigned int pat_index,
u32 flags)
{
- gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+ gen8_pte_t pte = addr;
if (unlikely(flags & PTE_READ_ONLY))
- pte &= ~GEN8_PAGE_RW;
+ pte |= GEN8_PAGE_PRESENT;
+ else
+ pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
/*
* For pre-gen12 platforms pat_index is the same as enum
@@ -61,25 +63,29 @@ static u64 gen12_pte_encode(dma_addr_t addr,
unsigned int pat_index,
u32 flags)
{
- gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+ gen8_pte_t pte = addr;
if (unlikely(flags & PTE_READ_ONLY))
- pte &= ~GEN8_PAGE_RW;
+ pte |= GEN8_PAGE_PRESENT;
+ else
+ pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
- if (flags & PTE_LM)
- pte |= GEN12_PPGTT_PTE_LM;
+ pte |= (flags & PTE_LM) << (ilog2(GEN12_PPGTT_PTE_LM) - ilog2(PTE_LM));
- if (pat_index & BIT(0))
- pte |= GEN12_PPGTT_PTE_PAT0;
+ pte |= (pat_index & (BIT(0) | BIT(1))) << (ilog2(GEN12_PPGTT_PTE_PAT0));
+ pte |= (pat_index & BIT(2)) << (ilog2(GEN12_PPGTT_PTE_PAT2) - 2);
- if (pat_index & BIT(1))
- pte |= GEN12_PPGTT_PTE_PAT1;
+ return pte;
+}
- if (pat_index & BIT(2))
- pte |= GEN12_PPGTT_PTE_PAT2;
+static u64 mtl_pte_encode(dma_addr_t addr,
+ unsigned int pat_index,
+ u32 flags)
+{
+ gen8_pte_t pte;
- if (pat_index & BIT(3))
- pte |= MTL_PPGTT_PTE_PAT3;
+ pte = gen12_pte_encode(addr, pat_index, flags);
+ pte |= (u64)(pat_index & BIT(3)) << (ilog2(MTL_PPGTT_PTE_PAT3) - 3);
return pte;
}
@@ -995,7 +1001,9 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
- if (GRAPHICS_VER(gt->i915) >= 12)
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ ppgtt->vm.pte_encode = mtl_pte_encode;
+ else if (GRAPHICS_VER(gt->i915) >= 12)
ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
--
2.39.2
More information about the Intel-gfx-trybot
mailing list