[PATCH 3/3] drm/i915: Optimize GGTT PTE encode a bit

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Jul 7 12:38:43 UTC 2023


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

If we notice gen8 and mtl pte encode vfuncs are basically just remapping
bits we can express that without any conditionals and at the same time
shrink function sizes.

Before:

00000000000000f0 <gen8_ggtt_pte_encode>:
      f0:       e8 00 00 00 00          call   f5 <gen8_ggtt_pte_encode+0x5>
      f5:       48 89 f8                mov    %rdi,%rax
      f8:       48 83 cf 03             or     $0x3,%rdi
      fc:       48 83 c8 01             or     $0x1,%rax
     100:       83 e2 02                and    $0x2,%edx
     103:       48 0f 45 c7             cmovne %rdi,%rax
     107:       c3                      ret

0000000000000250 <mtl_ggtt_pte_encode>:
     250:       e8 00 00 00 00          call   255 <mtl_ggtt_pte_encode+0x5>
     255:       48 b8 ff 0f 00 00 00    movabs $0xffffc00000000fff,%rax
     25c:       c0 ff ff
     25f:       48 85 c7                test   %rax,%rdi
     262:       75 28                   jne    28c <mtl_ggtt_pte_encode+0x3c>
     264:       48 89 f8                mov    %rdi,%rax
     267:       48 83 cf 03             or     $0x3,%rdi
     26b:       48 83 c8 01             or     $0x1,%rax
     26f:       83 e2 02                and    $0x2,%edx
     272:       48 0f 45 c7             cmovne %rdi,%rax
     276:       40 f6 c6 01             test   $0x1,%sil
     27a:       74 05                   je     281 <mtl_ggtt_pte_encode+0x31>
     27c:       48 0f ba e8 34          bts    $0x34,%rax
     281:       83 e6 02                and    $0x2,%esi
     284:       74 05                   je     28b <mtl_ggtt_pte_encode+0x3b>
     286:       48 0f ba e8 35          bts    $0x35,%rax
     28b:       c3                      ret

After:

00000000000000f0 <gen8_ggtt_pte_encode>:
      f0:       e8 00 00 00 00          call   f5 <gen8_ggtt_pte_encode+0x5>
      f5:       83 e2 02                and    $0x2,%edx
      f8:       48 09 fa                or     %rdi,%rdx
      fb:       48 89 d0                mov    %rdx,%rax
      fe:       48 83 c8 01             or     $0x1,%rax
     102:       c3                      ret

0000000000000250 <mtl_ggtt_pte_encode>:
     250:       e8 00 00 00 00          call   255 <mtl_ggtt_pte_encode+0x5>
     255:       48 b8 ff 0f 00 00 00    movabs $0xffffc00000000fff,%rax
     25c:       c0 ff ff
     25f:       48 85 c7                test   %rax,%rdi
     262:       75 17                   jne    27b <mtl_ggtt_pte_encode+0x2b>
     264:       89 f0                   mov    %esi,%eax
     266:       83 e2 02                and    $0x2,%edx
     269:       83 e0 03                and    $0x3,%eax
     26c:       48 c1 e0 34             shl    $0x34,%rax
     270:       48 09 d0                or     %rdx,%rax
     273:       48 09 f8                or     %rdi,%rax
     276:       48 83 c8 01             or     $0x1,%rax
     27a:       c3                      ret

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index dd0ed941441a..7237dc3e9d15 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -228,14 +228,8 @@ static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
 
 	WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
 
-	if (flags & PTE_LM)
-		pte |= GEN12_GGTT_PTE_LM;
-
-	if (pat_index & BIT(0))
-		pte |= MTL_GGTT_PTE_PAT0;
-
-	if (pat_index & BIT(1))
-		pte |= MTL_GGTT_PTE_PAT1;
+	pte |= (flags & PTE_LM) << (ilog2(GEN12_GGTT_PTE_LM) - ilog2(PTE_LM));
+	pte |= (pat_index & (BIT(0) | BIT(1))) << (ilog2(MTL_GGTT_PTE_PAT0));
 
 	return pte;
 }
@@ -246,8 +240,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 {
 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
 
-	if (flags & PTE_LM)
-		pte |= GEN12_GGTT_PTE_LM;
+	pte |= (flags & PTE_LM) << (ilog2(GEN12_GGTT_PTE_LM) - ilog2(PTE_LM));
 
 	return pte;
 }
-- 
2.39.2



More information about the Intel-gfx-trybot mailing list