[PATCH 2/3] drm/i915: Optimize PTE encode a bit

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Jul 7 12:38:42 UTC 2023


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

If we notice gen12 pte encode vfunc is basically just remapping bits we
can express that without any conditionals and at the same time shrink
function sizes.

Also uses the opportunity to split out gen12 from mtl vfunc to avoid
running pointless code on older platforms.

Before:

0000000000000060 <gen12_pte_encode>:
      60:       e8 00 00 00 00          call   65 <gen12_pte_encode+0x5>
      65:       48 89 f9                mov    %rdi,%rcx
      68:       48 89 f8                mov    %rdi,%rax
      6b:       48 83 e1 fd             and    $0xfffffffffffffffd,%rcx
      6f:       48 83 c8 03             or     $0x3,%rax
      73:       48 83 c9 01             or     $0x1,%rcx
      77:       f6 c2 01                test   $0x1,%dl
      7a:       48 0f 45 c1             cmovne %rcx,%rax
      7e:       48 89 c1                mov    %rax,%rcx
      81:       80 cd 08                or     $0x8,%ch
      84:       83 e2 02                and    $0x2,%edx
      87:       48 0f 45 c1             cmovne %rcx,%rax
      8b:       48 89 c2                mov    %rax,%rdx
      8e:       48 83 ca 08             or     $0x8,%rdx
      92:       40 f6 c6 01             test   $0x1,%sil
      96:       48 0f 45 c2             cmovne %rdx,%rax
      9a:       48 89 c2                mov    %rax,%rdx
      9d:       48 83 ca 10             or     $0x10,%rdx
      a1:       40 f6 c6 02             test   $0x2,%sil
      a5:       48 0f 45 c2             cmovne %rdx,%rax
      a9:       48 89 c2                mov    %rax,%rdx
      ac:       80 ca 80                or     $0x80,%dl
      af:       40 f6 c6 04             test   $0x4,%sil
      b3:       48 0f 45 c2             cmovne %rdx,%rax
      b7:       83 e6 08                and    $0x8,%esi
      ba:       74 05                   je     c1 <gen12_pte_encode+0x61>
      bc:       48 0f ba e8 3e          bts    $0x3e,%rax
      c1:       c3                      ret

After:

0000000000001360 <mtl_pte_encode>:
    1360:       e8 00 00 00 00          call   1365 <mtl_pte_encode+0x5>
    1365:       48 89 f8                mov    %rdi,%rax
    1368:       48 83 cf 03             or     $0x3,%rdi
    136c:       89 f1                   mov    %esi,%ecx
    136e:       48 83 c8 01             or     $0x1,%rax
    1372:       f6 c2 01                test   $0x1,%dl
    1375:       48 0f 45 f8             cmovne %rax,%rdi
    1379:       89 f0                   mov    %esi,%eax
    137b:       83 e6 04                and    $0x4,%esi
    137e:       83 e0 03                and    $0x3,%eax
    1381:       48 c1 e6 05             shl    $0x5,%rsi
    1385:       48 c1 e0 03             shl    $0x3,%rax
    1389:       83 e2 02                and    $0x2,%edx
    138c:       83 e1 08                and    $0x8,%ecx
    138f:       48 c1 e2 0a             shl    $0xa,%rdx
    1393:       48 09 f0                or     %rsi,%rax
    1396:       48 c1 e1 3b             shl    $0x3b,%rcx
    139a:       48 09 d0                or     %rdx,%rax
    139d:       48 09 c8                or     %rcx,%rax
    13a0:       48 09 f8                or     %rdi,%rax
    13a3:       c3                      ret

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 38 +++++++++++++++++-----------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c8568e5d1147..1f7cb5f3caa3 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -32,10 +32,12 @@ static u64 gen8_pte_encode(dma_addr_t addr,
 			   unsigned int pat_index,
 			   u32 flags)
 {
-	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+	gen8_pte_t pte = addr;
 
 	if (unlikely(flags & PTE_READ_ONLY))
-		pte &= ~GEN8_PAGE_RW;
+		pte |= GEN8_PAGE_PRESENT;
+	else
+		pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
 	/*
 	 * For pre-gen12 platforms pat_index is the same as enum
@@ -61,25 +63,29 @@ static u64 gen12_pte_encode(dma_addr_t addr,
 			    unsigned int pat_index,
 			    u32 flags)
 {
-	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+	gen8_pte_t pte = addr;
 
 	if (unlikely(flags & PTE_READ_ONLY))
-		pte &= ~GEN8_PAGE_RW;
+		pte |= GEN8_PAGE_PRESENT;
+	else
+		pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
-	if (flags & PTE_LM)
-		pte |= GEN12_PPGTT_PTE_LM;
+	pte |= (flags & PTE_LM) << (ilog2(GEN12_PPGTT_PTE_LM) - ilog2(PTE_LM));
 
-	if (pat_index & BIT(0))
-		pte |= GEN12_PPGTT_PTE_PAT0;
+	pte |= (pat_index & (BIT(0) | BIT(1))) << (ilog2(GEN12_PPGTT_PTE_PAT0));
+	pte |= (pat_index & BIT(2)) << (ilog2(GEN12_PPGTT_PTE_PAT2) - 2);
 
-	if (pat_index & BIT(1))
-		pte |= GEN12_PPGTT_PTE_PAT1;
+	return pte;
+}
 
-	if (pat_index & BIT(2))
-		pte |= GEN12_PPGTT_PTE_PAT2;
+static u64 mtl_pte_encode(dma_addr_t addr,
+			  unsigned int pat_index,
+			  u32 flags)
+{
+	gen8_pte_t pte;
 
-	if (pat_index & BIT(3))
-		pte |= MTL_PPGTT_PTE_PAT3;
+	pte = gen12_pte_encode(addr, pat_index, flags);
+	pte |= (u64)(pat_index & BIT(3)) << (ilog2(MTL_PPGTT_PTE_PAT3) - 3);
 
 	return pte;
 }
@@ -995,7 +1001,9 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	 */
 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-	if (GRAPHICS_VER(gt->i915) >= 12)
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+		ppgtt->vm.pte_encode = mtl_pte_encode;
+	else if (GRAPHICS_VER(gt->i915) >= 12)
 		ppgtt->vm.pte_encode = gen12_pte_encode;
 	else
 		ppgtt->vm.pte_encode = gen8_pte_encode;
-- 
2.39.2



More information about the Intel-gfx-trybot mailing list