[Mesa-dev] [PATCH v3] R600/SI: Add pattern for AMDGPUurecip
Tom Stellard
tom at stellard.net
Fri Apr 19 21:06:28 PDT 2013
On Thu, Apr 11, 2013 at 10:12:01AM +0200, Christian König wrote:
> Am 10.04.2013 18:50, schrieb Tom Stellard:
> >On Wed, Apr 10, 2013 at 05:59:48PM +0200, Michel Dänzer wrote:
> >>[SNIP]
> >We should start using the updated pattern syntax for all new patterns.
> >This means replacing register classes with types for the input patterns
> >and omitting the type in the output pattern:
> >
> >def : Pat <
> > (AMDGPUurecip i32:$src0),
> > (V_CVT_U32_F32_e32
> > (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
> > (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
> >
> >With that change:
> >
> >Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
>
> BTW: I created the attached patches two weeks ago. They rework most
> of the existing patterns on SI to use the new format, but I
> currently don't have time to rebase, test & commit them. They
> shouldn't change anything in functionality, so if you guys think
> they are ok then please review and commit them.
>
Thanks for doing this. I've thrown these patches into a branch along
with changes to the R600 patterns. I will try to test them next week.
Is there any reason why we can't squash all these patches together before
we commit?
-Tom
> Thanks,
> Christian.
> From f0175c616db5f6d3f1024137edbd8773c118f7dc Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 12:50:55 +0100
> Subject: [PATCH 1/9] R600/SI: remove nonsense select pattern
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Fortunately this pattern never matched, otherwise
> we would have generated incorrect code.
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 9 +--------
> 1 file changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index eb410d7..e37003e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1019,18 +1019,11 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
> def S_CSELECT_B32 : SOP2 <
> 0x0000000a, (outs SReg_32:$dst),
> (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
> - [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
> - SReg_32:$src0, SReg_32:$src1))]
> + []
> >;
>
> def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
>
> -// f32 pattern for S_CSELECT_B32
> -def : Pat <
> - (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
> - (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
> ->;
> -
> def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
>
> def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
> --
> 1.7.10.4
>
> From 7a2c0f084fa9ac949084a2c719d9944dd680a866 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:18:00 +0100
> Subject: [PATCH 2/9] R600/SI: start reworking patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> We don't need register classes in patterns any longer.
> Let's start with the indirect addressing patterns.
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 36 ++++++++++++++----------------------
> 1 file changed, 14 insertions(+), 22 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index e37003e..6ee3923 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1542,45 +1542,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
> /********** Indirect adressing **********/
> /********** ====================== **********/
>
> -multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
> - SI_INDIRECT_DST IndDst> {
> +multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
> +
> // 1. Extract with offset
> def : Pat<
> - (vector_extract (vt rc:$vec),
> - (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
> - ),
> - (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
> + (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
> + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), vt:$vec, VReg_32:$idx, imm:$off))
> >;
>
> // 2. Extract without offset
> def : Pat<
> - (vector_extract (vt rc:$vec),
> - (i64 (zext (i32 VReg_32:$idx)))
> - ),
> - (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
> + (vector_extract vt:$vec, (i64 (zext i32:$idx))),
> + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), vt:$vec, i32:$idx, 0))
> >;
>
> // 3. Insert with offset
> def : Pat<
> - (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
> - (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
> - ),
> - (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
> + (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
> + (IndDst (IMPLICIT_DEF), vt:$vec, i32:$idx, imm:$off, f32:$val)
> >;
>
> // 4. Insert without offset
> def : Pat<
> - (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
> - (i64 (zext (i32 VReg_32:$idx)))
> - ),
> - (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
> + (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
> + (IndDst (IMPLICIT_DEF), vt:$vec, i32:$idx, 0, f32:$val)
> >;
> }
>
> -defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
> -defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
> -defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
> -defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
> +defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
> +defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
> +defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
> +defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
>
> /********** =============== **********/
> /********** Conditions **********/
> --
> 1.7.10.4
>
> From 3c102c001b7e707cadc1261caabf5ef0a01e0434 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:22:53 +0100
> Subject: [PATCH 3/9] R600/SI: remove reg classes from constant load patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 13 +++++++------
> 1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 6ee3923..48e1698 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1514,22 +1514,23 @@ def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
> /********** ================== **********/
>
> multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
> +
> // 1. Offset as 8bit DWORD immediate
> def : Pat <
> - (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
> - (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
> + (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
> + (vt (Instr_IMM i64:$sbase, IMM8bitDWORD:$offset))
> >;
>
> // 2. Offset loaded in an 32bit SGPR
> def : Pat <
> - (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
> - (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
> + (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
> + (vt (Instr_SGPR i64:$sbase, (S_MOV_B32 imm:$offset)))
> >;
>
> // 3. No offset at all
> def : Pat <
> - (constant_load SReg_64:$sbase),
> - (vt (Instr_IMM SReg_64:$sbase, 0))
> + (constant_load i64:$sbase),
> + (vt (Instr_IMM i64:$sbase, 0))
> >;
> }
>
> --
> 1.7.10.4
>
> From c4aad71aa012d62b5a6760f7660b20446b9a184e Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:26:31 +0100
> Subject: [PATCH 4/9] R600/SI: remove reg classes from VOP3 patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 48e1698..a33ee69 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1505,9 +1505,10 @@ def : Pat <
> /********** VOP3 Patterns **********/
> /********** ================== **********/
>
> -def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
> - (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
> - 0, 0, 0, 0)>;
> +def : Pat <
> + (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
> + (V_MAD_F32 f32:$src0, f32:$src1, f32:$src2, 0, 0, 0, 0)
> +>;
>
> /********** ================== **********/
> /********** SMRD Patterns **********/
> --
> 1.7.10.4
>
> From fd8fe9d538267aa578ecea740c9eb53f78e3b923 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:33:38 +0100
> Subject: [PATCH 5/9] R600/SI: remove reg classes from instrinsic patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 58 ++++++++++++++++++-------------------
> 1 file changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index a33ee69..3e3974e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1438,67 +1438,67 @@ def : Pat <
> def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
>
> def : Pat <
> - (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
> - (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
> + (int_AMDGPU_div f32:$src0, f32:$src1),
> + (V_MUL_LEGACY_F32_e32 f32:$src0, (V_RCP_LEGACY_F32_e32 f32:$src1))
> >;
>
> def : Pat<
> - (fdiv VSrc_32:$src0, VSrc_32:$src1),
> - (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
> + (fdiv f32:$src0, f32:$src1),
> + (V_MUL_F32_e32 f32:$src0, (V_RCP_F32_e32 f32:$src1))
> >;
>
> def : Pat <
> - (fcos VSrc_32:$src0),
> - (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
> + (fcos f32:$src0),
> + (V_COS_F32_e32 (V_MUL_F32_e32 f32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
> >;
>
> def : Pat <
> - (fsin VSrc_32:$src0),
> - (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
> + (fsin f32:$src0),
> + (V_SIN_F32_e32 (V_MUL_F32_e32 f32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
> >;
>
> def : Pat <
> - (int_AMDGPU_cube VReg_128:$src),
> + (int_AMDGPU_cube v4f32:$src),
> (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
> - (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> - (EXTRACT_SUBREG VReg_128:$src, sub1),
> - (EXTRACT_SUBREG VReg_128:$src, sub2),
> + (V_CUBETC_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> + (EXTRACT_SUBREG v4f32:$src, sub1),
> + (EXTRACT_SUBREG v4f32:$src, sub2),
> 0, 0, 0, 0), sub0),
> - (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> - (EXTRACT_SUBREG VReg_128:$src, sub1),
> - (EXTRACT_SUBREG VReg_128:$src, sub2),
> + (V_CUBESC_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> + (EXTRACT_SUBREG v4f32:$src, sub1),
> + (EXTRACT_SUBREG v4f32:$src, sub2),
> 0, 0, 0, 0), sub1),
> - (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> - (EXTRACT_SUBREG VReg_128:$src, sub1),
> - (EXTRACT_SUBREG VReg_128:$src, sub2),
> + (V_CUBEMA_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> + (EXTRACT_SUBREG v4f32:$src, sub1),
> + (EXTRACT_SUBREG v4f32:$src, sub2),
> 0, 0, 0, 0), sub2),
> - (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> - (EXTRACT_SUBREG VReg_128:$src, sub1),
> - (EXTRACT_SUBREG VReg_128:$src, sub2),
> + (V_CUBEID_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> + (EXTRACT_SUBREG v4f32:$src, sub1),
> + (EXTRACT_SUBREG v4f32:$src, sub2),
> 0, 0, 0, 0), sub3)
> >;
>
> def : Pat <
> - (i32 (sext (i1 SReg_64:$src0))),
> - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
> + (i32 (sext i1:$src0)),
> + (V_CNDMASK_B32_e64 (i32 0), (i32 -1), i1:$src0)
> >;
>
> // 1. Offset as 8bit DWORD immediate
> def : Pat <
> - (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
> - (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
> + (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
> + (S_BUFFER_LOAD_DWORD_IMM v16i8:$sbase, IMM8bitDWORD:$offset)
> >;
>
> // 2. Offset loaded in an 32bit SGPR
> def : Pat <
> - (int_SI_load_const SReg_128:$sbase, imm:$offset),
> - (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
> + (int_SI_load_const v16i8:$sbase, imm:$offset),
> + (S_BUFFER_LOAD_DWORD_SGPR v16i8:$sbase, (S_MOV_B32 imm:$offset))
> >;
>
> // 3. Offset in an 32Bit VGPR
> def : Pat <
> - (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
> - (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
> + (int_SI_load_const v16i8:$sbase, i32:$voff),
> + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, i32:$voff, v16i8:$sbase, 0, 0, 0)
> >;
>
> /********** ================== **********/
> --
> 1.7.10.4
>
> From e009091046c06e55dba8381ccb1bc3fac27c726b Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:37:24 +0100
> Subject: [PATCH 6/9] R600/SI: remove reg classes from interpolation patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 14 +++++++-------
> 1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 3e3974e..8db15c5 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1418,16 +1418,16 @@ def : Pat <
> /********** ===================== **********/
>
> def : Pat <
> - (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
> - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
> + (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
> + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, i32:$params)
> >;
>
> def : Pat <
> - (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
> - (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
> - imm:$attr_chan, imm:$attr, M0Reg:$params),
> - (EXTRACT_SUBREG VReg_64:$ij, sub1),
> - imm:$attr_chan, imm:$attr, M0Reg:$params)
> + (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
> + (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
> + imm:$attr_chan, imm:$attr, i32:$params),
> + (EXTRACT_SUBREG v2i32:$ij, sub1),
> + imm:$attr_chan, imm:$attr, i32:$params)
> >;
>
> /********** ================== **********/
> --
> 1.7.10.4
>
> From 7ee8131bbb98e6e13be8ae8e0df248550cb72ded Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:40:14 +0100
> Subject: [PATCH 7/9] R600/SI: remove register classes from modifier patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 8db15c5..0988653 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1364,20 +1364,20 @@ def : BitConvert <f32, i32, VReg_32>;
> /********** =================== **********/
>
> def : Pat <
> - (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
> - (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
> + (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
> + (V_ADD_F32_e64 f32:$src, (i32 0 /* SRC1 */),
> 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
> >;
>
> def : Pat <
> - (fabs VReg_32:$src),
> - (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
> + (fabs f32:$src),
> + (V_ADD_F32_e64 f32:$src, (i32 0 /* SRC1 */),
> 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
> >;
>
> def : Pat <
> - (fneg VReg_32:$src),
> - (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
> + (fneg f32:$src),
> + (V_ADD_F32_e64 f32:$src, (i32 0 /* SRC1 */),
> 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
> >;
>
> --
> 1.7.10.4
>
> From 11c3926424284acb50587b9038f7cb05afe53849 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 12:29:45 +0100
> Subject: [PATCH 8/9] R600/SI: remove register classes from image sampling
> patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 92 +++++++++++++++++--------------------
> 1 file changed, 41 insertions(+), 51 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 0988653..4f58081 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1203,77 +1203,67 @@ def : Pat <
> VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
> >;
>
> +/********** ======================= **********/
> +/********** Image sampling patterns **********/
> +/********** ======================= **********/
>
> /* int_SI_sample for simple 1D texture lookup */
> def : Pat <
> - (int_SI_sample VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler, imm),
> - (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
> - SReg_256:$rsrc, SReg_128:$sampler)
> + (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
> + (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, v1i32:$addr,
> + v32i8:$rsrc, v16i8:$sampler)
> >;
>
> -class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
> - ValueType addr_type> : Pat <
> - (name (addr_type addr_class:$addr),
> - SReg_256:$rsrc, SReg_128:$sampler, imm),
> - (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
> - SReg_256:$rsrc, SReg_128:$sampler)
> +class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
> + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
> + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
> >;
>
> -class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
> - ValueType addr_type> : Pat <
> - (name (addr_type addr_class:$addr),
> - SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
> - (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
> - SReg_256:$rsrc, SReg_128:$sampler)
> +class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
> + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
> + (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
> >;
>
> -class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
> - ValueType addr_type> : Pat <
> - (name (addr_type addr_class:$addr),
> - SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
> - (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
> - SReg_256:$rsrc, SReg_128:$sampler)
> +class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
> + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
> + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
> >;
>
> class SampleShadowPattern<Intrinsic name, MIMG opcode,
> - RegisterClass addr_class, ValueType addr_type> : Pat <
> - (name (addr_type addr_class:$addr),
> - SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
> - (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
> - SReg_256:$rsrc, SReg_128:$sampler)
> + ValueType vt> : Pat <
> + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
> + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
> >;
>
> class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
> - RegisterClass addr_class, ValueType addr_type> : Pat <
> - (name (addr_type addr_class:$addr),
> - SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
> - (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
> - SReg_256:$rsrc, SReg_128:$sampler)
> + ValueType vt> : Pat <
> + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
> + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
> >;
>
> /* int_SI_sample* for texture lookups consuming more address parameters */
> -multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
> - def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
> - def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
> - def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
> - def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
> - def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
> -
> - def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
> - def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
> - def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
> - def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
> -
> - def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
> - def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
> - def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
> - def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
> +multiclass SamplePatterns<ValueType addr_type> {
> + def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
> + def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
> + def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
> + def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
> + def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
> +
> + def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
> + def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
> + def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
> + def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
> +
> + def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
> + def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
> + def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
> + def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
> }
>
> -defm : SamplePatterns<VReg_64, v2i32>;
> -defm : SamplePatterns<VReg_128, v4i32>;
> -defm : SamplePatterns<VReg_256, v8i32>;
> -defm : SamplePatterns<VReg_512, v16i32>;
> +defm : SamplePatterns<v2i32>;
> +defm : SamplePatterns<v4i32>;
> +defm : SamplePatterns<v8i32>;
> +defm : SamplePatterns<v16i32>;
>
> /********** ============================================ **********/
> /********** Extraction, Insertion, Building and Casting **********/
> --
> 1.7.10.4
>
> From b39b969f73f1766e3fd4615364f0015589012a52 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 13:01:28 +0100
> Subject: [PATCH 9/9] R600/SI: remove register classes from the remaining
> patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 39 ++++++++++++++++++-------------------
> 1 file changed, 19 insertions(+), 20 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 4f58081..b81a3bd 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -793,8 +793,8 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
>
> //f32 pattern for V_CNDMASK_B32_e64
> def : Pat <
> - (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
> - (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
> + (f32 (select i1:$src2, f32:$src1, f32:$src0)),
> + (V_CNDMASK_B32_e64 f32:$src0, f32:$src1, i1:$src2)
> >;
>
> defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
> @@ -983,18 +983,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
> } // isCommutable = 1
>
> def : Pat <
> - (mul VSrc_32:$src0, VReg_32:$src1),
> - (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
> + (mul i32:$src0, i32:$src1),
> + (V_MUL_LO_I32 i32:$src0, i32:$src1, (i32 0), 0, 0, 0, 0)
> >;
>
> def : Pat <
> - (mulhu VSrc_32:$src0, VReg_32:$src1),
> - (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
> + (mulhu i32:$src0, i32:$src1),
> + (V_MUL_HI_U32 i32:$src0, i32:$src1, (i32 0), 0, 0, 0, 0)
> >;
>
> def : Pat <
> - (mulhs VSrc_32:$src0, VReg_32:$src1),
> - (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
> + (mulhs i32:$src0, i32:$src1),
> + (V_MUL_HI_I32 i32:$src0, i32:$src1, (i32 0), 0, 0, 0, 0)
> >;
>
> def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
> @@ -1031,15 +1031,15 @@ def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
> >;
>
> def : Pat <
> - (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
> - (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
> + (i1 (and i1:$src0, i1:$src1)),
> + (S_AND_B64 i1:$src0, i1:$src1)
> >;
>
> def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
> def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
> def : Pat <
> - (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
> - (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
> + (i1 (or i1:$src0, i1:$src1)),
> + (S_OR_B64 i1:$src0, i1:$src1)
> >;
> def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
> def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
> @@ -1177,8 +1177,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
> } // end IsCodeGenOnly, isPseudo
>
> def : Pat<
> - (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
> - (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
> + (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
> + (V_CNDMASK_B32_e64 f32:$src2, f32:$src1, (V_CMP_GT_F32_e64 0, f32:$src0))
> >;
>
> def : Pat <
> @@ -1188,19 +1188,18 @@ def : Pat <
>
> /* int_SI_vs_load_input */
> def : Pat<
> - (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
> - VReg_32:$buf_idx_vgpr),
> + (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset,
> + i32:$buf_idx_vgpr),
> (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
> - VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
> - 0, 0, 0)
> + i32:$buf_idx_vgpr, v16i8:$tlst, 0, 0, 0)
> >;
>
> /* int_SI_export */
> def : Pat <
> (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
> - VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
> + f32:$src0, f32:$src1, f32:$src2, f32:$src3),
> (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
> - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
> + f32:$src0, f32:$src1, f32:$src2, f32:$src3)
> >;
>
> /********** ======================= **********/
> --
> 1.7.10.4
>
More information about the mesa-dev
mailing list