[Mesa-dev] [PATCH 4/4] R600/SI: Support AMDGPU.ddx/y intrinsics
Paul Berry
stereotype441 at gmail.com
Fri Feb 22 09:05:11 PST 2013
On 22 February 2013 08:02, Michel Dänzer <michel at daenzer.net> wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
>
> Use LDS for calculating the deltas between neighbouring pixels. Not sure
> the
> sign of the delta is correct for both dimensions (the sign doesn't seem to
> matter for the relevant piglit tests), but it'll be easy to fix that up if
> not.
>
Is this the back-end code that implements the dFdx() and dFdy() GLSL
functions? If so, then there is a piglit test that verifies that the sign
of the delta is correct for both dimensions. It's in tests/fbo/fbo-deriv.c.
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SIISelLowering.cpp | 84
> ++++++++++++++++++++++++++++++++++++++
> lib/Target/R600/SIISelLowering.h | 2 +
> lib/Target/R600/SIInstructions.td | 29 ++++++++++++-
> 3 files changed, 113 insertions(+), 2 deletions(-)
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp
> b/lib/Target/R600/SIISelLowering.cpp
> index 212e3f2..b21a7e8 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -75,6 +75,9 @@ MachineBasicBlock *
> SITargetLowering::EmitInstrWithCustomInserter(
> MI->eraseFromParent();
> break;
>
> + case AMDGPU::SI_DD:
> + LowerSI_DD(MI, *BB, I, MRI);
> + break;
> case AMDGPU::SI_INTERP:
> LowerSI_INTERP(MI, *BB, I, MRI);
> break;
> @@ -93,6 +96,87 @@ void SITargetLowering::LowerSI_WQM(MachineInstr *MI,
> MachineBasicBlock &BB,
> MI->eraseFromParent();
> }
>
> +void SITargetLowering::LowerSI_DD(MachineInstr *MI, MachineBasicBlock &BB,
> + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
> + unsigned mbcnt_lo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned mbcnt = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned tid = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned tid0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned tid1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned coord0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned coord1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + MachineOperand dst = MI->getOperand(0);
> + MachineOperand coord = MI->getOperand(1);
> + MachineOperand incr = MI->getOperand(2);
> +
> + // Get this thread's ID
> + BuildMI(BB, I, BB.findDebugLoc(I),
> TII->get(AMDGPU::V_MBCNT_LO_U32_B32_e64), mbcnt_lo)
> + .addImm(0xffffffff)
> + .addImm(0x80) // Inline constant 0
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0);
> + BuildMI(BB, I, BB.findDebugLoc(I),
> TII->get(AMDGPU::V_MBCNT_HI_U32_B32_e32), mbcnt)
> + .addImm(0xffffffff)
> + .addReg(mbcnt_lo);
> +
> + // Multiply by 4 to get a DWORD offset
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_LSHL_B32_e64),
> tid)
> + .addReg(mbcnt)
> + .addImm(0x82) // Inline constant 2
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0);
> +
> + // Write this thread's coordinate to LDS
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::DS_WRITE_B32))
> + .addOperand(coord)
> + .addImm(0) // LDS
> + .addReg(tid)
> + .addOperand(coord)
> + .addOperand(coord)
> + .addImm(0)
> + .addImm(0);
> +
> + // Get bottom left thread ID * 4
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_AND_B32_e32),
> tid0)
> + .addImm(0xfffffff0)
> + .addReg(tid);
> +
> + // Read bottom left thread's coordinate from LDS
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::DS_READ_B32),
> coord0)
> + .addImm(0) // LDS
> + .addReg(tid0)
> + .addReg(tid0)
> + .addReg(tid0)
> + .addImm(0)
> + .addImm(0);
> +
> + // Get bottom right / top left thread ID * 4
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_ADD_I32_e32),
> tid1)
> + .addOperand(incr)
> + .addReg(tid0);
> +
> + // Read bottom right / top left thread's coordinate from LDS
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::DS_READ_B32),
> coord1)
> + .addImm(0) // LDS
> + .addReg(tid1)
> + .addReg(tid1)
> + .addReg(tid1)
> + .addImm(0)
> + .addImm(0);
> +
> + // Subtract bottom left coordinate from bottom right / top left
> coordinate
> + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_SUB_F32_e32))
> + .addOperand(dst)
> + .addReg(coord1)
> + .addReg(coord0);
> +
> + MI->eraseFromParent();
> +}
> +
> void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock
> &BB,
> MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
> unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> diff --git a/lib/Target/R600/SIISelLowering.h
> b/lib/Target/R600/SIISelLowering.h
> index 5d048f8..905a43e 100644
> --- a/lib/Target/R600/SIISelLowering.h
> +++ b/lib/Target/R600/SIISelLowering.h
> @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
>
> void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
> MachineBasicBlock::iterator I, unsigned Opocde) const;
> + void LowerSI_DD(MachineInstr *MI, MachineBasicBlock &BB,
> + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI)
> const;
> void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
> MachineBasicBlock::iterator I, MachineRegisterInfo & MRI)
> const;
> void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
> diff --git a/lib/Target/R600/SIInstructions.td
> b/lib/Target/R600/SIInstructions.td
> index 7152c49..490fb99 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -806,8 +806,8 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
> defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
> defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
> //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
> -//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32",
> []>;
> -//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32",
> []>;
> +defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
> +defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
> let Defs = [VCC] in { // Carry-out goes to VCC
> defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32",
> [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
> @@ -996,6 +996,13 @@ def LOAD_CONST : AMDGPUShaderInst <
>
> let usesCustomInserter = 1 in {
>
> +def SI_DD : InstSI <
> + (outs VReg_32:$dst),
> + (ins VReg_32:$src, i32imm:$incr),
> + "SI_DD $src, $incr",
> + []
> +>;
> +
> def SI_INTERP : InstSI <
> (outs VReg_32:$dst),
> (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr,
> SReg_32:$params),
> @@ -1396,6 +1403,24 @@ def : Pat <
> (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
> >;
>
> +def : Pat <
> + (int_AMDGPU_ddx VReg_128:$src, imm, imm, imm),
> + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32
> (IMPLICIT_DEF)),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub0), 4), sub0),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub1), 4), sub1),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub2), 4), sub2),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub3), 4), sub3)
> +>;
> +
> +def : Pat <
> + (int_AMDGPU_ddy VReg_128:$src, imm, imm, imm),
> + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32
> (IMPLICIT_DEF)),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub0), 8), sub0),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub1), 8), sub1),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub2), 8), sub2),
> + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub3), 8), sub3)
> +>;
> +
> /********** ================== **********/
> /********** VOP3 Patterns **********/
> /********** ================== **********/
> --
> 1.8.1.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130222/60b64e3c/attachment-0001.html>
More information about the mesa-dev
mailing list