Mesa (master): nv50/ir/gk110: add implementations of div u32/s32

Ilia Mirkin imirkin at kemper.freedesktop.org
Tue Mar 18 09:58:23 UTC 2014


Module: Mesa
Branch: master
Commit: 48a9ba63f5c9751052e472f8d7fb195ce874199d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=48a9ba63f5c9751052e472f8d7fb195ce874199d

Author: Ilia Mirkin <imirkin at alum.mit.edu>
Date:   Wed Mar 12 12:00:58 2014 -0400

nv50/ir/gk110: add implementations of div u32/s32

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>

---

 .../drivers/nouveau/codegen/target_lib_nvf0.asm    |   86 ++++++++++++++++++++
 .../drivers/nouveau/codegen/target_lib_nvf0.asm.h  |   81 ++++++++++++++++--
 2 files changed, 162 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm b/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm
new file mode 100644
index 0000000..a0c5497
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm
@@ -0,0 +1,86 @@
+//
+// DIV U32
+//
+// UNR recurrence (q = a / b):
+// look for z such that 2^32 - b <= b * z < 2^32
+// then q - 1 <= (a * z) / 2^32 <= q
+//
+// INPUT:   $r0: dividend, $r1: divisor
+// OUTPUT:  $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p1
+// SIZE:    22 / 14 * 8 bytes
+//
+sched 0x28282804280428
+bfind u32 $r2 $r1
+xor b32 $r2 $r2 0x1f
+mov b32 $r3 0x1
+shl b32 $r2 $r3 clamp $r2
+cvt u32 $r1 neg u32 $r1
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+sched 0x28282828282828
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+sched 0x042c2828042804
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mov b32 $r3 $r0
+mul high $r0 u32 $r0 u32 $r2
+cvt u32 $r2 neg u32 $r1
+add $r1 (mul u32 $r1 u32 $r0) $r3
+set $p0 0x1 ge u32 $r1 $r2
+$p0 sub b32 $r1 $r1 $r2
+sched 0x20282e20042c28
+$p0 add b32 $r0 $r0 0x1
+$p0 set $p0 0x1 ge u32 $r1 $r2
+$p0 sub b32 $r1 $r1 $r2
+$p0 add b32 $r0 $r0 0x1
+ret
+//
+// DIV S32, like DIV U32 after taking ABS(inputs)
+//
+// INPUT:   $r0: dividend, $r1: divisor
+// OUTPUT:  $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p3
+//
+set $p2 0x1 lt s32 $r0 0x0
+set $p3 0x1 lt s32 $r1 0x0 xor $p2
+sched 0x28042804282820
+cvt s32 $r0 abs s32 $r0
+cvt s32 $r1 abs s32 $r1
+bfind u32 $r2 $r1
+xor b32 $r2 $r2 0x1f
+mov b32 $r3 0x1
+shl b32 $r2 $r3 clamp $r2
+cvt u32 $r1 neg u32 $r1
+sched 0x28282828282828
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+sched 0x28280428042828
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mul $r3 u32 $r1 u32 $r2
+add $r2 (mul high u32 $r2 u32 $r3) $r2
+mov b32 $r3 $r0
+mul high $r0 u32 $r0 u32 $r2
+cvt u32 $r2 neg u32 $r1
+add $r1 (mul u32 $r1 u32 $r0) $r3
+sched 0x2028042c28042c
+set $p0 0x1 ge u32 $r1 $r2
+$p0 sub b32 $r1 $r1 $r2
+$p0 add b32 $r0 $r0 0x1
+$p0 set $p0 0x1 ge u32 $r1 $r2
+$p0 sub b32 $r1 $r1 $r2
+$p0 add b32 $r0 $r0 0x1
+$p3 cvt s32 $r0 neg s32 $r0
+sched 0x2c200428042e04
+$p2 cvt s32 $r1 neg s32 $r1
+ret
diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h b/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h
index d10b6b0..02c1ec6 100644
--- a/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h
+++ b/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h
@@ -1,13 +1,84 @@
 
+// Assembled from target_lib_nvf0.asm by envyas -m gk110 -W.
+
 static const uint64_t nvf0_builtin_code[] =
 {
-   0x19000000001c003cULL,
+// DIV U32
+0x08a0a0a010a010a0ULL,
+0xe1800000009c000aULL,
+0x220000000f9c0808ULL,
+0x74000000009fc00eULL,
+0xe2400000011c0c0aULL,
+0xe6010000009c2806ULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0x08a0a0a0a0a0a0a0ULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0x0810b0a0a010a010ULL,
+0xd2000800019c080aULL,
+0xe4c03c00001c000eULL,
+0xe1c00400011c0002ULL,
+0xe6010000009c280aULL,
+0xd0000c00001c0406ULL,
+0xdb601c00011c041eULL,
+0xe088000001000406ULL,
+0x0880a0b88010b0a0ULL,
+0x4000000000800001ULL,
+0xdb601c000100041eULL,
+0xe088000001000406ULL,
+0x4000000000800001ULL,
+0x19000000001c003cULL,
+// DIV S32
+0xdb181c007f9c005eULL,
+0xdb1a08007f9c047eULL,
+0x08a010a010a0a080ULL,
+0xe6100000001ce802ULL,
+0xe6100000009ce806ULL,
+0xe1800000009c000aULL,
+0x220000000f9c0808ULL,
+0x74000000009fc00eULL,
+0xe2400000011c0c0aULL,
+0xe6010000009c2806ULL,
+0x08a0a0a0a0a0a0a0ULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0x08a0a010a010a0a0ULL,
+0xd2000800019c080aULL,
+0xe1c00000011c040eULL,
+0xd2000800019c080aULL,
+0xe4c03c00001c000eULL,
+0xe1c00400011c0002ULL,
+0xe6010000009c280aULL,
+0xd0000c00001c0406ULL,
+0x0880a010b0a010b0ULL,
+0xdb601c00011c041eULL,
+0xe088000001000406ULL,
+0x4000000000800001ULL,
+0xdb601c000100041eULL,
+0xe088000001000406ULL,
+0x4000000000800001ULL,
+0xe6010000000ce802ULL,
+0x08b08010a010b810ULL,
+0xe60100000088e806ULL,
+0x19000000001c003cULL,
 };
 
 static const uint16_t nvf0_builtin_offsets[NVC0_BUILTIN_COUNT] =
 {
-   0,
-   0,
-   0,
-   0
+   0x0000,
+   0x00f0,
+   /* Just point at a ret instruction for now. */
+   0x00f0 - 8,
+   0x00f0 - 8
 };




More information about the mesa-commit mailing list