[Beignet] [PATCH] support 64bit-integer multiplication
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Aug 13 03:02:29 PDT 2013
LGTM, pushed, thanks.
On Tue, Aug 13, 2013 at 11:05:28AM +0800, Homer Hsing wrote:
> also add test case
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 46 ++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 3 ++
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 23 +++++++---
> backend/src/backend/gen_insn_selection.hxx | 1 +
> kernels/compiler_long_mult.cl | 7 ++++
> utests/CMakeLists.txt | 1 +
> utests/compiler_long_mult.cpp | 49 ++++++++++++++++++++++
> 8 files changed, 126 insertions(+), 5 deletions(-)
> create mode 100644 kernels/compiler_long_mult.cl
> create mode 100644 utests/compiler_long_mult.cpp
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 406cb80..86f3555 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -598,6 +598,52 @@ namespace gbe
> p->pop();
> }
>
> + void GenContext::I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1) {
> + GenRegister acc = GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD);
> + int execWidth = p->curr.execWidth;
> + p->push();
> + p->curr.execWidth = 8;
> + for(int i = 0; i < execWidth; i += 8) {
> + p->MUL(acc, src0, src1);
> + p->curr.accWrEnable = 1;
> + p->MACH(high, src0, src1);
> + p->curr.accWrEnable = 0;
> + p->MOV(low, acc);
> + src0 = GenRegister::suboffset(src0, 8);
> + src1 = GenRegister::suboffset(src1, 8);
> + high = GenRegister::suboffset(high, 8);
> + low = GenRegister::suboffset(low, 8);
> + }
> + p->pop();
> + }
> +
> + void GenContext::emitI64MULInstruction(const SelectionInstruction &insn) {
> + GenRegister dest = ra->genReg(insn.dst(0));
> + GenRegister x = ra->genReg(insn.src(0));
> + GenRegister y = ra->genReg(insn.src(1));
> + GenRegister a = ra->genReg(insn.dst(1));
> + GenRegister b = ra->genReg(insn.dst(2));
> + GenRegister c = ra->genReg(insn.dst(3));
> + GenRegister d = ra->genReg(insn.dst(4));
> + GenRegister e = ra->genReg(insn.dst(5));
> + GenRegister f = ra->genReg(insn.dst(6));
> + a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD;
> + loadTopHalf(a, x);
> + loadBottomHalf(b, x);
> + loadTopHalf(c, y);
> + loadBottomHalf(d, y);
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + I32FullMult(GenRegister::null(), e, b, c);
> + I32FullMult(GenRegister::null(), f, a, d);
> + p->ADD(e, e, f);
> + I32FullMult(f, a, b, d);
> + p->ADD(e, e, f);
> + p->pop();
> + storeTopHalf(dest, e);
> + storeBottomHalf(dest, a);
> + }
> +
> void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index b294042..1531961 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -86,6 +86,7 @@ namespace gbe
>
> void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
> void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
> + void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
>
> /*! Final Gen ISA emission helper functions */
> void emitLabelInstruction(const SelectionInstruction &insn);
> @@ -116,8 +117,10 @@ namespace gbe
> void emitSpillRegInstruction(const SelectionInstruction &insn);
> void emitUnSpillRegInstruction(const SelectionInstruction &insn);
> void emitGetImageInfoInstruction(const SelectionInstruction &insn);
> + void emitI64MULInstruction(const SelectionInstruction &insn);
> void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
> void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
> +
> /*! Implements base class */
> virtual Kernel *allocateKernel(void);
> /*! Store the position of each label instruction in the Gen ISA stream */
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 4879b66..7f214ac 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -27,3 +27,4 @@ DECL_GEN7_SCHEDULE(SpillReg, 80, 1, 1)
> DECL_GEN7_SCHEDULE(UnSpillReg, 80, 1, 1)
> DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1)
> +DECL_GEN7_SCHEDULE(I64MUL, 20, 4, 2)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 929a3bd..9e3c535 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -523,6 +523,8 @@ namespace gbe
> void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti);
> /*! Get image information */
> void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti);
> + /*! Multiply 64-bit integers */
> + void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
> /*! Use custom allocators */
> GBE_CLASS(Opaque);
> friend class SelectionBlock;
> @@ -1003,6 +1005,15 @@ namespace gbe
> insn->extra.function = function;
> }
>
> + void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2);
> + insn->dst(0) = dst;
> + insn->src(0) = src0;
> + insn->src(1) = src1;
> + for(int i = 0; i < 6; i++)
> + insn->dst(i + 1) = tmp[i];
> + }
> +
> void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
> SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
> insn->dst(0) = dst;
> @@ -1610,12 +1621,14 @@ namespace gbe
> if (type == TYPE_U32 || type == TYPE_S32) {
> sel.pop();
> return false;
> - }
> - else {
> - GBE_ASSERTM((type != TYPE_S64 && type != TYPE_U64), "64bit integer not supported yet!" );
> + } else if (type == TYPE_S64 || type == TYPE_U64) {
> + GenRegister tmp[6];
> + for(int i = 0; i < 6; i++)
> + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> + sel.I64MUL(dst, src0, src1, tmp);
> + } else
> sel.MUL(dst, src0, src1);
> - }
> - break;
> + break;
> case OP_HADD: {
> GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
> sel.HADD(dst, src0, src1, temp);
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 06469ca..6ef50b8 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -30,6 +30,7 @@ DECL_SELECTION_IR(ADD, BinaryInstruction)
> DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
> DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
> DECL_SELECTION_IR(MUL, BinaryInstruction)
> +DECL_SELECTION_IR(I64MUL, I64MULInstruction)
> DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
> DECL_SELECTION_IR(MACH, BinaryInstruction)
> DECL_SELECTION_IR(CMP, CompareInstruction)
> diff --git a/kernels/compiler_long_mult.cl b/kernels/compiler_long_mult.cl
> new file mode 100644
> index 0000000..5b96d74
> --- /dev/null
> +++ b/kernels/compiler_long_mult.cl
> @@ -0,0 +1,7 @@
> +kernel void compiler_long_mult(global long *src1, global long *src2, global long *dst) {
> + int i = get_global_id(0);
> + if(i < 3)
> + dst[i] = src1[i] + src2[i];
> + else
> + dst[i] = src1[i] * src2[i];
> +}
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index b3d039e..746d77b 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -129,6 +129,7 @@ set (utests_sources
> compiler_long_shl.cpp
> compiler_long_shr.cpp
> compiler_long_asr.cpp
> + compiler_long_mult.cpp
> utest_assert.cpp
> utest.cpp
> utest_file_map.cpp
> diff --git a/utests/compiler_long_mult.cpp b/utests/compiler_long_mult.cpp
> new file mode 100644
> index 0000000..06070f7
> --- /dev/null
> +++ b/utests/compiler_long_mult.cpp
> @@ -0,0 +1,49 @@
> +#include <cstdint>
> +#include <cstring>
> +#include <iostream>
> +#include "utest_helper.hpp"
> +
> +void compiler_long_mult(void)
> +{
> + const size_t n = 16;
> + int64_t src1[n], src2[n];
> +
> + // Setup kernel and buffers
> + OCL_CREATE_KERNEL("compiler_long_mult");
> + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
> + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
> + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
> + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> + globals[0] = n;
> + locals[0] = 16;
> +
> + // Run random tests
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + src1[i] = 0x77665544FFEEDDCCLL;
> + src2[i] = ((int64_t)rand() << 32) + rand();
> + }
> + OCL_MAP_BUFFER(0);
> + OCL_MAP_BUFFER(1);
> + memcpy(buf_data[0], src1, sizeof(src1));
> + memcpy(buf_data[1], src2, sizeof(src2));
> + OCL_UNMAP_BUFFER(0);
> + OCL_UNMAP_BUFFER(1);
> +
> + // Run the kernel on GPU
> + OCL_NDRANGE(1);
> +
> + // Compare
> + OCL_MAP_BUFFER(2);
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + //printf("%lx\n", ((int64_t *)buf_data[2])[i]);
> + if (i < 3)
> + OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
> + else
> + OCL_ASSERT(src1[i] * src2[i] == ((int64_t *)buf_data[2])[i]);
> + }
> + OCL_UNMAP_BUFFER(2);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long_mult);
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list