[Beignet] [PATCH] Add memory fence before barrier to support global memory barrier.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Jun 17 02:34:10 PDT 2013
This patch looks good to me. And it can pass the global memory barrier
case. Thanks for the patch, I will push it latter.
As to the local memory fence, according to the bspec, we don't need to
issue a fence to ensure the memory access ordering. I heard from you
that you tried to modify the test case to use local memory and local fence,
it also had problem. Could you submit your modification as a new test
case for local memory barrier? Then we can all take a look at that case
and investigate the underlying problem.
On Mon, Jun 17, 2013 at 03:13:05PM +0800, Yang Rong wrote:
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 6 ++++++
> backend/src/backend/gen_context.hpp | 1 +
> backend/src/backend/gen_defs.hpp | 14 ++++++++++++++
> backend/src/backend/gen_encoder.cpp | 9 +++++++++
> backend/src/backend/gen_encoder.hpp | 2 ++
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_scheduling.cpp | 20 ++++++++++++++------
> backend/src/backend/gen_insn_selection.cpp | 19 +++++++++++++++----
> backend/src/backend/gen_insn_selection.hxx | 1 +
> 9 files changed, 63 insertions(+), 10 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 055c8fc..af651e7 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -193,6 +193,12 @@ namespace gbe
> p->BARRIER(src);
> }
>
> + void GenContext::emitFenceInstruction(const SelectionInstruction &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + p->FENCE(dst);
> + p->MOV(dst, dst);
> + }
> +
> void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 7c28bdf..1566cbb 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -85,6 +85,7 @@ namespace gbe
> void emitNoOpInstruction(const SelectionInstruction &insn);
> void emitWaitInstruction(const SelectionInstruction &insn);
> void emitBarrierInstruction(const SelectionInstruction &insn);
> + void emitFenceInstruction(const SelectionInstruction &insn);
> void emitMathInstruction(const SelectionInstruction &insn);
> void emitUntypedReadInstruction(const SelectionInstruction &insn);
> void emitUntypedWriteInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index c7a1581..f4e4938 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -765,6 +765,20 @@ struct GenInstruction
> uint32_t end_of_thread:1;
> } gen7_typed_rw;
>
> + /*! Memory fence */
> + struct {
> + uint32_t bti:8;
> + uint32_t ingored:5;
> + uint32_t commit_enable:1;
> + uint32_t msg_type:4;
> + uint32_t pad2:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad3:2;
> + uint32_t end_of_thread:1;
> + } gen7_memory_fence;
> +
> struct {
> uint32_t src1_subreg_nr_high:1;
> uint32_t src1_reg_nr:8;
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index b65cc94..859a1b9 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -707,6 +707,15 @@ namespace gbe
> insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
> insn->bits3.msg_gateway.notify = 0x1;
> }
> + void GenEncoder::FENCE(GenRegister dst) {
> + GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + this->setHeader(insn);
> + this->setDst(insn, dst);
> + this->setSrc0(insn, dst);
> + setMessageDescriptor(this, insn, GEN_SFID_DATAPORT_DATA_CACHE, 1, 1, 1);
> + insn->bits3.gen7_memory_fence.msg_type = GEN_MEM_FENCE;
> + insn->bits3.gen7_memory_fence.commit_enable = 0x1;
> + }
>
> void GenEncoder::JMPI(GenRegister src) {
> alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
> diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index 83d83d2..c98774f 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -118,6 +118,8 @@ namespace gbe
> #undef ALU3
> /*! Barrier message (to synchronize threads of a workgroup) */
> void BARRIER(GenRegister src);
> + /*! Memory fence message (to order loads and stores between threads) */
> + void FENCE(GenRegister dst);
> /*! Jump indexed instruction */
> void JMPI(GenRegister src);
> /*! Compare instructions */
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index a2c0fba..098d9ec 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -11,6 +11,7 @@ DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
> DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
> DECL_GEN7_SCHEDULE(Math, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1)
> +DECL_GEN7_SCHEDULE(Fence, 80, 1, 1)
> DECL_GEN7_SCHEDULE(UntypedRead, 80, 1, 1)
> DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1)
> DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1)
> diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
> index 95eedfe..cb990be 100644
> --- a/backend/src/backend/gen_insn_scheduling.cpp
> +++ b/backend/src/backend/gen_insn_scheduling.cpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -305,7 +305,7 @@ namespace gbe
> return simdWidth == 8 ? physical.nr : physical.nr / 2;
> }
> // We use virtual registers since allocation is not done yet
> - else
> + else
> return reg.value.reg;
> }
>
> @@ -345,7 +345,9 @@ namespace gbe
> }
>
> // Consider barriers and wait write to memory
> - if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> + if (insn.opcode == SEL_OP_BARRIER ||
> + insn.opcode == SEL_OP_FENCE ||
> + insn.opcode == SEL_OP_WAIT) {
> const uint32_t local = this->getIndex(0xfe);
> const uint32_t global = this->getIndex(0x00);
> this->nodes[local] = this->nodes[global] = node;
> @@ -424,7 +426,9 @@ namespace gbe
> }
>
> // Consider barriers and wait are reading memory (local and global)
> - if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> + if (insn.opcode == SEL_OP_BARRIER ||
> + insn.opcode == SEL_OP_FENCE ||
> + insn.opcode == SEL_OP_WAIT) {
> const uint32_t local = tracker.getIndex(0xfe);
> const uint32_t global = tracker.getIndex(0x00);
> tracker.addDependency(node, local);
> @@ -450,7 +454,9 @@ namespace gbe
> }
>
> // Consider barriers and wait are writing memory (local and global)
> - if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> + if (insn.opcode == SEL_OP_BARRIER ||
> + insn.opcode == SEL_OP_FENCE ||
> + insn.opcode == SEL_OP_WAIT) {
> const uint32_t local = tracker.getIndex(0xfe);
> const uint32_t global = tracker.getIndex(0x00);
> tracker.addDependency(node, local);
> @@ -482,7 +488,9 @@ namespace gbe
> }
>
> // Consider barriers and wait are reading memory (local and global)
> - if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> + if (insn.opcode == SEL_OP_BARRIER ||
> + insn.opcode == SEL_OP_FENCE ||
> + insn.opcode == SEL_OP_WAIT) {
> const uint32_t local = tracker.getIndex(0xfe);
> const uint32_t global = tracker.getIndex(0x00);
> tracker.addDependency(local, node);
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 88f9e94..4e7cebd 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -431,6 +431,8 @@ namespace gbe
> #undef ALU3
> /*! Encode a barrier instruction */
> void BARRIER(GenRegister src);
> + /*! Encode a barrier instruction */
> + void FENCE(GenRegister dst);
> /*! Encode a label instruction */
> void LABEL(ir::LabelIndex label);
> /*! Jump indexed instruction */
> @@ -682,6 +684,11 @@ namespace gbe
> insn->src(0) = src;
> }
>
> + void Selection::Opaque::FENCE(GenRegister dst) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_FENCE, 1, 0);
> + insn->dst(0) = dst;
> + }
> +
> void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
> insn->src(0) = src;
> @@ -1607,17 +1614,21 @@ namespace gbe
> INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
> {
> using namespace ir;
> - const uint32_t params = insn.getParameters();
> - GBE_ASSERTM(params == syncLocalBarrier,
> - "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
> - "for the synchronization primitives");
> const ir::Register reg = sel.reg(FAMILY_DWORD);
>
> + const uint32_t params = insn.getParameters();
> + //need to double check local barrier whether need fence or not
> + if(params == syncGlobalBarrier) {
> + const ir::Register fenceDst = sel.reg(FAMILY_DWORD);
> + sel.FENCE(sel.selReg(fenceDst, ir::TYPE_U32));
> + }
> +
> sel.push();
> sel.curr.predicate = GEN_PREDICATE_NONE;
> sel.curr.execWidth = 8;
> sel.curr.physicalFlag = 0;
> sel.curr.noMask = 1;
> +
> sel.SHL(GenRegister::ud8grf(reg),
> GenRegister::ud1grf(ocl::threadn),
> GenRegister::immud(0x9));
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 455bb92..789c81c 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -29,6 +29,7 @@ DECL_SELECTION_IR(NOP, NoOpInstruction)
> DECL_SELECTION_IR(WAIT, WaitInstruction)
> DECL_SELECTION_IR(MATH, MathInstruction)
> DECL_SELECTION_IR(BARRIER, BarrierInstruction)
> +DECL_SELECTION_IR(FENCE, FenceInstruction)
> DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
> DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
> DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list