[Beignet] [PATCH] Add memory fence before barrier to support global memory barrier.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Jun 17 02:34:10 PDT 2013


This patch looks good to me. And it can pass the global memory barrier
case. Thanks for the patch, I will push it latter.
As to the local memory fence, according to the bspec, we don't need to
issue a fence to ensure the memory access ordering. I heard from you
that you tried to modify the test case to use local memory and local fence,
it also had problem. Could you submit your modification as a new test
case for local memory barrier? Then we can all take a look at that case
and investigate the underlying problem.

On Mon, Jun 17, 2013 at 03:13:05PM +0800, Yang Rong wrote:
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/backend/gen_context.cpp                |    6 ++++++
>  backend/src/backend/gen_context.hpp                |    1 +
>  backend/src/backend/gen_defs.hpp                   |   14 ++++++++++++++
>  backend/src/backend/gen_encoder.cpp                |    9 +++++++++
>  backend/src/backend/gen_encoder.hpp                |    2 ++
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |    1 +
>  backend/src/backend/gen_insn_scheduling.cpp        |   20 ++++++++++++++------
>  backend/src/backend/gen_insn_selection.cpp         |   19 +++++++++++++++----
>  backend/src/backend/gen_insn_selection.hxx         |    1 +
>  9 files changed, 63 insertions(+), 10 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 055c8fc..af651e7 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -193,6 +193,12 @@ namespace gbe
>      p->BARRIER(src);
>    }
>  
> +  void GenContext::emitFenceInstruction(const SelectionInstruction &insn) {
> +    const GenRegister dst = ra->genReg(insn.dst(0));
> +    p->FENCE(dst);
> +    p->MOV(dst, dst);
> +  }
> +
>    void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
>      const GenRegister dst = ra->genReg(insn.dst(0));
>      const GenRegister src0 = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 7c28bdf..1566cbb 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -85,6 +85,7 @@ namespace gbe
>      void emitNoOpInstruction(const SelectionInstruction &insn);
>      void emitWaitInstruction(const SelectionInstruction &insn);
>      void emitBarrierInstruction(const SelectionInstruction &insn);
> +    void emitFenceInstruction(const SelectionInstruction &insn);
>      void emitMathInstruction(const SelectionInstruction &insn);
>      void emitUntypedReadInstruction(const SelectionInstruction &insn);
>      void emitUntypedWriteInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index c7a1581..f4e4938 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -765,6 +765,20 @@ struct GenInstruction
>        uint32_t end_of_thread:1;
>      } gen7_typed_rw;
>  
> +    /*! Memory fence */
> +    struct {
> +      uint32_t bti:8;
> +      uint32_t ingored:5;
> +      uint32_t commit_enable:1;
> +      uint32_t msg_type:4;
> +      uint32_t pad2:1;
> +      uint32_t header_present:1;
> +      uint32_t response_length:5;
> +      uint32_t msg_length:4;
> +      uint32_t pad3:2;
> +      uint32_t end_of_thread:1;
> +    } gen7_memory_fence;
> +
>      struct {
>        uint32_t src1_subreg_nr_high:1;
>        uint32_t src1_reg_nr:8;
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index b65cc94..859a1b9 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -707,6 +707,15 @@ namespace gbe
>       insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
>       insn->bits3.msg_gateway.notify = 0x1;
>    }
> +  void GenEncoder::FENCE(GenRegister dst) {
> +    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    this->setHeader(insn);
> +    this->setDst(insn, dst);
> +    this->setSrc0(insn, dst);
> +    setMessageDescriptor(this, insn, GEN_SFID_DATAPORT_DATA_CACHE, 1, 1, 1);
> +    insn->bits3.gen7_memory_fence.msg_type = GEN_MEM_FENCE;
> +    insn->bits3.gen7_memory_fence.commit_enable = 0x1;
> +  }
>  
>    void GenEncoder::JMPI(GenRegister src) {
>      alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
> diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index 83d83d2..c98774f 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -118,6 +118,8 @@ namespace gbe
>  #undef ALU3
>      /*! Barrier message (to synchronize threads of a workgroup) */
>      void BARRIER(GenRegister src);
> +    /*! Memory fence message (to order loads and stores between threads) */
> +    void FENCE(GenRegister dst);
>      /*! Jump indexed instruction */
>      void JMPI(GenRegister src);
>      /*! Compare instructions */
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index a2c0fba..098d9ec 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -11,6 +11,7 @@ DECL_GEN7_SCHEDULE(NoOp,            20,        2,        2)
>  DECL_GEN7_SCHEDULE(Wait,            20,        2,        2)
>  DECL_GEN7_SCHEDULE(Math,            20,        4,        2)
>  DECL_GEN7_SCHEDULE(Barrier,         80,        1,        1)
> +DECL_GEN7_SCHEDULE(Fence,           80,        1,        1)
>  DECL_GEN7_SCHEDULE(UntypedRead,     80,        1,        1)
>  DECL_GEN7_SCHEDULE(UntypedWrite,    80,        1,        1)
>  DECL_GEN7_SCHEDULE(ByteGather,      80,        1,        1)
> diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
> index 95eedfe..cb990be 100644
> --- a/backend/src/backend/gen_insn_scheduling.cpp
> +++ b/backend/src/backend/gen_insn_scheduling.cpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -305,7 +305,7 @@ namespace gbe
>        return simdWidth == 8 ? physical.nr : physical.nr / 2;
>      }
>      // We use virtual registers since allocation is not done yet
> -    else 
> +    else
>        return reg.value.reg;
>    }
>  
> @@ -345,7 +345,9 @@ namespace gbe
>      }
>  
>      // Consider barriers and wait write to memory
> -    if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> +    if (insn.opcode == SEL_OP_BARRIER ||
> +        insn.opcode == SEL_OP_FENCE ||
> +        insn.opcode == SEL_OP_WAIT) {
>        const uint32_t local = this->getIndex(0xfe);
>        const uint32_t global = this->getIndex(0x00);
>        this->nodes[local] = this->nodes[global] = node;
> @@ -424,7 +426,9 @@ namespace gbe
>        }
>  
>        // Consider barriers and wait are reading memory (local and global)
> -      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> +    if (insn.opcode == SEL_OP_BARRIER ||
> +        insn.opcode == SEL_OP_FENCE ||
> +        insn.opcode == SEL_OP_WAIT) {
>          const uint32_t local = tracker.getIndex(0xfe);
>          const uint32_t global = tracker.getIndex(0x00);
>          tracker.addDependency(node, local);
> @@ -450,7 +454,9 @@ namespace gbe
>        }
>  
>        // Consider barriers and wait are writing memory (local and global)
> -      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> +    if (insn.opcode == SEL_OP_BARRIER ||
> +        insn.opcode == SEL_OP_FENCE ||
> +        insn.opcode == SEL_OP_WAIT) {
>          const uint32_t local = tracker.getIndex(0xfe);
>          const uint32_t global = tracker.getIndex(0x00);
>          tracker.addDependency(node, local);
> @@ -482,7 +488,9 @@ namespace gbe
>        }
>  
>        // Consider barriers and wait are reading memory (local and global)
> -      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
> +      if (insn.opcode == SEL_OP_BARRIER ||
> +          insn.opcode == SEL_OP_FENCE ||
> +          insn.opcode == SEL_OP_WAIT) {
>          const uint32_t local = tracker.getIndex(0xfe);
>          const uint32_t global = tracker.getIndex(0x00);
>          tracker.addDependency(local, node);
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 88f9e94..4e7cebd 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -431,6 +431,8 @@ namespace gbe
>  #undef ALU3
>      /*! Encode a barrier instruction */
>      void BARRIER(GenRegister src);
> +    /*! Encode a barrier instruction */
> +    void FENCE(GenRegister dst);
>      /*! Encode a label instruction */
>      void LABEL(ir::LabelIndex label);
>      /*! Jump indexed instruction */
> @@ -682,6 +684,11 @@ namespace gbe
>      insn->src(0) = src;
>    }
>  
> +  void Selection::Opaque::FENCE(GenRegister dst) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_FENCE, 1, 0);
> +    insn->dst(0) = dst;
> +  }
> +
>    void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
>      insn->src(0) = src;
> @@ -1607,17 +1614,21 @@ namespace gbe
>      INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
>      {
>        using namespace ir;
> -      const uint32_t params = insn.getParameters();
> -      GBE_ASSERTM(params == syncLocalBarrier,
> -                  "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
> -                  "for the synchronization primitives");
>        const ir::Register reg = sel.reg(FAMILY_DWORD);
>  
> +      const uint32_t params = insn.getParameters();
> +      //need to double check local barrier whether need fence or not
> +      if(params == syncGlobalBarrier) {
> +        const ir::Register fenceDst = sel.reg(FAMILY_DWORD);
> +        sel.FENCE(sel.selReg(fenceDst, ir::TYPE_U32));
> +      }
> +
>        sel.push();
>          sel.curr.predicate = GEN_PREDICATE_NONE;
>          sel.curr.execWidth = 8;
>          sel.curr.physicalFlag = 0;
>          sel.curr.noMask = 1;
> +
>          sel.SHL(GenRegister::ud8grf(reg),
>                  GenRegister::ud1grf(ocl::threadn),
>                  GenRegister::immud(0x9));
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 455bb92..789c81c 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -29,6 +29,7 @@ DECL_SELECTION_IR(NOP, NoOpInstruction)
>  DECL_SELECTION_IR(WAIT, WaitInstruction)
>  DECL_SELECTION_IR(MATH, MathInstruction)
>  DECL_SELECTION_IR(BARRIER, BarrierInstruction)
> +DECL_SELECTION_IR(FENCE, FenceInstruction)
>  DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
>  DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
>  DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list