[Beignet] [V2 PATCH 5/6] Make the surface typed write work for HSW

junyan.he at inbox.com junyan.he at inbox.com
Wed May 7 03:03:10 PDT 2014


From: Junyan He <junyan.he at linux.intel.com>

1.Modify the typed write for state write using GEN_SFID_DATAPORT_DATA_CACHE.
2.Add the channel select for surface state setting.
3.Correct the send message for setting slot in send description.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen75_encoder.cpp | 117 ++++++++++++++++++++++++++++++++++
 backend/src/backend/gen75_encoder.hpp |   4 ++
 backend/src/backend/gen_encoder.hpp   |  14 ++--
 src/intel/intel_defines.h             |   7 ++
 src/intel/intel_driver.c              |   2 +-
 src/intel/intel_gpgpu.c               |  74 +++++++++++++++------
 src/intel/intel_gpgpu.h               |   2 +-
 src/intel/intel_structs.h             |  11 +++-
 8 files changed, 202 insertions(+), 29 deletions(-)

diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp
index ede9d55..d1a8542 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -27,8 +27,40 @@
 
 #include "backend/gen75_encoder.hpp"
 
+static const uint32_t untypedRWMask[] = {
+  GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+  GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+  GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+  GEN_UNTYPED_ALPHA,
+  0
+};
+
 namespace gbe
 {
+  void Gen75Encoder::setHeader(GenNativeInstruction *insn) {
+    if (this->curr.execWidth == 8)
+      insn->header.execution_size = GEN_WIDTH_8;
+    else if (this->curr.execWidth == 16)
+      insn->header.execution_size = GEN_WIDTH_16;
+    else if (this->curr.execWidth == 1)
+      insn->header.execution_size = GEN_WIDTH_1;
+    else if (this->curr.execWidth == 4)
+      insn->header.execution_size = GEN_WIDTH_4;
+    else
+      NOT_IMPLEMENTED;
+    insn->header.acc_wr_control = this->curr.accWrEnable;
+    insn->header.quarter_control = this->curr.quarterControl;
+    insn->bits1.ia1.nib_ctrl = this->curr.nibControl;
+    insn->header.mask_control = this->curr.noMask;
+    insn->bits2.ia1.flag_reg_nr = this->curr.flag;
+    insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag;
+    if (this->curr.predicate != GEN_PREDICATE_NONE) {
+      insn->header.predicate_control = this->curr.predicate;
+      insn->header.predicate_inverse = this->curr.inversePredicate;
+    }
+    insn->header.saturate = this->curr.saturate;
+  }
+
   void Gen75Encoder::setDPUntypedRW(GenNativeInstruction *insn,
                                     uint32_t bti,
                                     uint32_t rgba,
@@ -60,4 +92,89 @@ namespace gbe
     /* Always using the low 8 slots here. */
     insn->bits3.gen7_typed_rw.slot = 1;
   }
+
+  void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+    uint32_t msg_length = 0;
+    uint32_t response_length = 0;
+
+    if (this->curr.execWidth == 8) {
+      msg_length = srcNum;
+      response_length = 1;
+    } else if (this->curr.execWidth == 16) {
+      msg_length = 2 * srcNum;
+      response_length = 2;
+    } else
+      NOT_IMPLEMENTED;
+
+    this->setHeader(insn);
+    this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+    this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+    this->setSrc1(insn, GenRegister::immud(0));
+
+    const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA_CACHE;
+    setMessageDescriptor(insn, sfid, msg_length, response_length);
+    insn->bits3.gen7_atomic_op.msg_type = GEN75_P1_TYPED_ATOMIC_OP;
+    insn->bits3.gen7_atomic_op.bti = bti;
+    insn->bits3.gen7_atomic_op.return_data = 1;
+    insn->bits3.gen7_atomic_op.aop_type = function;
+
+    if (this->curr.execWidth == 8)
+      insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD8;
+    else if (this->curr.execWidth == 16)
+      insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD16;
+    else
+      NOT_SUPPORTED;
+  }
+
+  void Gen75Encoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+    assert(elemNum >= 1 || elemNum <= 4);
+    uint32_t msg_length = 0;
+    uint32_t response_length = 0;
+    if (this->curr.execWidth == 8) {
+      msg_length = 1;
+      response_length = elemNum;
+    } else if (this->curr.execWidth == 16) {
+      msg_length = 2;
+      response_length = 2 * elemNum;
+    } else
+      NOT_IMPLEMENTED;
+
+    this->setHeader(insn);
+    this->setDst(insn,  GenRegister::uw16grf(dst.nr, 0));
+    this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+    this->setSrc1(insn, GenRegister::immud(0));
+    setDPUntypedRW(insn,
+                   bti,
+                   untypedRWMask[elemNum],
+                   GEN75_P1_UNTYPED_READ,
+                   msg_length,
+                   response_length);
+  }
+
+  void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+    assert(elemNum >= 1 || elemNum <= 4);
+    uint32_t msg_length = 0;
+    uint32_t response_length = 0;
+    this->setHeader(insn);
+    if (this->curr.execWidth == 8) {
+      this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+      msg_length = 1 + elemNum;
+    } else if (this->curr.execWidth == 16) {
+      this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+      msg_length = 2 * (1 + elemNum);
+    }
+    else
+      NOT_IMPLEMENTED;
+    this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
+    this->setSrc1(insn, GenRegister::immud(0));
+    setDPUntypedRW(insn,
+                   bti,
+                   untypedRWMask[elemNum],
+                   GEN75_P1_UNTYPED_SURFACE_WRITE,
+                   msg_length,
+                   response_length);
+  }
 } /* End of the name space. */
diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp
index 53db3a7..a107202 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -34,6 +34,10 @@ namespace gbe
     Gen75Encoder(uint32_t simdWidth, uint32_t gen, uint32_t deviceID)
          : GenEncoder(simdWidth, gen, deviceID, 8) { };
 
+    virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
+    virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+    virtual void setHeader(GenNativeInstruction *insn);
     virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
                    uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
     virtual void setTypedWriteMessage(GenNativeInstruction *insn, unsigned char bti,
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 31d7f4e..d44d323 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -161,15 +161,15 @@ namespace gbe
     /*! Wait instruction (used for the barrier) */
     void WAIT(void);
     /*! Atomic instructions */
-    void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
+    virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
     /*! Read 64-bits float/int arrays */
     void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum);
     /*! Write 64-bits float/int arrays */
     void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar);
     /*! Untyped read (upto 4 channels) */
-    void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+    virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
     /*! Untyped write (upto 4 channels) */
-    void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
     /*! Byte gather (for unaligned bytes, shorts and ints) */
     void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize);
     /*! Byte scatter (for unaligned bytes, shorts and ints) */
@@ -193,9 +193,9 @@ namespace gbe
                 bool isLD);
 
     /*! TypedWrite instruction for texture */
-    void TYPED_WRITE(GenRegister header,
-                     bool header_present,
-                     unsigned char bti);
+    virtual void TYPED_WRITE(GenRegister header,
+                             bool header_present,
+                             unsigned char bti);
     /*! Extended math function (2 sources) */
     void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1);
     /*! Extended math function (1 source) */
@@ -207,6 +207,7 @@ namespace gbe
     ////////////////////////////////////////////////////////////////////////
     // Helper functions to encode
     ////////////////////////////////////////////////////////////////////////
+    virtual void setHeader(GenNativeInstruction *insn);
     virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
                                 uint32_t msg_type, uint32_t msg_length,
                                 uint32_t response_length);
@@ -216,7 +217,6 @@ namespace gbe
     void setMessageDescriptor(GenNativeInstruction *inst, enum GenMessageTarget sfid,
                               unsigned msg_length, unsigned response_length,
                               bool header_present = false, bool end_of_thread = false);
-    void setHeader(GenNativeInstruction *insn);
     void setDst(GenNativeInstruction *insn, GenRegister dest);
     void setSrc0(GenNativeInstruction *insn, GenRegister reg);
     void setSrc1(GenNativeInstruction *insn, GenRegister reg);
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index e5015ec..5139e43 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -288,6 +288,13 @@
 #define I965_TILEWALK_XMAJOR                 0
 #define I965_TILEWALK_YMAJOR                 1
 
+#define I965_SURCHAN_SELECT_ZERO             0
+#define I965_SURCHAN_SELECT_ONE              1
+#define I965_SURCHAN_SELECT_RED              4
+#define I965_SURCHAN_SELECT_GREEN            5
+#define I965_SURCHAN_SELECT_BLUE             6
+#define I965_SURCHAN_SELECT_ALPHA            7
+
 #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \
                                  IS_G4X(intel->device_id) ? 384 : 256)
 
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 7fd2bf3..ef97835 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -687,5 +687,5 @@ intel_setup_callbacks(void)
   cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
   cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
   cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
-  intel_set_gpgpu_callbacks();
+  intel_set_gpgpu_callbacks(intel_get_device_id());
 }
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index fbeef11..3d6fd30 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -629,6 +629,53 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
   ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
   intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
   gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+
+  assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
+                              uint32_t index,
+                              dri_bo* obj_bo,
+                              uint32_t obj_bo_offset,
+                              uint32_t format,
+                              cl_mem_object_type type,
+                              int32_t w,
+                              int32_t h,
+                              int32_t depth,
+                              int32_t pitch,
+                              int32_t tiling)
+{
+  surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+  gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+  memset(ss, 0, sizeof(*ss));
+
+  ss->ss0.surface_type = intel_get_surface_type(type);
+  ss->ss0.surface_format = format;
+  ss->ss1.base_addr = obj_bo->offset;
+  ss->ss2.width = w - 1;
+  ss->ss2.height = h - 1;
+  ss->ss3.depth = depth - 1;
+  ss->ss4.not_str_buf.rt_view_extent = depth - 1;
+  ss->ss4.not_str_buf.min_array_element = 0;
+  ss->ss3.pitch = pitch - 1;
+  ss->ss5.cache_control = cc_llc_l3;
+  ss->ss7.shader_r = I965_SURCHAN_SELECT_RED;
+  ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
+  ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
+  ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
+  if (tiling == GPGPU_TILE_X) {
+    ss->ss0.tiled_surface = 1;
+    ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+  } else if (tiling == GPGPU_TILE_Y) {
+    ss->ss0.tiled_surface = 1;
+    ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+  }
+  ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+  intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
+  gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+
+  assert(index < GEN_MAX_SURFACES);
 }
 
 static void
@@ -668,23 +715,6 @@ intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint
 }
 
 static void
-intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu,
-                       uint32_t index,
-                       cl_buffer *obj_bo,
-                       uint32_t obj_bo_offset,
-                       uint32_t format,
-                       cl_mem_object_type type,
-                       int32_t w,
-                       int32_t h,
-                       int32_t depth,
-                       int32_t pitch,
-                       cl_gpgpu_tiling tiling)
-{
-  intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, obj_bo_offset, format, type, w, h, depth, pitch, tiling);
-  assert(index < GEN_MAX_SURFACES);
-}
-
-static void
 intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
 {
   gen6_interface_descriptor_t *desc;
@@ -1053,12 +1083,11 @@ intel_gpgpu_event_get_exec_timestamp(intel_event_t *event,
 }
 
 LOCAL void
-intel_set_gpgpu_callbacks(void)
+intel_set_gpgpu_callbacks(int device_id)
 {
   cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
   cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
   cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync;
-  cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image;
   cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
   cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
   cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
@@ -1083,5 +1112,12 @@ intel_set_gpgpu_callbacks(void)
   cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
   cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
   cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+
+  if (IS_HASWELL(device_id))
+    cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
+  else if (IS_IVYBRIDGE(device_id))
+    cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
+  else
+    assert(0);
 }
 
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
index 9918b35..d593ac7 100644
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -28,7 +28,7 @@
 #include <stdint.h>
 
 /* Set the gpgpu related call backs */
-extern void intel_set_gpgpu_callbacks(void);
+extern void intel_set_gpgpu_callbacks(int device_id);
 
 #endif /* __INTEL_GPGPU_H__ */
 
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index 36b5971..59a9810 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -234,7 +234,16 @@ typedef struct gen7_surface_state
   } ss5;
 
   uint32_t ss6; /* unused */
-  uint32_t ss7; /* unused */
+
+  struct {
+    uint32_t min_lod:12;
+    uint32_t pad0:4;
+    uint32_t shader_a:3;
+    uint32_t shader_b:3;
+    uint32_t shader_g:3;
+    uint32_t shader_r:3;
+    uint32_t pad1:4;
+  } ss7;
 } gen7_surface_state_t;
 
 STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
-- 
1.8.3.2



More information about the Beignet mailing list