[Beignet] [PATCH 6/7] Make the surface typed write work for HSW.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Apr 15 17:56:37 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
1.Modify the typed write for state write using GEN_SFID_DATAPORT_DATA_CACHE.
2.Add the channel select for surface state setting.
3.Correct the send message for setting slot in send description.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen75_encoder.cpp | 116 +++++++++++++++++++++++++++++++++
backend/src/backend/gen75_encoder.hpp | 4 ++
backend/src/backend/gen_defs.hpp | 3 +-
backend/src/backend/gen_encoder.hpp | 14 ++--
src/intel/intel_defines.h | 7 ++
src/intel/intel_gpgpu.c | 73 ++++++++++++++++-----
src/intel/intel_structs.h | 11 +++-
7 files changed, 200 insertions(+), 28 deletions(-)
diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp
index bb6d622..d1d1292 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -27,8 +27,39 @@
#include "backend/gen75_encoder.hpp"
+static const uint32_t untypedRWMask[] = {
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+ GEN_UNTYPED_ALPHA,
+ 0
+};
+
namespace gbe
{
+ void Gen75Encoder::setHeader(GenInstruction *insn) {
+ if (this->curr.execWidth == 8)
+ insn->header.execution_size = GEN_WIDTH_8;
+ else if (this->curr.execWidth == 16)
+ insn->header.execution_size = GEN_WIDTH_16;
+ else if (this->curr.execWidth == 1)
+ insn->header.execution_size = GEN_WIDTH_1;
+ else if (this->curr.execWidth == 4)
+ insn->header.execution_size = GEN_WIDTH_4;
+ else
+ NOT_IMPLEMENTED;
+ insn->header.acc_wr_control = this->curr.accWrEnable;
+ insn->header.quarter_control = this->curr.quarterControl;
+ insn->bits1.ia1.nib_ctrl = this->curr.nibControl;
+ insn->header.mask_control = this->curr.noMask;
+ insn->bits2.ia1.flag_reg_nr = this->curr.flag;
+ insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag;
+ if (this->curr.predicate != GEN_PREDICATE_NONE) {
+ insn->header.predicate_control = this->curr.predicate;
+ insn->header.predicate_inverse = this->curr.inversePredicate;
+ }
+ insn->header.saturate = this->curr.saturate;
+ }
void Gen75Encoder::setDPUntypedRW(GenInstruction *insn,
uint32_t bti,
@@ -62,6 +93,91 @@ namespace gbe
insn->bits3.gen7_typed_rw.slot = 1;
}
+ void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+
+ if (this->curr.execWidth == 8) {
+ msg_length = srcNum;
+ response_length = 1;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2*srcNum;
+ response_length = 2;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA_CACHE;
+ setMessageDescriptor(insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_atomic_op.msg_type = GEN75_P1_TYPED_ATOMIC_OP;
+ insn->bits3.gen7_atomic_op.bti = bti;
+ insn->bits3.gen7_atomic_op.return_data = 1;
+ insn->bits3.gen7_atomic_op.aop_type = function;
+
+ if (this->curr.execWidth == 8)
+ insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD8;
+ else if (this->curr.execWidth == 16)
+ insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD16;
+ else
+ NOT_SUPPORTED;
+ }
+
+ void Gen75Encoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ if (this->curr.execWidth == 8) {
+ msg_length = 1;
+ response_length = elemNum;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2;
+ response_length = 2*elemNum;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN75_P1_UNTYPED_READ,
+ msg_length,
+ response_length);
+ }
+
+ void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ this->setHeader(insn);
+ if (this->curr.execWidth == 8) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ msg_length = 1+elemNum;
+ } else if (this->curr.execWidth == 16) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ msg_length = 2*(1+elemNum);
+ }
+ else
+ NOT_IMPLEMENTED;
+ this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN75_P1_UNTYPED_SURFACE_WRITE,
+ msg_length,
+ response_length);
+ }
+
void Gen75Encoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) {
GenInstruction &insn = this->store[insnID];
GBE_ASSERT(insnID < this->store.size());
diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp
index bdd294a..1bbdd2c 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -32,6 +32,10 @@ namespace gbe
{
public:
Gen75Encoder(uint32_t simdWidth, uint32_t gen) : GenEncoder(simdWidth, gen) { };
+ virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
+ virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void setHeader(GenInstruction *insn);
virtual void setDPUntypedRW(GenInstruction *insn, uint32_t bti, uint32_t rgba,
uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
virtual void setTypedWriteMessage(GenInstruction *insn, unsigned char bti,
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index f74b82c..3e0e8fb 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -857,8 +857,7 @@ struct GenInstruction
struct {
uint32_t bti:8;
uint32_t chan_mask:4;
- uint32_t pad:1;
- uint32_t slot:1;
+ uint32_t slot:2;
uint32_t msg_type:4;
uint32_t pad2:1;
uint32_t header_present:1;
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index cd3dfdd..c82d7c6 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -145,15 +145,15 @@ namespace gbe
/*! Wait instruction (used for the barrier) */
void WAIT(void);
/*! Atomic instructions */
- void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
+ virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
/*! Read 64-bits float/int arrays */
void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Write 64-bits float/int arrays */
void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar);
/*! Untyped read (upto 4 channels) */
- void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */
- void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Byte gather (for unaligned bytes, shorts and ints) */
void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
@@ -176,9 +176,9 @@ namespace gbe
uint32_t return_format);
/*! TypedWrite instruction for texture */
- void TYPED_WRITE(GenRegister header,
- bool header_present,
- unsigned char bti);
+ virtual void TYPED_WRITE(GenRegister header,
+ bool header_present,
+ unsigned char bti);
/*! Extended math function (2 sources) */
void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1);
/*! Extended math function (1 source) */
@@ -190,7 +190,7 @@ namespace gbe
////////////////////////////////////////////////////////////////////////
// Helper functions to encode
////////////////////////////////////////////////////////////////////////
- void setHeader(GenInstruction *insn);
+ virtual void setHeader(GenInstruction *insn);
virtual void setDPUntypedRW(GenInstruction *insn, uint32_t bti, uint32_t rgba,
uint32_t msg_type, uint32_t msg_length,
uint32_t response_length);
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index e5015ec..5139e43 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -288,6 +288,13 @@
#define I965_TILEWALK_XMAJOR 0
#define I965_TILEWALK_YMAJOR 1
+#define I965_SURCHAN_SELECT_ZERO 0
+#define I965_SURCHAN_SELECT_ONE 1
+#define I965_SURCHAN_SELECT_RED 4
+#define I965_SURCHAN_SELECT_GREEN 5
+#define I965_SURCHAN_SELECT_BLUE 6
+#define I965_SURCHAN_SELECT_ALPHA 7
+
#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
IS_G4X(intel->device_id) ? 384 : 256)
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 4cb9e0b..2696d68 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -629,6 +629,54 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+printf ("###################### here\n");
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = intel_get_surface_type(type);
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = obj_bo->offset;
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+ ss->ss4.not_str_buf.rt_view_extent = depth - 1;
+ ss->ss4.not_str_buf.min_array_element = 0;
+ ss->ss3.pitch = pitch - 1;
+ ss->ss5.cache_control = cc_llc_l3;
+ ss->ss7.shader_r = I965_SURCHAN_SELECT_RED;
+ ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
+ ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
+ ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ }
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
+ gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+
+ assert(index < GEN_MAX_SURFACES);
}
static void
@@ -668,23 +716,6 @@ intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint
}
static void
-intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu,
- uint32_t index,
- cl_buffer *obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- cl_gpgpu_tiling tiling)
-{
- intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, obj_bo_offset, format, type, w, h, depth, pitch, tiling);
- assert(index < GEN_MAX_SURFACES);
-}
-
-static void
intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
{
gen6_interface_descriptor_t *desc;
@@ -1071,7 +1102,6 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync;
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image;
cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
@@ -1096,5 +1126,12 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+
+ if (IS_HASWELL(device_id))
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
+ else if (IS_IVYBRIDGE(device_id))
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
+ else
+ assert(0);
}
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index 36b5971..59a9810 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -234,7 +234,16 @@ typedef struct gen7_surface_state
} ss5;
uint32_t ss6; /* unused */
- uint32_t ss7; /* unused */
+
+ struct {
+ uint32_t min_lod:12;
+ uint32_t pad0:4;
+ uint32_t shader_a:3;
+ uint32_t shader_b:3;
+ uint32_t shader_g:3;
+ uint32_t shader_r:3;
+ uint32_t pad1:4;
+ } ss7;
} gen7_surface_state_t;
STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
--
1.7.9.5
More information about the Beignet
mailing list