[Beignet] [V2 PATCH 5/6] Make the surface typed write work for HSW
junyan.he at inbox.com
junyan.he at inbox.com
Wed May 7 03:03:10 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
1.Modify the typed write for state write using GEN_SFID_DATAPORT_DATA_CACHE.
2.Add the channel select for surface state setting.
3.Correct the send message for setting slot in send description.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen75_encoder.cpp | 117 ++++++++++++++++++++++++++++++++++
backend/src/backend/gen75_encoder.hpp | 4 ++
backend/src/backend/gen_encoder.hpp | 14 ++--
src/intel/intel_defines.h | 7 ++
src/intel/intel_driver.c | 2 +-
src/intel/intel_gpgpu.c | 74 +++++++++++++++------
src/intel/intel_gpgpu.h | 2 +-
src/intel/intel_structs.h | 11 +++-
8 files changed, 202 insertions(+), 29 deletions(-)
diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp
index ede9d55..d1a8542 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -27,8 +27,40 @@
#include "backend/gen75_encoder.hpp"
+static const uint32_t untypedRWMask[] = {
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+ GEN_UNTYPED_ALPHA,
+ 0
+};
+
namespace gbe
{
+ void Gen75Encoder::setHeader(GenNativeInstruction *insn) {
+ if (this->curr.execWidth == 8)
+ insn->header.execution_size = GEN_WIDTH_8;
+ else if (this->curr.execWidth == 16)
+ insn->header.execution_size = GEN_WIDTH_16;
+ else if (this->curr.execWidth == 1)
+ insn->header.execution_size = GEN_WIDTH_1;
+ else if (this->curr.execWidth == 4)
+ insn->header.execution_size = GEN_WIDTH_4;
+ else
+ NOT_IMPLEMENTED;
+ insn->header.acc_wr_control = this->curr.accWrEnable;
+ insn->header.quarter_control = this->curr.quarterControl;
+ insn->bits1.ia1.nib_ctrl = this->curr.nibControl;
+ insn->header.mask_control = this->curr.noMask;
+ insn->bits2.ia1.flag_reg_nr = this->curr.flag;
+ insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag;
+ if (this->curr.predicate != GEN_PREDICATE_NONE) {
+ insn->header.predicate_control = this->curr.predicate;
+ insn->header.predicate_inverse = this->curr.inversePredicate;
+ }
+ insn->header.saturate = this->curr.saturate;
+ }
+
void Gen75Encoder::setDPUntypedRW(GenNativeInstruction *insn,
uint32_t bti,
uint32_t rgba,
@@ -60,4 +92,89 @@ namespace gbe
/* Always using the low 8 slots here. */
insn->bits3.gen7_typed_rw.slot = 1;
}
+
+ void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+
+ if (this->curr.execWidth == 8) {
+ msg_length = srcNum;
+ response_length = 1;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2 * srcNum;
+ response_length = 2;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA_CACHE;
+ setMessageDescriptor(insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_atomic_op.msg_type = GEN75_P1_TYPED_ATOMIC_OP;
+ insn->bits3.gen7_atomic_op.bti = bti;
+ insn->bits3.gen7_atomic_op.return_data = 1;
+ insn->bits3.gen7_atomic_op.aop_type = function;
+
+ if (this->curr.execWidth == 8)
+ insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD8;
+ else if (this->curr.execWidth == 16)
+ insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD16;
+ else
+ NOT_SUPPORTED;
+ }
+
+ void Gen75Encoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ if (this->curr.execWidth == 8) {
+ msg_length = 1;
+ response_length = elemNum;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2;
+ response_length = 2 * elemNum;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN75_P1_UNTYPED_READ,
+ msg_length,
+ response_length);
+ }
+
+ void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ this->setHeader(insn);
+ if (this->curr.execWidth == 8) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ msg_length = 1 + elemNum;
+ } else if (this->curr.execWidth == 16) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ msg_length = 2 * (1 + elemNum);
+ }
+ else
+ NOT_IMPLEMENTED;
+ this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN75_P1_UNTYPED_SURFACE_WRITE,
+ msg_length,
+ response_length);
+ }
} /* End of the name space. */
diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp
index 53db3a7..a107202 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -34,6 +34,10 @@ namespace gbe
Gen75Encoder(uint32_t simdWidth, uint32_t gen, uint32_t deviceID)
: GenEncoder(simdWidth, gen, deviceID, 8) { };
+ virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
+ virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void setHeader(GenNativeInstruction *insn);
virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
virtual void setTypedWriteMessage(GenNativeInstruction *insn, unsigned char bti,
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 31d7f4e..d44d323 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -161,15 +161,15 @@ namespace gbe
/*! Wait instruction (used for the barrier) */
void WAIT(void);
/*! Atomic instructions */
- void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
+ virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
/*! Read 64-bits float/int arrays */
void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Write 64-bits float/int arrays */
void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar);
/*! Untyped read (upto 4 channels) */
- void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */
- void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Byte gather (for unaligned bytes, shorts and ints) */
void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
@@ -193,9 +193,9 @@ namespace gbe
bool isLD);
/*! TypedWrite instruction for texture */
- void TYPED_WRITE(GenRegister header,
- bool header_present,
- unsigned char bti);
+ virtual void TYPED_WRITE(GenRegister header,
+ bool header_present,
+ unsigned char bti);
/*! Extended math function (2 sources) */
void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1);
/*! Extended math function (1 source) */
@@ -207,6 +207,7 @@ namespace gbe
////////////////////////////////////////////////////////////////////////
// Helper functions to encode
////////////////////////////////////////////////////////////////////////
+ virtual void setHeader(GenNativeInstruction *insn);
virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
uint32_t msg_type, uint32_t msg_length,
uint32_t response_length);
@@ -216,7 +217,6 @@ namespace gbe
void setMessageDescriptor(GenNativeInstruction *inst, enum GenMessageTarget sfid,
unsigned msg_length, unsigned response_length,
bool header_present = false, bool end_of_thread = false);
- void setHeader(GenNativeInstruction *insn);
void setDst(GenNativeInstruction *insn, GenRegister dest);
void setSrc0(GenNativeInstruction *insn, GenRegister reg);
void setSrc1(GenNativeInstruction *insn, GenRegister reg);
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index e5015ec..5139e43 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -288,6 +288,13 @@
#define I965_TILEWALK_XMAJOR 0
#define I965_TILEWALK_YMAJOR 1
+#define I965_SURCHAN_SELECT_ZERO 0
+#define I965_SURCHAN_SELECT_ONE 1
+#define I965_SURCHAN_SELECT_RED 4
+#define I965_SURCHAN_SELECT_GREEN 5
+#define I965_SURCHAN_SELECT_BLUE 6
+#define I965_SURCHAN_SELECT_ALPHA 7
+
#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
IS_G4X(intel->device_id) ? 384 : 256)
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 7fd2bf3..ef97835 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -687,5 +687,5 @@ intel_setup_callbacks(void)
cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
- intel_set_gpgpu_callbacks();
+ intel_set_gpgpu_callbacks(intel_get_device_id());
}
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index fbeef11..3d6fd30 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -629,6 +629,53 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = intel_get_surface_type(type);
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = obj_bo->offset;
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+ ss->ss4.not_str_buf.rt_view_extent = depth - 1;
+ ss->ss4.not_str_buf.min_array_element = 0;
+ ss->ss3.pitch = pitch - 1;
+ ss->ss5.cache_control = cc_llc_l3;
+ ss->ss7.shader_r = I965_SURCHAN_SELECT_RED;
+ ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
+ ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
+ ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ }
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
+ gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+
+ assert(index < GEN_MAX_SURFACES);
}
static void
@@ -668,23 +715,6 @@ intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint
}
static void
-intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu,
- uint32_t index,
- cl_buffer *obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- cl_gpgpu_tiling tiling)
-{
- intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, obj_bo_offset, format, type, w, h, depth, pitch, tiling);
- assert(index < GEN_MAX_SURFACES);
-}
-
-static void
intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
{
gen6_interface_descriptor_t *desc;
@@ -1053,12 +1083,11 @@ intel_gpgpu_event_get_exec_timestamp(intel_event_t *event,
}
LOCAL void
-intel_set_gpgpu_callbacks(void)
+intel_set_gpgpu_callbacks(int device_id)
{
cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync;
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image;
cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
@@ -1083,5 +1112,12 @@ intel_set_gpgpu_callbacks(void)
cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+
+ if (IS_HASWELL(device_id))
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
+ else if (IS_IVYBRIDGE(device_id))
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
+ else
+ assert(0);
}
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
index 9918b35..d593ac7 100644
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -28,7 +28,7 @@
#include <stdint.h>
/* Set the gpgpu related call backs */
-extern void intel_set_gpgpu_callbacks(void);
+extern void intel_set_gpgpu_callbacks(int device_id);
#endif /* __INTEL_GPGPU_H__ */
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index 36b5971..59a9810 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -234,7 +234,16 @@ typedef struct gen7_surface_state
} ss5;
uint32_t ss6; /* unused */
- uint32_t ss7; /* unused */
+
+ struct {
+ uint32_t min_lod:12;
+ uint32_t pad0:4;
+ uint32_t shader_a:3;
+ uint32_t shader_b:3;
+ uint32_t shader_g:3;
+ uint32_t shader_r:3;
+ uint32_t pad1:4;
+ } ss7;
} gen7_surface_state_t;
STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
--
1.8.3.2
More information about the Beignet
mailing list