[Mesa-dev] [PATCH 16/24] i965/gen7: Implement code generation for untyped atomic instructions.
Francisco Jerez
currojerez at riseup.net
Sun Sep 15 00:10:42 PDT 2013
---
src/mesa/drivers/dri/i965/brw_defines.h | 2 +
src/mesa/drivers/dri/i965/brw_eu.h | 9 +++++
src/mesa/drivers/dri/i965/brw_eu_emit.c | 62 +++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +
src/mesa/drivers/dri/i965/brw_fs.h | 5 +++
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 21 ++++++++++
src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +
src/mesa/drivers/dri/i965/brw_vec4.h | 5 +++
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 22 ++++++++++
9 files changed, 130 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e9e0c4a..ccb4ce4 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -770,6 +770,8 @@ enum opcode {
SHADER_OPCODE_SHADER_TIME_ADD,
+ SHADER_OPCODE_UNTYPED_ATOMIC,
+
FS_OPCODE_DDX,
FS_OPCODE_DDY,
FS_OPCODE_PIXEL_X,
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 720bc74..212d916 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -422,6 +422,15 @@ void brw_CMP(struct brw_compile *p,
struct brw_reg src0,
struct brw_reg src1);
+void
+brw_untyped_atomic(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ GLuint atomic_op,
+ GLuint bind_table_index,
+ GLuint msg_length,
+ GLuint response_length);
+
/***********************************************************************
* brw_eu_util.c:
*/
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index cce8752..f39bf99 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2465,6 +2465,68 @@ brw_svb_write(struct brw_compile *p,
send_commit_msg); /* send_commit_msg */
}
+static void
+brw_set_dp_untyped_atomic_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint atomic_op,
+ GLuint bind_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ bool header_present)
+{
+ if (p->brw->is_haswell) {
+ brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
+ msg_length, response_length,
+ header_present, false);
+
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (insn->header.execution_size != BRW_EXECUTE_16)
+ insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
+
+ insn->bits3.gen7_dp.msg_type =
+ HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
+ } else {
+ insn->bits3.gen7_dp.msg_type =
+ HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
+ }
+
+ } else {
+ brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+ msg_length, response_length,
+ header_present, false);
+
+ insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
+
+ if (insn->header.execution_size != BRW_EXECUTE_16)
+ insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
+ }
+
+ if (response_length)
+ insn->bits3.ud |= 1 << 13; /* Return data expected */
+
+ insn->bits3.gen7_dp.binding_table_index = bind_table_index;
+ insn->bits3.ud |= atomic_op << 8;
+}
+
+void
+brw_untyped_atomic(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ GLuint atomic_op,
+ GLuint bind_table_index,
+ GLuint msg_length,
+ GLuint response_length) {
+ struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+ brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, insn, brw_imm_d(0));
+ brw_set_dp_untyped_atomic_message(
+ p, insn, atomic_op, bind_table_index, msg_length, response_length,
+ insn->header.access_mode == BRW_ALIGN_1);
+}
+
/**
* This instruction is generated as a single-channel align1 instruction by
* both the VS and FS stages when using INTEL_DEBUG=shader_time.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a98e7c7..4f1a665 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -738,6 +738,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
return inst->mlen;
case FS_OPCODE_SPILL:
return 2;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ return 0;
default:
assert(!"not reached");
return inst->mlen;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index e78267e..27a47fa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -556,6 +556,11 @@ private:
struct brw_reg offset,
struct brw_reg value);
+ void generate_untyped_atomic(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg atomic_op,
+ struct brw_reg surf_index);
+
void mark_surface_used(unsigned surf_index);
void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index bfb3d33..cf30fcb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -1066,6 +1066,23 @@ fs_generator::generate_shader_time_add(fs_inst *inst,
}
void
+fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg atomic_op,
+ struct brw_reg surf_index)
+{
+ assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
+ atomic_op.type == BRW_REGISTER_TYPE_UD &&
+ surf_index.file == BRW_IMMEDIATE_VALUE &&
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+ brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
+ atomic_op.dw1.ud, surf_index.dw1.ud,
+ inst->mlen, dispatch_width / 8);
+
+ mark_surface_used(surf_index.dw1.ud);
+}
+
+void
fs_generator::generate_code(exec_list *instructions)
{
int last_native_insn_offset = p->next_insn_offset;
@@ -1439,6 +1456,10 @@ fs_generator::generate_code(exec_list *instructions)
generate_shader_time_add(inst, src[0], src[1], src[2]);
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ generate_untyped_atomic(inst, dst, src[0], src[1]);
+ break;
+
case FS_OPCODE_SET_SIMD4X2_OFFSET:
generate_set_simd4x2_offset(inst, dst, src[0]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 2c1f541..590c0a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -271,6 +271,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_TXF_MS:
case SHADER_OPCODE_TXS:
return inst->header_present ? 1 : 0;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ return 0;
default:
assert(!"not reached");
return inst->mlen;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 13c9166..233f233 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -620,6 +620,11 @@ private:
void generate_unpack_flags(vec4_instruction *inst,
struct brw_reg dst);
+ void generate_untyped_atomic(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg atomic_op,
+ struct brw_reg surf_index);
+
void mark_surface_used(unsigned surf_index);
struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 6916134..05c5806 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -835,6 +835,24 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
mark_surface_used(surf_index.dw1.ud);
}
+void
+vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg atomic_op,
+ struct brw_reg surf_index)
+{
+ assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
+ atomic_op.type == BRW_REGISTER_TYPE_UD &&
+ surf_index.file == BRW_IMMEDIATE_VALUE &&
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+ brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
+ atomic_op.dw1.ud, surf_index.dw1.ud,
+ inst->mlen, 1);
+
+ mark_surface_used(surf_index.dw1.ud);
+}
+
/**
* Generate assembly for a Vec4 IR instruction.
*
@@ -1096,6 +1114,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
mark_surface_used(SURF_INDEX_VEC4_SHADER_TIME);
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ generate_untyped_atomic(inst, dst, src[0], src[1]);
+ break;
+
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
generate_unpack_flags(inst, dst);
break;
--
1.8.3.4
More information about the mesa-dev
mailing list