Mesa (master): r600g: Compute support for Cayman

Tom Stellard tstellar at kemper.freedesktop.org
Wed Jun 6 20:52:38 UTC 2012


Module: Mesa
Branch: master
Commit: 0c4b19ac63efa41242c515824301e6161aceeea5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0c4b19ac63efa41242c515824301e6161aceeea5

Author: Tom Stellard <thomas.stellard at amd.com>
Date:   Tue Jun  5 13:11:11 2012 -0400

r600g: Compute support for Cayman

---

 src/gallium/drivers/r600/evergreen_compute.c       |   47 +++++++---
 .../drivers/r600/evergreen_compute_internal.c      |   16 +++-
 src/gallium/drivers/r600/evergreend.h              |    3 +
 src/gallium/drivers/r600/r600_shader.c             |    3 +
 src/gallium/drivers/radeon/R600Instructions.td     |   92 +++++++++----------
 5 files changed, 95 insertions(+), 66 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 7aeb403..ceb3b3a 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -165,8 +165,10 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
 	struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
 						COMPUTE_RESOURCE_SHADER, 0);
 
-	evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3,
+	if (ctx->chip_class < CAYMAN) {
+		evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3,
 			S_008C0C_NUM_LS_GPRS(ctx->cs_shader->bc.ngpr));
+	}
 
 	///maybe we can use it later
 	evergreen_reg_set(res, R_0286C8_SPI_THREAD_GROUPING, 0);
@@ -606,31 +608,48 @@ void evergreen_compute_init_config(struct r600_context *ctx)
 
 	evergreen_reg_set(res, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
 				S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-	evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0);
+	if (ctx->chip_class < CAYMAN) {
+		evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0);
+	}
 	evergreen_reg_set(res, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0);
 	evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
 	evergreen_reg_set(res, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+
 	/* workaround for hw issues with dyn gpr - must set all limits to 240
 	 * instead of 0, 0x1e == 240/8 */
-	evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+	if (ctx->chip_class < CAYMAN) {
+		evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
 				S_028838_PS_GPRS(0x1e) |
 				S_028838_VS_GPRS(0x1e) |
 				S_028838_GS_GPRS(0x1e) |
 				S_028838_ES_GPRS(0x1e) |
 				S_028838_HS_GPRS(0x1e) |
 				S_028838_LS_GPRS(0x1e));
+	} else {
+		evergreen_reg_set(res, 0x286f8,
+				S_028838_PS_GPRS(0x1e) |
+				S_028838_VS_GPRS(0x1e) |
+				S_028838_GS_GPRS(0x1e) |
+				S_028838_ES_GPRS(0x1e) |
+				S_028838_HS_GPRS(0x1e) |
+				S_028838_LS_GPRS(0x1e));
+	}
 
-
-	evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF);
-	evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF);
-	evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF);
-	evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0);
-	tmp = S_008C1C_NUM_LS_THREADS(num_threads);
-	evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp);
-	evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0);
-	evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0);
-	tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries);
-	evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp);
+	if (ctx->chip_class < CAYMAN) {
+
+		evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF);
+		evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF);
+		evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF);
+		evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF);
+		evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF);
+		evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0);
+		tmp = S_008C1C_NUM_LS_THREADS(num_threads);
+		evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp);
+		evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0);
+		evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0);
+		tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries);
+		evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp);
+	}
 	evergreen_reg_set(res, R_0286CC_SPI_PS_IN_CONTROL_0, S_0286CC_LINEAR_GRADIENT_ENA(1));
 	evergreen_reg_set(res, R_0286D0_SPI_PS_IN_CONTROL_1, 0);
 	evergreen_reg_set(res, R_0286E4_SPI_PS_IN_CONTROL_2, 0);
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 209f064..d846cbe 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -438,8 +438,13 @@ void evergreen_set_lds(
 	struct evergreen_compute_resource* res =
 		get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
 
-	evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
-		S_008E2C_NUM_LS_LDS(num_lds));
+	if (pipe->ctx->chip_class < CAYMAN) {
+		evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
+			S_008E2C_NUM_LS_LDS(num_lds));
+	} else {
+		evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT,
+					S_0286FC_NUM_LS_LDS(num_lds));
+	}
 	evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
 }
 
@@ -620,6 +625,7 @@ void evergreen_set_vtx_resource(
 	assert(id < 16);
 	uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
 	struct number_type_and_format fmt;
+	uint64_t va;
 
 	fmt.format = 0;
 
@@ -639,11 +645,13 @@ void evergreen_set_vtx_resource(
 //	size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
 //		util_format_get_blocksize(bo->b.b.b.format));
 
+	va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset;
+
 	COMPUTE_DBG("id: %i vtx size: %i byte,	width0: %i elem\n",
 		id, size, bo->b.b.width0);
 
 	sq_vtx_constant_word2 =
-		S_030008_BASE_ADDRESS_HI(offset >> 32) |
+		S_030008_BASE_ADDRESS_HI(va >> 32) |
 		S_030008_STRIDE(stride) |
 		S_030008_DATA_FORMAT(fmt.format) |
 		S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
@@ -662,7 +670,7 @@ void evergreen_set_vtx_resource(
 
 	evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
 	evergreen_emit_raw_value(res, (id+816)*32 >> 2);
-	evergreen_emit_raw_value(res, (unsigned)((offset) & 0xffffffff));
+	evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff));
 	evergreen_emit_raw_value(res, size - 1);
 	evergreen_emit_raw_value(res, sq_vtx_constant_word2);
 	evergreen_emit_raw_value(res, sq_vtx_constant_word3);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 3b6d730..5d57ce3 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -2129,6 +2129,9 @@
 #define ENDIAN_8IN32	2
 #define ENDIAN_8IN64	3
 
+#define CM_R_0286FC_SPI_LDS_MGMT                     0x286fc
+#define   S_0286FC_NUM_PS_LDS(x)                     ((x) & 0xff)
+#define   S_0286FC_NUM_LS_LDS(x)                     ((x) & 0xff) << 8
 #define CM_R_0288E8_SQ_LDS_ALLOC                     0x000288E8
 
 #define CM_R_028804_DB_EQAA                          0x00028804
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 8a80dba..63b9a03 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -247,6 +247,9 @@ int r600_compute_shader_create(struct pipe_context * ctx,
 	r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
 	shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
 	r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
+	if (shader_ctx.bc->chip_class == CAYMAN) {
+		cm_bytecode_add_cf_end(shader_ctx.bc);
+	}
 	r600_bytecode_build(shader_ctx.bc);
 	if (dump) {
 		r600_bytecode_dump(shader_ctx.bc);
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index 9caaf1c..12900fb 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -784,54 +784,6 @@ class TRIG_HELPER_r700 <InstR600 trig_inst>: Pat <
 >;
 */
 
-/* ---------------------- */
-/* Evergreen Instructions */
-/* ---------------------- */
-
-
-let Predicates = [isEG] in {
-
-let usesCustomInserter = 1 in {
-
-def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs),
-  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
-  "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr",
-  [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]>
-{
-  let RIM         = 0;
-  /* XXX: Have a separate instruction for non-indexed writes. */
-  let TYPE        = 1;
-  let RW_REL      = 0;
-  let ELEM_SIZE   = 0;
-
-  let ARRAY_SIZE  = 0;
-  let COMP_MASK   = 1;
-  let BURST_COUNT = 0;
-  let VPM         = 0;
-  let EOP         = 0;
-  let MARK        = 0;
-  let BARRIER     = 1;
-}
-
-} // End usesCustomInserter = 1
-
-class VTX_READ_eg <int buffer_id, list<dag> pattern> : InstR600ISA <
-  (outs R600_TReg32_X:$dst),
-  (ins MEMxi:$ptr),
-  "VTX_READ_eg $dst, $ptr",
-  pattern
->;
-
-def VTX_READ_PARAM_eg : VTX_READ_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
->;
-
-def VTX_READ_GLOBAL_eg : VTX_READ_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
->;
-
-} // End isEG Predicate
-
 /* ------------------------------- */
 /* Evergreen / Cayman Instructions */
 /* ------------------------------- */
@@ -893,6 +845,50 @@ class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat<
 
   def : Pat<(fp_to_uint R600_Reg32:$src),
     (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+
+def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs),
+  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
+  "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr",
+  [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]>
+{
+  let RIM         = 0;
+  /* XXX: Have a separate instruction for non-indexed writes. */
+  let TYPE        = 1;
+  let RW_REL      = 0;
+  let ELEM_SIZE   = 0;
+
+  let ARRAY_SIZE  = 0;
+  let COMP_MASK   = 1;
+  let BURST_COUNT = 0;
+  let VPM         = 0;
+  let EOP         = 0;
+  let MARK        = 0;
+  let BARRIER     = 1;
+}
+
+} // End usesCustomInserter = 1
+
+class VTX_READ_eg <int buffer_id, list<dag> pattern> : InstR600ISA <
+  (outs R600_TReg32_X:$dst),
+  (ins MEMxi:$ptr),
+  "VTX_READ_eg $dst, $ptr",
+  pattern
+>;
+
+def VTX_READ_PARAM_eg : VTX_READ_eg <0,
+  [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_GLOBAL_eg : VTX_READ_eg <1,
+  [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
 }
 
 let Predicates = [isCayman] in {




More information about the mesa-commit mailing list