[PATCH libdrm] amdgpu: Add explicit dependency test.

Tue Nov 28 15:32:58 UTC 2017

The test is as following:

1) Create context A & B
2) Send a command submission using context A which fires up a compute shader.
3) The shader wait a bit and then write a value to a memory location.
4) Send a command submission using context B which writes another value to the same memory location, but having an explicit dependency on the first command submission.
5) Wait with the CPU for both submissions to finish and inspect the written value.

Test passes if the value seen in the memory location after both submissions is from command B.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
---
 tests/amdgpu/amdgpu_test.c |  18 ++++
 tests/amdgpu/basic_tests.c | 264 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 282 insertions(+)

diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
index 50da17c..8fa3399 100644
--- a/tests/amdgpu/amdgpu_test.c
+++ b/tests/amdgpu/amdgpu_test.c
@@ -49,6 +49,7 @@
 #include "CUnit/Basic.h"
 
 #include "amdgpu_test.h"
+#include "amdgpu_internal.h"
 
 /* Test suit names */
 #define BASIC_TESTS_STR "Basic Tests"
@@ -401,9 +402,20 @@ static int amdgpu_find_device(uint8_t bus, uint16_t dev)
 
 static void amdgpu_disable_suits()
 {
+	amdgpu_device_handle device_handle;
+	uint32_t major_version, minor_version, family_id;
 	int i;
 	int size = sizeof(suites_active_stat) / sizeof(suites_active_stat[0]);
 
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+				   &minor_version, &device_handle))
+		return;
+
+	family_id = device_handle->info.family_id;
+
+	if (amdgpu_device_deinitialize(device_handle))
+		return;
+
 	/* Set active status for suits based on their policies */
 	for (i = 0; i < size; ++i)
 		if (amdgpu_set_suite_active(suites_active_stat[i].pName,
@@ -420,6 +432,12 @@ static void amdgpu_disable_suits()
 
 	if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE))
 		fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
+
+	/* This test was ran on GFX8 and GFX9 only */
+	if (family_id < AMDGPU_FAMILY_VI || family_id > AMDGPU_FAMILY_RV)
+		if (amdgpu_set_test_active(BASIC_TESTS_STR, "Sync dependency Test", CU_FALSE))
+			fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
 }
 
 /* The main() function for setting up and running the tests.
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index e7f48e3..a78cf52 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -50,6 +50,7 @@ static void amdgpu_command_submission_multi_fence(void);
 static void amdgpu_command_submission_sdma(void);
 static void amdgpu_userptr_test(void);
 static void amdgpu_semaphore_test(void);
+static void amdgpu_sync_dependency_test(void);
 
 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
@@ -63,6 +64,7 @@ CU_TestInfo basic_tests[] = {
 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
 	{ "SW semaphore Test",  amdgpu_semaphore_test },
+	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
 	CU_TEST_INFO_NULL,
 };
 #define BUFFER_SIZE (8 * 1024)
@@ -226,6 +228,60 @@ CU_TestInfo basic_tests[] = {
 		 */
 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
 
+
+#define PKT3_CONTEXT_CONTROL                   0x28
+#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
+#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
+#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
+
+#define PKT3_CLEAR_STATE                       0x12
+
+#define PKT3_SET_SH_REG                        0x76
+#define		PACKET3_SET_SH_REG_START			0x00002c00
+
+#define	PACKET3_DISPATCH_DIRECT				0x15
+
+
+/* gfx 8 */
+#define mmCOMPUTE_PGM_LO                                                        0x2e0c
+#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
+#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
+#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
+#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
+#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
+#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
+
+
+
+#define SWAP_32(num) ((num>>24)&0xff) | \
+			((num<<8)&0xff0000) | \
+			((num>>8)&0xff00) | \
+			((num<<24)&0xff000000)
+
+
+/* Shader code
+ * void main()
+{
+
+	float x = some_input;
+		for (unsigned i = 0; i < 1000000; i++)
+  	x = sin(x);
+
+	u[0] = 42u;
+}
+*/
+
+static  uint32_t shader_bin[] = {
+	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
+	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
+	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
+	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
+};
+
+#define CODE_OFFSET 512
+#define DATA_OFFSET 1024
+
+
 int suite_basic_tests_init(void)
 {
 	struct amdgpu_gpu_info gpu_info = {0};
@@ -1386,3 +1442,211 @@ static void amdgpu_userptr_test(void)
 
 	wait(NULL);
 }
+
+static void amdgpu_sync_dependency_test(void)
+{
+	amdgpu_context_handle context_handle[2];
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired;
+	int i, j, r, instance;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	static uint32_t *ptr;
+	uint64_t seq_no;
+
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
+			AMDGPU_GEM_DOMAIN_GTT, 0,
+						    &ib_result_handle, &ib_result_cpu,
+						    &ib_result_mc_address, &va_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+			       &bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	ptr = ib_result_cpu;
+	i = 0;
+
+	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
+
+	/* Dispatch minimal init config and verify it's executed */
+	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
+	ptr[i++] = 0x80000000;
+	ptr[i++] = 0x80000000;
+
+	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
+	ptr[i++] = 0x80000000;
+
+
+	/* Program compute regs */
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
+	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
+	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
+	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
+
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
+	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
+	/*
+	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
+	                                      SGPRS = 1
+	                                      PRIORITY = 0
+	                                      FLOAT_MODE = 192 (0xc0)
+	                                      PRIV = 0
+	                                      DX10_CLAMP = 1
+	                                      DEBUG_MODE = 0
+	                                      IEEE_MODE = 0
+	                                      BULKY = 0
+	                                      CDBG_USER = 0
+	 *
+	 */
+	ptr[i++] = 0x002c0040;
+
+
+	/*
+	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
+	                                      USER_SGPR = 8
+	                                      TRAP_PRESENT = 0
+	                                      TGID_X_EN = 0
+	                                      TGID_Y_EN = 0
+	                                      TGID_Z_EN = 0
+	                                      TG_SIZE_EN = 0
+	                                      TIDIG_COMP_CNT = 0
+	                                      EXCP_EN_MSB = 0
+	                                      LDS_SIZE = 0
+	                                      EXCP_EN = 0
+	 *
+	 */
+	ptr[i++] = 0x00000010;
+
+
+/*
+ * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
+                                         WAVESIZE = 0
+ *
+ */
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
+	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 0x00000100;
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
+	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
+	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
+	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 0;
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
+	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+
+
+	/* Dispatch */
+	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
+
+
+	while (i & 7)
+		ptr[i++] =  0xffff1000; /* type3 nop packet */
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = i;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
+	CU_ASSERT_EQUAL(r, 0);
+	seq_no = ibs_request.seq_no;
+
+
+
+	/* Prepare second command with dependency on the first */
+	j = i;
+	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
+	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+	ptr[i++] = 0xfffffffc & ib_result_mc_address + DATA_OFFSET * 4;
+	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
+	ptr[i++] = 99;
+
+	while (i & 7)
+		ptr[i++] =  0xffff1000; /* type3 nop packet */
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
+	ib_info.size = i - j;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	ibs_request.number_of_dependencies = 1;
+
+	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
+	ibs_request.dependencies[0].context = context_handle[1];
+	ibs_request.dependencies[0].ip_instance = 0;
+	ibs_request.dependencies[0].ring = 0;
+	ibs_request.dependencies[0].fence = seq_no;
+
+
+	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
+	CU_ASSERT_EQUAL(r, 0);
+
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle[0];
+	fence_status.ip_type = AMDGPU_HW_IP_GFX;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	CU_ASSERT_EQUAL(r, 0);
+
+	/* Expect the second command to wait for shader to complete */
+	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_cs_ctx_free(context_handle[0]);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_cs_ctx_free(context_handle[1]);
+	CU_ASSERT_EQUAL(r, 0);
+
+	free(ibs_request.dependencies);
+}
-- 
2.7.4