[Intel-gfx] [PATCH v3] igt/gem_trtt: Exercise the TRTT hardware
Goel, Akash
akash.goel at intel.com
Thu Mar 3 15:38:25 UTC 2016
Thanks for the review.
On 3/3/2016 3:34 PM, Chris Wilson wrote:
> On Thu, Mar 03, 2016 at 10:25:59AM +0530, akash.goel at intel.com wrote:
>> +static bool uses_full_ppgtt(int fd, int min)
> gem_gtt_type()
fine will change like this,
gem_gtt_type(fd) > 2
>
>> +static bool has_softpin_support(int fd)
>
> gem_has_softpin()
fine will use gem_has_softpin()
>
>> +static int emit_store_dword(int fd, uint32_t *cmd_buf, uint32_t dw_offset,
>> + uint64_t vaddr, uint32_t data)
>> +{
>> + /* Check that softpin addresses are in the correct form */
>> + assert(vaddr == gen8_canonical_addr(vaddr));
>> +
>> + /* SDI cannot write to unaligned addresses */
>> + assert((vaddr & 3) == 0);
>> +
>> + cmd_buf[dw_offset++] = MI_STORE_DWORD_IMM;
>> + cmd_buf[dw_offset++] = vaddr & 0xFFFFFFFC;
>> + cmd_buf[dw_offset++] = (vaddr >> 32) & 0xFFFF; /* bits 32:47 */
>> + cmd_buf[dw_offset++] = data;
>> +
>> + return dw_offset;
>> +}
>> +
>> +/* emit_store_qword
>> + * populate batch buffer with MI_STORE_DWORD_IMM command
>> + * @fd: drm file descriptor
>> + * @cmd_buf: batch buffer
>> + * @dw_offset: write offset in batch buffer
>> + * @vaddr: destination Virtual address
>> + * @data: u64 data to be stored at destination
>> + */
>> +static int emit_store_qword(int fd, uint32_t *cmd_buf, uint32_t dw_offset,
>> + uint64_t vaddr, uint64_t data)
>> +{
>> + /* Check that softpin addresses are in the correct form */
>> + assert(vaddr == gen8_canonical_addr(vaddr));
>> +
>> + /* SDI cannot write to unaligned addresses */
>> + assert((vaddr & 3) == 0);
>> +
>> + cmd_buf[dw_offset++] = MI_STORE_DWORD_IMM | 0x3;
>> + cmd_buf[dw_offset++] = vaddr & 0xFFFFFFFC;
>
> You just asserted above that the low bits aren't set.
>
>> + cmd_buf[dw_offset++] = (vaddr >> 32) & 0xFFFF; /* bits 32:47 */
>
> This conflicts with the value that would be set by the kernel (if it had
> to the relocation). Who is correct? If not required, please don't raise
> the spectre of devastating bugs.
>
Sorry will modify like this,
cmd_buf[dw_offset++] = (uint32_t)vaddr;
cmd_buf[dw_offset++] = (uint32_t)(vaddr >> 32);
>> + cmd_buf[dw_offset++] = data;
>> + cmd_buf[dw_offset++] = data >> 32;
>> +
>> + return dw_offset;
>> +}
>> +
>> +/* emit_bb_end
>> + * populate batch buffer with MI_BATCH_BUFFER_END command
>> + * @fd: drm file descriptor
>> + * @cmd_buf: batch buffer
>> + * @dw_offset: write offset in batch buffer
>> + */
>> +static int emit_bb_end(int fd, uint32_t *cmd_buf, uint32_t dw_offset)
>> +{
>> + cmd_buf[dw_offset++] = MI_BATCH_BUFFER_END;
>> + cmd_buf[dw_offset++] = 0;
>
> Why? execbuf.batch_len must be aligned, but the CS parser doesn't care,
> and there is no guarantee that you are aligned coming into
> emit_bb_end().
>
Sorry, will change like this,
dw_offset = ALIGN(dw_offset, 2);
cmd_buf[dw_offset++] = MI_BATCH_BUFFER_END;
dw_offset++;
>> +
>> + return dw_offset;
>> +}
>> +
>> +/* setup_execbuffer
>> + * helper for buffer execution
>> + * @execbuf - pointer to execbuffer
>> + * @exec_object - pointer to exec object2 struct
>> + * @ring - ring to be used
>> + * @buffer_count - how manu buffers to submit
>> + * @batch_length - length of batch buffer
>> + */
>> +static void setup_execbuffer(struct drm_i915_gem_execbuffer2 *execbuf,
>> + struct drm_i915_gem_exec_object2 *exec_object,
>> + uint32_t ctx_id, int ring, int buffer_count, int batch_length)
>> +{
>> + memset(execbuf, 0, sizeof(*execbuf));
>> +
>> + execbuf->buffers_ptr = (unsigned long)exec_object;
>> + execbuf->buffer_count = buffer_count;
>> + execbuf->batch_len = batch_length;
>> + execbuf->flags = ring;
>> + i915_execbuffer2_set_context_id(*execbuf, ctx_id);
>> +}
>> +
>> +#define TABLE_SIZE 0x1000
>> +#define TILE_SIZE 0x10000
>> +
>> +#define TRTT_SEGMENT_SIZE (1ULL << 44)
>> +#define PPGTT_SIZE (1ULL << 48)
>> +
>> +#define NULL_TILE_PATTERN 0xFFFFFFFF
>> +#define INVALID_TILE_PATTERN 0xFFFFFFFE
>> +
>> +struct local_i915_gem_context_trtt_param {
>> + uint64_t segment_base_addr;
>> + uint64_t l3_table_address;
>> + uint32_t invd_tile_val;
>> + uint32_t null_tile_val;
>> +};
>> +
>> +/* setup_trtt
>> + * Helper function to request KMD to enable TRTT
>> + * @fd - drm fd
>> + * @ctx_id - id of the context for which TRTT is to be enabled
>> + * @l3_table_address - GFX address of the L3 table
>> + * @segment_base_addr - offset of the TRTT segment in PPGTT space
>> + */
>> +static void
>> +setup_trtt(int fd, uint32_t ctx_id, uint64_t l3_table_address,
>> + uint64_t segment_base_addr)
>> +{
>> + struct local_i915_gem_context_param ctx_param;
>> + struct local_i915_gem_context_trtt_param trtt_param;
>> +
>> + memset(&ctx_param, 0, sizeof(ctx_param));
>> +
>> + trtt_param.null_tile_val = NULL_TILE_PATTERN;
>> + trtt_param.invd_tile_val = INVALID_TILE_PATTERN;
>> + trtt_param.l3_table_address = l3_table_address;
>> + trtt_param.segment_base_addr = segment_base_addr;
>> +
>> + ctx_param.context = ctx_id;
>> + ctx_param.size = sizeof(trtt_param);
>> + ctx_param.param = LOCAL_CONTEXT_PARAM_TRTT;
>> + ctx_param.value = (uint64_t)&trtt_param;
>> +
>> + gem_context_set_param(fd, &ctx_param);
>> +}
>> +
>> +/* bo_alloc_setup
>> + * allocate bo and populate exec object
>> + * @exec_object2 - pointer to exec object
>> + * @bo_sizee - buffer size
>> + * @flags - exec flags
>> + * @bo_offset - pointer to the current PPGTT offset
>> + */
>> +static void bo_alloc_setup(int fd, struct drm_i915_gem_exec_object2 *exec_object2,
>> + uint64_t bo_size, uint64_t flags, uint64_t *bo_offset)
>> +{
>> + memset(exec_object2, 0, sizeof(*exec_object2));
>> + exec_object2->handle = gem_create(fd, bo_size);
>> + exec_object2->flags = flags;
>> +
>> + if (bo_offset)
>> + {
>> + exec_object2->offset = *bo_offset;
>> + *bo_offset += bo_size;
>> + }
>> +}
>> +
>> +/* submit_trtt_context
>> + * This helper function will create a new context if the TR-TT segment
>> + * base address is not zero, allocate a L3 table page, 2 pages apiece
>> + * for L2/L1 tables and couple of data buffers of 64KB in size, matching the
>> + * Tile size. The 2 data buffers will be mapped to the 2 ends of TRTT virtual
>> + * space. Series of MI_STORE_DWORD_IMM commands will be added in the batch
>> + * buffer to first update the TR-TT table entries and then to update the data
>> + * buffers using their TR-TT VA, exercising the table programming done
>> + * previously.
>> + * Invoke CONTEXT_SETPARAM ioctl to request KMD to enable TRTT.
>> + * Invoke execbuffer to submit the batch buffer.
>> + * Verify value of first DWORD in the 2 data buffer matches the data asked
>> + * to be written by the GPU.
>> + */
>> +static void submit_trtt_context(int fd, uint64_t segment_base_addr)
>> +{
>> + enum {
>> + L3_TBL,
>> + L2_TBL1,
>> + L2_TBL2,
>> + L1_TBL1,
>> + L1_TBL2,
>> + DATA1,
>> + DATA2,
>> + BATCH,
>> + NUM_BUFFERS,
>> + };
>> +
>> + int ring, len = 0;
>> + uint32_t *ptr;
>> + struct drm_i915_gem_execbuffer2 execbuf;
>> + struct drm_i915_gem_exec_object2 exec_object2[NUM_BUFFERS];
>> + uint32_t batch_buffer[BO_SIZE];
>> + uint32_t ctx_id, data, last_entry_offset;
>> + uint64_t cur_ppgtt_off, exec_flags;
>> + uint64_t first_tile_addr, last_tile_addr;
>> +
>> + first_tile_addr = segment_base_addr;
>> + last_tile_addr = first_tile_addr + TRTT_SEGMENT_SIZE - TILE_SIZE;
>> +
>> + if (segment_base_addr == 0) {
>> + /* Use the default context for first iteration */
>> + ctx_id = 0;
>
> Seems like a variable the callee wants to control. (Otherwise you are
> missing a significant test for non-default contexts). It also implies
> that we don't test what happens when we call set-trtt-context twice on a
> context.
>
As per the current logic, the callee will use non-default context for
the non zero TR-TT segment start location.
Should a new subtest be added to check the case when set-trtt-context is
called twice, which is expected to fail.
>> + /* To avoid conflict with the TR-TT segment */
>> + cur_ppgtt_off = TRTT_SEGMENT_SIZE;
>> + } else {
>> + /* Create a new context to have different TRTT settings
>> + * on every iteration.
>> + */
>> + ctx_id = gem_context_create(fd);
>> + cur_ppgtt_off = 0;
>> + }
>> +
>> + exec_flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +
>> + /* first allocate Batch buffer BO */
>> + bo_alloc_setup(fd, &exec_object2[BATCH], BO_SIZE, exec_flags, NULL);
>> +
>> + /* table BOs and data buffer BOs are written by GPU and are soft pinned */
>> + exec_flags |= (EXEC_OBJECT_WRITE | EXEC_OBJECT_PINNED);
>> +
>> + /* Allocate a L3 table BO */
>> + bo_alloc_setup(fd, &exec_object2[L3_TBL], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
>> +
>> + /* Allocate two L2 table BOs */
>> + bo_alloc_setup(fd, &exec_object2[L2_TBL1], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
>> + bo_alloc_setup(fd, &exec_object2[L2_TBL2], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
>> +
>> + /* Allocate two L1 table BOs */
>> + bo_alloc_setup(fd, &exec_object2[L1_TBL1], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
>> + bo_alloc_setup(fd, &exec_object2[L1_TBL2], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
>> +
>> + /* Align the PPGTT offsets for the 2 data buffers to next 64 KB boundary */
>> + cur_ppgtt_off = ALIGN(cur_ppgtt_off, TILE_SIZE);
>> +
>> + /* Allocate two Data buffer BOs */
>> + bo_alloc_setup(fd, &exec_object2[DATA1], TILE_SIZE, exec_flags, &cur_ppgtt_off);
>> + bo_alloc_setup(fd, &exec_object2[DATA2], TILE_SIZE, exec_flags, &cur_ppgtt_off);
>> +
>> + /* Add commands to update the two L3 table entries to point them to the L2 tables*/
>> + last_entry_offset = 511*sizeof(uint64_t);
>> +
>> + len = emit_store_qword(fd, batch_buffer, len,
>> + exec_object2[L3_TBL].offset,
>> + exec_object2[L2_TBL1].offset);
>> +
>> + len = emit_store_qword(fd, batch_buffer, len,
>> + exec_object2[L3_TBL].offset + last_entry_offset,
>> + exec_object2[L2_TBL2].offset);
>> +
>> + /* Add commands to update an entry of 2 L2 tables to point them to the L1 tables*/
>> + len = emit_store_qword(fd, batch_buffer, len,
>> + exec_object2[L2_TBL1].offset,
>> + exec_object2[L1_TBL1].offset);
>> +
>> + len = emit_store_qword(fd, batch_buffer, len,
>> + exec_object2[L2_TBL2].offset + last_entry_offset,
>> + exec_object2[L1_TBL2].offset);
>> +
>> + /* Add commands to update an entry of 2 L1 tables to point them to the data buffers*/
>> + last_entry_offset = 1023*sizeof(uint32_t);
>> +
>> + len = emit_store_dword(fd, batch_buffer, len,
>> + exec_object2[L1_TBL1].offset,
>> + exec_object2[DATA1].offset >> 16);
>> +
>> + len = emit_store_dword(fd, batch_buffer, len,
>> + exec_object2[L1_TBL2].offset + last_entry_offset,
>> + exec_object2[DATA2].offset >> 16);
>> +
>> + /* Add commands to update the 2 data buffers, using their TRTT VA */
>> + data = 0x12345678;
>> + len = emit_store_dword(fd, batch_buffer, len, first_tile_addr, data);
>> + len = emit_store_dword(fd, batch_buffer, len, last_tile_addr, data);
>> +
>> + len = emit_bb_end(fd, batch_buffer, len);
>> + gem_write(fd, exec_object2[BATCH].handle, 0, batch_buffer, len*4);
>> +
>> + /* Request KMD to setup the TR-TT */
>> + setup_trtt(fd, ctx_id, exec_object2[L3_TBL].offset, first_tile_addr);
>> +
>> + ring = I915_EXEC_RENDER;
>> + setup_execbuffer(&execbuf, exec_object2, ctx_id, ring, NUM_BUFFERS, len*4);
>> +
>> + /* submit command buffer */
>> + gem_execbuf(fd, &execbuf);
>> +
>> + /* read the 2 data buffers to check for the value written by the GPU */
>> + ptr = mmap_bo(fd, exec_object2[DATA1].handle, TILE_SIZE);
>> + igt_fail_on_f(ptr[0] != data,
>> + "\nCPU read does not match GPU write,\
>> + expected: 0x%x, got: 0x%x\n",
>> + data, ptr[0]);
>
> Just igt_assert_eq_u32(ptr[0], expected);
Fine will change.
>
>> +
>> + ptr = mmap_bo(fd, exec_object2[DATA2].handle, TILE_SIZE);
>> + igt_fail_on_f(ptr[0] != data,
>> + "\nCPU read does not match GPU write,\
>> + expected: 0x%x, got: 0x%x\n",
>> + data, ptr[0]);
>> +
>> + gem_close(fd, exec_object2[L3_TBL].handle);
>> + gem_close(fd, exec_object2[L2_TBL1].handle);
>> + gem_close(fd, exec_object2[L2_TBL2].handle);
>> + gem_close(fd, exec_object2[L1_TBL1].handle);
>> + gem_close(fd, exec_object2[L1_TBL2].handle);
>> + gem_close(fd, exec_object2[DATA1].handle);
>> + gem_close(fd, exec_object2[DATA2].handle);
>> + gem_close(fd, exec_object2[BATCH].handle);
>
> Before we destroy the context (or exit), how about a query_trtt().
> We should also query after create to ensure that the defaults are set.
> Just thinking that is better doing the query after several steps (i.e.
> the execbuf) rather than immediately after the set in order to give time
> for something to go wrong. We should also ensure that everything remains
> set after a GPU hang and suspend/resume.
Nice suggestion, currently get-trtt-context will just retrieve the TR-TT
params stored with a Driver for a given context.
Should the Context image itself be read to ensure that settings are
intact or not ?
Best regards
Akash
> -Chris
>
More information about the Intel-gfx
mailing list