[Intel-xe] [PATCH v4 4/9] drm/xe/xe_migrate: Use NULL 1G PTE mapped at 255GiB VA for ccs clear

Ghimiray, Himal Prasad himal.prasad.ghimiray at intel.com
Fri Dec 8 04:10:25 UTC 2023


On 07-12-2023 05:07, Matt Roper wrote:
> On Wed, Dec 06, 2023 at 10:01:21AM +0530, Himal Prasad Ghimiray wrote:
>> Get rid of the cleared bo, instead use null 1G PTE mapped at 255GiB
>> offset, this can be used for both dgfx and igfx.
>>
>> v2:
>>   - Remove xe_migrate::cleared_bo.
>>   - Add a comment for NULL mapping.(Thomas)
>>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_migrate.c | 65 ++++++---------------------------
>>   1 file changed, 11 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>> index 7ef068451b59..b4dd1b6d78f0 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -46,16 +46,12 @@ struct xe_migrate {
>>   	struct mutex job_mutex;
>>   	/** @pt_bo: Page-table buffer object. */
>>   	struct xe_bo *pt_bo;
>> -	/**
>> -	 * @cleared_bo: Zeroed out bo used as a source for CCS metadata clears
>> -	 */
>> -	struct xe_bo *cleared_bo;
>>   	/** @batch_base_ofs: VM offset of the migration batch buffer */
>>   	u64 batch_base_ofs;
>>   	/** @usm_batch_base_ofs: VM offset of the usm batch buffer */
>>   	u64 usm_batch_base_ofs;
>> -	/** @cleared_vram_ofs: VM offset of @cleared_bo. */
>> -	u64 cleared_vram_ofs;
>> +	/** @cleared_mem_ofs: VM offset of @cleared_bo. */
>> +	u64 cleared_mem_ofs;
>>   	/**
>>   	 * @fence: dma-fence representing the last migration job batch.
>>   	 * Protected by @job_mutex.
>> @@ -93,13 +89,9 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg)
>>   
>>   	xe_vm_lock(m->q->vm, false);
>>   	xe_bo_unpin(m->pt_bo);
>> -	if (m->cleared_bo)
>> -		xe_bo_unpin(m->cleared_bo);
>>   	xe_vm_unlock(m->q->vm);
>>   
>>   	dma_fence_put(m->fence);
>> -	if (m->cleared_bo)
>> -		xe_bo_put(m->cleared_bo);
>>   	xe_bo_put(m->pt_bo);
>>   	drm_suballoc_manager_fini(&m->vm_update_sa);
>>   	mutex_destroy(&m->job_mutex);
>> @@ -125,41 +117,6 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
>>   	return addr + (256ULL << xe_pt_shift(2));
>>   }
>>   
>> -/*
>> - * For flat CCS clearing we need a cleared chunk of memory to copy from,
>> - * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy
>> - * (it clears on only 14 bytes in each chunk of 16).
> Is XY_FAST_COLOR_BLT still broken on Xe2_LPG?  If not, have we checked
> to see whether there's any performance difference between doing a NULL
> copy vs a regular fill?
Will try to figure out this info.
>
> In general, this pre-existing comment should have been labeled with a
> workaround number so that we could properly track which platforms and
> steppings it applies to and know if/when we could potentially go back to
> using the dedicated fill instruction.
>
>> - * If clearing the main surface one can use the part of the main surface
>> - * already cleared, but for clearing as part of copying non-compressed
>> - * data out of system memory, we don't readily have a cleared part of
>> - * VRAM to copy from, so create one to use for that case.
>> - */
>> -static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
>> -{
>> -	struct xe_tile *tile = m->tile;
>> -	struct xe_device *xe = vm->xe;
>> -	size_t cleared_size;
>> -	u64 vram_addr;
>> -
>> -	if (!xe_device_has_flat_ccs(xe))
>> -		return 0;
>> -
>> -	cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER);
>> -	cleared_size = PAGE_ALIGN(cleared_size);
>> -	m->cleared_bo = xe_bo_create_pin_map(xe, tile, vm, cleared_size,
>> -					     ttm_bo_type_kernel,
>> -					     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>> -					     XE_BO_CREATE_PINNED_BIT);
>> -	if (IS_ERR(m->cleared_bo))
>> -		return PTR_ERR(m->cleared_bo);
>> -
>> -	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
>> -	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE);
>> -	m->cleared_vram_ofs = xe_migrate_vram_ofs(xe, vram_addr);
>> -
>> -	return 0;
>> -}
>> -
>>   static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>>   				 struct xe_vm *vm)
>>   {
>> @@ -170,7 +127,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>>   	u32 map_ofs, level, i;
>>   	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
>>   	u64 entry;
>> -	int ret;
>>   
>>   	/* Can't bump NUM_PT_SLOTS too high */
>>   	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
>> @@ -190,12 +146,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>>   	if (IS_ERR(bo))
>>   		return PTR_ERR(bo);
>>   
>> -	ret = xe_migrate_create_cleared_bo(m, vm);
>> -	if (ret) {
>> -		xe_bo_put(bo);
>> -		return ret;
>> -	}
>> -
>>   	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
>>   	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
>>   
>> @@ -265,6 +215,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>>   			  (i + 1) * 8, u64, entry);
>>   	}
>>   
>> +	/* Set up a 1GiB NULL mapping at 255GiB offset. */
>> +	level = 2;
>> +	xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64,
>> +		  vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0)
>> +		  | XE_PTE_NULL);
>> +	m->cleared_mem_ofs = (255ULL << xe_pt_shift(level));
>> +
>>   	/* Identity map the entire vram at 256GiB offset */
>>   	if (IS_DGFX(xe)) {
>>   		u64 pos, ofs, flags;
>> @@ -618,7 +575,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>>   		 * Otherwise if the bo doesn't have any CCS metadata attached,
>>   		 * we still need to clear it for security reasons.
>>   		 */
>> -		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_vram_ofs;
>> +		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
> Is the 'src_is_vram' check even correct for an igpu?  It seems like
> we're _always_ going to be copying the NULL CCS data now and will never
> be able to copy the real CCS data.
This check is being enhanced for igfx in [PATCH v4 7/9].
>
>
> Matt
>
>>   
>>   		emit_copy_ccs(gt, bb,
>>   			      dst_ofs, true,
>> @@ -1006,7 +963,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>>   			   clear_vram);
>>   		if (xe_device_has_flat_ccs(xe) && clear_vram) {
>>   			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
>> -				      m->cleared_vram_ofs, false, clear_L0);
>> +				      m->cleared_mem_ofs, false, clear_L0);
>>   			flush_flags = MI_FLUSH_DW_CCS;
>>   		}
>>   
>> -- 
>> 2.25.1
>>


More information about the Intel-xe mailing list