[PATCH v4 1/2] intel: 48b ppgtt support (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag)

Michel Thierry michel.thierry at intel.com
Mon Oct 5 07:03:46 PDT 2015


On 9/14/2015 2:54 PM, Michał Winiarski wrote:
> On Thu, Sep 03, 2015 at 03:23:58PM +0100, Michel Thierry wrote:
>> Gen8+ supports 48-bit virtual addresses, but some objects must always be
>> allocated inside the 32-bit address range.
>>
>> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
>> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
>> 32-bit range, because the General State Offset and Instruction State Offset
>> are limited to 32-bits.
>>
>> The i915 driver has been modified to provide a flag to set when the 4GB
>> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
>> 48-bit range will only be used when explicitly requested.
>>
>> Callers to the existing drm_intel_bo_emit_reloc function should set the
>> use_48b_address_range flag beforehand, in order to use full ppgtt range.
>>
>> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>>      internally in emit_reloc functions (Ben)
>>      s/48BADDRESS/48B_ADDRESS/ (Dave)
>> v3: Keep set/clear functions internal, no-one needs to use them directly.
>> v4: Don't set 48bit-support flag in emit reloc, check for ppgtt type
>>      before enabling set/clear function, print full offsets in debug
>>      statements, using port of lower_32_bits and upper_32_bits from linux
>>      kernel (Michał)
>>
>> References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
>> Cc: Ben Widawsky <ben at bwidawsk.net>
>> Cc: Michał Winiarski <michal.winiarski at intel.com>
>
> +Kristian
>
> LGTM.
> Acked-by: Michał Winiarski <michal.winiarski at intel.com>
>
>> Signed-off-by: Michel Thierry <michel.thierry at intel.com>


Hi Kristian,

More comments on this?
I've resent the mesa patch with the last changes 
(http://lists.freedesktop.org/archives/dri-devel/2015-October/091752.html).

Michał has agreed with the interface and will use it for his work.
If mesa doesn't plan to use this for now, it's ok. The kernel changes 
have been done to prevent any regressions when the support 48-bit flag 
is not set.

Thanks,

-Michel

>> ---
>>   include/drm/i915_drm.h    |  3 +-
>>   intel/intel_bufmgr.c      | 11 ++++++
>>   intel/intel_bufmgr.h      |  1 +
>>   intel/intel_bufmgr_gem.c  | 88 +++++++++++++++++++++++++++++++++++++----------
>>   intel/intel_bufmgr_priv.h | 14 ++++++++
>>   5 files changed, 97 insertions(+), 20 deletions(-)
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index ded43b1..426b25c 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>>   #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>>   #define EXEC_OBJECT_WRITE	(1<<2)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
>> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>>   	__u64 flags;
>>
>>   	__u64 rsvd1;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 14ea9f9..0856e60 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -293,6 +293,17 @@ drm_intel_bo_madvise(drm_intel_bo *bo, int madv)
>>   }
>>
>>   int
>> +drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
>> +{
>> +	if (bo->bufmgr->bo_use_48b_address_range) {
>> +		bo->bufmgr->bo_use_48b_address_range(bo, enable);
>> +		return 0;
>> +	}
>> +
>> +	return -ENODEV;
>> +}
>> +
>> +int
>>   drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
>>   {
>>   	return bo->bufmgr->bo_references(bo, target_bo);
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 95eecb8..a14c78f 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
>>   int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
>>   int drm_intel_bo_busy(drm_intel_bo *bo);
>>   int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
>> +int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable);
>>
>>   int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
>>   int drm_intel_bo_is_reusable(drm_intel_bo *bo);
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index 2723e21..09d82d2 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -83,6 +83,22 @@
>>   #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
>>   #define MAX2(A, B) ((A) > (B) ? (A) : (B))
>>
>> +/**
>> + * upper_32_bits - return bits 32-63 of a number
>> + * @n: the number we're accessing
>> + *
>> + * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
>> + * the "right shift count >= width of type" warning when that quantity is
>> + * 32-bits.
>> + */
>> +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
>> +
>> +/**
>> + * lower_32_bits - return bits 0-31 of a number
>> + * @n: the number we're accessing
>> + */
>> +#define lower_32_bits(n) ((__u32)(n))
>> +
>>   typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
>>
>>   struct drm_intel_gem_bo_bucket {
>> @@ -237,6 +253,15 @@ struct _drm_intel_bo_gem {
>>   	bool is_userptr;
>>
>>   	/**
>> +	 * Boolean of whether this buffer can be placed in the full 48-bit
>> +	 * address range on gen8+.
>> +	 *
>> +	 * By default, buffers will be keep in a 32-bit range, unless this
>> +	 * flag is explicitly set.
>> +	 */
>> +	bool use_48b_address_range;
>> +
>> +	/**
>>   	 * Size in bytes of this buffer and its relocation descendents.
>>   	 *
>>   	 * Used to avoid costly tree walking in
>> @@ -395,14 +420,16 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
>>   			drm_intel_bo_gem *target_gem =
>>   			    (drm_intel_bo_gem *) target_bo;
>>
>> -			DBG("%2d: %d (%s)@0x%08llx -> "
>> -			    "%d (%s)@0x%08lx + 0x%08x\n",
>> +			DBG("%2d: %d (%s)@0x%08x %08x -> "
>> +			    "%d (%s)@0x%08x %08x + 0x%08x\n",
>>   			    i,
>>   			    bo_gem->gem_handle, bo_gem->name,
>> -			    (unsigned long long)bo_gem->relocs[j].offset,
>> +			    upper_32_bits(bo_gem->relocs[j].offset),
>> +			    lower_32_bits(bo_gem->relocs[j].offset),
>>   			    target_gem->gem_handle,
>>   			    target_gem->name,
>> -			    target_bo->offset64,
>> +			    upper_32_bits(target_bo->offset64),
>> +			    lower_32_bits(target_bo->offset64),
>>   			    bo_gem->relocs[j].delta);
>>   		}
>>   	}
>> @@ -468,11 +495,15 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>>   	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>>   	int index;
>> +	int flags = 0;
>> +
>> +	if (need_fence)
>> +		flags |= EXEC_OBJECT_NEEDS_FENCE;
>> +	if (bo_gem->use_48b_address_range)
>> +		flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>>
>>   	if (bo_gem->validate_index != -1) {
>> -		if (need_fence)
>> -			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
>> -				EXEC_OBJECT_NEEDS_FENCE;
>> +		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
>>   		return;
>>   	}
>>
>> @@ -501,13 +532,9 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>>   	bufmgr_gem->exec2_objects[index].alignment = bo->align;
>>   	bufmgr_gem->exec2_objects[index].offset = 0;
>>   	bufmgr_gem->exec_bos[index] = bo;
>> -	bufmgr_gem->exec2_objects[index].flags = 0;
>> +	bufmgr_gem->exec2_objects[index].flags = flags;
>>   	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
>>   	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
>> -	if (need_fence) {
>> -		bufmgr_gem->exec2_objects[index].flags |=
>> -			EXEC_OBJECT_NEEDS_FENCE;
>> -	}
>>   	bufmgr_gem->exec_count++;
>>   }
>>
>> @@ -780,6 +807,7 @@ retry:
>>   	bo_gem->used_as_reloc_target = false;
>>   	bo_gem->has_error = false;
>>   	bo_gem->reusable = true;
>> +	bo_gem->use_48b_address_range = false;
>>
>>   	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
>>
>> @@ -926,6 +954,7 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>>   	bo_gem->used_as_reloc_target = false;
>>   	bo_gem->has_error = false;
>>   	bo_gem->reusable = false;
>> +	bo_gem->use_48b_address_range = false;
>>
>>   	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
>>
>> @@ -1081,6 +1110,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
>>   	bo_gem->bo.handle = open_arg.handle;
>>   	bo_gem->global_name = handle;
>>   	bo_gem->reusable = false;
>> +	bo_gem->use_48b_address_range = false;
>>
>>   	memclear(get_tiling);
>>   	get_tiling.handle = bo_gem->gem_handle;
>> @@ -1930,6 +1960,13 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>   	return 0;
>>   }
>>
>> +static void
>> +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
>> +{
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +	bo_gem->use_48b_address_range = enable;
>> +}
>> +
>>   static int
>>   drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>   			    drm_intel_bo *target_bo, uint32_t target_offset,
>> @@ -2073,10 +2110,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
>>
>>   		/* Update the buffer offset */
>>   		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
>> -			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
>> -			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
>> -			    (unsigned long long)bufmgr_gem->exec_objects[i].
>> -			    offset);
>> +			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
>> +			    bo_gem->gem_handle, bo_gem->name,
>> +			    upper_32_bits(bo->offset64),
>> +			    lower_32_bits(bo->offset64),
>> +			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
>> +			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
>>   			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
>>   			bo->offset = bufmgr_gem->exec_objects[i].offset;
>>   		}
>> @@ -2094,9 +2133,12 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
>>
>>   		/* Update the buffer offset */
>>   		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
>> -			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
>> -			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
>> -			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
>> +			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
>> +			    bo_gem->gem_handle, bo_gem->name,
>> +			    upper_32_bits(bo->offset64),
>> +			    lower_32_bits(bo->offset64),
>> +			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
>> +			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
>>   			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
>>   			bo->offset = bufmgr_gem->exec2_objects[i].offset;
>>   		}
>> @@ -2481,6 +2523,7 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s
>>   	bo_gem->used_as_reloc_target = false;
>>   	bo_gem->has_error = false;
>>   	bo_gem->reusable = false;
>> +	bo_gem->use_48b_address_range = false;
>>
>>   	DRMINITLISTHEAD(&bo_gem->vma_list);
>>   	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
>> @@ -3278,6 +3321,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>>   		}
>>   	}
>>
>> +	if (bufmgr_gem->gen >= 8) {
>> +		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
>> +		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>> +		if (ret == 0 && *gp.value == 3)
>> +			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
>> +	}
>> +
>>   	/* Let's go with one relocation per every 2 dwords (but round down a bit
>>   	 * since a power of two will mean an extra page allocation for the reloc
>>   	 * buffer).
>> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
>> index 59ebd18..5c17ffb 100644
>> --- a/intel/intel_bufmgr_priv.h
>> +++ b/intel/intel_bufmgr_priv.h
>> @@ -152,6 +152,20 @@ struct _drm_intel_bufmgr {
>>   	void (*destroy) (drm_intel_bufmgr *bufmgr);
>>
>>   	/**
>> +	 * Indicate if the buffer can be placed anywhere in the full ppgtt
>> +	 * address range (2^48).
>> +	 *
>> +	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
>> +	 * General State Heap (GSH) or Intructions State Heap (ISH) must
>> +	 * be in a 32-bit range. 48-bit range will only be used when explicitly
>> +	 * requested.
>> +	 *
>> +	 * \param bo Buffer to set the use_48b_address_range flag.
>> +	 * \param enable The flag value.
>> +	 */
>> +	void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable);
>> +
>> +	/**
>>   	 * Add relocation entry in reloc_buf, which will be updated with the
>>   	 * target buffer's real offset on on command submission.
>>   	 *
>> --
>> 2.5.0
>>


More information about the dri-devel mailing list