[Intel-gfx] [PATCH v5 17/19] drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset
Goel, Akash
akash.goel at intel.com
Mon Jul 27 07:34:50 PDT 2015
On 7/16/2015 3:03 PM, Michel Thierry wrote:
> There are some allocations that must be only referenced by 32-bit
> offsets. To limit the chances of having the first 4GB already full,
> objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
> DRM_MM_CREATE_TOP flags
>
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State
> Offset are limited to 32-bits.
>
> Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if
> they can be allocated above the 32-bit address range. To limit the
> chances of having the first 4GB already full, objects will use
> DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible.
>
> v2: Changed flag logic from neeeds_32b, to supports_48b.
> v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel)
> v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK
> to use last PIN_ defined instead of hard-coded value; use correct limit
> check in eb_vma_misplaced. (Chris)
> v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris)
> v6: Apply pin-high for ggtt too (Chris)
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk> (v4)
> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 ++
> drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++--
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++
> include/uapi/drm/i915_drm.h | 3 ++-
> 4 files changed, 29 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1dbbbf0..f79cc7b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2771,6 +2771,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
> #define PIN_OFFSET_BIAS (1<<3)
> #define PIN_USER (1<<4)
> #define PIN_UPDATE (1<<5)
> +#define PIN_ZONE_4G (1<<6)
> +#define PIN_HIGH (1<<7)
> #define PIN_OFFSET_MASK (~4095)
> int __must_check
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 76b7612..cd7e4b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3728,6 +3728,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct drm_i915_private *dev_priv = dev->dev_private;
> u32 fence_alignment, unfenced_alignment;
> u64 size, fence_size;
> + u32 search_flag = DRM_MM_SEARCH_DEFAULT;
> + u32 alloc_flag = DRM_MM_CREATE_DEFAULT;
> u64 start =
> flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> u64 end =
> @@ -3771,6 +3773,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> }
>
> + if (flags & PIN_HIGH) {
> + search_flag = DRM_MM_SEARCH_BELOW;
> + alloc_flag = DRM_MM_CREATE_TOP;
> + }
> +
> + if (flags & PIN_ZONE_4G)
> + end = (1ULL << 32);
Would this be fine for platforms, where only 2 GB of GGTT space is
available ? For GEN7 & older platforms, only GGTT would be used.
Shouldn't this check for PIN_ZONE_4G flag, be done only for PPGTT vm ?
For GGTT we have to obey the PIN_MAPPABLE flag, if set then 'end' will
be 256 MB.
If both PIN_MAPPABLE & PIN_ZONE_4G flags are set, the 'end' should
still be 256 MB, for GGTT vm.
So we need to mindful in defining the 'end' for platforms where
only GGTT would be used.
Best Regards
Akash
> +
> if (alignment == 0)
> alignment = flags & PIN_MAPPABLE ? fence_alignment :
> unfenced_alignment;
> @@ -3811,8 +3821,8 @@ search_free:
> size, alignment,
> obj->cache_level,
> start, end,
> - DRM_MM_SEARCH_DEFAULT,
> - DRM_MM_CREATE_DEFAULT);
> + search_flag,
> + alloc_flag);
> if (ret) {
> ret = i915_gem_evict_something(dev, vm, size, alignment,
> obj->cache_level,
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 923a3c4..209e8e2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> flags |= PIN_GLOBAL;
>
> + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
> + * limit address to the first 4GBs for unflagged objects.
> + */
> + flags |= PIN_ZONE_4G;
> + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)
> + flags &= ~PIN_ZONE_4G;
> +
> if (!drm_mm_node_allocated(&vma->node)) {
> if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
> flags |= PIN_GLOBAL | PIN_MAPPABLE;
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> + if ((flags & PIN_MAPPABLE) == 0)
> + flags |= PIN_HIGH;
> }
>
> ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma)
> if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
> return !only_mappable_for_reloc(entry->flags);
>
> + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
> + (vma->node.start + vma->node.size) >= (1ULL << 32))
> + return true;
> +
> return false;
> }
>
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index e7c29f1..e4471e8 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -686,7 +686,8 @@ struct drm_i915_gem_exec_object2 {
> #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
> #define EXEC_OBJECT_NEEDS_GTT (1<<1)
> #define EXEC_OBJECT_WRITE (1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> __u64 flags;
>
> __u64 rsvd1;
>
More information about the Intel-gfx
mailing list