[Intel-gfx] [PATCH 22/26] drm/i915: Split out i915_gem_object_set_tiling()
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jan 9 15:43:46 UTC 2017
On 31/12/2016 12:06, Chris Wilson wrote:
> Expose an interface for changing the tiling and stride on an object,
> that includes the complexity of checking for conflicting bindings and
> fence registers.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_object.h | 3 +
> drivers/gpu/drm/i915/i915_gem_tiling.c | 238 +++++++++++++++++----------------
> 2 files changed, 128 insertions(+), 113 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 6a368de9d81e..789bb44c9149 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -317,6 +317,9 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
> return obj->tiling_and_stride & STRIDE_MASK;
> }
>
> +int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
> + unsigned int tiling, unsigned int stride);
> +
> static inline struct intel_engine_cs *
> i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 6c5b8a0e8325..4ec2620b7bf9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -60,61 +60,56 @@
>
> /* Check pitch constriants for all chips & tiling formats */
> static bool
> -i915_tiling_ok(struct drm_i915_private *dev_priv,
> - int stride, int size, int tiling_mode)
> +i915_tiling_ok(struct drm_i915_gem_object *obj,
> + unsigned int tiling, unsigned int stride)
Should it, or should it not now have an _obj(ect)_ somewhere in the
function name? i915_gem_object_tiling_ok, or i915_gem_tiling_ok_for_object?
> {
> - int tile_width;
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + unsigned int tile_width;
>
> /* Linear is always fine */
> - if (tiling_mode == I915_TILING_NONE)
> + if (tiling == I915_TILING_NONE)
> return true;
>
> - if (tiling_mode > I915_TILING_LAST)
> + if (tiling > I915_TILING_LAST)
> return false;
>
> - if (IS_GEN2(dev_priv) ||
> - (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev_priv)))
> - tile_width = 128;
> - else
> - tile_width = 512;
> -
> /* check maximum stride & object size */
> /* i965+ stores the end address of the gtt mapping in the fence
> * reg, so dont bother to check the size */
> - if (INTEL_GEN(dev_priv) >= 7) {
> + if (INTEL_GEN(i915) >= 7) {
> if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL)
> return false;
> - } else if (INTEL_GEN(dev_priv) >= 4) {
> + } else if (INTEL_GEN(i915) >= 4) {
> if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
> return false;
> } else {
> if (stride > 8192)
> return false;
>
> - if (IS_GEN3(dev_priv)) {
> - if (size > I830_FENCE_MAX_SIZE_VAL << 20)
> + if (IS_GEN3(i915)) {
> + if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20)
> return false;
> } else {
> - if (size > I830_FENCE_MAX_SIZE_VAL << 19)
> + if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19)
> return false;
> }
> }
>
> - if (stride < tile_width)
> + if (IS_GEN2(i915) ||
> + (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915)))
> + tile_width = 128;
> + else
> + tile_width = 512;
> +
> + if (stride & (tile_width - 1))
> return false;
>
> /* 965+ just needs multiples of tile width */
> - if (INTEL_GEN(dev_priv) >= 4) {
> - if (stride & (tile_width - 1))
> - return false;
> + if (INTEL_GEN(i915) >= 4)
> return true;
> - }
>
> /* Pre-965 needs power of two tile widths */
Is this comment actually referring to the stride? tile_width is always a
power of two since it is set above.
> - if (stride & (stride - 1))
> - return false;
> -
> - return true;
> + return is_power_of_2(stride);
> }
Looks OK, but would have been better if you left the optimising bits out
from this patch.
> static bool i915_vma_fence_prepare(struct i915_vma *vma,
> @@ -163,6 +158,99 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
> return 0;
> }
>
> +int
> +i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
> + unsigned int tiling, unsigned int stride)
> +{
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + struct i915_vma *vma;
> + int err;
> +
> + /* Make sure we don't cross-contaminate obj->tiling_and_stride */
> + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
> +
> + GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
> + GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
Why is i915_tiling_ok not checking for this and handling it gracefully?
It might be even user triggerable, not sure, don't see the whole
function in the diff.
> + lockdep_assert_held(&i915->drm.struct_mutex);
> +
> + if ((tiling | stride) == obj->tiling_and_stride)
> + return 0;
> +
> + if (obj->pin_display || obj->framebuffer_references)
> + return -EBUSY;
> +
> + /* We need to rebind the object if its current allocation
> + * no longer meets the alignment restrictions for its new
> + * tiling mode. Otherwise we can just leave it alone, but
> + * need to ensure that any fence register is updated before
> + * the next fenced (either through the GTT or by the BLT unit
> + * on older GPUs) access.
> + *
> + * After updating the tiling parameters, we then flag whether
> + * we need to update an associated fence register. Note this
> + * has to also include the unfenced register the GPU uses
> + * whilst executing a fenced command for an untiled object.
> + */
> +
> + err = i915_gem_object_fence_prepare(obj, tiling, stride);
> + if (err)
> + return err;
> +
> + /* If the memory has unknown (i.e. varying) swizzling, we pin the
> + * pages to prevent them being swapped out and causing corruption
> + * due to the change in swizzling.
> + */
> + mutex_lock(&obj->mm.lock);
> + if (obj->mm.pages &&
> + obj->mm.madv == I915_MADV_WILLNEED &&
> + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
> + if (tiling == I915_TILING_NONE) {
> + GEM_BUG_ON(!obj->mm.quirked);
> + __i915_gem_object_unpin_pages(obj);
> + obj->mm.quirked = false;
> + }
> + if (!i915_gem_object_is_tiled(obj)) {
> + GEM_BUG_ON(!obj->mm.quirked);
> + __i915_gem_object_pin_pages(obj);
> + obj->mm.quirked = true;
> + }
> + }
> + mutex_unlock(&obj->mm.lock);
> +
> + list_for_each_entry(vma, &obj->vma_list, obj_link) {
> + if (!i915_vma_is_ggtt(vma))
> + break;
> +
> + vma->fence_size =
> + i915_gem_get_ggtt_size(i915, vma->size,
> + tiling, stride);
> + vma->fence_alignment =
> + i915_gem_get_ggtt_alignment(i915, vma->size,
> + tiling, stride);
> +
> + if (vma->fence)
> + vma->fence->dirty = true;
> + }
> +
> + obj->tiling_and_stride = tiling | stride;
> +
> + /* Force the fence to be reacquired for GTT access */
> + i915_gem_release_mmap(obj);
> +
> + /* Try to preallocate memory required to save swizzling on put-pages */
> + if (i915_gem_object_needs_bit17_swizzle(obj)) {
> + if (!obj->bit_17) {
> + obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT),
> + sizeof(long), GFP_KERNEL);
> + }
> + } else {
> + kfree(obj->bit_17);
> + obj->bit_17 = NULL;
> + }
> +
> + return 0;
> +}
> +
> /**
> * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode
> * @dev: DRM device
> @@ -182,26 +270,15 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file)
> {
> struct drm_i915_gem_set_tiling *args = data;
> - struct drm_i915_private *dev_priv = to_i915(dev);
> struct drm_i915_gem_object *obj;
> - int err = 0;
> -
> - /* Make sure we don't cross-contaminate obj->tiling_and_stride */
> - BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
> + int err;
>
> obj = i915_gem_object_lookup(file, args->handle);
> if (!obj)
> return -ENOENT;
>
> - if (!i915_tiling_ok(dev_priv,
> - args->stride, obj->base.size, args->tiling_mode)) {
> - i915_gem_object_put(obj);
> - return -EINVAL;
> - }
> -
> - mutex_lock(&dev->struct_mutex);
> - if (obj->pin_display || obj->framebuffer_references) {
> - err = -EBUSY;
> + if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) {
> + err = -EINVAL;
> goto err;
> }
>
> @@ -210,9 +287,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
> args->stride = 0;
> } else {
> if (args->tiling_mode == I915_TILING_X)
> - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
> + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x;
> else
> - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
> + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y;
>
> /* Hide bit 17 swizzling from the user. This prevents old Mesa
> * from aborting the application on sw fallbacks to bit 17,
> @@ -234,84 +311,19 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
> }
> }
>
> - if (args->tiling_mode != i915_gem_object_get_tiling(obj) ||
> - args->stride != i915_gem_object_get_stride(obj)) {
> - /* We need to rebind the object if its current allocation
> - * no longer meets the alignment restrictions for its new
> - * tiling mode. Otherwise we can just leave it alone, but
> - * need to ensure that any fence register is updated before
> - * the next fenced (either through the GTT or by the BLT unit
> - * on older GPUs) access.
> - *
> - * After updating the tiling parameters, we then flag whether
> - * we need to update an associated fence register. Note this
> - * has to also include the unfenced register the GPU uses
> - * whilst executing a fenced command for an untiled object.
> - */
> + err = mutex_lock_interruptible(&dev->struct_mutex);
> + if (err)
> + goto err;
>
> - err = i915_gem_object_fence_prepare(obj,
> - args->tiling_mode,
> - args->stride);
> - if (!err) {
> - struct i915_vma *vma;
> -
> - mutex_lock(&obj->mm.lock);
> - if (obj->mm.pages &&
> - obj->mm.madv == I915_MADV_WILLNEED &&
> - dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
> - if (args->tiling_mode == I915_TILING_NONE) {
> - GEM_BUG_ON(!obj->mm.quirked);
> - __i915_gem_object_unpin_pages(obj);
> - obj->mm.quirked = false;
> - }
> - if (!i915_gem_object_is_tiled(obj)) {
> - GEM_BUG_ON(!obj->mm.quirked);
> - __i915_gem_object_pin_pages(obj);
> - obj->mm.quirked = true;
> - }
> - }
> - mutex_unlock(&obj->mm.lock);
> -
> - list_for_each_entry(vma, &obj->vma_list, obj_link) {
> - if (!i915_vma_is_ggtt(vma))
> - break;
> -
> - vma->fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
> - args->tiling_mode,
> - args->stride);
> - vma->fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, vma->size,
> - args->tiling_mode,
> - args->stride);
> -
> - if (vma->fence)
> - vma->fence->dirty = true;
> - }
> - obj->tiling_and_stride =
> - args->stride | args->tiling_mode;
> -
> - /* Force the fence to be reacquired for GTT access */
> - i915_gem_release_mmap(obj);
> - }
> - }
> - /* we have to maintain this existing ABI... */
> + err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
> + mutex_unlock(&dev->struct_mutex);
> +
> + /* We have to maintain this existing ABI... */
> args->stride = i915_gem_object_get_stride(obj);
> args->tiling_mode = i915_gem_object_get_tiling(obj);
>
> - /* Try to preallocate memory required to save swizzling on put-pages */
> - if (i915_gem_object_needs_bit17_swizzle(obj)) {
> - if (obj->bit_17 == NULL) {
> - obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT),
> - sizeof(long), GFP_KERNEL);
> - }
> - } else {
> - kfree(obj->bit_17);
> - obj->bit_17 = NULL;
> - }
> -
> err:
> i915_gem_object_put(obj);
> - mutex_unlock(&dev->struct_mutex);
> -
> return err;
> }
>
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list