[Intel-gfx] [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jul 1 07:54:55 PDT 2015
Hi,
On 07/01/2015 10:25 AM, ankitprasad.r.sharma at intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
>
> This patch adds support for clearing buffer objects via blitter
> engines. This is particularly useful for clearing out the memory
> from stolen region.
Because CPU cannot access it? I would put that into the commit message
since I think cover letter does not go into the git history.
> v2: Add support for using execlists & PPGTT
>
> v3: Fix issues in legacy ringbuffer submission mode
>
> v4: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
Nitpick: usually it is "Testcase:" and all tags grouped together.
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Deepak S <deepak.s at linux.intel.com>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/i915_drv.h | 4 +
> drivers/gpu/drm/i915/i915_gem_exec.c | 201 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 4 +-
> drivers/gpu/drm/i915/intel_lrc.h | 3 +
> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 7 files changed, 213 insertions(+), 3 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_gem_exec.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index de21965..1959314 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -24,6 +24,7 @@ i915-y += i915_cmd_parser.o \
> i915_gem_debug.o \
> i915_gem_dmabuf.o \
> i915_gem_evict.o \
> + i915_gem_exec.o \
> i915_gem_execbuffer.o \
> i915_gem_gtt.o \
> i915_gem.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ea9caf2..d1e151e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3082,6 +3082,10 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
> int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
> int i915_gem_evict_everything(struct drm_device *dev);
>
> +/* i915_gem_exec.c */
> +int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
> + struct drm_i915_file_private *file_priv);
> +
> /* belongs in i915_gem_gtt.h */
> static inline void i915_gem_chipset_flush(struct drm_device *dev)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
> new file mode 100644
> index 0000000..a07fda0
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> @@ -0,0 +1,201 @@
> +/*
> + * Copyright © 2013 Intel Corporation
Is the year correct?
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Chris Wilson <chris at chris-wilson.co.uk>
And author?
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +
> +#define GEN8_COLOR_BLT_CMD (2<<29 | 0x50<<22)
> +
> +#define BPP_8 0
> +#define BPP_16 (1<<24)
> +#define BPP_32 (1<<25 | 1<<24)
> +
> +#define ROP_FILL_COPY (0xf0 << 16)
> +
> +static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
> + struct intel_engine_cs *ring,
> + struct intel_context *ctx,
> + struct drm_i915_gem_request **req)
> +{
> + int ret;
> +
> + ret = i915_gem_object_sync(obj, ring, req);
> + if (ret)
> + return ret;
> +
> + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
> + if (i915_gem_clflush_object(obj, false))
> + i915_gem_chipset_flush(obj->base.dev);
> + obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
> + }
> + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
> + wmb();
> + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
> + }
All this could be replaced with i915_gem_object_set_to_gtt_domain, no?
> +
> + return i915.enable_execlists ?
> + logical_ring_invalidate_all_caches(*req) :
> + intel_ring_invalidate_all_caches(*req);
And this is done on actual submission for you by the lower levels so you
don't need to call it directly.
> +}
> +
> +static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
> + struct intel_engine_cs *ring,
> + struct i915_address_space *vm,
> + struct drm_i915_gem_request *req)
> +{
> + i915_gem_request_assign(&obj->last_write_req, req);
> + obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> + obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> + i915_vma_move_to_active(i915_gem_obj_to_vma(obj, vm), req);
> + obj->dirty = 1;
> +
> + ring->gpu_caches_dirty = true;
> +}
> +
> +int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
> + struct drm_i915_file_private *file_priv)
> +{
Function needs some good kerneldoc.
> + struct drm_device *dev = obj->base.dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_engine_cs *ring;
> + struct intel_context *ctx;
> + struct intel_ringbuffer *ringbuf;
> + struct i915_address_space *vm;
> + struct drm_i915_gem_request *req;
> + int ret = 0;
> +
> + lockdep_assert_held(&dev->struct_mutex);
It think there was some guidance that lockdep_assert_held is compiled
out when lockdep is not in the kernel and that WARN_ON is preferred. In
this case that would probably be WARN_ON_ONCE and return error.
> +
> + ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> + ctx = i915_gem_context_get(file_priv, DEFAULT_CONTEXT_HANDLE);
> + /* Allocate a request for this operation nice and early. */
> + ret = i915_gem_request_alloc(ring, ctx, &req);
> + if (ret)
> + return ret;
> +
> + if (ctx->ppgtt)
> + vm = &ctx->ppgtt->base;
> + else
> + vm = &dev_priv->gtt.base;
> +
> + if (i915.enable_execlists && !ctx->engine[ring->id].state) {
> + ret = intel_lr_context_deferred_create(ctx, ring);
i915_gem_context_get above and this call are very similar to what
i915_gem_validate_context does. It seems to me it would be better to
call the latter function here.
> + if (ret)
> + return ret;
Failure path (and some below) leaks the request. i915_gem_request_cancel
is the new API to be called I understand.
> + }
> +
> + ringbuf = ctx->engine[ring->id].ringbuf;
> +
> + ret = i915_gem_object_pin(obj, vm, PAGE_SIZE, 0);
> + if (ret)
> + return ret;
> +
> + if (obj->tiling_mode && INTEL_INFO(dev)->gen <= 3) {
> + ret = i915_gem_object_put_fence(obj);
> + if (ret)
> + goto unpin;
> + }
Why is this needed?
Could it be called unconditionally and still work?
At least I would recommend a comment explaining it.
> + ret = i915_gem_exec_flush_object(obj, ring, ctx, &req);
> + if (ret)
> + goto unpin;
As I said above one call to i915_gem_object_set_to_gtt_domain would be
enough I think.
> + if (i915.enable_execlists) {
> + if (dev_priv->info.gen >= 8) {
> + ret = intel_logical_ring_begin(req, 8);
> + if (ret)
> + goto unpin;
> +
> + intel_logical_ring_emit(ringbuf, GEN8_COLOR_BLT_CMD |
> + BLT_WRITE_RGBA |
> + (7-2));
> + intel_logical_ring_emit(ringbuf, BPP_32 |
> + ROP_FILL_COPY |
> + PAGE_SIZE);
> + intel_logical_ring_emit(ringbuf, 0);
> + intel_logical_ring_emit(ringbuf,
> + obj->base.size >> PAGE_SHIFT
> + << 16 | PAGE_SIZE / 4);
> + intel_logical_ring_emit(ringbuf,
> + i915_gem_obj_offset(obj, vm));
> + intel_logical_ring_emit(ringbuf, 0);
> + intel_logical_ring_emit(ringbuf, 0);
> + intel_logical_ring_emit(ringbuf, MI_NOOP);
> +
> + intel_logical_ring_advance(ringbuf);
> + } else {
> + DRM_ERROR("Execlists not supported for gen %d\n",
> + dev_priv->info.gen);
> + ret = -EINVAL;
I would put a WARN_ON_ONCE here, or even just return -EINVAL. If the
driver is so messed up in general that execlists are enabled < gen8 I
think there is no point logging errors about it from here. Would also
save you one indentation level.
> + goto unpin;
> + }
> + } else {
> + if (IS_GEN8(dev)) {
> + ret = intel_ring_begin(req, 8);
> + if (ret)
> + goto unpin;
> +
> + intel_ring_emit(ring, GEN8_COLOR_BLT_CMD |
> + BLT_WRITE_RGBA | (7-2));
> + intel_ring_emit(ring, BPP_32 |
> + ROP_FILL_COPY | PAGE_SIZE);
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring,
> + obj->base.size >> PAGE_SHIFT << 16 |
> + PAGE_SIZE / 4);
> + intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, MI_NOOP);
Such a pitty that these two emit blocks need to be duplicated but I
guess it is what it is now.
> + } else {
> + ret = intel_ring_begin(req, 6);
> + if (ret)
> + goto unpin;
> +
> + intel_ring_emit(ring, COLOR_BLT_CMD |
> + BLT_WRITE_RGBA);
> + intel_ring_emit(ring, BPP_32 |
> + ROP_FILL_COPY | PAGE_SIZE);
> + intel_ring_emit(ring,
> + obj->base.size >> PAGE_SHIFT << 16 |
> + PAGE_SIZE);
> + intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, MI_NOOP);
> + }
> +
> + __intel_ring_advance(ring);
> + }
> +
> + i915_gem_exec_dirty_object(obj, ring, vm, req);
Where is this request actually submitted?
Regards,
Tvrtko
More information about the Intel-gfx
mailing list