[Intel-gfx] [PATCH v3 12/37] drm/i915/blt: support copying objects
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 10 10:45:04 UTC 2019
Quoting Matthew Auld (2019-08-09 23:26:18)
> +struct i915_vma *intel_emit_vma_copy_blt(struct intel_engine_pool_node **p,
> + struct intel_context *ce,
> + struct i915_vma *src,
> + struct i915_vma *dst)
> +{
> + struct drm_i915_private *i915 = ce->vm->i915;
> + const u32 block_size = S16_MAX * PAGE_SIZE;
> + struct intel_engine_pool_node *pool;
> + struct i915_vma *batch;
> + u64 src_offset, dst_offset;
> + u64 count;
> + u64 rem;
> + u32 size;
> + u32 *cmd;
> + int err;
> +
> + GEM_BUG_ON(src->size != dst->size);
> +
> + count = div_u64(dst->size, block_size);
> + size = (1 + 11 * count) * sizeof(u32);
> + size = round_up(size, PAGE_SIZE);
> + pool = intel_engine_pool_get(&ce->engine->pool, size);
> + if (IS_ERR(pool))
> + return ERR_CAST(pool);
> +
> + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> + if (IS_ERR(cmd)) {
> + err = PTR_ERR(cmd);
> + goto out_put;
> + }
> +
> + rem = src->size;
> + src_offset = src->node.start;
> + dst_offset = dst->node.start;
> +
> + do {
> + u32 size = min_t(u64, rem, block_size);
> +
> + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
> +
> + if (INTEL_GEN(i915) >= 9) {
> + *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
> + *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
> + *cmd++ = 0;
> + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
> + *cmd++ = lower_32_bits(dst_offset);
> + *cmd++ = upper_32_bits(dst_offset);
> + *cmd++ = 0;
> + *cmd++ = PAGE_SIZE;
> + *cmd++ = lower_32_bits(src_offset);
> + *cmd++ = upper_32_bits(src_offset);
> + } else if (INTEL_GEN(i915) >= 8) {
> + *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
> + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
> + *cmd++ = 0;
> + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
> + *cmd++ = lower_32_bits(dst_offset);
> + *cmd++ = upper_32_bits(dst_offset);
> + *cmd++ = 0;
> + *cmd++ = PAGE_SIZE;
> + *cmd++ = lower_32_bits(src_offset);
> + *cmd++ = upper_32_bits(src_offset);
> + } else {
> + *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
> + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
> + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
> + *cmd++ = dst_offset;
> + *cmd++ = PAGE_SIZE;
> + *cmd++ = src_offset;
> + }
> +
> + /* Allow ourselves to be preempted in between blocks. */
> + *cmd++ = MI_ARB_CHECK;
> +
> + src_offset += size;
> + dst_offset += size;
> + rem -= size;
> + } while (rem);
> +
> + *cmd = MI_BATCH_BUFFER_END;
> + intel_gt_chipset_flush(ce->vm->gt);
> +
> + i915_gem_object_unpin_map(pool->obj);
> +
> + batch = i915_vma_instance(pool->obj, ce->vm, NULL);
> + if (IS_ERR(batch)) {
> + err = PTR_ERR(batch);
> + goto out_put;
> + }
> +
> + err = i915_vma_pin(batch, 0, 0, PIN_USER);
> + if (unlikely(err))
> + goto out_put;
> +
> + *p = pool;
> + return batch;
> +
> +out_put:
> + intel_engine_pool_put(pool);
> + return ERR_PTR(err);
> +}
> +
> +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
> + struct drm_i915_gem_object *dst,
> + struct intel_context *ce)
> +{
> + struct drm_gem_object *objs[] = { &src->base, &dst->base };
> + struct i915_address_space *vm = ce->vm;
> + struct intel_engine_pool_node *pool;
> + struct ww_acquire_ctx acquire;
> + struct i915_vma *vma_src, *vma_dst;
> + struct i915_vma *batch;
> + struct i915_request *rq;
> + int err;
> +
> + vma_src = i915_vma_instance(src, vm, NULL);
> + if (IS_ERR(vma_src))
> + return PTR_ERR(vma_src);
> +
> + err = i915_vma_pin(vma_src, 0, 0, PIN_USER);
> + if (unlikely(err))
> + return err;
> +
> + vma_dst = i915_vma_instance(dst, vm, NULL);
> + if (IS_ERR(vma_dst))
> + goto out_unpin_src;
> +
> + err = i915_vma_pin(vma_dst, 0, 0, PIN_USER);
> + if (unlikely(err))
> + goto out_unpin_src;
> +
> + intel_engine_pm_get(ce->engine);
> + batch = intel_emit_vma_copy_blt(&pool, ce, vma_src, vma_dst);
> + if (IS_ERR(batch)) {
> + err = PTR_ERR(batch);
> + goto out_unpin_dst;
> + }
> +
> + rq = intel_context_create_request(ce);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto out_batch;
> + }
> +
> + i915_vma_lock(batch);
> + err = i915_vma_move_to_active(batch, rq, 0);
> + i915_vma_unlock(batch);
> + if (unlikely(err))
> + goto out_request;
> +
> + err = intel_engine_pool_mark_active(pool, rq);
> + if (unlikely(err))
> + goto out_request;
> +
> + err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
> + if (unlikely(err))
> + goto out_request;
> +
> + if (src->cache_dirty & ~src->cache_coherent)
> + i915_gem_clflush_object(src, 0);
> +
> + if (dst->cache_dirty & ~dst->cache_coherent)
> + i915_gem_clflush_object(dst, 0);
> +
> + err = i915_request_await_object(rq, src, false);
> + if (unlikely(err))
> + goto out_unlock;
> +
> + err = i915_vma_move_to_active(vma_src, rq, 0);
> + if (unlikely(err))
> + goto out_unlock;
> +
> + err = i915_request_await_object(rq, dst, true);
> + if (unlikely(err))
> + goto out_unlock;
> +
> + err = i915_vma_move_to_active(vma_dst, rq, EXEC_OBJECT_WRITE);
> + if (unlikely(err))
> + goto out_unlock;
Strictly, wait on all objects, then setup all signals. Avoids any nasty
cycles in the dependency graphs. Such as if someone passed in src = dst.
Time for another selftest ;)
for (i = 0; i < ARRAY_SIZE(obj); i++) {
clflush_object(obj[i]);
await_object(rq, obj[i]);
}
for (i = 0; i < ARRAY_SIZE(obj); i++)
move_to_active(obj[i]);
> +
> + if (ce->engine->emit_init_breadcrumb) {
> + err = ce->engine->emit_init_breadcrumb(rq);
> + if (unlikely(err))
> + goto out_unlock;
> + }
> +
> + err = ce->engine->emit_bb_start(rq,
> + batch->node.start, batch->node.size,
> + 0);
> +out_unlock:
> + drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
> +out_request:
> + if (unlikely(err))
> + i915_request_skip(rq, err);
> +
> + i915_request_add(rq);
> +out_batch:
> + i915_vma_unpin(batch);
> + intel_engine_pool_put(pool);
> +out_unpin_dst:
> + i915_vma_unpin(vma_dst);
> + intel_engine_pm_put(ce->engine);
> +out_unpin_src:
> + i915_vma_unpin(vma_src);
> + return err;
> +}
More information about the Intel-gfx
mailing list