[PATCH 6/7] drm/vmwgfx: Abstract placement selection
Thomas Zimmermann
tzimmermann at suse.de
Fri Jan 27 13:42:49 UTC 2023
Hi
Am 26.01.23 um 18:38 schrieb Zack Rusin:
> From: Zack Rusin <zackr at vmware.com>
>
> Problem with explicit placement selection in vmwgfx is that by the time
> the buffer object needs to be validated the information about which
> placement was supposed to be used is lost. To workaround this the driver
> had a bunch of state in various places e.g. as_mob or cpu_blit to
> somehow convey the information on which placement was intended.
>
> Fix is properly by allowing the buffer object to hold their preferred
'Fix it'
> placement so it can be reused whenever needed. This makes the entire
> validation pipeline a lot easier both to understand and maintain.
>
> Signed-off-by: Zack Rusin <zackr at vmware.com>
> ---
> drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 156 +++++++--
> drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 26 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_context.c | 9 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 11 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 -
> drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 36 ++-
> drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 5 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 22 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 21 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 11 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 3 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 13 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 15 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_so.c | 4 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 304 ++----------------
> drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c | 3 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 6 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 47 ---
> drivers/gpu/drm/vmwgfx/vmwgfx_va.c | 4 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 74 ++---
> drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 6 +-
> 22 files changed, 325 insertions(+), 456 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index fa289e67143d..b94c32a59689 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -148,11 +148,17 @@ int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
> goto out_unreserve;
> }
>
> - ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram,
> + vmw_bo_domain_gmr);
> + ret = ttm_bo_validate(bo, &buf->placement, &ctx);
> if (likely(ret == 0) || ret == -ERESTARTSYS)
> goto out_unreserve;
>
> - ret = ttm_bo_validate(bo, &vmw_vram_placement, &ctx);
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_vram,
> + vmw_bo_domain_vram);
> + ret = ttm_bo_validate(bo, &buf->placement, &ctx);
>
> out_unreserve:
> if (!ret)
> @@ -203,17 +209,8 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
> {
> struct ttm_operation_ctx ctx = {interruptible, false };
> struct ttm_buffer_object *bo = &buf->base;
> - struct ttm_placement placement;
> - struct ttm_place place;
> int ret = 0;
>
> - place = vmw_vram_placement.placement[0];
> - place.lpfn = PFN_UP(bo->resource->size);
> - placement.num_placement = 1;
> - placement.placement = &place;
> - placement.num_busy_placement = 1;
> - placement.busy_placement = &place;
> -
> vmw_execbuf_release_pinned_bo(dev_priv);
> ret = ttm_bo_reserve(bo, interruptible, false, NULL);
> if (unlikely(ret != 0))
> @@ -229,14 +226,21 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
> bo->resource->start > 0 &&
> buf->base.pin_count == 0) {
> ctx.interruptible = false;
> - (void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_sys,
> + vmw_bo_domain_sys);
> + (void) ttm_bo_validate(bo, &buf->placement, &ctx);
> }
>
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_vram,
> + vmw_bo_domain_vram);
> + buf->places[0].lpfn = PFN_UP(bo->resource->size);
> if (buf->base.pin_count > 0)
> - ret = ttm_resource_compat(bo->resource, &placement)
> + ret = ttm_resource_compat(bo->resource, &buf->placement)
> ? 0 : -EINVAL;
> else
> - ret = ttm_bo_validate(bo, &placement, &ctx);
> + ret = ttm_bo_validate(bo, &buf->placement, &ctx);
>
> /* For some reason we didn't end up at the start of vram */
> WARN_ON(ret == 0 && bo->resource->start != 0);
> @@ -444,7 +448,7 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size,
> }
>
> int vmw_bo_create(struct vmw_private *vmw,
> - size_t size, struct ttm_placement *placement,
> + size_t size, u32 domain, u32 busy_domain,
> bool interruptible, bool pin,
> struct vmw_bo **p_bo)
> {
> @@ -457,7 +461,8 @@ int vmw_bo_create(struct vmw_private *vmw,
> }
>
> ret = vmw_bo_init(vmw, *p_bo, size,
> - placement, interruptible, pin);
> + domain, busy_domain,
> + interruptible, pin);
> if (unlikely(ret != 0))
> goto out_error;
>
> @@ -474,7 +479,8 @@ int vmw_bo_create(struct vmw_private *vmw,
> * @dev_priv: Pointer to the device private struct
> * @vmw_bo: Pointer to the struct vmw_bo to initialize.
> * @size: Buffer object size in bytes.
> - * @placement: Initial placement.
> + * @domain: Domain to put the bo in.
> + * @busy_domain: Domain to put the bo if busy.
> * @interruptible: Whether waits should be performed interruptible.
> * @pin: If the BO should be created pinned at a fixed location.
> * Returns: Zero on success, negative error code on error.
> @@ -483,7 +489,9 @@ int vmw_bo_create(struct vmw_private *vmw,
> */
> int vmw_bo_init(struct vmw_private *dev_priv,
> struct vmw_bo *vmw_bo,
> - size_t size, struct ttm_placement *placement,
> + size_t size,
> + u32 domain,
> + u32 busy_domain,
> bool interruptible, bool pin)
> {
> struct ttm_operation_ctx ctx = {
> @@ -502,8 +510,9 @@ int vmw_bo_init(struct vmw_private *dev_priv,
> size = ALIGN(size, PAGE_SIZE);
> drm_gem_private_object_init(vdev, &vmw_bo->base.base, size);
>
> + vmw_bo_placement_set(vmw_bo, domain, busy_domain);
> ret = ttm_bo_init_reserved(bdev, &vmw_bo->base, ttm_bo_type_device,
> - placement, 0, &ctx, NULL, NULL, vmw_bo_free);
> + &vmw_bo->placement, 0, &ctx, NULL, NULL, vmw_bo_free);
> if (unlikely(ret)) {
> return ret;
> }
> @@ -838,3 +847,112 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
> if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB)
> vmw_resource_unbind_list(vbo);
> }
> +
> +static u32
> +set_placement_list(struct ttm_place *pl, u32 domain)
> +{
> + u32 n = 0;
> +
> + /*
> + * The placements are ordered according to our preferences
> + */
> + if (domain & vmw_bo_domain_mob) {
> + pl[n].mem_type = VMW_PL_MOB;
> + pl[n].flags = 0;
> + pl[n].fpfn = 0;
> + pl[n].lpfn = 0;
> + n++;
> + }
> + if (domain & vmw_bo_domain_gmr) {
> + pl[n].mem_type = VMW_PL_GMR;
> + pl[n].flags = 0;
> + pl[n].fpfn = 0;
> + pl[n].lpfn = 0;
> + n++;
> + }
> + if (domain & vmw_bo_domain_vram) {
> + pl[n].mem_type = TTM_PL_VRAM;
> + pl[n].flags = 0;
> + pl[n].fpfn = 0;
> + pl[n].lpfn = 0;
> + n++;
> + }
> + WARN_ON((domain & vmw_bo_domain_waitable_sys) != 0);
> + if (domain & vmw_bo_domain_waitable_sys) {
> + pl[n].mem_type = VMW_PL_SYSTEM;
> + pl[n].flags = 0;
> + pl[n].fpfn = 0;
> + pl[n].lpfn = 0;
> + n++;
> + }
> + if (domain & vmw_bo_domain_sys) {
> + pl[n].mem_type = TTM_PL_SYSTEM;
> + pl[n].flags = 0;
> + pl[n].fpfn = 0;
> + pl[n].lpfn = 0;
> + n++;
> + }
> +
> + /*
> + * In case of vmw_bo_domain_any the buffer
> + * should initially reside in the system memory,
> + * it will be validated/migrated to the correct
> + * placement later
> + */
> + if (!n) {
> + pl[n].mem_type = TTM_PL_SYSTEM;
> + pl[n].flags = 0;
> + pl[n].fpfn = 0;
> + pl[n].lpfn = 0;
> + n++;
> + }
> + return n;
> +}
> +
> +void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain)
> +{
> + struct ttm_device *bdev = bo->base.bdev;
> + struct vmw_private *vmw =
> + container_of(bdev, struct vmw_private, bdev);
> + struct ttm_placement *pl = &bo->placement;
> + bool mem_compatible = false;
> + u32 i;
> +
> + pl->placement = bo->places;
> + pl->num_placement = set_placement_list(bo->places, domain);
> +
> + if (drm_debug_enabled(DRM_UT_DRIVER) && bo->base.resource) {
> + for (i = 0; i < pl->num_placement; ++i) {
> + if (bo->base.resource->mem_type == TTM_PL_SYSTEM ||
> + bo->base.resource->mem_type == pl->placement[i].mem_type)
> + mem_compatible = true;
> + }
> + if (!mem_compatible)
> + drm_warn(&vmw->drm,
> + "%s: Incompatible transition from "
> + "bo->base.resource->mem_type = %u to domain = %u\n",
> + __func__, bo->base.resource->mem_type, domain);
> + }
> +
> + pl->busy_placement = bo->busy_places;
> + pl->num_busy_placement = set_placement_list(bo->busy_places, busy_domain);
> +}
> +
> +
> +void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo)
> +{
> + struct ttm_device *bdev = bo->base.bdev;
> + struct vmw_private *vmw =
> + container_of(bdev, struct vmw_private, bdev);
> + u32 domain = vmw_bo_domain_any;
> + u32 busy_domain = vmw_bo_domain_any;
> +
> + if (vmw->has_mob) {
> + domain = vmw_bo_domain_mob;
> + busy_domain = domain;
> + } else {
> + domain = vmw_bo_domain_gmr | vmw_bo_domain_vram;
> + busy_domain = domain;
> + }
> + vmw_bo_placement_set(bo, domain, busy_domain);
> +}
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
> index 4dcf37235cb0..3c7549ed42b0 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
> @@ -31,6 +31,7 @@
> #include "device_include/svga_reg.h"
>
> #include <drm/ttm/ttm_bo_api.h>
> +#include <drm/ttm/ttm_placement.h>
>
> #include <linux/rbtree_types.h>
> #include <linux/types.h>
> @@ -40,6 +41,15 @@ struct vmw_fence_obj;
> struct vmw_private;
> struct vmw_resource;
>
> +typedef enum {
No typedefs please; just enum vmw_bo_domain. See
Documentation/process/coding-style.rst, Sec 5. It talks about structs,
pointers and int, but the same rules also apply to enums, I think.
> + vmw_bo_domain_any = 0,
> + vmw_bo_domain_sys = 1 << 0,
> + vmw_bo_domain_waitable_sys = 1 << 1,
> + vmw_bo_domain_vram = 1 << 2,
> + vmw_bo_domain_gmr = 1 << 3,
> + vmw_bo_domain_mob = 1 << 4,
Those constants should be in capital letters. Init them with BIT(), like
this
VMW_BO_DOMAIN_SYS = BIT(0)
Wouldn't it be better to define
define VMW_BO_DOMAIN_ANY INT_MAX
and have all bits set? I'd do that outside of the regular vmw_bo_domain
enum.
But what's the point of _domain_any? Most of the rsp _context_func
fields should probably be initialized with the correct bits, and the use
in vmw_bo_placement_set_default_accelerated() looks somewhat bogus to me.
Best regards
Thomas
> +} vmw_bo_domain;
> +
> /**
> * struct vmw_bo - TTM buffer object with vmwgfx additions
> * @base: The TTM buffer object
> @@ -53,6 +63,11 @@ struct vmw_resource;
> */
> struct vmw_bo {
> struct ttm_buffer_object base;
> +
> + struct ttm_placement placement;
> + struct ttm_place places[5];
> + struct ttm_place busy_places[5];
> +
> struct rb_root res_tree;
>
> atomic_t cpu_writers;
> @@ -64,17 +79,24 @@ struct vmw_bo {
> struct vmw_bo_dirty *dirty;
> };
>
> +void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain);
> +void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo);
> +
> int vmw_bo_create_kernel(struct vmw_private *dev_priv,
> unsigned long size,
> struct ttm_placement *placement,
> struct ttm_buffer_object **p_bo);
> int vmw_bo_create(struct vmw_private *dev_priv,
> - size_t size, struct ttm_placement *placement,
> + size_t size,
> + u32 domain,
> + u32 busy_domain,
> bool interruptible, bool pin,
> struct vmw_bo **p_bo);
> int vmw_bo_init(struct vmw_private *dev_priv,
> struct vmw_bo *vmw_bo,
> - size_t size, struct ttm_placement *placement,
> + size_t size,
> + u32 domain,
> + u32 busy_domain,
> bool interruptible, bool pin);
> int vmw_bo_unref_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
> index cc02be6a9884..e514639364cf 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
> @@ -76,7 +76,8 @@ static const struct vmw_res_func vmw_legacy_context_func = {
> .needs_backup = false,
> .may_evict = false,
> .type_name = "legacy contexts",
> - .backup_placement = NULL,
> + .domain = vmw_bo_domain_any,
> + .busy_domain = vmw_bo_domain_any,
> .create = NULL,
> .destroy = NULL,
> .bind = NULL,
> @@ -90,7 +91,8 @@ static const struct vmw_res_func vmw_gb_context_func = {
> .prio = 3,
> .dirty_prio = 3,
> .type_name = "guest backed contexts",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_gb_context_create,
> .destroy = vmw_gb_context_destroy,
> .bind = vmw_gb_context_bind,
> @@ -104,7 +106,8 @@ static const struct vmw_res_func vmw_dx_context_func = {
> .prio = 3,
> .dirty_prio = 3,
> .type_name = "dx contexts",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_dx_context_create,
> .destroy = vmw_dx_context_destroy,
> .bind = vmw_dx_context_bind,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
> index 9193faae8dab..e2a7131ca528 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
> @@ -136,7 +136,8 @@ static const struct vmw_res_func vmw_cotable_func = {
> .prio = 3,
> .dirty_prio = 3,
> .type_name = "context guest backed object tables",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_cotable_create,
> .destroy = vmw_cotable_destroy,
> .bind = vmw_cotable_bind,
> @@ -424,7 +425,8 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
> * for the new COTable. Initially pin the buffer object to make sure
> * we can use tryreserve without failure.
> */
> - ret = vmw_bo_create(dev_priv, new_size, &vmw_mob_placement,
> + ret = vmw_bo_create(dev_priv, new_size,
> + vmw_bo_domain_mob, vmw_bo_domain_mob,
> true, true, &buf);
> if (ret) {
> DRM_ERROR("Failed initializing new cotable MOB.\n");
> @@ -465,7 +467,10 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
> }
>
> /* Unpin new buffer, and switch backup buffers. */
> - ret = ttm_bo_validate(bo, &vmw_mob_placement, &ctx);
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_mob,
> + vmw_bo_domain_mob);
> + ret = ttm_bo_validate(bo, &buf->placement, &ctx);
> if (unlikely(ret != 0)) {
> DRM_ERROR("Failed validating new COTable backup buffer.\n");
> goto out_wait;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 60d08185a71f..71cab2144e5c 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -399,7 +399,8 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
> * user of the bo currently.
> */
> ret = vmw_bo_create(dev_priv, PAGE_SIZE,
> - &vmw_sys_placement, false, true, &vbo);
> + vmw_bo_domain_sys, vmw_bo_domain_sys,
> + false, true, &vbo);
> if (unlikely(ret != 0))
> return ret;
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> index 6a9c5ad4f16e..a9157b2a94b5 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> @@ -930,9 +930,7 @@ extern struct ttm_placement vmw_vram_placement;
> extern struct ttm_placement vmw_vram_sys_placement;
> extern struct ttm_placement vmw_vram_gmr_placement;
> extern struct ttm_placement vmw_sys_placement;
> -extern struct ttm_placement vmw_srf_placement;
> extern struct ttm_placement vmw_mob_placement;
> -extern struct ttm_placement vmw_nonfixed_placement;
> extern struct ttm_device_funcs vmw_bo_driver;
> extern const struct vmw_sg_table *
> vmw_bo_sg_table(struct ttm_buffer_object *bo);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
> index 687c6926bc00..8ddb273d1e2f 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
> @@ -477,9 +477,13 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
> struct vmw_bo *dx_query_mob;
>
> dx_query_mob = vmw_context_get_dx_query_mob(ctx);
> - if (dx_query_mob)
> + if (dx_query_mob) {
> + vmw_bo_placement_set(dx_query_mob,
> + vmw_bo_domain_mob,
> + vmw_bo_domain_mob);
> ret = vmw_validation_add_bo(sw_context->ctx,
> - dx_query_mob, true, false);
> + dx_query_mob);
> + }
> }
>
> mutex_unlock(&dev_priv->binding_mutex);
> @@ -1035,17 +1039,18 @@ static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
>
> if (unlikely(sw_context->cur_query_bo != NULL)) {
> sw_context->needs_post_query_barrier = true;
> + vmw_bo_placement_set_default_accelerated(sw_context->cur_query_bo);
> ret = vmw_validation_add_bo(sw_context->ctx,
> - sw_context->cur_query_bo,
> - dev_priv->has_mob, false);
> + sw_context->cur_query_bo);
> if (unlikely(ret != 0))
> return ret;
> }
> sw_context->cur_query_bo = new_query_bo;
>
> +
> + vmw_bo_placement_set_default_accelerated(dev_priv->dummy_query_bo);
> ret = vmw_validation_add_bo(sw_context->ctx,
> - dev_priv->dummy_query_bo,
> - dev_priv->has_mob, false);
> + dev_priv->dummy_query_bo);
> if (unlikely(ret != 0))
> return ret;
> }
> @@ -1157,7 +1162,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
> drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n");
> return PTR_ERR(vmw_bo);
> }
> - ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false);
> + vmw_bo_placement_set(vmw_bo, vmw_bo_domain_mob, vmw_bo_domain_mob);
> + ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
> ttm_bo_put(&vmw_bo->base);
> if (unlikely(ret != 0))
> return ret;
> @@ -1211,7 +1217,9 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
> drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n");
> return PTR_ERR(vmw_bo);
> }
> - ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false);
> + vmw_bo_placement_set(vmw_bo, vmw_bo_domain_gmr | vmw_bo_domain_vram,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram);
> + ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
> ttm_bo_put(&vmw_bo->base);
> if (unlikely(ret != 0))
> return ret;
> @@ -4361,13 +4369,17 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
> if (dev_priv->pinned_bo == NULL)
> goto out_unlock;
>
> - ret = vmw_validation_add_bo(&val_ctx, dev_priv->pinned_bo, false,
> - false);
> + vmw_bo_placement_set(dev_priv->pinned_bo,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram);
> + ret = vmw_validation_add_bo(&val_ctx, dev_priv->pinned_bo);
> if (ret)
> goto out_no_reserve;
>
> - ret = vmw_validation_add_bo(&val_ctx, dev_priv->dummy_query_bo, false,
> - false);
> + vmw_bo_placement_set(dev_priv->dummy_query_bo,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram);
> + ret = vmw_validation_add_bo(&val_ctx, dev_priv->dummy_query_bo);
> if (ret)
> goto out_no_reserve;
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
> index 10f4da34f73e..9798a11db508 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
> @@ -134,9 +134,8 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv,
> int ret;
>
> ret = vmw_bo_create(dev_priv, size,
> - (dev_priv->has_mob) ?
> - &vmw_sys_placement :
> - &vmw_vram_sys_placement,
> + (dev_priv->has_mob) ? vmw_bo_domain_sys : vmw_bo_domain_vram,
> + vmw_bo_domain_sys,
> true, false, p_vbo);
>
> (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
> index e83286e08837..5e2d501953cf 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
> @@ -1270,9 +1270,9 @@ int vmw_kms_readback(struct vmw_private *dev_priv,
> user_fence_rep, vclips, num_clips,
> NULL);
> case vmw_du_screen_target:
> - return vmw_kms_stdu_dma(dev_priv, file_priv, vfb,
> - user_fence_rep, NULL, vclips, num_clips,
> - 1, false, true, NULL);
> + return vmw_kms_stdu_readback(dev_priv, file_priv, vfb,
> + user_fence_rep, NULL, vclips, num_clips,
> + 1, NULL);
> default:
> WARN_ONCE(true,
> "Readback called with invalid display system.\n");
> @@ -2999,8 +2999,20 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update)
> struct vmw_framebuffer_bo *vfbbo =
> container_of(update->vfb, typeof(*vfbbo), base);
>
> - ret = vmw_validation_add_bo(&val_ctx, vfbbo->buffer, false,
> - update->cpu_blit);
> + /*
> + * For screen targets we want a mappable bo, for everything else we want
> + * accelerated i.e. host backed (vram or gmr) bo. If the display unit
> + * is not screen target then mob's shouldn't be available.
> + */
> + if (update->dev_priv->active_display_unit == vmw_du_screen_target)
> + vmw_bo_placement_set(vfbbo->buffer,
> + vmw_bo_domain_sys | vmw_bo_domain_mob | vmw_bo_domain_gmr,
> + vmw_bo_domain_sys | vmw_bo_domain_mob | vmw_bo_domain_gmr);
> + else {
> + WARN_ON(update->dev_priv->has_mob);
> + vmw_bo_placement_set_default_accelerated(vfbbo->buffer);
> + }
> + ret = vmw_validation_add_bo(&val_ctx, vfbbo->buffer);
> } else {
> struct vmw_framebuffer_surface *vfbs =
> container_of(update->vfb, typeof(*vfbs), base);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
> index 7a97e53e8e51..1bdf601e7c35 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
> @@ -126,7 +126,6 @@ struct vmw_du_update_plane {
> struct vmw_framebuffer *vfb;
> struct vmw_fence_obj **out_fence;
> struct mutex *mutex;
> - bool cpu_blit;
> bool intr;
> };
>
> @@ -564,17 +563,15 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
> unsigned num_clips, int inc,
> struct vmw_fence_obj **out_fence,
> struct drm_crtc *crtc);
> -int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
> - struct drm_file *file_priv,
> - struct vmw_framebuffer *vfb,
> - struct drm_vmw_fence_rep __user *user_fence_rep,
> - struct drm_clip_rect *clips,
> - struct drm_vmw_rect *vclips,
> - uint32_t num_clips,
> - int increment,
> - bool to_surface,
> - bool interruptible,
> - struct drm_crtc *crtc);
> +int vmw_kms_stdu_readback(struct vmw_private *dev_priv,
> + struct drm_file *file_priv,
> + struct vmw_framebuffer *vfb,
> + struct drm_vmw_fence_rep __user *user_fence_rep,
> + struct drm_clip_rect *clips,
> + struct drm_vmw_rect *vclips,
> + uint32_t num_clips,
> + int increment,
> + struct drm_crtc *crtc);
>
> int vmw_du_helper_plane_update(struct vmw_du_update_plane *update);
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index 54e412f8c2d1..6780a36e6171 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -332,7 +332,7 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res,
> }
>
> ret = vmw_bo_create(res->dev_priv, res->backup_size,
> - res->func->backup_placement,
> + res->func->domain, res->func->busy_domain,
> interruptible, false, &backup);
> if (unlikely(ret != 0))
> goto out_no_bo;
> @@ -529,8 +529,10 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket,
> return 0;
>
> backup_dirty = res->backup_dirty;
> + vmw_bo_placement_set(res->backup, res->func->domain,
> + res->func->busy_domain);
> ret = ttm_bo_validate(&res->backup->base,
> - res->func->backup_placement,
> + &res->backup->placement,
> &ctx);
>
> if (unlikely(ret != 0))
> @@ -968,9 +970,12 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
> if (ret)
> goto out_no_validate;
> if (!vbo->base.pin_count) {
> + vmw_bo_placement_set(vbo,
> + res->func->domain,
> + res->func->busy_domain);
> ret = ttm_bo_validate
> (&vbo->base,
> - res->func->backup_placement,
> + &vbo->placement,
> &ctx);
> if (ret) {
> ttm_bo_unreserve(&vbo->base);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
> index 3b7438b2d289..2c24e0929faa 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
> @@ -83,7 +83,8 @@ struct vmw_res_func {
> enum vmw_res_type res_type;
> bool needs_backup;
> const char *type_name;
> - struct ttm_placement *backup_placement;
> + u32 domain;
> + u32 busy_domain;
> bool may_evict;
> u32 prio;
> u32 dirty_prio;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
> index a04897f04c13..2dfc732c98f8 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
> @@ -445,7 +445,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
> */
> vmw_overlay_pause_all(dev_priv);
> ret = vmw_bo_create(dev_priv, size,
> - &vmw_vram_placement,
> + vmw_bo_domain_vram,
> + vmw_bo_domain_vram,
> false, true, &vps->bo);
> vmw_overlay_resume_all(dev_priv);
> if (ret) {
> @@ -547,7 +548,6 @@ static int vmw_sou_plane_update_bo(struct vmw_private *dev_priv,
> bo_update.base.vfb = vfb;
> bo_update.base.out_fence = out_fence;
> bo_update.base.mutex = NULL;
> - bo_update.base.cpu_blit = false;
> bo_update.base.intr = true;
>
> bo_update.base.calc_fifo_size = vmw_sou_bo_fifo_size;
> @@ -708,7 +708,6 @@ static int vmw_sou_plane_update_surface(struct vmw_private *dev_priv,
> srf_update.base.vfb = vfb;
> srf_update.base.out_fence = out_fence;
> srf_update.base.mutex = &dev_priv->cmdbuf_mutex;
> - srf_update.base.cpu_blit = false;
> srf_update.base.intr = true;
>
> srf_update.base.calc_fifo_size = vmw_sou_surface_fifo_size;
> @@ -1224,7 +1223,9 @@ int vmw_kms_sou_do_bo_dirty(struct vmw_private *dev_priv,
> DECLARE_VAL_CONTEXT(val_ctx, NULL, 0);
> int ret;
>
> - ret = vmw_validation_add_bo(&val_ctx, buf, false, false);
> + vmw_bo_placement_set(buf, vmw_bo_domain_gmr | vmw_bo_domain_vram,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram);
> + ret = vmw_validation_add_bo(&val_ctx, buf);
> if (ret)
> return ret;
>
> @@ -1330,7 +1331,9 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv,
> DECLARE_VAL_CONTEXT(val_ctx, NULL, 0);
> int ret;
>
> - ret = vmw_validation_add_bo(&val_ctx, buf, false, false);
> + vmw_bo_placement_set(buf, vmw_bo_domain_gmr | vmw_bo_domain_vram,
> + vmw_bo_domain_gmr | vmw_bo_domain_vram);
> + ret = vmw_validation_add_bo(&val_ctx, buf);
> if (ret)
> return ret;
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
> index b186d0993d83..07d4b41a87fb 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
> @@ -94,7 +94,8 @@ static const struct vmw_res_func vmw_gb_shader_func = {
> .prio = 3,
> .dirty_prio = 3,
> .type_name = "guest backed shaders",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_gb_shader_create,
> .destroy = vmw_gb_shader_destroy,
> .bind = vmw_gb_shader_bind,
> @@ -108,7 +109,8 @@ static const struct vmw_res_func vmw_dx_shader_func = {
> .prio = 3,
> .dirty_prio = 3,
> .type_name = "dx shaders",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_dx_shader_create,
> /*
> * The destroy callback is only called with a committed resource on
> @@ -893,7 +895,9 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
> if (!vmw_shader_id_ok(user_key, shader_type))
> return -EINVAL;
>
> - ret = vmw_bo_create(dev_priv, size, &vmw_sys_placement,
> + ret = vmw_bo_create(dev_priv, size,
> + vmw_bo_domain_sys,
> + vmw_bo_domain_sys,
> true, true, &buf);
> if (unlikely(ret != 0))
> goto out;
> @@ -913,7 +917,10 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
> WARN_ON(is_iomem);
>
> ttm_bo_kunmap(&map);
> - ret = ttm_bo_validate(&buf->base, &vmw_sys_placement, &ctx);
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_sys,
> + vmw_bo_domain_sys);
> + ret = ttm_bo_validate(&buf->base, &buf->placement, &ctx);
> WARN_ON(ret != 0);
> ttm_bo_unreserve(&buf->base);
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c
> index 4ea32b01efc0..7afdb36045a7 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c
> @@ -24,6 +24,7 @@
> *
> **************************************************************************/
>
> +#include "vmwgfx_bo.h"
> #include "vmwgfx_drv.h"
> #include "vmwgfx_resource_priv.h"
> #include "vmwgfx_so.h"
> @@ -84,7 +85,8 @@ static const struct vmw_res_func vmw_view_func = {
> .needs_backup = false,
> .may_evict = false,
> .type_name = "DX view",
> - .backup_placement = NULL,
> + .domain = vmw_bo_domain_any,
> + .busy_domain = vmw_bo_domain_any,
> .create = vmw_view_create,
> .commit_notify = vmw_view_commit_notify,
> };
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
> index 4745537fed25..0d711ee1dc36 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
> @@ -66,7 +66,6 @@ enum stdu_content_type {
> */
> struct vmw_stdu_dirty {
> struct vmw_kms_dirty base;
> - SVGA3dTransferType transfer;
> s32 left, right, top, bottom;
> s32 fb_left, fb_top;
> u32 pitch;
> @@ -137,12 +136,6 @@ static void vmw_stdu_destroy(struct vmw_screen_target_display_unit *stdu);
> * Screen Target Display Unit CRTC Functions
> *****************************************************************************/
>
> -static bool vmw_stdu_use_cpu_blit(const struct vmw_private *vmw)
> -{
> - return !(vmw->capabilities & SVGA_CAP_3D) || vmw->vram_size < (32 * 1024 * 1024);
> -}
> -
> -
> /**
> * vmw_stdu_crtc_destroy - cleans up the STDU
> *
> @@ -451,93 +444,6 @@ static void vmw_stdu_crtc_atomic_disable(struct drm_crtc *crtc,
> }
> }
>
> -/**
> - * vmw_stdu_bo_clip - Callback to encode a suface DMA command cliprect
> - *
> - * @dirty: The closure structure.
> - *
> - * Encodes a surface DMA command cliprect and updates the bounding box
> - * for the DMA.
> - */
> -static void vmw_stdu_bo_clip(struct vmw_kms_dirty *dirty)
> -{
> - struct vmw_stdu_dirty *ddirty =
> - container_of(dirty, struct vmw_stdu_dirty, base);
> - struct vmw_stdu_dma *cmd = dirty->cmd;
> - struct SVGA3dCopyBox *blit = (struct SVGA3dCopyBox *) &cmd[1];
> -
> - blit += dirty->num_hits;
> - blit->srcx = dirty->fb_x;
> - blit->srcy = dirty->fb_y;
> - blit->x = dirty->unit_x1;
> - blit->y = dirty->unit_y1;
> - blit->d = 1;
> - blit->w = dirty->unit_x2 - dirty->unit_x1;
> - blit->h = dirty->unit_y2 - dirty->unit_y1;
> - dirty->num_hits++;
> -
> - if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM)
> - return;
> -
> - /* Destination bounding box */
> - ddirty->left = min_t(s32, ddirty->left, dirty->unit_x1);
> - ddirty->top = min_t(s32, ddirty->top, dirty->unit_y1);
> - ddirty->right = max_t(s32, ddirty->right, dirty->unit_x2);
> - ddirty->bottom = max_t(s32, ddirty->bottom, dirty->unit_y2);
> -}
> -
> -/**
> - * vmw_stdu_bo_fifo_commit - Callback to fill in and submit a DMA command.
> - *
> - * @dirty: The closure structure.
> - *
> - * Fills in the missing fields in a DMA command, and optionally encodes
> - * a screen target update command, depending on transfer direction.
> - */
> -static void vmw_stdu_bo_fifo_commit(struct vmw_kms_dirty *dirty)
> -{
> - struct vmw_stdu_dirty *ddirty =
> - container_of(dirty, struct vmw_stdu_dirty, base);
> - struct vmw_screen_target_display_unit *stdu =
> - container_of(dirty->unit, typeof(*stdu), base);
> - struct vmw_stdu_dma *cmd = dirty->cmd;
> - struct SVGA3dCopyBox *blit = (struct SVGA3dCopyBox *) &cmd[1];
> - SVGA3dCmdSurfaceDMASuffix *suffix =
> - (SVGA3dCmdSurfaceDMASuffix *) &blit[dirty->num_hits];
> - size_t blit_size = sizeof(*blit) * dirty->num_hits + sizeof(*suffix);
> -
> - if (!dirty->num_hits) {
> - vmw_cmd_commit(dirty->dev_priv, 0);
> - return;
> - }
> -
> - cmd->header.id = SVGA_3D_CMD_SURFACE_DMA;
> - cmd->header.size = sizeof(cmd->body) + blit_size;
> - vmw_bo_get_guest_ptr(&ddirty->buf->base, &cmd->body.guest.ptr);
> - cmd->body.guest.pitch = ddirty->pitch;
> - cmd->body.host.sid = stdu->display_srf->res.id;
> - cmd->body.host.face = 0;
> - cmd->body.host.mipmap = 0;
> - cmd->body.transfer = ddirty->transfer;
> - suffix->suffixSize = sizeof(*suffix);
> - suffix->maximumOffset = ddirty->buf->base.base.size;
> -
> - if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
> - blit_size += sizeof(struct vmw_stdu_update);
> -
> - vmw_stdu_populate_update(&suffix[1], stdu->base.unit,
> - ddirty->left, ddirty->right,
> - ddirty->top, ddirty->bottom);
> - }
> -
> - vmw_cmd_commit(dirty->dev_priv, sizeof(*cmd) + blit_size);
> -
> - stdu->display_srf->res.res_dirty = true;
> - ddirty->left = ddirty->top = S32_MAX;
> - ddirty->right = ddirty->bottom = S32_MIN;
> -}
> -
> -
> /**
> * vmw_stdu_bo_cpu_clip - Callback to encode a CPU blit
> *
> @@ -598,62 +504,21 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
> return;
>
> /* Assume we are blitting from Guest (bo) to Host (display_srf) */
> - dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
> - dst_bo = &stdu->display_srf->res.backup->base;
> - dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
> -
> - src_pitch = ddirty->pitch;
> - src_bo = &ddirty->buf->base;
> - src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
> + src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
> + src_bo = &stdu->display_srf->res.backup->base;
> + src_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
>
> - /* Swap src and dst if the assumption was wrong. */
> - if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
> - swap(dst_pitch, src_pitch);
> - swap(dst_bo, src_bo);
> - swap(src_offset, dst_offset);
> - }
> + dst_pitch = ddirty->pitch;
> + dst_bo = &ddirty->buf->base;
> + dst_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
>
> (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
> src_bo, src_offset, src_pitch,
> width * stdu->cpp, height, &diff);
> -
> - if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
> - drm_rect_visible(&diff.rect)) {
> - struct vmw_private *dev_priv;
> - struct vmw_stdu_update *cmd;
> - struct drm_clip_rect region;
> - int ret;
> -
> - /* We are updating the actual surface, not a proxy */
> - region.x1 = diff.rect.x1;
> - region.x2 = diff.rect.x2;
> - region.y1 = diff.rect.y1;
> - region.y2 = diff.rect.y2;
> - ret = vmw_kms_update_proxy(&stdu->display_srf->res, ®ion,
> - 1, 1);
> - if (ret)
> - goto out_cleanup;
> -
> -
> - dev_priv = vmw_priv(stdu->base.crtc.dev);
> - cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd));
> - if (!cmd)
> - goto out_cleanup;
> -
> - vmw_stdu_populate_update(cmd, stdu->base.unit,
> - region.x1, region.x2,
> - region.y1, region.y2);
> -
> - vmw_cmd_commit(dev_priv, sizeof(*cmd));
> - }
> -
> -out_cleanup:
> - ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
> - ddirty->right = ddirty->bottom = S32_MIN;
> }
>
> /**
> - * vmw_kms_stdu_dma - Perform a DMA transfer between a buffer-object backed
> + * vmw_kms_stdu_readback - Perform a readback from a buffer-object backed
> * framebuffer and the screen target system.
> *
> * @dev_priv: Pointer to the device private structure.
> @@ -666,9 +531,6 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
> * be NULL.
> * @num_clips: Number of clip rects in @clips or @vclips.
> * @increment: Increment to use when looping over @clips or @vclips.
> - * @to_surface: Whether to DMA to the screen target system as opposed to
> - * from the screen target system.
> - * @interruptible: Whether to perform waits interruptible if possible.
> * @crtc: If crtc is passed, perform stdu dma on that crtc only.
> *
> * If DMA-ing till the screen target system, the function will also notify
> @@ -677,59 +539,49 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
> * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
> * interrupted.
> */
> -int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
> - struct drm_file *file_priv,
> - struct vmw_framebuffer *vfb,
> - struct drm_vmw_fence_rep __user *user_fence_rep,
> - struct drm_clip_rect *clips,
> - struct drm_vmw_rect *vclips,
> - uint32_t num_clips,
> - int increment,
> - bool to_surface,
> - bool interruptible,
> - struct drm_crtc *crtc)
> +int vmw_kms_stdu_readback(struct vmw_private *dev_priv,
> + struct drm_file *file_priv,
> + struct vmw_framebuffer *vfb,
> + struct drm_vmw_fence_rep __user *user_fence_rep,
> + struct drm_clip_rect *clips,
> + struct drm_vmw_rect *vclips,
> + uint32_t num_clips,
> + int increment,
> + struct drm_crtc *crtc)
> {
> struct vmw_bo *buf =
> container_of(vfb, struct vmw_framebuffer_bo, base)->buffer;
> struct vmw_stdu_dirty ddirty;
> int ret;
> - bool cpu_blit = vmw_stdu_use_cpu_blit(dev_priv);
> DECLARE_VAL_CONTEXT(val_ctx, NULL, 0);
>
> /*
> - * VMs without 3D support don't have the surface DMA command and
> - * we'll be using a CPU blit, and the framebuffer should be moved out
> - * of VRAM.
> + * The GMR domain might seem confusing because it might seem like it should
> + * never happen with screen targets but e.g. the xorg vmware driver issues
> + * CMD_SURFACE_DMA for various pixmap updates which might transition our bo to
> + * a GMR. Instead of forcing another transition we can optimize the readback
> + * by reading directly from the GMR.
> */
> - ret = vmw_validation_add_bo(&val_ctx, buf, false, cpu_blit);
> + vmw_bo_placement_set(buf,
> + vmw_bo_domain_mob | vmw_bo_domain_sys | vmw_bo_domain_gmr,
> + vmw_bo_domain_mob | vmw_bo_domain_sys | vmw_bo_domain_gmr);
> + ret = vmw_validation_add_bo(&val_ctx, buf);
> if (ret)
> return ret;
>
> - ret = vmw_validation_prepare(&val_ctx, NULL, interruptible);
> + ret = vmw_validation_prepare(&val_ctx, NULL, true);
> if (ret)
> goto out_unref;
>
> - ddirty.transfer = (to_surface) ? SVGA3D_WRITE_HOST_VRAM :
> - SVGA3D_READ_HOST_VRAM;
> ddirty.left = ddirty.top = S32_MAX;
> ddirty.right = ddirty.bottom = S32_MIN;
> ddirty.fb_left = ddirty.fb_top = S32_MAX;
> ddirty.pitch = vfb->base.pitches[0];
> ddirty.buf = buf;
> - ddirty.base.fifo_commit = vmw_stdu_bo_fifo_commit;
> - ddirty.base.clip = vmw_stdu_bo_clip;
> - ddirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_dma) +
> - num_clips * sizeof(SVGA3dCopyBox) +
> - sizeof(SVGA3dCmdSurfaceDMASuffix);
> - if (to_surface)
> - ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);
> -
> -
> - if (cpu_blit) {
> - ddirty.base.fifo_commit = vmw_stdu_bo_cpu_commit;
> - ddirty.base.clip = vmw_stdu_bo_cpu_clip;
> - ddirty.base.fifo_reserve_size = 0;
> - }
> +
> + ddirty.base.fifo_commit = vmw_stdu_bo_cpu_commit;
> + ddirty.base.clip = vmw_stdu_bo_cpu_clip;
> + ddirty.base.fifo_reserve_size = 0;
>
> ddirty.base.crtc = crtc;
>
> @@ -1161,11 +1013,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
> /*
> * This should only happen if the buffer object is too large to create a
> * proxy surface for.
> - * If we are a 2D VM with a buffer object then we have to use CPU blit
> - * so cache these mappings
> */
> - if (vps->content_fb_type == SEPARATE_BO &&
> - vmw_stdu_use_cpu_blit(dev_priv))
> + if (vps->content_fb_type == SEPARATE_BO)
> vps->cpp = new_fb->pitches[0] / new_fb->width;
>
> return 0;
> @@ -1175,14 +1024,6 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
> return ret;
> }
>
> -static uint32_t vmw_stdu_bo_fifo_size(struct vmw_du_update_plane *update,
> - uint32_t num_hits)
> -{
> - return sizeof(struct vmw_stdu_dma) + sizeof(SVGA3dCopyBox) * num_hits +
> - sizeof(SVGA3dCmdSurfaceDMASuffix) +
> - sizeof(struct vmw_stdu_update);
> -}
> -
> static uint32_t vmw_stdu_bo_fifo_size_cpu(struct vmw_du_update_plane *update,
> uint32_t num_hits)
> {
> @@ -1190,68 +1031,6 @@ static uint32_t vmw_stdu_bo_fifo_size_cpu(struct vmw_du_update_plane *update,
> sizeof(struct vmw_stdu_update);
> }
>
> -static uint32_t vmw_stdu_bo_populate_dma(struct vmw_du_update_plane *update,
> - void *cmd, uint32_t num_hits)
> -{
> - struct vmw_screen_target_display_unit *stdu;
> - struct vmw_framebuffer_bo *vfbbo;
> - struct vmw_stdu_dma *cmd_dma = cmd;
> -
> - stdu = container_of(update->du, typeof(*stdu), base);
> - vfbbo = container_of(update->vfb, typeof(*vfbbo), base);
> -
> - cmd_dma->header.id = SVGA_3D_CMD_SURFACE_DMA;
> - cmd_dma->header.size = sizeof(cmd_dma->body) +
> - sizeof(struct SVGA3dCopyBox) * num_hits +
> - sizeof(SVGA3dCmdSurfaceDMASuffix);
> - vmw_bo_get_guest_ptr(&vfbbo->buffer->base, &cmd_dma->body.guest.ptr);
> - cmd_dma->body.guest.pitch = update->vfb->base.pitches[0];
> - cmd_dma->body.host.sid = stdu->display_srf->res.id;
> - cmd_dma->body.host.face = 0;
> - cmd_dma->body.host.mipmap = 0;
> - cmd_dma->body.transfer = SVGA3D_WRITE_HOST_VRAM;
> -
> - return sizeof(*cmd_dma);
> -}
> -
> -static uint32_t vmw_stdu_bo_populate_clip(struct vmw_du_update_plane *update,
> - void *cmd, struct drm_rect *clip,
> - uint32_t fb_x, uint32_t fb_y)
> -{
> - struct SVGA3dCopyBox *box = cmd;
> -
> - box->srcx = fb_x;
> - box->srcy = fb_y;
> - box->srcz = 0;
> - box->x = clip->x1;
> - box->y = clip->y1;
> - box->z = 0;
> - box->w = drm_rect_width(clip);
> - box->h = drm_rect_height(clip);
> - box->d = 1;
> -
> - return sizeof(*box);
> -}
> -
> -static uint32_t vmw_stdu_bo_populate_update(struct vmw_du_update_plane *update,
> - void *cmd, struct drm_rect *bb)
> -{
> - struct vmw_screen_target_display_unit *stdu;
> - struct vmw_framebuffer_bo *vfbbo;
> - SVGA3dCmdSurfaceDMASuffix *suffix = cmd;
> -
> - stdu = container_of(update->du, typeof(*stdu), base);
> - vfbbo = container_of(update->vfb, typeof(*vfbbo), base);
> -
> - suffix->suffixSize = sizeof(*suffix);
> - suffix->maximumOffset = vfbbo->buffer->base.base.size;
> -
> - vmw_stdu_populate_update(&suffix[1], stdu->base.unit, bb->x1, bb->x2,
> - bb->y1, bb->y2);
> -
> - return sizeof(*suffix) + sizeof(struct vmw_stdu_update);
> -}
> -
> static uint32_t vmw_stdu_bo_pre_clip_cpu(struct vmw_du_update_plane *update,
> void *cmd, uint32_t num_hits)
> {
> @@ -1369,24 +1148,12 @@ static int vmw_stdu_plane_update_bo(struct vmw_private *dev_priv,
> bo_update.base.vfb = vfb;
> bo_update.base.out_fence = out_fence;
> bo_update.base.mutex = NULL;
> - bo_update.base.cpu_blit = vmw_stdu_use_cpu_blit(dev_priv);
> bo_update.base.intr = false;
>
> - /*
> - * VM without 3D support don't have surface DMA command and framebuffer
> - * should be moved out of VRAM.
> - */
> - if (bo_update.base.cpu_blit) {
> - bo_update.base.calc_fifo_size = vmw_stdu_bo_fifo_size_cpu;
> - bo_update.base.pre_clip = vmw_stdu_bo_pre_clip_cpu;
> - bo_update.base.clip = vmw_stdu_bo_clip_cpu;
> - bo_update.base.post_clip = vmw_stdu_bo_populate_update_cpu;
> - } else {
> - bo_update.base.calc_fifo_size = vmw_stdu_bo_fifo_size;
> - bo_update.base.pre_clip = vmw_stdu_bo_populate_dma;
> - bo_update.base.clip = vmw_stdu_bo_populate_clip;
> - bo_update.base.post_clip = vmw_stdu_bo_populate_update;
> - }
> + bo_update.base.calc_fifo_size = vmw_stdu_bo_fifo_size_cpu;
> + bo_update.base.pre_clip = vmw_stdu_bo_pre_clip_cpu;
> + bo_update.base.clip = vmw_stdu_bo_clip_cpu;
> + bo_update.base.post_clip = vmw_stdu_bo_populate_update_cpu;
>
> return vmw_du_helper_plane_update(&bo_update.base);
> }
> @@ -1549,7 +1316,6 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv,
> srf_update.vfb = vfb;
> srf_update.out_fence = out_fence;
> srf_update.mutex = &dev_priv->cmdbuf_mutex;
> - srf_update.cpu_blit = false;
> srf_update.intr = true;
>
> if (vfbs->is_bo_proxy)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c b/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c
> index 71ce89150ba7..ea6457ae2206 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c
> @@ -66,7 +66,8 @@ static const struct vmw_res_func vmw_dx_streamoutput_func = {
> .needs_backup = true,
> .may_evict = false,
> .type_name = "DX streamoutput",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_dx_streamoutput_create,
> .destroy = NULL, /* Command buffer managed resource. */
> .bind = vmw_dx_streamoutput_bind,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> index 296d903c5acb..458adedc8bad 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> @@ -131,7 +131,8 @@ static const struct vmw_res_func vmw_legacy_surface_func = {
> .prio = 1,
> .dirty_prio = 1,
> .type_name = "legacy surfaces",
> - .backup_placement = &vmw_srf_placement,
> + .domain = vmw_bo_domain_gmr,
> + .busy_domain = vmw_bo_domain_gmr | vmw_bo_domain_vram,
> .create = &vmw_legacy_srf_create,
> .destroy = &vmw_legacy_srf_destroy,
> .bind = &vmw_legacy_srf_bind,
> @@ -145,7 +146,8 @@ static const struct vmw_res_func vmw_gb_surface_func = {
> .prio = 1,
> .dirty_prio = 2,
> .type_name = "guest backed surfaces",
> - .backup_placement = &vmw_mob_placement,
> + .domain = vmw_bo_domain_mob,
> + .busy_domain = vmw_bo_domain_mob,
> .create = vmw_gb_surface_create,
> .destroy = vmw_gb_surface_destroy,
> .bind = vmw_gb_surface_bind,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> index 41480af87255..c43df4109613 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
> @@ -79,20 +79,6 @@ static const struct ttm_place vram_gmr_placement_flags[] = {
> }
> };
>
> -static const struct ttm_place gmr_vram_placement_flags[] = {
> - {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = VMW_PL_GMR,
> - .flags = 0
> - }, {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = TTM_PL_VRAM,
> - .flags = 0
> - }
> -};
> -
> static const struct ttm_place vmw_sys_placement_flags = {
> .fpfn = 0,
> .lpfn = 0,
> @@ -128,32 +114,6 @@ struct ttm_placement vmw_pt_sys_placement = {
> .busy_placement = &vmw_sys_placement_flags
> };
>
> -static const struct ttm_place nonfixed_placement_flags[] = {
> - {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = TTM_PL_SYSTEM,
> - .flags = 0
> - }, {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = VMW_PL_GMR,
> - .flags = 0
> - }, {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = VMW_PL_MOB,
> - .flags = 0
> - }
> -};
> -
> -struct ttm_placement vmw_srf_placement = {
> - .num_placement = 1,
> - .num_busy_placement = 2,
> - .placement = &gmr_placement_flags,
> - .busy_placement = gmr_vram_placement_flags
> -};
> -
> struct ttm_placement vmw_mob_placement = {
> .num_placement = 1,
> .num_busy_placement = 1,
> @@ -161,13 +121,6 @@ struct ttm_placement vmw_mob_placement = {
> .busy_placement = &mob_placement_flags
> };
>
> -struct ttm_placement vmw_nonfixed_placement = {
> - .num_placement = 3,
> - .placement = nonfixed_placement_flags,
> - .num_busy_placement = 1,
> - .busy_placement = &sys_placement_flags
> -};
> -
> const size_t vmw_tt_size = sizeof(struct vmw_ttm_tt);
>
> /**
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_va.c b/drivers/gpu/drm/vmwgfx/vmwgfx_va.c
> index 6ad744ae07f5..cfbe580e6795 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_va.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_va.c
> @@ -25,6 +25,7 @@
> *
> **************************************************************************/
>
> +#include "vmwgfx_bo.h"
> #include "vmwgfx_drv.h"
> #include "vmwgfx_resource_priv.h"
>
> @@ -83,7 +84,8 @@ static const struct vmw_simple_resource_func va_stream_func = {
> .needs_backup = false,
> .may_evict = false,
> .type_name = "overlay stream",
> - .backup_placement = NULL,
> + .domain = vmw_bo_domain_any,
> + .busy_domain = vmw_bo_domain_any,
> .create = NULL,
> .destroy = NULL,
> .bind = NULL,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
> index 770b1b53bde7..05f0909ff1dd 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
> @@ -27,6 +27,7 @@
> **************************************************************************/
> #include "vmwgfx_bo.h"
> #include "vmwgfx_drv.h"
> +#include "vmwgfx_resource_priv.h"
> #include "vmwgfx_validation.h"
>
> #include <linux/slab.h>
> @@ -40,8 +41,6 @@
> * @hash: A hash entry used for the duplicate detection hash table.
> * @coherent_count: If switching backup buffers, number of new coherent
> * resources that will have this buffer as a backup buffer.
> - * @as_mob: Validate as mob.
> - * @cpu_blit: Validate for cpu blit access.
> *
> * Bit fields are used since these structures are allocated and freed in
> * large numbers and space conservation is desired.
> @@ -50,8 +49,6 @@ struct vmw_validation_bo_node {
> struct ttm_validate_buffer base;
> struct vmwgfx_hash_item hash;
> unsigned int coherent_count;
> - u32 as_mob : 1;
> - u32 cpu_blit : 1;
> };
> /**
> * struct vmw_validation_res_node - Resource validation metadata.
> @@ -260,26 +257,16 @@ vmw_validation_find_res_dup(struct vmw_validation_context *ctx,
> * vmw_validation_add_bo - Add a buffer object to the validation context.
> * @ctx: The validation context.
> * @vbo: The buffer object.
> - * @as_mob: Validate as mob, otherwise suitable for GMR operations.
> - * @cpu_blit: Validate in a page-mappable location.
> *
> * Return: Zero on success, negative error code otherwise.
> */
> int vmw_validation_add_bo(struct vmw_validation_context *ctx,
> - struct vmw_bo *vbo,
> - bool as_mob,
> - bool cpu_blit)
> + struct vmw_bo *vbo)
> {
> struct vmw_validation_bo_node *bo_node;
>
> bo_node = vmw_validation_find_bo_dup(ctx, vbo);
> - if (bo_node) {
> - if (bo_node->as_mob != as_mob ||
> - bo_node->cpu_blit != cpu_blit) {
> - DRM_ERROR("Inconsistent buffer usage.\n");
> - return -EINVAL;
> - }
> - } else {
> + if (!bo_node) {
> struct ttm_validate_buffer *val_buf;
>
> bo_node = vmw_validation_mem_alloc(ctx, sizeof(*bo_node));
> @@ -297,8 +284,6 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx,
> return -ESRCH;
> val_buf->num_shared = 0;
> list_add_tail(&val_buf->head, &ctx->bo_list);
> - bo_node->as_mob = as_mob;
> - bo_node->cpu_blit = cpu_blit;
> }
>
> return 0;
> @@ -455,9 +440,10 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx,
> if (res->backup) {
> struct vmw_bo *vbo = res->backup;
>
> - ret = vmw_validation_add_bo
> - (ctx, vbo, vmw_resource_needs_backup(res),
> - false);
> + vmw_bo_placement_set(vbo,
> + res->func->domain,
> + res->func->busy_domain);
> + ret = vmw_validation_add_bo(ctx, vbo);
> if (ret)
> goto out_unreserve;
> }
> @@ -519,14 +505,12 @@ void vmw_validation_res_unreserve(struct vmw_validation_context *ctx,
> * vmw_validation_bo_validate_single - Validate a single buffer object.
> * @bo: The TTM buffer object base.
> * @interruptible: Whether to perform waits interruptible if possible.
> - * @validate_as_mob: Whether to validate in MOB memory.
> *
> * Return: Zero on success, -ERESTARTSYS if interrupted. Negative error
> * code on failure.
> */
> -int vmw_validation_bo_validate_single(struct ttm_buffer_object *bo,
> - bool interruptible,
> - bool validate_as_mob)
> +static int vmw_validation_bo_validate_single(struct ttm_buffer_object *bo,
> + bool interruptible)
> {
> struct vmw_bo *vbo =
> container_of(bo, struct vmw_bo, base);
> @@ -542,27 +526,17 @@ int vmw_validation_bo_validate_single(struct ttm_buffer_object *bo,
> if (vbo->base.pin_count > 0)
> return 0;
>
> - if (validate_as_mob)
> - return ttm_bo_validate(bo, &vmw_mob_placement, &ctx);
> -
> - /**
> - * Put BO in VRAM if there is space, otherwise as a GMR.
> - * If there is no space in VRAM and GMR ids are all used up,
> - * start evicting GMRs to make room. If the DMA buffer can't be
> - * used as a GMR, this will return -ENOMEM.
> - */
> -
> - ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
> + ret = ttm_bo_validate(bo, &vbo->placement, &ctx);
> if (ret == 0 || ret == -ERESTARTSYS)
> return ret;
>
> - /**
> - * If that failed, try VRAM again, this time evicting
> + /*
> + * If that failed, try again, this time evicting
> * previous contents.
> */
> + ctx.allow_res_evict = true;
>
> - ret = ttm_bo_validate(bo, &vmw_vram_placement, &ctx);
> - return ret;
> + return ttm_bo_validate(bo, &vbo->placement, &ctx);
> }
>
> /**
> @@ -583,18 +557,8 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
> struct vmw_bo *vbo =
> container_of(entry->base.bo, typeof(*vbo), base);
>
> - if (entry->cpu_blit) {
> - struct ttm_operation_ctx ttm_ctx = {
> - .interruptible = intr,
> - .no_wait_gpu = false
> - };
> -
> - ret = ttm_bo_validate(entry->base.bo,
> - &vmw_nonfixed_placement, &ttm_ctx);
> - } else {
> - ret = vmw_validation_bo_validate_single
> - (entry->base.bo, intr, entry->as_mob);
> - }
> + ret = vmw_validation_bo_validate_single(entry->base.bo, intr);
> +
> if (ret)
> return ret;
>
> @@ -655,9 +619,9 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr)
> if (backup && res->backup && (backup != res->backup)) {
> struct vmw_bo *vbo = res->backup;
>
> - ret = vmw_validation_add_bo
> - (ctx, vbo, vmw_resource_needs_backup(res),
> - false);
> + vmw_bo_placement_set(vbo, res->func->domain,
> + res->func->busy_domain);
> + ret = vmw_validation_add_bo(ctx, vbo);
> if (ret)
> return ret;
> }
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> index 4aa4f700c65e..240ee0c4ebfd 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> @@ -159,11 +159,7 @@ static inline unsigned int vmw_validation_align(unsigned int val)
> }
>
> int vmw_validation_add_bo(struct vmw_validation_context *ctx,
> - struct vmw_bo *vbo,
> - bool as_mob, bool cpu_blit);
> -int vmw_validation_bo_validate_single(struct ttm_buffer_object *bo,
> - bool interruptible,
> - bool validate_as_mob);
> + struct vmw_bo *vbo);
> int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr);
> void vmw_validation_unref_lists(struct vmw_validation_context *ctx);
> int vmw_validation_add_resource(struct vmw_validation_context *ctx,
--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature
Type: application/pgp-signature
Size: 840 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20230127/5a9b4b08/attachment-0001.sig>
More information about the dri-devel
mailing list