[PATCH v2] drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl
Eric Anholt
eric at anholt.net
Thu Oct 12 18:45:20 UTC 2017
Boris Brezillon <boris.brezillon at free-electrons.com> writes:
> This ioctl will allow us to purge inactive userspace buffers when the
> system is running out of contiguous memory.
>
> For now, the purge logic is rather dumb in that it does not try to
> release only the amount of BO needed to meet the last CMA alloc request
> but instead purges all objects placed in the purgeable pool as soon as
> we experience a CMA allocation failure.
>
> Note that the in-kernel BO cache is always purged before the purgeable
> cache because those objects are known to be unused while objects marked
> as purgeable by a userspace application/library might have to be
> restored when they are marked back as unpurgeable, which can be
> expensive.
>
> Signed-off-by: Boris Brezillon <boris.brezillon at free-electrons.com>
> ---
> Hello,
>
> Updates to libdrm, mesa and igt making use of this kernel feature can
> be found on my github repos [1][2][3].
>
> There's currently no debugfs hook to manually force a purge, but this
> is being discussed and might be added later on.
>
> Regards,
>
> Boris
>
> [1]https://github.com/bbrezillon/drm/tree/vc4/purgeable
> [2]https://github.com/bbrezillon/mesa/tree/vc4/purgeable
> [3]https://github.com/bbrezillon/igt/tree/vc4/purgeable
>
> Changes in v2:
> - Do not re-allocate BO's memory after when WILLNEED is asked after a purge
> - Add purgeable/purged stats to debugfs and dumpstats
> - Pad the drm_vc4_gem_madvise to make it 64-bit aligned
> - Forbid madvise calls on internal BO objects (everything that is not
> meant to be exposed to userspace)
> - Do not increment/decrement usecnt on internal BOs (they cannot be marked
> purgeable, so doing that is useless and error prone)
> - Fix a few bugs related to usecnt refcounting
> ---
> drivers/gpu/drm/vc4/vc4_bo.c | 284 ++++++++++++++++++++++++++++++++++++++--
> drivers/gpu/drm/vc4/vc4_drv.c | 10 +-
> drivers/gpu/drm/vc4/vc4_drv.h | 44 +++++++
> drivers/gpu/drm/vc4/vc4_gem.c | 153 +++++++++++++++++++++-
> drivers/gpu/drm/vc4/vc4_plane.c | 20 +++
> include/uapi/drm/vc4_drm.h | 18 +++
> 6 files changed, 512 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> index 3afdbf4bc10b..edb0062a58c7 100644
> --- a/drivers/gpu/drm/vc4/vc4_bo.c
> +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> @@ -42,7 +42,8 @@ static bool is_user_label(int label)
>
> static void vc4_bo_stats_dump(struct vc4_dev *vc4)
> {
> - int i;
> + size_t purgeable_size, purged_size;
> + int i, npurgeable, npurged;
>
> for (i = 0; i < vc4->num_labels; i++) {
> if (!vc4->bo_labels[i].num_allocated)
> @@ -53,6 +54,21 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4)
> vc4->bo_labels[i].size_allocated / 1024,
> vc4->bo_labels[i].num_allocated);
> }
> +
> + mutex_lock(&vc4->purgeable.lock);
> + npurgeable = vc4->purgeable.num;
> + purgeable_size = vc4->purgeable.size;
> + purged_size = vc4->purgeable.purged_size;
> + npurged = vc4->purgeable.purged_num;
> + mutex_unlock(&vc4->purgeable.lock);
> +
> + if (npurgeable)
> + DRM_INFO("%30s: %6dkb BOs (%d)\n", "userspace BO cache",
> + purgeable_size / 1024, npurgeable);
> +
> + if (npurged)
> + DRM_INFO("%30s: %6dkb BOs (%d)\n", "total purged BO",
> + purged_size / 1024, npurged);
> }
We had a discussion about this on IRC -- I was trying to go for using
the debug labeling stuff instead of maintaining separate stats.
Apparently the "total purged ever" stats have been really useful, though
(since a BO is probably removed soon after being purged), so we're
sticking with it. Also, losing existing debug labels on
GL_APPLE_object_purgeable objects wouldn't be great.
> +static inline bool vc4_bo_supports_madv(const struct vc4_bo *bo)
> +{
> + switch (bo->label) {
> + case VC4_BO_TYPE_V3D:
> + case VC4_BO_TYPE_V3D_SHADER:
> + case VC4_BO_TYPE_DUMB:
> + return true;
> + default:
> + break;
> + }
> +
> + return false;
> +}
This function is a problem -- we're basing usecnt management elsewhere
on its return value, but these labels are supposed to only be debug
information. Notably, userspace could call the label ioctl with the
same string as one of the kernel names and get the BO to have one of the
kernel's permanent label numbers, mixing up the usecnts.
> struct vc4_fence {
> struct dma_fence base;
> struct drm_device *dev;
> @@ -503,6 +540,7 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> +int vc4_fault(struct vm_fault *vmf);
> int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
> struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
> int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
> @@ -513,6 +551,10 @@ void *vc4_prime_vmap(struct drm_gem_object *obj);
> int vc4_bo_cache_init(struct drm_device *dev);
> void vc4_bo_cache_destroy(struct drm_device *dev);
> int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
> +int vc4_bo_inc_usecnt(struct vc4_bo *bo);
> +void vc4_bo_dec_usecnt(struct vc4_bo *bo);
> +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
> +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
>
> /* vc4_crtc.c */
> extern struct platform_driver vc4_crtc_driver;
> @@ -557,6 +599,8 @@ void vc4_job_handle_completed(struct vc4_dev *vc4);
> int vc4_queue_seqno_cb(struct drm_device *dev,
> struct vc4_seqno_cb *cb, uint64_t seqno,
> void (*func)(struct vc4_seqno_cb *cb));
> +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
>
> /* vc4_hdmi.c */
> extern struct platform_driver vc4_hdmi_driver;
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index d0c6bfb68c4e..8d38c1e9ccd0 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *dev)
> continue;
>
> for (j = 0; j < exec[i]->bo_count; j++) {
> + bo = to_vc4_bo(&exec[i]->bo[j]->base);
> +
> + /* Retain BOs just in case they were marked purgeable.
> + * This prevents the BO from being purged before
> + * someone had a chance to dump the hang state.
> + */
> + WARN_ON(!refcount_read(&bo->usecnt));
> + refcount_inc(&bo->usecnt);
> drm_gem_object_get(&exec[i]->bo[j]->base);
> kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
> }
>
> list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
> + /* No need to retain BOs coming from the ->unref_list
> + * because they are naturally unpurgeable.
> + */
> drm_gem_object_get(&bo->base.base);
> kernel_state->bo[j + prev_idx] = &bo->base.base;
> j++;
> @@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *dev)
> state->fdbgs = V3D_READ(V3D_FDBGS);
> state->errstat = V3D_READ(V3D_ERRSTAT);
>
> + /* We need to turn purgeable BOs into unpurgeable ones so that
> + * userspace has a chance to dump the hang state before the kernel
> + * decides to purge those BOs.
> + * Note that BO consistency at dump time cannot be guaranteed. For
> + * example, if the owner of these BOs decides to re-use them or mark
> + * them purgeable again there's nothing we can do to prevent it.
> + */
> + for (i = 0; i < kernel_state->user_state.bo_count; i++) {
> + struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
> +
> + if (!vc4_bo_supports_madv(bo))
> + continue;
> +
> + mutex_lock(&bo->madv_lock);
> + if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
> + bo->madv = VC4_MADV_WILLNEED;
> + refcount_dec(&bo->usecnt);
> + mutex_unlock(&bo->madv_lock);
> + }
> +
> spin_lock_irqsave(&vc4->job_lock, irqflags);
> if (vc4->hang_state) {
> spin_unlock_irqrestore(&vc4->job_lock, irqflags);
> @@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
> * The command validator needs to reference BOs by their index within
> * the submitted job's BO list. This does the validation of the job's
> * BO list and reference counting for the lifetime of the job.
> - *
> - * Note that this function doesn't need to unreference the BOs on
> - * failure, because that will happen at vc4_complete_exec() time.
> */
> static int
> vc4_cl_lookup_bos(struct drm_device *dev,
> @@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev,
> DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
> i, handles[i]);
> ret = -EINVAL;
> - spin_unlock(&file_priv->table_lock);
> - goto fail;
> + break;
> }
> +
> drm_gem_object_get(bo);
> exec->bo[i] = (struct drm_gem_cma_object *)bo;
> }
> spin_unlock(&file_priv->table_lock);
>
> + if (ret)
> + goto fail_put_bo;
> +
> + for (i = 0; i < exec->bo_count; i++) {
> + ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
> + if (ret)
> + goto fail_dec_usecnt;
> + }
> +
> + kvfree(handles);
> + return 0;
> +
> +fail_dec_usecnt:
> + /* Decrease usecnt on acquired objects.
> + * We cannot rely on vc4_complete_exec() to release resources here,
> + * because vc4_complete_exec() has no information about which BO has
> + * had its ->usecnt incremented.
> + * To make things easier we just free everything explicitly and set
> + * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
> + * step.
> + */
> + for (i-- ; i >= 0; i--)
> + vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
> +
> +fail_put_bo:
> + /* Release any reference to acquired objects. */
> + for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
> + drm_gem_object_put(&exec->bo[i]->base);
> +
> fail:
> kvfree(handles);
> + kvfree(exec->bo);
> + exec->bo = NULL;
> return ret;
> }
>
> @@ -833,8 +892,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
> dma_fence_signal(exec->fence);
>
> if (exec->bo) {
> - for (i = 0; i < exec->bo_count; i++)
> + for (i = 0; i < exec->bo_count; i++) {
> + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
> +
> + vc4_bo_dec_usecnt(bo);
> drm_gem_object_put_unlocked(&exec->bo[i]->base);
> + }
> kvfree(exec->bo);
> }
>
> @@ -1098,6 +1161,9 @@ vc4_gem_init(struct drm_device *dev)
> INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
>
> mutex_init(&vc4->power_lock);
> +
> + INIT_LIST_HEAD(&vc4->purgeable.list);
> + mutex_init(&vc4->purgeable.lock);
> }
>
> void
> @@ -1121,3 +1187,78 @@ vc4_gem_destroy(struct drm_device *dev)
> if (vc4->hang_state)
> vc4_free_hang_state(dev, vc4->hang_state);
> }
> +
> +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct drm_vc4_gem_madvise *args = data;
> + struct drm_gem_object *gem_obj;
> + struct vc4_bo *bo;
> + int ret;
> +
> + switch (args->madv) {
> + case VC4_MADV_DONTNEED:
> + case VC4_MADV_WILLNEED:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + gem_obj = drm_gem_object_lookup(file_priv, args->handle);
> + if (!gem_obj) {
> + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
> + return -EINVAL;
> + }
bad lookups generally return -ENOENT. Also, I've done my best to keep
DRM_ERROR calls from being reachable from userspace, so let's demote
these to DRM_DEBUG.
Also, with the introduction of a "pad" field in the ioctl, we should
make sure that the pad passed in is 0 (so that, if we need to, we could
put flags in that field in the future).
> diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
> index 2968b3ebb895..83e789dd7b98 100644
> --- a/drivers/gpu/drm/vc4/vc4_plane.c
> +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> @@ -23,6 +23,7 @@
> #include <drm/drm_fb_cma_helper.h>
> #include <drm/drm_plane_helper.h>
>
> +#include "uapi/drm/vc4_drm.h"
> #include "vc4_drv.h"
> #include "vc4_regs.h"
>
> @@ -764,21 +765,40 @@ static int vc4_prepare_fb(struct drm_plane *plane,
> {
> struct vc4_bo *bo;
> struct dma_fence *fence;
> + int ret;
>
> if ((plane->state->fb == state->fb) || !state->fb)
> return 0;
>
> bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
> +
> + ret = vc4_bo_inc_usecnt(bo);
> + if (ret)
> + return ret;
> +
> fence = reservation_object_get_excl_rcu(bo->resv);
> drm_atomic_set_fence_for_plane(state, fence);
>
> return 0;
> }
>
> +static void vc4_cleanup_fb(struct drm_plane *plane,
> + struct drm_plane_state *state)
> +{
> + struct vc4_bo *bo;
> +
> + if (plane->state->fb == state->fb || !state->fb)
> + return;
> +
> + bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
> + vc4_bo_dec_usecnt(bo);
> +}
Huh, I thought we had converted things to drm_gem_fb_get_obj(), but
apparently not. Consistency with the prepare() seems fine, in that
case.
> diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
> index afae87004963..855722ffb5e7 100644
> --- a/include/uapi/drm/vc4_drm.h
> +++ b/include/uapi/drm/vc4_drm.h
> @@ -41,6 +41,7 @@ extern "C" {
> #define DRM_VC4_SET_TILING 0x08
> #define DRM_VC4_GET_TILING 0x09
> #define DRM_VC4_LABEL_BO 0x0a
> +#define DRM_VC4_GEM_MADVISE 0x0b
>
> #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
> #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
> @@ -53,6 +54,7 @@ extern "C" {
> #define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
> #define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
> #define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
> +#define DRM_IOCTL_VC4_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
>
> struct drm_vc4_submit_rcl_surface {
> __u32 hindex; /* Handle index, or ~0 if not present. */
> @@ -305,6 +307,7 @@ struct drm_vc4_get_hang_state {
> #define DRM_VC4_PARAM_SUPPORTS_ETC1 4
> #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5
> #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6
> +#define DRM_VC4_PARAM_SUPPORTS_MADVISE 7
>
> struct drm_vc4_get_param {
> __u32 param;
> @@ -333,6 +336,21 @@ struct drm_vc4_label_bo {
> __u64 name;
> };
>
> +/*
> + * States prefixed with '__' are internal states and cannot be passed to the
> + * DRM_IOCTL_VC4_GEM_MADVISE ioctl.
> + */
> +#define VC4_MADV_WILLNEED 0
> +#define VC4_MADV_DONTNEED 1
> +#define __VC4_MADV_PURGED 2
> +
> +struct drm_vc4_gem_madvise {
> + __u32 handle;
> + __u32 madv;
> + __u32 retained;
> + __u32 pad;
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
> --
> 2.11.0
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20171012/8928f1d3/attachment-0001.sig>
More information about the dri-devel
mailing list