[RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

Kenny Ho y2kenny at gmail.com
Fri May 10 14:57:55 UTC 2019


On Fri, May 10, 2019 at 8:28 AM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> Am 09.05.19 um 23:04 schrieb Kenny Ho:
> > +     /* only allow bo from the same cgroup or its ancestor to be imported */
> > +     if (drmcgrp != NULL &&
> > +                     !drmcgrp_is_self_or_ancestor(drmcgrp, obj->drmcgrp)) {
> > +             ret = -EACCES;
> > +             goto out_unlock;
> > +     }
> > +
>
> This will most likely go up in flames.
>
> If I'm not completely mistaken we already use
> drm_gem_prime_fd_to_handle() to exchange handles between different
> cgroups in current container usages.
This is something that I am interested in getting more details from
the broader community because the details affect how likely this will
go up in flames ;).  Note that this check does not block sharing of
handles from cgroup parent to children in the hierarchy, nor does it
blocks sharing of handles within a cgroup.

I am interested to find out, when existing apps share handles between
containers, if there are any expectations on resource management.
Since there are no drm cgroup for current container usage, I expect
the answer to be no.  In this case, the drm cgroup controller can be
disabled on its own (in the context of cgroup-v2's unified hierarchy),
or the process can remain at the root for the drm cgroup hierarchy (in
the context of cgroup-v1.)  If I understand the cgroup api correctly,
that means all process would be part of the root cgroup as far as the
drm controller is concerned and this block will not come into effect.
I have verified that this is indeed the current default behaviour of a
container runtime (runc, which is used by docker, podman and others.)
The new drm cgroup controller is simply ignored and all processes
remain at the root of the hierarchy (since there are no other
cgroups.)  I plan to make contributions to runc (so folks can actually
use this features with docker/podman/k8s, etc.) once things stabilized
on the kernel side.

On the other hand, if there are expectations for resource management
between containers, I would like to know who is the expected manager
and how does it fit into the concept of container (which enforce some
level of isolation.)  One possible manager may be the display server.
But as long as the display server is in a parent cgroup of the apps'
cgroup, the apps can still import handles from the display server
under the current implementation.  My understanding is that this is
most likely the case, with the display server simply sitting at the
default/root cgroup.  But I certainly want to hear more about other
use cases (for example, is running multiple display servers on a
single host a realistic possibility?  Are there people running
multiple display servers inside peer containers?  If so, how do they
coordinate resources?)

I should probably summarize some of these into the commit message.

Regards,
Kenny



> Christian.
>
> >       if (obj->dma_buf) {
> >               WARN_ON(obj->dma_buf != dma_buf);
> >       } else {
> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> > index ddb9eab64360..8711b7c5f7bf 100644
> > --- a/include/drm/drm_cgroup.h
> > +++ b/include/drm/drm_cgroup.h
> > @@ -4,12 +4,20 @@
> >   #ifndef __DRM_CGROUP_H__
> >   #define __DRM_CGROUP_H__
> >
> > +#include <linux/cgroup_drm.h>
> > +
> >   #ifdef CONFIG_CGROUP_DRM
> >
> >   int drmcgrp_register_device(struct drm_device *device);
> > -
> >   int drmcgrp_unregister_device(struct drm_device *device);
> > -
> > +bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> > +             struct drmcgrp *relative);
> > +void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > +             size_t size);
> > +void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > +             size_t size);
> > +bool drmcgrp_bo_can_allocate(struct task_struct *task, struct drm_device *dev,
> > +             size_t size);
> >   #else
> >   static inline int drmcgrp_register_device(struct drm_device *device)
> >   {
> > @@ -20,5 +28,27 @@ static inline int drmcgrp_unregister_device(struct drm_device *device)
> >   {
> >       return 0;
> >   }
> > +
> > +static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> > +             struct drmcgrp *relative)
> > +{
> > +     return false;
> > +}
> > +
> > +static inline void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp,
> > +             struct drm_device *dev, size_t size)
> > +{
> > +}
> > +
> > +static inline void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp,
> > +             struct drm_device *dev, size_t size)
> > +{
> > +}
> > +
> > +static inline bool drmcgrp_bo_can_allocate(struct task_struct *task,
> > +             struct drm_device *dev, size_t size)
> > +{
> > +     return true;
> > +}
> >   #endif /* CONFIG_CGROUP_DRM */
> >   #endif /* __DRM_CGROUP_H__ */
> > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
> > index c95727425284..02854c674b5c 100644
> > --- a/include/drm/drm_gem.h
> > +++ b/include/drm/drm_gem.h
> > @@ -272,6 +272,17 @@ struct drm_gem_object {
> >        *
> >        */
> >       const struct drm_gem_object_funcs *funcs;
> > +
> > +     /**
> > +      * @drmcgrp:
> > +      *
> > +      * DRM cgroup this GEM object belongs to.
> > +         *
> > +         * This is used to track and limit the amount of GEM objects a user
> > +         * can allocate.  Since GEM objects can be shared, this is also used
> > +         * to ensure GEM objects are only shared within the same cgroup.
> > +      */
> > +     struct drmcgrp *drmcgrp;
> >   };
> >
> >   /**
> > diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
> > index d7ccf434ca6b..fe14ba7bb1cf 100644
> > --- a/include/linux/cgroup_drm.h
> > +++ b/include/linux/cgroup_drm.h
> > @@ -15,6 +15,9 @@
> >
> >   struct drmcgrp_device_resource {
> >       /* for per device stats */
> > +     s64                     bo_stats_total_allocated;
> > +
> > +     s64                     bo_limits_total_allocated;
> >   };
> >
> >   struct drmcgrp {
> > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
> > index f9ef4bf042d8..bc3abff09113 100644
> > --- a/kernel/cgroup/drm.c
> > +++ b/kernel/cgroup/drm.c
> > @@ -15,6 +15,22 @@ static DEFINE_MUTEX(drmcgrp_mutex);
> >   struct drmcgrp_device {
> >       struct drm_device       *dev;
> >       struct mutex            mutex;
> > +
> > +     s64                     bo_limits_total_allocated_default;
> > +};
> > +
> > +#define DRMCG_CTF_PRIV_SIZE 3
> > +#define DRMCG_CTF_PRIV_MASK GENMASK((DRMCG_CTF_PRIV_SIZE - 1), 0)
> > +
> > +enum drmcgrp_res_type {
> > +     DRMCGRP_TYPE_BO_TOTAL,
> > +};
> > +
> > +enum drmcgrp_file_type {
> > +     DRMCGRP_FTYPE_STATS,
> > +     DRMCGRP_FTYPE_MAX,
> > +     DRMCGRP_FTYPE_DEFAULT,
> > +     DRMCGRP_FTYPE_HELP,
> >   };
> >
> >   /* indexed by drm_minor for access speed */
> > @@ -53,6 +69,10 @@ static inline int init_drmcgrp_single(struct drmcgrp *drmcgrp, int i)
> >       }
> >
> >       /* set defaults here */
> > +     if (known_drmcgrp_devs[i] != NULL) {
> > +             ddr->bo_limits_total_allocated =
> > +               known_drmcgrp_devs[i]->bo_limits_total_allocated_default;
> > +     }
> >
> >       return 0;
> >   }
> > @@ -99,7 +119,187 @@ drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
> >       return &drmcgrp->css;
> >   }
> >
> > +static inline void drmcgrp_print_stats(struct drmcgrp_device_resource *ddr,
> > +             struct seq_file *sf, enum drmcgrp_res_type type)
> > +{
> > +     if (ddr == NULL) {
> > +             seq_puts(sf, "\n");
> > +             return;
> > +     }
> > +
> > +     switch (type) {
> > +     case DRMCGRP_TYPE_BO_TOTAL:
> > +             seq_printf(sf, "%lld\n", ddr->bo_stats_total_allocated);
> > +             break;
> > +     default:
> > +             seq_puts(sf, "\n");
> > +             break;
> > +     }
> > +}
> > +
> > +static inline void drmcgrp_print_limits(struct drmcgrp_device_resource *ddr,
> > +             struct seq_file *sf, enum drmcgrp_res_type type)
> > +{
> > +     if (ddr == NULL) {
> > +             seq_puts(sf, "\n");
> > +             return;
> > +     }
> > +
> > +     switch (type) {
> > +     case DRMCGRP_TYPE_BO_TOTAL:
> > +             seq_printf(sf, "%lld\n", ddr->bo_limits_total_allocated);
> > +             break;
> > +     default:
> > +             seq_puts(sf, "\n");
> > +             break;
> > +     }
> > +}
> > +
> > +static inline void drmcgrp_print_default(struct drmcgrp_device *ddev,
> > +             struct seq_file *sf, enum drmcgrp_res_type type)
> > +{
> > +     if (ddev == NULL) {
> > +             seq_puts(sf, "\n");
> > +             return;
> > +     }
> > +
> > +     switch (type) {
> > +     case DRMCGRP_TYPE_BO_TOTAL:
> > +             seq_printf(sf, "%lld\n", ddev->bo_limits_total_allocated_default);
> > +             break;
> > +     default:
> > +             seq_puts(sf, "\n");
> > +             break;
> > +     }
> > +}
> > +
> > +static inline void drmcgrp_print_help(int cardNum, struct seq_file *sf,
> > +             enum drmcgrp_res_type type)
> > +{
> > +     switch (type) {
> > +     case DRMCGRP_TYPE_BO_TOTAL:
> > +             seq_printf(sf,
> > +             "Total amount of buffer allocation in bytes for card%d\n",
> > +             cardNum);
> > +             break;
> > +     default:
> > +             seq_puts(sf, "\n");
> > +             break;
> > +     }
> > +}
> > +
> > +int drmcgrp_bo_show(struct seq_file *sf, void *v)
> > +{
> > +     struct drmcgrp *drmcgrp = css_drmcgrp(seq_css(sf));
> > +     struct drmcgrp_device_resource *ddr = NULL;
> > +     enum drmcgrp_file_type f_type = seq_cft(sf)->
> > +             private & DRMCG_CTF_PRIV_MASK;
> > +     enum drmcgrp_res_type type = seq_cft(sf)->
> > +             private >> DRMCG_CTF_PRIV_SIZE;
> > +     struct drmcgrp_device *ddev;
> > +     int i;
> > +
> > +     for (i = 0; i <= max_minor; i++) {
> > +             ddr = drmcgrp->dev_resources[i];
> > +             ddev = known_drmcgrp_devs[i];
> > +
> > +             switch (f_type) {
> > +             case DRMCGRP_FTYPE_STATS:
> > +                     drmcgrp_print_stats(ddr, sf, type);
> > +                     break;
> > +             case DRMCGRP_FTYPE_MAX:
> > +                     drmcgrp_print_limits(ddr, sf, type);
> > +                     break;
> > +             case DRMCGRP_FTYPE_DEFAULT:
> > +                     drmcgrp_print_default(ddev, sf, type);
> > +                     break;
> > +             case DRMCGRP_FTYPE_HELP:
> > +                     drmcgrp_print_help(i, sf, type);
> > +                     break;
> > +             default:
> > +                     seq_puts(sf, "\n");
> > +                     break;
> > +             }
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +ssize_t drmcgrp_bo_limit_write(struct kernfs_open_file *of, char *buf,
> > +             size_t nbytes, loff_t off)
> > +{
> > +     struct drmcgrp *drmcgrp = css_drmcgrp(of_css(of));
> > +     enum drmcgrp_res_type type = of_cft(of)->private >> DRMCG_CTF_PRIV_SIZE;
> > +     char *cft_name = of_cft(of)->name;
> > +     char *limits = strstrip(buf);
> > +     struct drmcgrp_device_resource *ddr;
> > +     char *sval;
> > +     s64 val;
> > +     int i = 0;
> > +     int rc;
> > +
> > +     while (i <= max_minor && limits != NULL) {
> > +             sval =  strsep(&limits, "\n");
> > +             rc = kstrtoll(sval, 0, &val);
> > +
> > +             if (rc) {
> > +                     pr_err("drmcgrp: %s: minor %d, err %d. ",
> > +                             cft_name, i, rc);
> > +                     pr_cont_cgroup_name(drmcgrp->css.cgroup);
> > +                     pr_cont("\n");
> > +             } else {
> > +                     ddr = drmcgrp->dev_resources[i];
> > +                     switch (type) {
> > +                     case DRMCGRP_TYPE_BO_TOTAL:
> > +                                if (val < 0) continue;
> > +                             ddr->bo_limits_total_allocated = val;
> > +                             break;
> > +                     default:
> > +                             break;
> > +                     }
> > +             }
> > +
> > +             i++;
> > +     }
> > +
> > +     if (i <= max_minor) {
> > +             pr_err("drmcgrp: %s: less entries than # of drm devices. ",
> > +                             cft_name);
> > +             pr_cont_cgroup_name(drmcgrp->css.cgroup);
> > +             pr_cont("\n");
> > +     }
> > +
> > +     return nbytes;
> > +}
> > +
> >   struct cftype files[] = {
> > +     {
> > +             .name = "buffer.total.stats",
> > +             .seq_show = drmcgrp_bo_show,
> > +             .private = (DRMCGRP_TYPE_BO_TOTAL << DRMCG_CTF_PRIV_SIZE) |
> > +                     DRMCGRP_FTYPE_STATS,
> > +     },
> > +     {
> > +             .name = "buffer.total.default",
> > +             .seq_show = drmcgrp_bo_show,
> > +             .flags = CFTYPE_ONLY_ON_ROOT,
> > +             .private = (DRMCGRP_TYPE_BO_TOTAL << DRMCG_CTF_PRIV_SIZE) |
> > +                     DRMCGRP_FTYPE_DEFAULT,
> > +     },
> > +     {
> > +             .name = "buffer.total.help",
> > +             .seq_show = drmcgrp_bo_show,
> > +             .flags = CFTYPE_ONLY_ON_ROOT,
> > +             .private = (DRMCGRP_TYPE_BO_TOTAL << DRMCG_CTF_PRIV_SIZE) |
> > +                     DRMCGRP_FTYPE_HELP,
> > +     },
> > +     {
> > +             .name = "buffer.total.max",
> > +             .write = drmcgrp_bo_limit_write,
> > +             .seq_show = drmcgrp_bo_show,
> > +             .private = (DRMCGRP_TYPE_BO_TOTAL << DRMCG_CTF_PRIV_SIZE) |
> > +                     DRMCGRP_FTYPE_MAX,
> > +     },
> >       { }     /* terminate */
> >   };
> >
> > @@ -122,6 +322,8 @@ int drmcgrp_register_device(struct drm_device *dev)
> >               return -ENOMEM;
> >
> >       ddev->dev = dev;
> > +     ddev->bo_limits_total_allocated_default = S64_MAX;
> > +
> >       mutex_init(&ddev->mutex);
> >
> >       mutex_lock(&drmcgrp_mutex);
> > @@ -156,3 +358,81 @@ int drmcgrp_unregister_device(struct drm_device *dev)
> >       return 0;
> >   }
> >   EXPORT_SYMBOL(drmcgrp_unregister_device);
> > +
> > +bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self, struct drmcgrp *relative)
> > +{
> > +     for (; self != NULL; self = parent_drmcgrp(self))
> > +             if (self == relative)
> > +                     return true;
> > +
> > +     return false;
> > +}
> > +EXPORT_SYMBOL(drmcgrp_is_self_or_ancestor);
> > +
> > +bool drmcgrp_bo_can_allocate(struct task_struct *task, struct drm_device *dev,
> > +             size_t size)
> > +{
> > +     struct drmcgrp *drmcgrp = get_drmcgrp(task);
> > +     struct drmcgrp_device_resource *ddr;
> > +     struct drmcgrp_device_resource *d;
> > +     int devIdx = dev->primary->index;
> > +     bool result = true;
> > +     s64 delta = 0;
> > +
> > +     if (drmcgrp == NULL || drmcgrp == root_drmcgrp)
> > +             return true;
> > +
> > +     ddr = drmcgrp->dev_resources[devIdx];
> > +     mutex_lock(&known_drmcgrp_devs[devIdx]->mutex);
> > +     for ( ; drmcgrp != root_drmcgrp; drmcgrp = parent_drmcgrp(drmcgrp)) {
> > +             d = drmcgrp->dev_resources[devIdx];
> > +             delta = d->bo_limits_total_allocated -
> > +                             d->bo_stats_total_allocated;
> > +
> > +             if (delta <= 0 || size > delta) {
> > +                     result = false;
> > +                     break;
> > +             }
> > +     }
> > +     mutex_unlock(&known_drmcgrp_devs[devIdx]->mutex);
> > +
> > +     return result;
> > +}
> > +EXPORT_SYMBOL(drmcgrp_bo_can_allocate);
> > +
> > +void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > +             size_t size)
> > +{
> > +     struct drmcgrp_device_resource *ddr;
> > +     int devIdx = dev->primary->index;
> > +
> > +     if (drmcgrp == NULL || known_drmcgrp_devs[devIdx] == NULL)
> > +             return;
> > +
> > +     mutex_lock(&known_drmcgrp_devs[devIdx]->mutex);
> > +     for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp)) {
> > +             ddr = drmcgrp->dev_resources[devIdx];
> > +
> > +             ddr->bo_stats_total_allocated += (s64)size;
> > +     }
> > +     mutex_unlock(&known_drmcgrp_devs[devIdx]->mutex);
> > +}
> > +EXPORT_SYMBOL(drmcgrp_chg_bo_alloc);
> > +
> > +void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > +             size_t size)
> > +{
> > +     struct drmcgrp_device_resource *ddr;
> > +     int devIdx = dev->primary->index;
> > +
> > +     if (drmcgrp == NULL || known_drmcgrp_devs[devIdx] == NULL)
> > +             return;
> > +
> > +     ddr = drmcgrp->dev_resources[devIdx];
> > +     mutex_lock(&known_drmcgrp_devs[devIdx]->mutex);
> > +     for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp))
> > +             drmcgrp->dev_resources[devIdx]->bo_stats_total_allocated
> > +                     -= (s64)size;
> > +     mutex_unlock(&known_drmcgrp_devs[devIdx]->mutex);
> > +}
> > +EXPORT_SYMBOL(drmcgrp_unchg_bo_alloc);
>


More information about the amd-gfx mailing list