[RFC PATCH 7/9] drmcg: Add initial support for tracking gpu time usage

Wed Feb 3 13:25:55 UTC 2021

Quoting Brian Welty (2021-01-26 23:46:24)
> Single control below is added to DRM cgroup controller in order to track
> user execution time for GPU devices.  It is up to device drivers to
> charge execution time to the cgroup via drm_cgroup_try_charge().
> 
>   sched.runtime
>       Read-only value, displays current user execution time for each DRM
>       device. The expectation is that this is incremented by DRM device
>       driver's scheduler upon user context completion or context switch.
>       Units of time are in microseconds for consistency with cpu.stats.

Were not we also planning for a percentage style budgeting?

Capping the maximum runtime is definitely useful, but in order to
configure a system for peaceful co-existence of two or more workloads we
must also impose a limit on how big portion of the instantaneous
capacity can be used.

Regards, Joonas

> Signed-off-by: Brian Welty <brian.welty at intel.com>
> ---
>  Documentation/admin-guide/cgroup-v2.rst |  9 +++++++++
>  include/drm/drm_cgroup.h                |  2 ++
>  include/linux/cgroup_drm.h              |  2 ++
>  kernel/cgroup/drm.c                     | 20 ++++++++++++++++++++
>  4 files changed, 33 insertions(+)
> 
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index ccc25f03a898..f1d0f333a49e 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -2205,6 +2205,15 @@ thresholds are hit, this would then allow the DRM device driver to invoke
>  some equivalent to OOM-killer or forced memory eviction for the device
>  backed memory in order to attempt to free additional space.
>  
> +The below set of control files are for time accounting of DRM devices. Units
> +of time are in microseconds.
> +
> +  sched.runtime
> +        Read-only value, displays current user execution time for each DRM
> +        device. The expectation is that this is incremented by DRM device
> +        driver's scheduler upon user context completion or context switch.
> +
> +
>  Misc
>  ----
>  
> diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> index 9ba0e372eeee..315dab8a93b8 100644
> --- a/include/drm/drm_cgroup.h
> +++ b/include/drm/drm_cgroup.h
> @@ -22,6 +22,7 @@ enum drmcg_res_type {
>         DRMCG_TYPE_MEM_CURRENT,
>         DRMCG_TYPE_MEM_MAX,
>         DRMCG_TYPE_MEM_TOTAL,
> +       DRMCG_TYPE_SCHED_RUNTIME,
>         __DRMCG_TYPE_LAST,
>  };
>  
> @@ -79,5 +80,6 @@ void drm_cgroup_uncharge(struct drmcg *drmcg,struct drm_device *dev,
>                          enum drmcg_res_type type, u64 usage)
>  {
>  }
> +
>  #endif /* CONFIG_CGROUP_DRM */
>  #endif /* __DRM_CGROUP_H__ */
> diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
> index 3570636473cf..0fafa663321e 100644
> --- a/include/linux/cgroup_drm.h
> +++ b/include/linux/cgroup_drm.h
> @@ -19,6 +19,8 @@
>   */
>  struct drmcg_device_resource {
>         struct page_counter memory;
> +       seqlock_t sched_lock;
> +       u64 exec_runtime;
>  };
>  
>  /**
> diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
> index 08e75eb67593..64e9d0dbe8c8 100644
> --- a/kernel/cgroup/drm.c
> +++ b/kernel/cgroup/drm.c
> @@ -81,6 +81,7 @@ static inline int init_drmcg_single(struct drmcg *drmcg, struct drm_device *dev)
>         /* set defaults here */
>         page_counter_init(&ddr->memory,
>                           parent_ddr ? &parent_ddr->memory : NULL);
> +       seqlock_init(&ddr->sched_lock);
>         drmcg->dev_resources[minor] = ddr;
>  
>         return 0;
> @@ -287,6 +288,10 @@ static int drmcg_seq_show_fn(int id, void *ptr, void *data)
>                 seq_printf(sf, "%d:%d %llu\n", DRM_MAJOR, minor->index,
>                            minor->dev->drmcg_props.memory_total);
>                 break;
> +       case DRMCG_TYPE_SCHED_RUNTIME:
> +               seq_printf(sf, "%d:%d %llu\n", DRM_MAJOR, minor->index,
> +                          ktime_to_us(ddr->exec_runtime));
> +               break;
>         default:
>                 seq_printf(sf, "%d:%d\n", DRM_MAJOR, minor->index);
>                 break;
> @@ -384,6 +389,12 @@ struct cftype files[] = {
>                 .private = DRMCG_TYPE_MEM_TOTAL,
>                 .flags = CFTYPE_ONLY_ON_ROOT,
>         },
> +       {
> +               .name = "sched.runtime",
> +               .seq_show = drmcg_seq_show,
> +               .private = DRMCG_TYPE_SCHED_RUNTIME,
> +               .flags = CFTYPE_NOT_ON_ROOT,
> +       },
>         { }     /* terminate */
>  };
>  
> @@ -440,6 +451,10 @@ EXPORT_SYMBOL(drmcg_device_early_init);
>   * choose to enact some form of memory reclaim, but the exact behavior is left
>   * to the DRM device driver to define.
>   *
> + * For @res type of DRMCG_TYPE_SCHED_RUNTIME:
> + * For GPU time accounting, add @usage amount of GPU time to @drmcg for
> + * the given device.
> + *
>   * Returns 0 on success.  Otherwise, an error code is returned.
>   */
>  int drm_cgroup_try_charge(struct drmcg *drmcg, struct drm_device *dev,
> @@ -466,6 +481,11 @@ int drm_cgroup_try_charge(struct drmcg *drmcg, struct drm_device *dev,
>                         err = 0;
>                 }
>                 break;
> +       case DRMCG_TYPE_SCHED_RUNTIME:
> +               write_seqlock(&res->sched_lock);
> +               res->exec_runtime = ktime_add(res->exec_runtime, usage);
> +               write_sequnlock(&res->sched_lock);
> +               break;
>         default:
>                 err = -EINVAL;
>                 break;
> -- 
> 2.20.1
>