[Mesa-dev] [PATCH 1/3] drm-uapi: Pull i915_drm.h changes for context cloning
Jordan Justen
jordan.l.justen at intel.com
Sun Mar 31 09:53:06 UTC 2019
Where are these changes from (repo/commit)? It could be good to
reference in the commit message.
I suspect that the answer might mean that these patches should be
labeled RFC.
-Jordan
On 2019-03-25 03:58:58, Chris Wilson wrote:
> For use in GPU recovery and pipeline construction.
> ---
> include/drm-uapi/i915_drm.h | 389 +++++++++++++++++++++++++++++-------
> 1 file changed, 317 insertions(+), 72 deletions(-)
>
> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
> index d2792ab3640..59baacd265d 100644
> --- a/include/drm-uapi/i915_drm.h
> +++ b/include/drm-uapi/i915_drm.h
> @@ -62,6 +62,28 @@ extern "C" {
> #define I915_ERROR_UEVENT "ERROR"
> #define I915_RESET_UEVENT "RESET"
>
> +/*
> + * i915_user_extension: Base class for defining a chain of extensions
> + *
> + * Many interfaces need to grow over time. In most cases we can simply
> + * extend the struct and have userspace pass in more data. Another option,
> + * as demonstrated by Vulkan's approach to providing extensions for forward
> + * and backward compatibility, is to use a list of optional structs to
> + * provide those extra details.
> + *
> + * The key advantage to using an extension chain is that it allows us to
> + * redefine the interface more easily than an ever growing struct of
> + * increasing complexity, and for large parts of that interface to be
> + * entirely optional. The downside is more pointer chasing; chasing across
> + * the boundary with pointers encapsulated inside u64.
> + */
> +struct i915_user_extension {
> + __u64 next_extension;
> + __u32 name;
> + __u32 flags; /* All undefined bits must be zero. */
> + __u32 rsvd[4]; /* Reserved for future use; must be zero. */
> +};
> +
> /*
> * MOCS indexes used for GPU surfaces, defining the cacheability of the
> * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> @@ -99,9 +121,14 @@ enum drm_i915_gem_engine_class {
> I915_ENGINE_CLASS_VIDEO = 2,
> I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
>
> + /* should be kept compact */
> +
> I915_ENGINE_CLASS_INVALID = -1
> };
>
> +#define I915_ENGINE_CLASS_INVALID_NONE -1
> +#define I915_ENGINE_CLASS_INVALID_VIRTUAL 0
> +
> /**
> * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
> *
> @@ -319,6 +346,9 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_PERF_ADD_CONFIG 0x37
> #define DRM_I915_PERF_REMOVE_CONFIG 0x38
> #define DRM_I915_QUERY 0x39
> +#define DRM_I915_GEM_VM_CREATE 0x3a
> +#define DRM_I915_GEM_VM_DESTROY 0x3b
> +/* Must be kept compact -- no holes */
>
> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -367,6 +397,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
> #define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
> #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
> +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)
> #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> @@ -377,6 +408,8 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
> #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
> #define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
> +#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
> +#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
>
> /* Allow drivers to submit batchbuffers directly to hardware, relying
> * on the security mechanisms provided by hardware.
> @@ -476,6 +509,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_SCHEDULER_CAP_ENABLED (1ul << 0)
> #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
> #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
> +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
>
> #define I915_PARAM_HUC_STATUS 42
>
> @@ -559,6 +593,14 @@ typedef struct drm_i915_irq_wait {
> */
> #define I915_PARAM_MMAP_GTT_COHERENT 52
>
> +/*
> + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
> + * execution through use of explicit fence support.
> + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
> + */
> +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
> +/* Must be kept compact -- no holes and well documented */
> +
> typedef struct drm_i915_getparam {
> __s32 param;
> /*
> @@ -574,6 +616,7 @@ typedef struct drm_i915_getparam {
> #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
> #define I915_SETPARAM_ALLOW_BATCHBUFFER 3
> #define I915_SETPARAM_NUM_USED_FENCES 4
> +/* Must be kept compact -- no holes */
>
> typedef struct drm_i915_setparam {
> int param;
> @@ -972,7 +1015,7 @@ struct drm_i915_gem_execbuffer2 {
> * struct drm_i915_gem_exec_fence *fences.
> */
> __u64 cliprects_ptr;
> -#define I915_EXEC_RING_MASK (7<<0)
> +#define I915_EXEC_RING_MASK (0x3f)
> #define I915_EXEC_DEFAULT (0<<0)
> #define I915_EXEC_RENDER (1<<0)
> #define I915_EXEC_BSD (2<<0)
> @@ -1078,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 {
> */
> #define I915_EXEC_FENCE_ARRAY (1<<19)
>
> -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
> +/*
> + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
> + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
> + * the batch.
> + *
> + * Returns -EINVAL if the sync_file fd cannot be found.
> + */
> +#define I915_EXEC_FENCE_SUBMIT (1 << 20)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
>
> #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
> #define i915_execbuffer2_set_context_id(eb2, context) \
> @@ -1120,32 +1172,34 @@ struct drm_i915_gem_busy {
> * as busy may become idle before the ioctl is completed.
> *
> * Furthermore, if the object is busy, which engine is busy is only
> - * provided as a guide. There are race conditions which prevent the
> - * report of which engines are busy from being always accurate.
> - * However, the converse is not true. If the object is idle, the
> - * result of the ioctl, that all engines are idle, is accurate.
> + * provided as a guide and only indirectly by reporting its class
> + * (there may be more than one engine in each class). There are race
> + * conditions which prevent the report of which engines are busy from
> + * being always accurate. However, the converse is not true. If the
> + * object is idle, the result of the ioctl, that all engines are idle,
> + * is accurate.
> *
> * The returned dword is split into two fields to indicate both
> - * the engines on which the object is being read, and the
> - * engine on which it is currently being written (if any).
> + * the engine classess on which the object is being read, and the
> + * engine class on which it is currently being written (if any).
> *
> * The low word (bits 0:15) indicate if the object is being written
> * to by any engine (there can only be one, as the GEM implicit
> * synchronisation rules force writes to be serialised). Only the
> - * engine for the last write is reported.
> + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as
> + * 1 not 0 etc) for the last write is reported.
> *
> - * The high word (bits 16:31) are a bitmask of which engines are
> - * currently reading from the object. Multiple engines may be
> + * The high word (bits 16:31) are a bitmask of which engines classes
> + * are currently reading from the object. Multiple engines may be
> * reading from the object simultaneously.
> *
> - * The value of each engine is the same as specified in the
> - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
> - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
> - * the I915_EXEC_RENDER engine for execution, and so it is never
> + * The value of each engine class is the same as specified in the
> + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
> + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
> * reported as active itself. Some hardware may have parallel
> * execution engines, e.g. multiple media engines, which are
> - * mapped to the same identifier in the EXECBUFFER2 ioctl and
> - * so are not separately reported for busyness.
> + * mapped to the same class identifier and so are not separately
> + * reported for busyness.
> *
> * Caveat emptor:
> * Only the boolean result of this query is reliable; that is whether
> @@ -1412,65 +1466,18 @@ struct drm_i915_gem_wait {
> };
>
> struct drm_i915_gem_context_create {
> - /* output: id of new context*/
> - __u32 ctx_id;
> - __u32 pad;
> -};
> -
> -struct drm_i915_gem_context_destroy {
> - __u32 ctx_id;
> + __u32 ctx_id; /* output: id of new context*/
> __u32 pad;
> };
>
> -struct drm_i915_reg_read {
> - /*
> - * Register offset.
> - * For 64bit wide registers where the upper 32bits don't immediately
> - * follow the lower 32bits, the offset of the lower 32bits must
> - * be specified
> - */
> - __u64 offset;
> -#define I915_REG_READ_8B_WA (1ul << 0)
> -
> - __u64 val; /* Return value */
> -};
> -/* Known registers:
> - *
> - * Render engine timestamp - 0x2358 + 64bit - gen7+
> - * - Note this register returns an invalid value if using the default
> - * single instruction 8byte read, in order to workaround that pass
> - * flag I915_REG_READ_8B_WA in offset field.
> - *
> - */
> -
> -struct drm_i915_reset_stats {
> - __u32 ctx_id;
> +struct drm_i915_gem_context_create_ext {
> + __u32 ctx_id; /* output: id of new context*/
> __u32 flags;
> -
> - /* All resets since boot/module reload, for all contexts */
> - __u32 reset_count;
> -
> - /* Number of batches lost when active in GPU, for this context */
> - __u32 batch_active;
> -
> - /* Number of batches lost pending for execution, for this context */
> - __u32 batch_pending;
> -
> - __u32 pad;
> -};
> -
> -struct drm_i915_gem_userptr {
> - __u64 user_ptr;
> - __u64 user_size;
> - __u32 flags;
> -#define I915_USERPTR_READ_ONLY 0x1
> -#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> - /**
> - * Returned handle for the object.
> - *
> - * Object handles are nonzero.
> - */
> - __u32 handle;
> +#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0)
> +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1)
> +#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
> + (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
> + __u64 extensions;
> };
>
> struct drm_i915_gem_context_param {
> @@ -1491,6 +1498,63 @@ struct drm_i915_gem_context_param {
> * drm_i915_gem_context_param_sseu.
> */
> #define I915_CONTEXT_PARAM_SSEU 0x7
> +
> +/*
> + * Not all clients may want to attempt automatic recover of a context after
> + * a hang (for example, some clients may only submit very small incremental
> + * batches relying on known logical state of previous batches which will never
> + * recover correctly and each attempt will hang), and so would prefer that
> + * the context is forever banned instead.
> + *
> + * If set to false (0), after a reset, subsequent (and in flight) rendering
> + * from this context is discarded, and the client will need to create a new
> + * context to use instead.
> + *
> + * If set to true (1), the kernel will automatically attempt to recover the
> + * context by skipping the hanging batch and executing the next batch starting
> + * from the default context state (discarding the incomplete logical context
> + * state lost due to the reset).
> + *
> + * On creation, all new contexts are marked as recoverable.
> + */
> +#define I915_CONTEXT_PARAM_RECOVERABLE 0x8
> +
> + /*
> + * The id of the associated virtual memory address space (ppGTT) of
> + * this context. Can be retrieved and passed to another context
> + * (on the same fd) for both to use the same ppGTT and so share
> + * address layouts, and avoid reloading the page tables on context
> + * switches between themselves.
> + *
> + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
> + */
> +#define I915_CONTEXT_PARAM_VM 0x9
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. Slots 0...N are filled in using the specified (class, instance).
> + * Use
> + * engine_class: I915_ENGINE_CLASS_INVALID,
> + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE
> + * to specify a gap in the array that can be filled in later, e.g. by a
> + * virtual engine used for load balancing.
> + *
> + * Setting the number of engines bound to the context to 0, by passing a zero
> + * sized argument, will revert back to default settings.
> + *
> + * See struct i915_context_param_engines.
> + *
> + * Extensions:
> + * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
> + * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
> + */
> +#define I915_CONTEXT_PARAM_ENGINES 0xa
> +/* Must be kept compact -- no holes and well documented */
> +
> __u64 value;
> };
>
> @@ -1553,6 +1617,186 @@ struct drm_i915_gem_context_param_sseu {
> __u32 rsvd;
> };
>
> +/*
> + * i915_context_engines_load_balance:
> + *
> + * Enable load balancing across this set of engines.
> + *
> + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
> + * used will proxy the execbuffer request onto one of the set of engines
> + * in such a way as to distribute the load evenly across the set.
> + *
> + * The set of engines must be compatible (e.g. the same HW class) as they
> + * will share the same logical GPU context and ring.
> + *
> + * To intermix rendering with the virtual engine and direct rendering onto
> + * the backing engines (bypassing the load balancing proxy), the context must
> + * be defined to use a single timeline for all engines.
> + */
> +struct i915_context_engines_load_balance {
> + struct i915_user_extension base;
> +
> + __u16 engine_index;
> + __u16 mbz16; /* reserved for future use; must be zero */
> + __u32 flags; /* all undefined flags must be zero */
> +
> + __u64 engines_mask; /* selection mask of engines[] */
> +
> + __u64 mbz64[4]; /* reserved for future use; must be zero */
> +};
> +
> +/*
> + * i915_context_engines_bond:
> + *
> + * Constructed bonded pairs for execution within a virtual engine.
> + *
> + * All engines are equal, but some are more equal than others. Given
> + * the distribution of resources in the HW, it may be preferable to run
> + * a request on a given subset of engines in parallel to a request on a
> + * specific engine. We enable this selection of engines within a virtual
> + * engine by specifying bonding pairs, for any given master engine we will
> + * only execute on one of the corresponding siblings within the virtual engine.
> + *
> + * To execute a request in parallel on the master engine and a sibling requires
> + * coordination with a I915_EXEC_FENCE_SUBMIT.
> + */
> +struct i915_context_engines_bond {
> + struct i915_user_extension base;
> +
> + __u16 virtual_index; /* index of virtual engine in ctx->engines[] */
> + __u16 mbz;
> +
> + __u16 master_class;
> + __u16 master_instance;
> +
> + __u64 sibling_mask; /* bitmask of BIT(sibling_index) wrt the v.engine */
> + __u64 flags; /* all undefined flags must be zero */
> +};
> +
> +struct i915_context_param_engines {
> + __u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
> +#define I915_CONTEXT_ENGINES_EXT_BOND 1
> +
> + struct {
> + __u16 engine_class; /* see enum drm_i915_gem_engine_class */
> + __u16 engine_instance;
> + } class_instance[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
> + __u64 extensions; \
> + struct { \
> + __u16 engine_class; \
> + __u16 engine_instance; \
> + } class_instance[N__]; \
> +} __attribute__((packed)) name__
> +
> +struct drm_i915_gem_context_create_ext_setparam {
> +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
> + struct i915_user_extension base;
> + struct drm_i915_gem_context_param param;
> +};
> +
> +struct drm_i915_gem_context_create_ext_clone {
> +#define I915_CONTEXT_CREATE_EXT_CLONE 1
> + struct i915_user_extension base;
> + __u32 clone_id;
> + __u32 flags;
> +#define I915_CONTEXT_CLONE_ENGINES (1u << 0)
> +#define I915_CONTEXT_CLONE_FLAGS (1u << 1)
> +#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2)
> +#define I915_CONTEXT_CLONE_SSEU (1u << 3)
> +#define I915_CONTEXT_CLONE_TIMELINE (1u << 4)
> +#define I915_CONTEXT_CLONE_VM (1u << 5)
> +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> + __u64 rsvd;
> +};
> +
> +struct drm_i915_gem_context_destroy {
> + __u32 ctx_id;
> + __u32 pad;
> +};
> +
> +/*
> + * DRM_I915_GEM_VM_CREATE -
> + *
> + * Create a new virtual memory address space (ppGTT) for use within a context
> + * on the same file. Extensions can be provided to configure exactly how the
> + * address space is setup upon creation.
> + *
> + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
> + * returned in the outparam @id.
> + *
> + * No flags are defined, with all bits reserved and must be zero.
> + *
> + * An extension chain maybe provided, starting with @extensions, and terminated
> + * by the @next_extension being 0. Currently, no extensions are defined.
> + *
> + * DRM_I915_GEM_VM_DESTROY -
> + *
> + * Destroys a previously created VM id, specified in @id.
> + *
> + * No extensions or flags are allowed currently, and so must be zero.
> + */
> +struct drm_i915_gem_vm_control {
> + __u64 extensions;
> + __u32 flags;
> + __u32 vm_id;
> +};
> +
> +struct drm_i915_reg_read {
> + /*
> + * Register offset.
> + * For 64bit wide registers where the upper 32bits don't immediately
> + * follow the lower 32bits, the offset of the lower 32bits must
> + * be specified
> + */
> + __u64 offset;
> +#define I915_REG_READ_8B_WA (1ul << 0)
> +
> + __u64 val; /* Return value */
> +};
> +
> +/* Known registers:
> + *
> + * Render engine timestamp - 0x2358 + 64bit - gen7+
> + * - Note this register returns an invalid value if using the default
> + * single instruction 8byte read, in order to workaround that pass
> + * flag I915_REG_READ_8B_WA in offset field.
> + *
> + */
> +
> +struct drm_i915_reset_stats {
> + __u32 ctx_id;
> + __u32 flags;
> +
> + /* All resets since boot/module reload, for all contexts */
> + __u32 reset_count;
> +
> + /* Number of batches lost when active in GPU, for this context */
> + __u32 batch_active;
> +
> + /* Number of batches lost pending for execution, for this context */
> + __u32 batch_pending;
> +
> + __u32 pad;
> +};
> +
> +struct drm_i915_gem_userptr {
> + __u64 user_ptr;
> + __u64 user_size;
> + __u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> + /**
> + * Returned handle for the object.
> + *
> + * Object handles are nonzero.
> + */
> + __u32 handle;
> +};
> +
> enum drm_i915_oa_format {
> I915_OA_FORMAT_A13 = 1, /* HSW only */
> I915_OA_FORMAT_A29, /* HSW only */
> @@ -1714,6 +1958,7 @@ struct drm_i915_perf_oa_config {
> struct drm_i915_query_item {
> __u64 query_id;
> #define DRM_I915_QUERY_TOPOLOGY_INFO 1
> +/* Must be kept compact -- no holes and well documented */
>
> /*
> * When set to zero by userspace, this is filled with the size of the
> --
> 2.20.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list