[Intel-gfx] [PATCH v2 2/6] drm/i915: Cache last cmd descriptor when parsing
Ville Syrjälä
ville.syrjala at linux.intel.com
Fri Nov 20 07:08:06 PST 2015
On Fri, Nov 20, 2015 at 10:55:57AM +0000, Chris Wilson wrote:
> The cmd parser has the biggest impact on the BLT ring, because it is
> relatively verbose composed to the other engines as the vertex data is
> inline. It also typically has runs of repeating commands (again since
> the vertex data is inline, it typically has sequences of XY_SETUP_BLT,
> XY_SCANLINE_BLT..) We can easily reduce the impact of cmdparsing on
> benchmarks by then caching the last descriptor and comparing it against
> the next command header. To get maximum benefit, we also want to take
> advantage of skipping a few validations and length determinations if the
> header is unchanged between commands.
>
> ivb i7-3720QM:
> x11perf -dot: before 52.3M, after 124M (max 203M)
> glxgears: before 7310 fps, after 7550 fps (max 7860 fps)
>
> v2: Fix initial cached cmd descriptor to match MI_NOOP.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_cmd_parser.c | 133 +++++++++++++++------------------
> drivers/gpu/drm/i915/i915_drv.h | 10 +--
> 2 files changed, 64 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index db3a04ea036a..c6f6d9f2b2ce 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -794,6 +794,9 @@ void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
> fini_hash_table(ring);
> }
>
> +/*
> + * Returns a pointer to a descriptor for the command specified by cmd_header.
> + */
> static const struct drm_i915_cmd_descriptor*
> find_cmd_in_table(struct intel_engine_cs *ring,
> u32 cmd_header)
> @@ -813,37 +816,6 @@ find_cmd_in_table(struct intel_engine_cs *ring,
> return NULL;
> }
>
> -/*
> - * Returns a pointer to a descriptor for the command specified by cmd_header.
> - *
> - * The caller must supply space for a default descriptor via the default_desc
> - * parameter. If no descriptor for the specified command exists in the ring's
> - * command parser tables, this function fills in default_desc based on the
> - * ring's default length encoding and returns default_desc.
> - */
> -static const struct drm_i915_cmd_descriptor*
> -find_cmd(struct intel_engine_cs *ring,
> - u32 cmd_header,
> - struct drm_i915_cmd_descriptor *default_desc)
> -{
> - const struct drm_i915_cmd_descriptor *desc;
> - u32 mask;
> -
> - desc = find_cmd_in_table(ring, cmd_header);
> - if (desc)
> - return desc;
> -
> - mask = ring->get_cmd_length_mask(cmd_header);
> - if (!mask)
> - return NULL;
> -
> - BUG_ON(!default_desc);
> - default_desc->flags = CMD_DESC_SKIP;
> - default_desc->length.mask = mask;
> -
> - return default_desc;
> -}
> -
> static const struct drm_i915_reg_descriptor *
> find_reg(const struct drm_i915_reg_descriptor *table,
> int count, u32 addr)
> @@ -886,17 +858,6 @@ static bool check_cmd(const struct intel_engine_cs *ring,
> const bool is_master,
> bool *oacontrol_set)
> {
> - if (desc->flags & CMD_DESC_REJECT) {
> - DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> - return false;
> - }
> -
> - if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
> - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
> - *cmd);
> - return false;
> - }
> -
> if (desc->flags & CMD_DESC_REGISTER) {
> /*
> * Get the distance between individual register offset
> @@ -1027,14 +988,15 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
> u32 batch_len,
> bool is_master)
> {
> - const struct drm_i915_cmd_descriptor *desc;
> + struct drm_i915_cmd_descriptor default_desc = { CMD_DESC_SKIP };
> + const struct drm_i915_cmd_descriptor *desc = &default_desc;
> + u32 last_cmd_header = 0;
> unsigned dst_iter, src_iter;
> int needs_clflush = 0;
> struct get_page rewind;
> void *src, *dst, *tmp;
> - u32 partial, length;
> + u32 partial, length = 1;
> unsigned in, out;
> - struct drm_i915_cmd_descriptor default_desc = { 0 };
> bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
> int ret = 0;
>
> @@ -1100,40 +1062,63 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
> partial = 0;
>
> do {
> - if (*cmd == MI_BATCH_BUFFER_END) {
> - if (oacontrol_set) {
> - DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
> - goto unmap;
> + if (*cmd != last_cmd_header) {
> + if (*cmd == MI_BATCH_BUFFER_END) {
> + if (unlikely(oacontrol_set)) {
> + DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
> + goto unmap;
> + }
> +
> + cmd++; /* copy this cmd to dst */
> + batch_len = this; /* no more src */
> + ret = 0;
> + break;
> }
>
> - cmd++; /* copy this cmd to dst */
> - batch_len = this; /* no more src */
> - ret = 0;
> - break;
> - }
> -
> - desc = find_cmd(ring, *cmd, &default_desc);
> - if (!desc) {
> - DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
> - *cmd);
> - goto unmap;
> - }
> + desc = find_cmd_in_table(ring, *cmd);
> + if (desc) {
> + if (unlikely(desc->flags & CMD_DESC_REJECT)) {
> + DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> + goto unmap;
> + }
Yikes. More indenttion. It's getting really deep here. Any sane way to
reduce it?
> +
> + if (unlikely((desc->flags & CMD_DESC_MASTER) && !is_master)) {
> + DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", *cmd);
> + goto unmap;
> + }
> +
> + /*
> + * If the batch buffer contains a
> + * chained batch, return an error that
> + * tells the caller to abort and
> + * dispatch the workload as a
> + * non-secure batch.
> + */
> + if (unlikely(desc->cmd.value == MI_BATCH_BUFFER_START)) {
> + ret = -EACCES;
> + goto unmap;
> + }
> +
> + if (desc->flags & CMD_DESC_FIXED)
> + length = desc->length.fixed;
> + else
> + length = (*cmd & desc->length.mask) + LENGTH_BIAS;
> + } else {
> + u32 mask = ring->get_cmd_length_mask(*cmd);
> + if (unlikely(!mask)) {
> + DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd);
> + goto unmap;
> + }
> +
> + default_desc.length.mask = mask;
> + desc = &default_desc;
> +
> + length = (*cmd & mask) + LENGTH_BIAS;
> + }
>
> - /*
> - * If the batch buffer contains a chained batch, return an
> - * error that tells the caller to abort and dispatch the
> - * workload as a non-secure batch.
> - */
> - if (desc->cmd.value == MI_BATCH_BUFFER_START) {
> - ret = -EACCES;
> - goto unmap;
> + last_cmd_header = *cmd;
> }
>
> - if (desc->flags & CMD_DESC_FIXED)
> - length = desc->length.fixed;
> - else
> - length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
> -
> if (unlikely(cmd + length > page_end)) {
> if (unlikely(cmd + length > batch_end)) {
> DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8d939649b919..28d5bfceae3b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2333,12 +2333,12 @@ struct drm_i915_cmd_descriptor {
> * is the DRM master
> */
> u32 flags;
> +#define CMD_DESC_SKIP (0)
> #define CMD_DESC_FIXED (1<<0)
> -#define CMD_DESC_SKIP (1<<1)
> -#define CMD_DESC_REJECT (1<<2)
> -#define CMD_DESC_REGISTER (1<<3)
> -#define CMD_DESC_BITMASK (1<<4)
> -#define CMD_DESC_MASTER (1<<5)
> +#define CMD_DESC_REJECT (1<<1)
> +#define CMD_DESC_REGISTER (1<<2)
> +#define CMD_DESC_BITMASK (1<<3)
> +#define CMD_DESC_MASTER (1<<4)
>
> /*
> * The command's unique identification bits and the bitmask to get them.
> --
> 2.6.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
More information about the Intel-gfx
mailing list