[PATCH v7 2/5] lib/gpgpu_shader: tooling for preparing and running gpgpu shaders
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Mon Jun 17 11:41:20 UTC 2024
On Wed, Jun 12, 2024 at 11:39:00AM +0200, Andrzej Hajda wrote:
> Implement tooling for building shaders for specific generations.
> The library allows you to build and run shader from precompiled blocks
> and provides an abstraction layer over gpgpu pipeline.
>
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> Signed-off-by: Christoph Manszewski <christoph.manszewski at intel.com>
> Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>
> ---
> lib/gpgpu_shader.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> lib/gpgpu_shader.h | 38 ++++++++++
> lib/meson.build | 1 +
> 3 files changed, 249 insertions(+)
>
> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
> new file mode 100644
> index 000000000000..080eef2445da
> --- /dev/null
> +++ b/lib/gpgpu_shader.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2024 Intel Corporation
> + *
> + * Author: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> + */
> +
> +#include <i915_drm.h>
> +
> +#include "ioctl_wrappers.h"
> +#include "gpgpu_shader.h"
> +#include "gpu_cmds.h"
> +
> +#define SUPPORTED_GEN_VER 1200 /* Support TGL and up */
> +
> +#define PAGE_SIZE 4096
> +#define BATCH_STATE_SPLIT 2048
> +/* VFE STATE params */
> +#define THREADS (1 << 16) /* max value */
> +#define GEN8_GPGPU_URB_ENTRIES 1
> +#define GPGPU_URB_SIZE 0
> +#define GPGPU_CURBE_SIZE 0
> +#define GEN7_VFE_STATE_GPGPU_MODE 1
> +
> +static uint32_t fill_sip(struct intel_bb *ibb,
> + const uint32_t sip[][4],
> + const size_t size)
> +{
> + uint32_t *sip_dst;
> + uint32_t offset;
> +
> + intel_bb_ptr_align(ibb, 16);
> + sip_dst = intel_bb_ptr(ibb);
> + offset = intel_bb_offset(ibb);
> +
> + memcpy(sip_dst, sip, size);
> +
> + intel_bb_ptr_add(ibb, size);
> +
> + return offset;
> +}
> +
> +static void emit_sip(struct intel_bb *ibb, const uint64_t offset)
> +{
> + intel_bb_out(ibb, GEN4_STATE_SIP | (3 - 2));
> + intel_bb_out(ibb, lower_32_bits(offset));
> + intel_bb_out(ibb, upper_32_bits(offset));
> +}
> +
> +static void
> +__xelp_gpgpu_execfunc(struct intel_bb *ibb,
> + struct intel_buf *target,
> + unsigned int x_dim, unsigned int y_dim,
> + struct gpgpu_shader *shdr,
> + struct gpgpu_shader *sip,
> + uint64_t ring, bool explicit_engine)
> +{
> + uint32_t interface_descriptor, sip_offset;
> + uint64_t engine;
> +
> + intel_bb_add_intel_buf(ibb, target, true);
> +
> + intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
> +
> + interface_descriptor = gen8_fill_interface_descriptor(ibb, target,
> + shdr->instr,
> + 4 * shdr->size);
> +
> + if (sip && sip->size)
> + sip_offset = fill_sip(ibb, sip->instr, 4 * sip->size);
> + else
> + sip_offset = 0;
> +
> + intel_bb_ptr_set(ibb, 0);
> +
> + /* GPGPU pipeline */
> + intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
> + PIPELINE_SELECT_GPGPU);
> +
> + gen9_emit_state_base_address(ibb);
> +
> + xelp_emit_vfe_state(ibb, THREADS, GEN8_GPGPU_URB_ENTRIES,
> + GPGPU_URB_SIZE, GPGPU_CURBE_SIZE, true);
> +
> + gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
> +
> + if (sip_offset)
> + emit_sip(ibb, sip_offset);
> +
> + gen8_emit_gpgpu_walk(ibb, 0, 0, x_dim * 16, y_dim);
> +
> + intel_bb_out(ibb, MI_BATCH_BUFFER_END);
> + intel_bb_ptr_align(ibb, 32);
> +
> + engine = explicit_engine ? ring : I915_EXEC_DEFAULT;
> + intel_bb_exec(ibb, intel_bb_offset(ibb),
> + engine | I915_EXEC_NO_RELOC, false);
> +}
> +
> +static void
> +__xehp_gpgpu_execfunc(struct intel_bb *ibb,
> + struct intel_buf *target,
> + unsigned int x_dim, unsigned int y_dim,
> + struct gpgpu_shader *shdr,
> + struct gpgpu_shader *sip,
> + uint64_t ring, bool explicit_engine)
> +{
> + struct xehp_interface_descriptor_data idd;
> + uint32_t sip_offset;
> + uint64_t engine;
> +
> + intel_bb_add_intel_buf(ibb, target, true);
> +
> + intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
> +
> + xehp_fill_interface_descriptor(ibb, target, shdr->instr,
> + 4 * shdr->size, &idd);
> +
> + if (sip && sip->size)
> + sip_offset = fill_sip(ibb, sip->instr, 4 * sip->size);
> + else
> + sip_offset = 0;
> +
> + intel_bb_ptr_set(ibb, 0);
> +
> + /* GPGPU pipeline */
> + intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
> + PIPELINE_SELECT_GPGPU);
> + xehp_emit_state_base_address(ibb);
> + xehp_emit_state_compute_mode(ibb);
> + xehp_emit_state_binding_table_pool_alloc(ibb);
> + xehp_emit_cfe_state(ibb, THREADS);
> +
> + if (sip_offset)
> + emit_sip(ibb, sip_offset);
> +
> + xehp_emit_compute_walk(ibb, 0, 0, x_dim * 16, y_dim, &idd, 0x0);
> +
> + intel_bb_out(ibb, MI_BATCH_BUFFER_END);
> + intel_bb_ptr_align(ibb, 32);
> +
> + engine = explicit_engine ? ring : I915_EXEC_DEFAULT;
> + intel_bb_exec(ibb, intel_bb_offset(ibb),
> + engine | I915_EXEC_NO_RELOC, false);
> +}
> +
> +/**
> + * gpgpu_shader_exec:
> + * @ibb: pointer to initialized intel_bb
> + * @target: pointer to initialized intel_buf to be written by shader/sip
> + * @x_dim: gpgpu/compute walker thread group width
> + * @y_dim: gpgpu/compute walker thread group height
> + * @shdr: shader to be executed
> + * @sip: sip to be executed, can be NULL
> + * @ring: engine index
> + * @explicit_engine: whether to use provided engine index
> + *
> + * Execute provided shader in asynchronous fashion. To wait for completion,
> + * caller has to use the provided ibb handle.
> + */
> +void gpgpu_shader_exec(struct intel_bb *ibb,
> + struct intel_buf *target,
> + unsigned int x_dim, unsigned int y_dim,
> + struct gpgpu_shader *shdr,
> + struct gpgpu_shader *sip,
> + uint64_t ring, bool explicit_engine)
> +{
> + igt_require(shdr->gen_ver >= SUPPORTED_GEN_VER);
> + igt_assert(ibb->size >= PAGE_SIZE);
> + igt_assert(ibb->ptr == ibb->batch);
> +
> + if (shdr->gen_ver >= 1250)
> + __xehp_gpgpu_execfunc(ibb, target, x_dim, y_dim, shdr, sip,
> + ring, explicit_engine);
> + else
> + __xelp_gpgpu_execfunc(ibb, target, x_dim, y_dim, shdr, sip,
> + ring, explicit_engine);
> +}
> +
> +/**
> + * gpgpu_shader_create:
> + * @fd: drm fd - i915 or xe
> + *
> + * Creates empty shader.
> + *
> + * Returns: pointer to empty shader struct.
> + */
> +struct gpgpu_shader *gpgpu_shader_create(int fd)
> +{
> + struct gpgpu_shader *shdr = calloc(1, sizeof(struct gpgpu_shader));
> + const struct intel_device_info *info;
> +
> + info = intel_get_device_info(intel_get_drm_devid(fd));
> + shdr->gen_ver = 100 * info->graphics_ver + info->graphics_rel;
> + shdr->max_size = 16 * 4;
> + shdr->code = malloc(4 * shdr->max_size);
> + return shdr;
Optimistic path without memory allocation error checking. Both for shdr
and shrd->code. At least assert on allocation failure.
Rest looks correct imo, so with above nit fixed:
Acked-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
--
Zbigniew
> +}
> +
> +/**
> + * gpgpu_shader_destroy:
> + * @shdr: pointer to shader struct created with 'gpgpu_shader_create'
> + *
> + * Frees resources of gpgpu_shader struct.
> + */
> +void gpgpu_shader_destroy(struct gpgpu_shader *shdr)
> +{
> + free(shdr->code);
> + free(shdr);
> +}
> diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
> new file mode 100644
> index 000000000000..02f6f1aad1e3
> --- /dev/null
> +++ b/lib/gpgpu_shader.h
> @@ -0,0 +1,38 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef GPGPU_SHADER_H
> +#define GPGPU_SHADER_H
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +struct intel_bb;
> +struct intel_buf;
> +
> +struct gpgpu_shader {
> + uint32_t gen_ver;
> + uint32_t size;
> + uint32_t max_size;
> + union {
> + uint32_t *code;
> + uint32_t (*instr)[4];
> + };
> +};
> +
> +struct gpgpu_shader *gpgpu_shader_create(int fd);
> +void gpgpu_shader_destroy(struct gpgpu_shader *shdr);
> +
> +void gpgpu_shader_dump(struct gpgpu_shader *shdr);
> +
> +void gpgpu_shader_exec(struct intel_bb *ibb,
> + struct intel_buf *target,
> + unsigned int x_dim, unsigned int y_dim,
> + struct gpgpu_shader *shdr,
> + struct gpgpu_shader *sip,
> + uint64_t ring, bool explicit_engine);
> +
> +#endif /* GPGPU_SHADER_H */
> diff --git a/lib/meson.build b/lib/meson.build
> index e2f740c116f8..0a3084f8aea2 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -72,6 +72,7 @@ lib_sources = [
> 'media_spin.c',
> 'media_fill.c',
> 'gpgpu_fill.c',
> + 'gpgpu_shader.c',
> 'gpu_cmds.c',
> 'rendercopy_i915.c',
> 'rendercopy_i830.c',
>
> --
> 2.34.1
>
More information about the igt-dev
mailing list