[PATCH 2/5] drm/xe: Add MI_MATH and ALU instruction definitions
Matt Roper
matthew.d.roper at intel.com
Mon Mar 3 21:50:46 UTC 2025
On Mon, Mar 03, 2025 at 06:35:19PM +0100, Michal Wajdeczko wrote:
> The command streamer implements an Arithmetic Logic Unit (ALU)
> which supports basic arithmetic and logical operations on two
> 64-bit operands. Access to this ALU is thru the MI_MATH command
> and sixteen General Purpose Register (GPR) 64-bit registers,
> which are used as temporary storage.
>
> Bspec: 45737, 60236 # MI
> Bspec: 45525, 60132 # ALU
> Bspec: 45533, 60309 # GPR
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> ---
> .../gpu/drm/xe/instructions/xe_alu_commands.h | 72 +++++++++++++++++++
> .../gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
> drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++
> 3 files changed, 77 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/instructions/xe_alu_commands.h
>
> diff --git a/drivers/gpu/drm/xe/instructions/xe_alu_commands.h b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h
> new file mode 100644
> index 000000000000..c4321949f534
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h
> @@ -0,0 +1,72 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#ifndef _XE_ALU_COMMANDS_H_
> +#define _XE_ALU_COMMANDS_H_
> +
> +#include "instructions/xe_instr_defs.h"
> +
> +/* Instruction Opcodes */
It looks like we've only included the opcodes that were available since
Xe_LP platforms here. Since we seem to be defining all of those
(including the ones not used by subsequent patches in this series) is
there any reason not to include the ones first introduced in Xe_HP (and
still supported on all the Xe2 and beyond platforms that the Xe driver
officially supports)? I.e., referencing the table from bspec 60146.
> +#define CS_ALU_OPCODE_NOOP 0x000
> +#define CS_ALU_OPCODE_LOAD 0x080
> +#define CS_ALU_OPCODE_LOADINV 0x480
> +#define CS_ALU_OPCODE_LOAD0 0x081
> +#define CS_ALU_OPCODE_LOAD1 0x481
> +#define CS_ALU_OPCODE_ADD 0x100
> +#define CS_ALU_OPCODE_SUB 0x101
> +#define CS_ALU_OPCODE_AND 0x102
> +#define CS_ALU_OPCODE_OR 0x103
> +#define CS_ALU_OPCODE_XOR 0x104
> +#define CS_ALU_OPCODE_STORE 0x180
> +#define CS_ALU_OPCODE_STOREINV 0x580
> +
> +/* Instruction Operands */
> +#define CS_ALU_OPERAND_REG(n) REG_FIELD_PREP(GENMASK(3, 0), (n))
> +#define CS_ALU_OPERAND_REG0 0x0
> +#define CS_ALU_OPERAND_REG1 0x1
> +#define CS_ALU_OPERAND_REG2 0x2
> +#define CS_ALU_OPERAND_REG3 0x3
> +#define CS_ALU_OPERAND_REG4 0x4
> +#define CS_ALU_OPERAND_REG5 0x5
> +#define CS_ALU_OPERAND_REG6 0x6
> +#define CS_ALU_OPERAND_REG7 0x7
> +#define CS_ALU_OPERAND_REG8 0x8
> +#define CS_ALU_OPERAND_REG9 0x9
> +#define CS_ALU_OPERAND_REG10 0xa
> +#define CS_ALU_OPERAND_REG11 0xb
> +#define CS_ALU_OPERAND_REG12 0xc
> +#define CS_ALU_OPERAND_REG13 0xd
> +#define CS_ALU_OPERAND_REG14 0xe
> +#define CS_ALU_OPERAND_REG15 0xf
> +#define CS_ALU_OPERAND_SRCA 0x20
> +#define CS_ALU_OPERAND_SRCB 0x21
> +#define CS_ALU_OPERAND_ACCU 0x31
> +#define CS_ALU_OPERAND_ZF 0x32
> +#define CS_ALU_OPERAND_CF 0x33
> +#define CS_ALU_OPERAND_NA 0 /* N/A operand */
> +
> +/* Command Streamer ALU Instructions */
> +#define CS_ALU_INSTR(opcode, op1, op2) (REG_FIELD_PREP(GENMASK(31, 20), (opcode)) | \
> + REG_FIELD_PREP(GENMASK(19, 10), (op1)) | \
> + REG_FIELD_PREP(GENMASK(9, 0), (op2)))
> +
> +#define __CS_ALU_INSTR(opcode, op1, op2) CS_ALU_INSTR(CS_ALU_OPCODE_##opcode, \
> + CS_ALU_OPERAND_##op1, \
> + CS_ALU_OPERAND_##op2)
> +
> +#define CS_ALU_INSTR_NOOP __CS_ALU_INSTR(NOOP, NA, NA)
> +#define CS_ALU_INSTR_LOAD(op1, op2) __CS_ALU_INSTR(LOAD, op1, op2)
> +#define CS_ALU_INSTR_LOADINV(op1, op2) __CS_ALU_INSTR(LOADINV, op1, op2)
> +#define CS_ALU_INSTR_LOAD0(op1) __CS_ALU_INSTR(LOAD0, op1, NA)
> +#define CS_ALU_INSTR_LOAD1(op1) __CS_ALU_INSTR(LOAD1, op1, NA)
> +#define CS_ALU_INSTR_ADD __CS_ALU_INSTR(ADD, NA, NA)
> +#define CS_ALU_INSTR_SUB __CS_ALU_INSTR(SUB, NA, NA)
> +#define CS_ALU_INSTR_AND __CS_ALU_INSTR(AND, NA, NA)
It seems like the bspec forgot to document AND (on page 60143). But
this looks correct nevertheless.
Up to you if you want to add the other shift, store, and load
instructions above or not. Either way,
Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
> +#define CS_ALU_INSTR_OR __CS_ALU_INSTR(OR, NA, NA)
> +#define CS_ALU_INSTR_XOR __CS_ALU_INSTR(XOR, NA, NA)
> +#define CS_ALU_INSTR_STORE(op1, op2) __CS_ALU_INSTR(STORE, op1, op2)
> +#define CS_ALU_INSTR_STOREINV(op1, op2) __CS_ALU_INSTR(STOREINV, op1, op2)
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> index 526bad9d4bac..eba582058d55 100644
> --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> @@ -32,6 +32,7 @@
> #define MI_BATCH_BUFFER_END __MI_INSTR(0xA)
> #define MI_TOPOLOGY_FILTER __MI_INSTR(0xD)
> #define MI_FORCE_WAKEUP __MI_INSTR(0x1D)
> +#define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1))
>
> #define MI_STORE_DATA_IMM __MI_INSTR(0x20)
> #define MI_SDI_GGTT REG_BIT(22)
> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> index 4f372dc2cb89..659cf85fa3d6 100644
> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> @@ -184,6 +184,10 @@
> #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1)
> #define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0)
>
> +#define CS_GPR_DATA(base, n) XE_REG((base) + 0x600 + (n) * 4)
> +#define CS_GPR_REG(base, n) CS_GPR_DATA((base), (n) * 2)
> +#define CS_GPR_REG_UDW(base, n) CS_GPR_DATA((base), (n) * 2 + 1)
> +
> #define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08)
> #define CG3DDISHRS_CLKGATE_DIS REG_BIT(5)
>
> --
> 2.47.1
>
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list