[Mesa-dev] [PATCH v5 33/38] intel/compiler: validate region restrictions for half-float conversions
Francisco Jerez
currojerez at riseup.net
Thu Mar 28 01:48:28 UTC 2019
"Juan A. Suarez Romero" <jasuarez at igalia.com> writes:
> From: Iago Toral Quiroga <itoral at igalia.com>
>
> v2:
> - Consider implicit conversions in 2-src instructions too (Curro)
> - For restrictions that involve destination stride requirements
> only validate them for Align1, since Align16 always requires
> packed data.
> - Skip general rule for the dst/execution type size ratio for
> mixed float instructions on CHV and SKL+, these have their own
> set of rules that we'll be validated separately.
> ---
> src/intel/compiler/brw_eu_validate.c | 156 +++++++++++++++++++++++-
> src/intel/compiler/test_eu_validate.cpp | 117 ++++++++++++++++++
> 2 files changed, 272 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
> index bd0e48a5e5c..cad50469c65 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -469,6 +469,66 @@ is_packed(unsigned vstride, unsigned width, unsigned hstride)
> return false;
> }
>
> +/**
> + * Returns whether an instruction is an explicit or implicit conversion
> + * to/from half-float.
> + */
> +static bool
> +is_half_float_conversion(const struct gen_device_info *devinfo,
> + const brw_inst *inst)
> +{
> + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> +
> + unsigned num_sources = num_sources_from_inst(devinfo, inst);
> + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> +
> + if (dst_type != src0_type &&
> + (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
> + return true;
> + } else if (num_sources > 1) {
> + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
> + return dst_type != src1_type &&
> + (dst_type == BRW_REGISTER_TYPE_HF ||
> + src1_type == BRW_REGISTER_TYPE_HF);
> + }
> +
> + return false;
> +}
> +
> +/*
> + * Returns whether an instruction is using mixed float operation mode
> + */
> +static bool
> +is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst)
> +{
> + if (devinfo->gen < 8)
> + return false;
> +
> + if (inst_is_send(devinfo, inst))
> + return false;
> +
> + unsigned opcode = brw_inst_opcode(devinfo, inst);
> + const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
> + if (desc->ndst == 0)
> + return false;
> +
> + /* FIXME: support 3-src instructions */
> + unsigned num_sources = num_sources_from_inst(devinfo, inst);
> + assert(num_sources < 3);
> +
> + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> +
> + if (num_sources == 1)
> + return types_are_mixed_float(src0_type, dst_type);
> +
> + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
> +
> + return types_are_mixed_float(src0_type, src1_type) ||
> + types_are_mixed_float(src0_type, dst_type) ||
> + types_are_mixed_float(src1_type, dst_type);
> +}
> +
> /**
> * Checks restrictions listed in "General Restrictions Based on Operand Types"
> * in the "Register Region Restrictions" section.
> @@ -539,7 +599,101 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
> exec_type_size == 8 && dst_type_size == 4)
> dst_type_size = 8;
>
> - if (exec_type_size > dst_type_size) {
> + if (is_half_float_conversion(devinfo, inst)) {
> + /**
> + * A helper to validate used in the validation of the following restriction
> + * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
> + *
> + * "There is no direct conversion from HF to DF or DF to HF.
> + * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
> + *
> + * Even if these restrictions are listed for the MOV instruction, we
> + * validate this more generally, since there is the possibility
> + * of implicit conversions from other instructions, such us implicit
> + * conversion from integer to HF with the ADD instruction in SKL+.
> + */
> + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
I think the evaluation of brw_inst_src1_type() above can blow up if the
instruction has a single source and the src1 type field happens to
overlap with something else (e.g. an immediate).
> + ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
> + (type_sz(src0_type) == 8 ||
> + (num_sources > 1 && type_sz(src1_type) == 8)),
> + "There are no direct conversions between 64-bit types and HF");
> +
> + ERROR_IF(type_sz(dst_type) == 8 &&
> + (src0_type == BRW_REGISTER_TYPE_HF ||
> + (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
> + "There are no direct conversions between 64-bit types and HF");
> +
> + /* From the BDW+ PRM:
> + *
> + * "Conversion between Integer and HF (Half Float) must be
> + * DWord-aligned and strided by a DWord on the destination."
> + *
> + * Since mixing floating point and integer types on sources is not
> + * allowed we can verify this by only looking at the type of the first
> + * source only ehen there is more than one source.
> + *
I doubt we are actually verifying this restriction, so checking src1 in
addition wouldn't hurt.
> + * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
> + *
> + * "There is a relaxed alignment rule for word destinations. When
> + * the destination type is word (UW, W, HF), destination data types
> + * can be aligned to either the lowest word or the second lowest
> + * word of the execution channel. This means the destination data
> + * words can be either all in the even word locations or all in the
> + * odd word locations."
> + *
> + * We do not implement the second rule as is though, since empirical
> + * testing shows inconsistencies:
> + * - It suggests that packed 16-bit is not allowed, which is not true.
> + * - It suggests that conversions from Q/DF to W (which need to be
> + * 64-bit aligned on the destination) are not possible, which is
> + * not true.
> + *
> + * So from this rule we only validate the implication that conversions
> + * from F to HF need to be DWord strided (except in Align1 mixed
> + * float mode where packed fp16 destination is allowed so long as the
> + * destination is oword-aligned).
> + *
> + * Finally, we only validate this for Align1 because Align16 always
> + * requires packed destinations, so these restrictions can't possibly
> + * apply to Align16 mode.
> + */
> + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
> + if ((dst_type == BRW_REGISTER_TYPE_HF &&
> + brw_reg_type_is_integer(src0_type)) ||
> + (brw_reg_type_is_integer(dst_type) &&
> + src0_type == BRW_REGISTER_TYPE_HF)) {
> + ERROR_IF(dst_stride * dst_type_size != 4,
> + "Conversions between integer and half-float must be "
> + "strided by a DWord on the destination");
> +
> + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
> + ERROR_IF(subreg % 4 != 0,
> + "Conversions between integer and half-float must be "
> + "aligned to a DWord on the destination");
> + } else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
> + dst_type == BRW_REGISTER_TYPE_HF) {
> + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
> + ERROR_IF(dst_stride != 2 &&
> + !(is_mixed_float(devinfo, inst) &&
> + dst_stride == 1 && subreg % 16 == 0),
> + "Conversions to HF must have either all words in even "
> + "word locations or all words in odd word locations or "
> + "be mixed-float with Oword-aligned packed destination");
> + }
> + }
> + }
> +
> + /* There are special regioning rules for mixed-float mode in CHV and SKL that
> + * override the general rule for the ratio of sizes of the destination type
> + * and the execution type. We will add validation for those in a later patch.
> + */
> + bool validate_dst_size_and_exec_size_ratio =
> + !is_mixed_float(devinfo, inst) ||
> + !(devinfo->is_cherryview || devinfo->gen >= 9);
> +
> + if (validate_dst_size_and_exec_size_ratio &&
> + exec_type_size > dst_type_size) {
> if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
> ERROR_IF(dst_stride * dst_type_size != exec_type_size,
> "Destination stride must be equal to the ratio of the sizes "
> diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
> index 73300b23122..a1d77781c24 100644
> --- a/src/intel/compiler/test_eu_validate.cpp
> +++ b/src/intel/compiler/test_eu_validate.cpp
> @@ -848,6 +848,123 @@ TEST_P(validation_test, byte_destination_relaxed_alignment)
> }
> }
>
> +TEST_P(validation_test, half_float_conversion)
> +{
> + static const struct {
> + enum brw_reg_type dst_type;
> + enum brw_reg_type src_type;
> + unsigned dst_stride;
> + unsigned dst_subnr;
> + bool expected_result_bdw;
> + bool expected_result_chv_gen9;
> + } inst[] = {
> +#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
> + { \
> + BRW_REGISTER_TYPE_##dst_type, \
> + BRW_REGISTER_TYPE_##src_type, \
> + BRW_HORIZONTAL_STRIDE_##dst_stride, \
> + dst_subnr, \
> + expected_result, \
> + expected_result, \
> + }
> +#define INST_S(dst_type, src_type, dst_stride, dst_subnr, \
> + expected_result_bdw, expected_result_chv_gen9) \
> + { \
> + BRW_REGISTER_TYPE_##dst_type, \
> + BRW_REGISTER_TYPE_##src_type, \
> + BRW_HORIZONTAL_STRIDE_##dst_stride, \
> + dst_subnr, \
> + expected_result_bdw, \
> + expected_result_chv_gen9, \
> + }
> +
> + /* MOV to half-float destination */
> + INST_C(HF, B, 1, 0, false),
> + INST_C(HF, W, 1, 0, false),
> + INST_C(HF, HF, 1, 0, true),
> + INST_C(HF, HF, 1, 2, true),
> + INST_C(HF, D, 1, 0, false),
> + INST_S(HF, F, 1, 0, false, true),
> + INST_C(HF, Q, 1, 0, false),
> + INST_C(HF, B, 2, 0, true),
> + INST_C(HF, B, 2, 2, false),
> + INST_C(HF, W, 2, 0, true),
> + INST_C(HF, W, 2, 2, false),
> + INST_C(HF, HF, 2, 0, true),
> + INST_C(HF, HF, 2, 2, true),
> + INST_C(HF, D, 2, 0, true),
> + INST_C(HF, D, 2, 2, false),
> + INST_C(HF, F, 2, 0, true),
> + INST_S(HF, F, 2, 2, false, true),
> + INST_C(HF, Q, 2, 0, false),
> + INST_C(HF, DF, 2, 0, false),
> + INST_C(HF, B, 4, 0, false),
> + INST_C(HF, W, 4, 0, false),
> + INST_C(HF, HF, 4, 0, true),
> + INST_C(HF, HF, 4, 2, true),
> + INST_C(HF, D, 4, 0, false),
> + INST_C(HF, F, 4, 0, false),
> + INST_C(HF, Q, 4, 0, false),
> + INST_C(HF, DF, 4, 0, false),
> +
> + /* MOV from half-float source */
> + INST_C( B, HF, 1, 0, false),
> + INST_C( W, HF, 1, 0, false),
> + INST_C( D, HF, 1, 0, true),
> + INST_C( D, HF, 1, 4, true),
> + INST_C( F, HF, 1, 0, true),
> + INST_C( F, HF, 1, 4, true),
> + INST_C( Q, HF, 1, 0, false),
> + INST_C(DF, HF, 1, 0, false),
> + INST_C( B, HF, 2, 0, false),
> + INST_C( W, HF, 2, 0, true),
> + INST_C( W, HF, 2, 2, false),
> + INST_C( D, HF, 2, 0, false),
> + INST_C( F, HF, 2, 0, true),
> + INST_C( F, HF, 2, 2, true),
I don't think this is valid, destination subregister offset is not
aligned to the component size.
> + INST_C( B, HF, 4, 0, true),
> + INST_C( B, HF, 4, 1, false),
> + INST_C( W, HF, 4, 0, false),
> +
> +#undef INST_C
> +#undef INST_S
> + };
> +
> + if (devinfo.gen < 8)
> + return;
> +
> + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> + if (!devinfo.has_64bit_types &&
> + (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) {
> + continue;
> + }
> +
> + brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
> +
> + brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
> +
> + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> + brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
> +
> + if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
> + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
> + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
> + } else {
> + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
> + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> + }
> +
> + if (devinfo.is_cherryview || devinfo.gen >= 9)
> + EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
> + else
> + EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> + clear_instructions(p);
> + }
> +}
> +
> TEST_P(validation_test, vector_immediate_destination_alignment)
> {
> static const struct {
> --
> 2.20.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190327/7498dce3/attachment-0001.sig>
More information about the mesa-dev
mailing list