[Mesa-dev] [PATCH v4 34/40] intel/compiler: validate region restrictions for half-float conversions
Francisco Jerez
currojerez at riseup.net
Tue Feb 26 22:54:27 UTC 2019
Iago Toral Quiroga <itoral at igalia.com> writes:
> ---
> src/intel/compiler/brw_eu_validate.c | 64 ++++++++++++-
> src/intel/compiler/test_eu_validate.cpp | 122 ++++++++++++++++++++++++
> 2 files changed, 185 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
> index 000a05cb6ac..203641fecb9 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -531,7 +531,69 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
> exec_type_size == 8 && dst_type_size == 4)
> dst_type_size = 8;
>
> - if (exec_type_size > dst_type_size) {
> + /* From the BDW+ PRM:
> + *
> + * "There is no direct conversion from HF to DF or DF to HF.
> + * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
> + */
> + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> + ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
Why is only the MOV instruction handled here and below? Aren't other
instructions able to do implicit conversions? Probably means you need
to deal with two sources rather than one.
> + ((dst_type == BRW_REGISTER_TYPE_HF && type_sz(src0_type) == 8) ||
> + (dst_type_size == 8 && src0_type == BRW_REGISTER_TYPE_HF)),
> + "There are no direct conversion between 64-bit types and HF");
> +
> + /* From the BDW+ PRM:
> + *
> + * "Conversion between Integer and HF (Half Float) must be
> + * DWord-aligned and strided by a DWord on the destination."
> + *
> + * But this seems to be expanded on CHV and SKL+ by:
> + *
> + * "There is a relaxed alignment rule for word destinations. When
> + * the destination type is word (UW, W, HF), destination data types
> + * can be aligned to either the lowest word or the second lowest
> + * word of the execution channel. This means the destination data
> + * words can be either all in the even word locations or all in the
> + * odd word locations."
> + *
> + * We do not implement the second rule as is though, since empirical testing
> + * shows inconsistencies:
> + * - It suggests that packed 16-bit is not allowed, which is not true.
> + * - It suggests that conversions from Q/DF to W (which need to be 64-bit
> + * aligned on the destination) are not possible, which is not true.
> + * - It suggests that conversions from 16-bit executions types to W need
> + * to be 32-bit aligned, which doesn't seem to be necessary.
> + *
> + * So from this rule we only validate the implication that conversion from
> + * F to HF needs to be DWord aligned too (in BDW this is limited to
> + * conversions from integer types).
> + */
> + bool is_half_float_conversion =
> + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
> + dst_type != src0_type &&
> + (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF);
> +
> + if (is_half_float_conversion) {
> + assert(devinfo->gen >= 8);
> +
> + if ((dst_type == BRW_REGISTER_TYPE_HF && brw_reg_type_is_integer(src0_type)) ||
> + (brw_reg_type_is_integer(dst_type) && src0_type == BRW_REGISTER_TYPE_HF)) {
> + ERROR_IF(dst_stride * dst_type_size != 4,
> + "Conversions between integer and half-float must be strided "
> + "by a DWord on the destination");
> +
> + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
> + ERROR_IF(subreg % 4 != 0,
> + "Conversions between integer and half-float must be aligned "
> + "to a DWord on the destination");
> + } else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
> + dst_type == BRW_REGISTER_TYPE_HF) {
> + ERROR_IF(dst_stride != 2,
> + "Conversions to HF must have either all words in even word "
> + "locations or all words in odd word locations");
> + }
> +
> + } else if (exec_type_size > dst_type_size) {
> if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
> ERROR_IF(dst_stride * dst_type_size != exec_type_size,
> "Destination stride must be equal to the ratio of the sizes "
> diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
> index 73300b23122..1557b6d2452 100644
> --- a/src/intel/compiler/test_eu_validate.cpp
> +++ b/src/intel/compiler/test_eu_validate.cpp
> @@ -848,6 +848,128 @@ TEST_P(validation_test, byte_destination_relaxed_alignment)
> }
> }
>
> +TEST_P(validation_test, half_float_conversion)
> +{
> + static const struct {
> + enum brw_reg_type dst_type;
> + enum brw_reg_type src_type;
> + unsigned dst_stride;
> + unsigned dst_subnr;
> + bool expected_result;
> + } inst[] = {
> +#define INST(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
> + { \
> + BRW_REGISTER_TYPE_##dst_type, \
> + BRW_REGISTER_TYPE_##src_type, \
> + BRW_HORIZONTAL_STRIDE_##dst_stride, \
> + dst_subnr, \
> + expected_result, \
> + }
> +
> + /* MOV to half-float destination (F source handled later) */
> + INST(HF, B, 1, 0, false),
> + INST(HF, W, 1, 0, false),
> + INST(HF, HF, 1, 0, true),
> + INST(HF, HF, 1, 2, true),
> + INST(HF, D, 1, 0, false),
> + INST(HF, Q, 1, 0, false),
> + INST(HF, DF, 1, 0, false),
> + INST(HF, B, 2, 0, true),
> + INST(HF, B, 2, 2, false),
> + INST(HF, W, 2, 0, true),
> + INST(HF, W, 2, 2, false),
> + INST(HF, HF, 2, 0, true),
> + INST(HF, HF, 2, 2, true),
> + INST(HF, D, 2, 0, true),
> + INST(HF, D, 2, 2, false),
> + INST(HF, Q, 2, 0, false),
> + INST(HF, DF, 2, 0, false),
> + INST(HF, B, 4, 0, false),
> + INST(HF, W, 4, 0, false),
> + INST(HF, HF, 4, 0, true),
> + INST(HF, HF, 4, 2, true),
> + INST(HF, D, 4, 0, false),
> + INST(HF, Q, 4, 0, false),
> + INST(HF, DF, 4, 0, false),
> +
> + /* MOV from half-float source */
> + INST( B, HF, 1, 0, false),
> + INST( W, HF, 1, 0, false),
> + INST( D, HF, 1, 0, true),
> + INST( D, HF, 1, 4, true),
> + INST( F, HF, 1, 0, true),
> + INST( F, HF, 1, 4, true),
> + INST( Q, HF, 1, 0, false),
> + INST(DF, HF, 1, 0, false),
> + INST( B, HF, 2, 0, false),
> + INST( W, HF, 2, 0, true),
> + INST( W, HF, 2, 2, false),
> + INST( D, HF, 2, 0, false),
> + INST( F, HF, 2, 0, true),
> + INST( F, HF, 2, 2, true),
> + INST( B, HF, 4, 0, true),
> + INST( B, HF, 4, 1, false),
> + INST( W, HF, 4, 0, false),
> +
> +#undef INST
> + };
> +
> + if (devinfo.gen < 8)
> + return;
> +
> + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> + if (!devinfo.has_64bit_types &&
> + (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) {
> + continue;
> + }
> +
> + brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
> +
> + brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
> +
> + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> + brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
> +
> + if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
> + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
> + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
> + } else {
> + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
> + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> + }
> +
> + EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> + clear_instructions(p);
> + }
> +
> + /* Conversion from F to HF has Dword destination alignment restriction
> + * on CHV and SKL+ only
> + */
> + brw_MOV(p, retype(g0, BRW_REGISTER_TYPE_HF),
> + retype(g0, BRW_REGISTER_TYPE_F));
> +
> + brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> +
> + if (devinfo.gen >= 9 || devinfo.is_cherryview) {
> + EXPECT_FALSE(validate(p));
> + } else {
> + assert(devinfo.gen == 8);
> + EXPECT_TRUE(validate(p));
> + }
> + clear_instructions(p);
> +
> + brw_MOV(p, retype(g0, BRW_REGISTER_TYPE_HF),
> + retype(g0, BRW_REGISTER_TYPE_F));
> +
> + brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
> +
> + EXPECT_TRUE(validate(p));
> + clear_instructions(p);
> +}
> +
> TEST_P(validation_test, vector_immediate_destination_alignment)
> {
> static const struct {
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190226/fb4fa3fd/attachment-0001.sig>
More information about the mesa-dev
mailing list