[Mesa-dev] [PATCH v2] i965/fs: Define new shader opcode to set rounding modes

Francisco Jerez currojerez at riseup.net
Tue Sep 5 21:41:54 UTC 2017


Alejandro Piñeiro <apinheiro at igalia.com> writes:

> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
> having a specific opcode makes it easier to remove duplicate settings
> later.
>
> v2: (Curro)
>   - Set thread control to 'switch' when using the control register
>   - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
>     with the rounding mode.
>   - Avoid magic numbers setting rounding mode field at control register.
>
> Signed-off-by:  Alejandro Piñeiro <apinheiro at igalia.com>
> Signed-off-by:  Jose Maria Casanova Crespo <jmcasanova at igalia.com>
> ---
>  src/intel/compiler/brw_eu.h             |  3 +++
>  src/intel/compiler/brw_eu_defines.h     | 17 +++++++++++++++++
>  src/intel/compiler/brw_eu_emit.c        | 34 +++++++++++++++++++++++++++++++++
>  src/intel/compiler/brw_fs_generator.cpp |  5 +++++
>  src/intel/compiler/brw_shader.cpp       |  4 ++++
>  5 files changed, 63 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index 8e597b212a6..106bf03530d 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>                struct brw_reg src,
>                struct brw_reg idx);
>  
> +void
> +brw_rounding_mode(struct brw_codegen *p,
> +                  enum brw_rnd_mode mode);

Missing whitespace line.

>  /***********************************************************************
>   * brw_eu_util.c:
>   */
> diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
> index da482b73c58..91d88fe8952 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -388,6 +388,9 @@ enum opcode {
>     SHADER_OPCODE_TYPED_SURFACE_WRITE,
>     SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>  
> +

Redundant whitespace.

> +   SHADER_OPCODE_RND_MODE,
> +
>     SHADER_OPCODE_MEMORY_FENCE,
>  
>     SHADER_OPCODE_GEN4_SCRATCH_READ,
> @@ -1214,4 +1217,18 @@ enum brw_message_target {
>  /* R0 */
>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT		27
>  
> +/* CR0.0[5:4] Floating-Point Rounding Modes
> + *  Skylake PRM, Volume 7 Part 1, "Control Register", page 756
> + */
> +
> +#define BRW_CR0_RND_MODE_MASK     0x30
> +#define BRW_CR0_RND_MODE_SHIFT    4
> +
> +enum PACKED brw_rnd_mode {
> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
> +   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
> +   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
> +   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
> +};
> +
>  #endif /* BRW_EU_DEFINES_H */
> diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
> index 8c952e7da26..12164653e47 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3530,3 +3530,37 @@ brw_WAIT(struct brw_codegen *p)
>     brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
>     brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
>  }
> +
> +/**
> + * Changes the floating point rounding mode updating the control register
> + * field defined at cr0.0[5-6] bits. This function supports the changes to
> + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
> + * Only RTNE and RTZ rounding are enabled at nir.
> + */
> +

Redundant whitespace.

> +void
> +brw_rounding_mode(struct brw_codegen *p,
> +                  enum brw_rnd_mode mode)
> +{
> +   const unsigned bits  = mode << BRW_CR0_RND_MODE_SHIFT;
> +
> +   if (bits != BRW_CR0_RND_MODE_MASK) {
> +      brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
> +                               brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
> +
> +      /* From the Skylake PRM, Volume 7, page 760:
> +       *  "Implementation Restriction on Register Access: When the control
> +       *   register is used as an explicit source and/or destination, hardware
> +       *   does not ensure execution pipeline coherency. Software must set the
> +       *   thread control field to ‘switch’ for an instruction that uses
> +       *   control register as an explicit operand."
> +       */
> +      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
> +    }
> +
> +   if (bits) {
> +      brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
> +                              brw_imm_ud(bits));
> +      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
> +   }
> +}
> diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
> index afaec5c9497..ff9880ebfe8 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -2144,6 +2144,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
>           brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
>           break;
>  
> +      case SHADER_OPCODE_RND_MODE:
> +         assert(src[0].file == BRW_IMMEDIATE_VALUE);
> +         brw_rounding_mode(p, (brw_rnd_mode) src[0].d);
> +         break;
> +
>        default:
>           unreachable("Unsupported opcode");
>  
> diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
> index c62b8ba6140..19dd960be3a 100644
> --- a/src/intel/compiler/brw_shader.cpp
> +++ b/src/intel/compiler/brw_shader.cpp
> @@ -486,6 +486,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
>        return "tes_add_indirect_urb_offset";
>     case TES_OPCODE_GET_PRIMITIVE_ID:
>        return "tes_get_primitive_id";
> +
> +   case SHADER_OPCODE_RND_MODE:
> +      return "set_round_mode";

It would make sense for the printed instruction name to match the IR
opcode name as closely as possible, e.g. "rnd_mode".  Other than these
minor nit-picks patch looks good:

Reviewed-by: Francisco Jerez <currojerez at riseup.net>

>     }
>  
>     unreachable("not reached");
> @@ -1004,6 +1007,7 @@ backend_instruction::has_side_effects() const
>     case SHADER_OPCODE_BARRIER:
>     case TCS_OPCODE_URB_WRITE:
>     case TCS_OPCODE_RELEASE_INPUT:
> +   case SHADER_OPCODE_RND_MODE:
>        return true;
>     default:
>        return false;
> -- 
> 2.11.0
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170905/4359e00f/attachment-0001.sig>


More information about the mesa-dev mailing list