[Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations
Ian Romanick
idr at freedesktop.org
Thu Jan 10 10:45:38 PST 2013
On 01/10/2013 12:10 AM, Chad Versace wrote:
> Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 1 +
> src/mesa/drivers/dri/i965/brw_fs.h | 7 ++
> .../dri/i965/brw_fs_channel_expressions.cpp | 29 +++++++-
> src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 39 ++++++++++-
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 78 +++++++++++++++++++++-
> 5 files changed, 149 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 22d3e98..1c43d68 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -713,6 +713,7 @@ enum opcode {
> FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
> FS_OPCODE_DISCARD_JUMP,
> FS_OPCODE_SET_GLOBAL_OFFSET,
> + FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
>
> VS_OPCODE_URB_WRITE,
> VS_OPCODE_SCRATCH_READ,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index bcf38f3..59aa28d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -355,6 +355,10 @@ public:
> fs_reg fix_math_operand(fs_reg src);
> fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
> fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
> + void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
> + void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
> + void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
> +
> void emit_minmax(uint32_t conditionalmod, fs_reg dst,
> fs_reg src0, fs_reg src1);
> bool try_emit_saturate(ir_expression *ir);
> @@ -541,6 +545,9 @@ private:
> struct brw_reg src,
> struct brw_reg offset);
> void generate_discard_jump(fs_inst *inst);
> + void generate_unpack_half_2x16_split_y(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src);
>
> void patch_discard_jumps_to_fb_writes();
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> index 58521ee..7081511 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> @@ -76,8 +76,21 @@ channel_expressions_predicate(ir_instruction *ir)
> return false;
>
> for (i = 0; i < expr->get_num_operands(); i++) {
> - if (expr->operands[i]->type->is_vector())
> - return true;
> + if (expr->operands[i]->type->is_vector()) {
> + switch (expr->operation) {
> + case ir_binop_pack_half_2x16_split:
> + case ir_unop_pack_half_2x16:
> + case ir_unop_unpack_half_2x16:
> + case ir_unop_unpack_half_2x16_split_x:
> + case ir_unop_unpack_half_2x16_split_y:
> + assert(!"WTF");
Classy. :) Maybe (like below)
assert("!not reached: expression operates on scalars only");
> + break;
> + default:
> + break;
> + }
> +
> + return true;
> + }
> }
>
> return false;
> @@ -342,9 +355,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
> assert(!"not yet supported");
> break;
>
> + case ir_unop_pack_snorm_2x16:
> + case ir_unop_pack_unorm_2x16:
> + case ir_unop_pack_half_2x16:
> + case ir_unop_unpack_snorm_2x16:
> + case ir_unop_unpack_unorm_2x16:
> + case ir_unop_unpack_half_2x16:
> case ir_quadop_vector:
> assert(!"should have been lowered");
> break;
> +
> + case ir_unop_unpack_half_2x16_split_x:
> + case ir_unop_unpack_half_2x16_split_y:
> + case ir_binop_pack_half_2x16_split:
> + assert("!not reached: expression operates on scalars only");
> + break;
> }
>
> ir->remove();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 63f09fe..46e2409 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -920,6 +920,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
> }
>
> void
> +fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src)
> +{
> + assert(intel->gen >= 7);
> +
> + /* src has the form of unpackHalf2x16's input:
> + *
> + * w z y x
> + * |undef|undef|undef|0xhhhhllll|
> + *
> + * We wish to access only the "hhhh" bits of the source register, and hence
> + * must access it with a 16 bit subregister offset. To do so, we must
> + * halve the size of the source data type from UD to UW and compensate by
> + * doubling the stride.
> + */
> + assert(src.type == BRW_REGISTER_TYPE_UD);
> + src.type = BRW_REGISTER_TYPE_UW;
> + if (src.vstride > 0)
> + ++src.vstride;
> + if (src.hstride > 0)
> + ++src.hstride;
> + src.subnr += 2;
> +
> + brw_F16TO32(p, dst, src);
> +}
> +
> +void
> fs_generator::generate_code(exec_list *instructions)
> {
> int last_native_insn_offset = p->next_insn_offset;
> @@ -1079,7 +1107,12 @@ fs_generator::generate_code(exec_list *instructions)
> case BRW_OPCODE_SHL:
> brw_SHL(p, dst, src[0], src[1]);
> break;
> -
> + case BRW_OPCODE_F32TO16:
> + brw_F32TO16(p, dst, src[0]);
> + break;
> + case BRW_OPCODE_F16TO32:
> + brw_F16TO32(p, dst, src[0]);
> + break;
> case BRW_OPCODE_CMP:
> brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
> break;
> @@ -1226,6 +1259,10 @@ fs_generator::generate_code(exec_list *instructions)
> generate_set_global_offset(inst, dst, src[0], src[1]);
> break;
>
> + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
> + generate_unpack_half_2x16_split_y(inst, dst, src[0]);
> + break;
> +
> default:
> if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
> _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index e70d6bf..563d1d5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -536,7 +536,20 @@ fs_visitor::visit(ir_expression *ir)
> BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
> this->result, op[0], op[1]);
> break;
> -
> + case ir_unop_pack_snorm_2x16:
> + case ir_unop_pack_unorm_2x16:
> + case ir_unop_unpack_snorm_2x16:
> + case ir_unop_unpack_unorm_2x16:
> + case ir_unop_unpack_half_2x16:
> + case ir_unop_pack_half_2x16:
> + assert(!"not reached: should be handled by lower_packing_builtins");
> + break;
> + case ir_unop_unpack_half_2x16_split_x:
> + emit_unpack_half_2x16_split_x(this->result, op[0]);
> + break;
> + case ir_unop_unpack_half_2x16_split_y:
> + emit_unpack_half_2x16_split_y(this->result, op[0]);
> + break;
> case ir_binop_pow:
> emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
> break;
> @@ -564,7 +577,9 @@ fs_visitor::visit(ir_expression *ir)
> else
> inst = emit(SHR(this->result, op[0], op[1]));
> break;
> -
> + case ir_binop_pack_half_2x16_split:
> + emit_pack_half_2x16_split(this->result, op[0], op[1]);
> + break;
> case ir_binop_ubo_load:
> /* This IR node takes a constant uniform block and a constant or
> * variable byte offset within the block and loads a vector from that.
> @@ -2259,6 +2274,65 @@ fs_visitor::emit_fb_writes()
> }
>
> void
> +fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y)
> +{
> + if (intel->gen < 7)
> + assert(!"packHalf2x16 should be handled by lower_packing_builtins");
> +
> + /* uint dst; */
> + assert(dst.type == BRW_REGISTER_TYPE_UD);
> +
> + /* float x; */
> + assert(x.type == BRW_REGISTER_TYPE_F);
> +
> + /* float y; */
> + assert(y.type == BRW_REGISTER_TYPE_F);
> +
> + /* uint tmp; */
> + fs_reg tmp(this, glsl_type::uint_type);
> +
> + /* dst = f32to16(x); */
> + emit(BRW_OPCODE_F32TO16, dst, x);
> +
> + /* tmp = f32to16(y); */
> + emit(BRW_OPCODE_F32TO16, tmp, y);
> +
> + /* tmp <<= 16; */
> + emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u));
> +
> + /* dst |= tmp; */
> + emit(BRW_OPCODE_OR, dst, dst, tmp);
> +}
> +
> +void
> +fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0)
> +{
> + if (intel->gen < 7)
> + assert(!"unpackHalf2x16 should be lowered");
> +
> + /* float dst; */
> + assert(dst.type == BRW_REGISTER_TYPE_F);
> +
> + /* uint src0; */
> + assert(src0.type == BRW_REGISTER_TYPE_UD);
> +
> + /* dst = f16to32(src0); */
> + emit(BRW_OPCODE_F16TO32, dst, src0);
> +}
> +
> +void
> +fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0)
> +{
> + if (intel->gen < 7)
> + assert(!"unpackHalf2x16 should be lowered");
> +
> + assert(dst.type == BRW_REGISTER_TYPE_F);
> + assert(src0.type == BRW_REGISTER_TYPE_UD);
> +
> + emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0);
> +}
> +
> +void
> fs_visitor::resolve_ud_negate(fs_reg *reg)
> {
> if (reg->type != BRW_REGISTER_TYPE_UD ||
>
More information about the mesa-dev
mailing list