[Mesa-dev] [PATCH] r300/compiler: Avoid generating MOV instructions for invalid IMM swizzles v2
Tom Stellard
tom at stellard.net
Wed Oct 31 23:49:20 PDT 2012
From: Tom Stellard <thomas.stellard at amd.com>
If an instruction reads from a constant register that contains
immediates using an invalid swizzle, we can avoid generating MOV
instructions to fix up the swizzle by loading the immediates into a
different constant register that can be read using a valid swizzle.
This only affects r300 and r400 cards.
For example:
CONST[1] = { -3.5000 3.5000 2.5000 1.5000 }
MAD temp[4].xy, const[0].xy__, const[1].xz__, input[0].xy__;
========== Before this change would be lowered to: =========
CONST[1] = { -3.5000 3.5000 2.5000 1.5000 }
MOV temp[0].x, const[1].x___;
MOV temp[0].y, const[1]._z__;
MAD temp[4].xy, const[0].xy__, temp[0].xy__, input[0].xy__;
========== After this change is lowered to: ===============
CONST[1] = { -3.5000 3.5000 2.5000 1.5000 }
CONST[2] = { 0.0000 -3.5000 2.5000 0.0000 }
MAD temp[4].xy, const[0].xy__, const[2].yz__, input[0].xy__;
============================================================
This change reduces one of the Lightsmark shaders from 133 to 91
instructions.
v2:
- Fix crash caused by swizzles with only inline constants.
---
This patch should fix the crash. Does it work for you?
.../r300/compiler/radeon_dataflow_swizzles.c | 353 +++++++++++++++++++-
1 files changed, 349 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
index 133a9f7..7c74596 100644
--- a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2012 Advanced Micro Devices, Inc.
*
* All Rights Reserved.
*
@@ -23,11 +24,16 @@
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
+ * Authors:
+ * Nicolai Haehnle
+ * Tom Stellard <thomas.stellard at amd.com>
*/
#include "radeon_dataflow.h"
+#include "radeon_code.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
@@ -87,17 +93,356 @@ static void rewrite_source(struct radeon_compiler * c,
}
}
+/**
+ * This function will attempt to rewrite non-native swizzles that read from
+ * immediate registers by rearranging the immediates to allow the
+ * instruction to use native swizzles.
+ */
+static unsigned try_rewrite_constant(struct radeon_compiler *c,
+ struct rc_src_register *reg)
+{
+ unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
+ unsigned all_inline = 0;
+ float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+
+ if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
+ /* The register does not contain immediates, but if all
+ * the swizzles are inline constants, we can still rewrite
+ * it. */
+
+ new_swizzle = RC_SWIZZLE_XYZW;
+ for (chan = 0 ; chan < 4; chan++) {
+ unsigned swz = GET_SWZ(reg->Swizzle, chan);
+ if (swz <= RC_SWIZZLE_W) {
+ return 0;
+ }
+ if (swz == RC_SWIZZLE_UNUSED) {
+ SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+ }
+ all_inline = 1;
+ } else {
+ new_swizzle = reg->Swizzle;
+ }
+
+ swz = RC_SWIZZLE_UNUSED;
+ found_swizzle = 1;
+ /* Check if all channels have the same swizzle. If they do we can skip
+ * the search for a native swizzle. We only need to check the first
+ * three channels, because any swizzle is legal in the fourth channel.
+ */
+ for (chan = 0; chan < 3; chan++) {
+ unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
+ if (chan_swz == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ if (swz == RC_SWIZZLE_UNUSED) {
+ swz = chan_swz;
+ } else if (swz != chan_swz) {
+ found_swizzle = 0;
+ break;
+ }
+ }
+
+ /* Find a legal swizzle */
+
+ /* This loop attempts to find a native swizzle where all the
+ * channels are different. */
+ while (!found_swizzle && !all_inline) {
+ swz0 = GET_SWZ(new_swizzle, 0);
+ swz1 = GET_SWZ(new_swizzle, 1);
+ swz2 = GET_SWZ(new_swizzle, 2);
+
+ /* Swizzle .W. is never legal. */
+ if (swz1 == RC_SWIZZLE_W ||
+ swz1 == RC_SWIZZLE_UNUSED ||
+ swz1 == RC_SWIZZLE_ZERO ||
+ swz1 == RC_SWIZZLE_HALF ||
+ swz1 == RC_SWIZZLE_ONE) {
+ /* We chose Z, because there are two non-repeating
+ * swizzle combinations of the form .Z. There are
+ * only one combination each for .X. and .Y. */
+ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
+ continue;
+ }
+
+ if (swz2 == RC_SWIZZLE_UNUSED) {
+ /* We choose Y, because there are two non-repeating
+ * swizzle combinations of the form ..Y */
+ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
+ continue;
+ }
+
+ switch (swz0) {
+ /* X.. */
+ case RC_SWIZZLE_X:
+ /* Legal swizzles that start with X: XYZ, XXX */
+ switch (swz1) {
+ /* XX. */
+ case RC_SWIZZLE_X:
+ /* The new swizzle will be:
+ * ZXY (XX. => ZX. => ZXY) */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
+ break;
+ /* XY. */
+ case RC_SWIZZLE_Y:
+ /* The new swizzle is XYZ */
+ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
+ found_swizzle = 1;
+ break;
+ /* XZ. */
+ case RC_SWIZZLE_Z:
+ /* XZZ */
+ if (swz2 == RC_SWIZZLE_Z) {
+ /* The new swizzle is XYZ */
+ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
+ found_swizzle = 1;
+ } else { /* XZ[^Z] */
+ /* The new swizzle will be:
+ * YZX (XZ. => YZ. => YZX) */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
+ }
+ break;
+ /* XW. Should have already been handled. */
+ case RC_SWIZZLE_W:
+ assert(0);
+ break;
+ }
+ break;
+ /* Y.. */
+ case RC_SWIZZLE_Y:
+ /* Legal swizzles that start with Y: YYY, YZX */
+ switch (swz1) {
+ /* YY. */
+ case RC_SWIZZLE_Y:
+ /* The new swizzle will be:
+ * XYZ (YY. => XY. => XYZ) */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
+ break;
+ /* YZ. */
+ case RC_SWIZZLE_Z:
+ /* The new swizzle is YZX */
+ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
+ found_swizzle = 1;
+ break;
+ /* YX. */
+ case RC_SWIZZLE_X:
+ /* YXX */
+ if (swz2 == RC_SWIZZLE_X) {
+ /*The new swizzle is YZX */
+ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
+ found_swizzle = 1;
+ } else { /* YX[^X] */
+ /* The new swizzle will be:
+ * ZXY (YX. => ZX. -> ZXY) */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
+ }
+ break;
+ /* YW. Should have already been handled. */
+ case RC_SWIZZLE_W:
+ assert(0);
+ break;
+ }
+ break;
+ /* Z.. */
+ case RC_SWIZZLE_Z:
+ /* Legal swizzles that start with Z: ZZZ, ZXY */
+ switch (swz1) {
+ /* ZZ. */
+ case RC_SWIZZLE_Z:
+ /* The new swizzle will be:
+ * WZY (ZZ. => WZ. => WZY) */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
+ break;
+ /* ZX. */
+ case RC_SWIZZLE_X:
+ /* The new swizzle is ZXY */
+ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
+ found_swizzle = 1;
+ break;
+ /* ZY. */
+ case RC_SWIZZLE_Y:
+ /* ZYY */
+ if (swz2 == RC_SWIZZLE_Y) {
+ /* The new swizzle is ZXY */
+ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
+ found_swizzle = 1;
+ } else { /* ZY[^Y] */
+ /* The new swizzle will be:
+ * XYZ (ZY. => XY. => XYZ) */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
+ }
+ break;
+ /* ZW. Should have already been handled. */
+ case RC_SWIZZLE_W:
+ assert(0);
+ break;
+ }
+ break;
+
+ /* W.. */
+ case RC_SWIZZLE_W:
+ /* Legal swizzles that start with X: WWW, WZY */
+ switch (swz1) {
+ /* WW. Should have already been handled. */
+ case RC_SWIZZLE_W:
+ assert(0);
+ break;
+ /* WZ. */
+ case RC_SWIZZLE_Z:
+ /* The new swizzle will be WZY */
+ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
+ found_swizzle = 1;
+ break;
+ /* WX. */
+ case RC_SWIZZLE_X:
+ /* WY. */
+ case RC_SWIZZLE_Y:
+ /* W[XY]Y */
+ if (swz2 == RC_SWIZZLE_Y) {
+ /* The new swizzle will be WZY */
+ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
+ found_swizzle = 1;
+ } else { /* W[XY][^Y] */
+ /* The new swizzle will be:
+ * ZXY (WX. => XX. => ZX. => ZXY) or
+ * XYZ (WY. => XY. => XYZ)
+ */
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
+ }
+ break;
+ }
+ break;
+ /* U.. 0.. 1.. H..*/
+ case RC_SWIZZLE_UNUSED:
+ case RC_SWIZZLE_ZERO:
+ case RC_SWIZZLE_ONE:
+ case RC_SWIZZLE_HALF:
+ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
+ break;
+ }
+ }
+
+ /* Handle the swizzle in the w channel. */
+ swz3 = GET_SWZ(reg->Swizzle, 3);
+
+ /* We can skip this if the swizzle in channel w is an inline constant. */
+ if (swz3 <= RC_SWIZZLE_W) {
+ for (chan = 0; chan < 3; chan++) {
+ unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
+ unsigned new_swz = GET_SWZ(new_swizzle, chan);
+ /* If the swizzle in the w channel is the same as the
+ * swizzle in any other channels, we need to rewrite it.
+ * For example:
+ * reg->Swizzle == XWZW
+ * new_swizzle == XYZX
+ * Since the swizzle in the y channel is being
+ * rewritten from W -> Y we need to change the swizzle
+ * in the w channel from W -> Y as well.
+ */
+ if (old_swz == swz3) {
+ SET_SWZ(new_swizzle, 3,
+ GET_SWZ(new_swizzle, chan));
+ break;
+ }
+
+ /* The swizzle in channel w will be overwritten by one
+ * of the new swizzles. */
+ if (new_swz == swz3) {
+ /* Find an unused swizzle */
+ unsigned i;
+ unsigned used = 0;
+ for (i = 0; i < 3; i++) {
+ used |= 1 << GET_SWZ(new_swizzle, i);
+ }
+ for (i = 0; i < 4; i++) {
+ if (used & (1 << i)) {
+ continue;
+ }
+ SET_SWZ(new_swizzle, 3, i);
+ }
+ }
+ }
+ }
+
+ for (chan = 0; chan < 4; chan++) {
+ unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
+ unsigned new_swz = GET_SWZ(new_swizzle, chan);
+
+ if (old_swz == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+
+ /* We don't need to change the swizzle in channel w if it is
+ * an inline constant. These are always legal in the w channel.
+ *
+ * Swizzles with a value > RC_SWIZZLE_W are inline constants.
+ */
+ if (chan == 3 && old_swz > RC_SWIZZLE_W) {
+ continue;
+ }
+
+ assert(new_swz <= RC_SWIZZLE_W);
+
+ switch (old_swz) {
+ case RC_SWIZZLE_ZERO:
+ imms[new_swz] = 0.0f;
+ break;
+ case RC_SWIZZLE_HALF:
+ if (reg->Negate & (1 << chan)) {
+ imms[new_swz] = -0.5f;
+ } else {
+ imms[new_swz] = 0.5f;
+ }
+ break;
+ case RC_SWIZZLE_ONE:
+ if (reg->Negate & (1 << chan)) {
+ imms[new_swz] = -1.0f;
+ } else {
+ imms[new_swz] = 1.0f;
+ }
+ break;
+ default:
+ imms[new_swz] = rc_get_constant_value(c, reg->Index,
+ reg->Swizzle, reg->Negate, chan);
+ }
+ SET_SWZ(reg->Swizzle, chan, new_swz);
+ }
+ reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
+ imms);
+ /* We need to set the register file to CONSTANT in case we are
+ * converting a non-constant register with constant swizzles (e.g.
+ * ONE, ZERO, HALF).
+ */
+ reg->File = RC_FILE_CONSTANT;
+ reg->Negate = 0;
+ return 1;
+}
+
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
- for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * opcode =
+ rc_get_opcode_info(inst->U.I.Opcode);
unsigned int src;
for(src = 0; src < opcode->NumSrcRegs; ++src) {
- if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
- rewrite_source(c, inst, src);
+ struct rc_src_register *reg = &inst->U.I.SrcReg[src];
+ if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
+ continue;
+ }
+ if (!c->is_r500 &&
+ c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
+ try_rewrite_constant(c, reg)) {
+ continue;
+ }
+ rewrite_source(c, inst, src);
}
}
+ if (c->Debug & RC_DBG_LOG)
+ rc_constants_print(&c->Program.Constants);
}
--
1.7.8.6
More information about the mesa-dev
mailing list