[Mesa-dev] [PATCH 3/7] tgsi/exec: implement load/store/atomic on MEMORY.

Dave Airlie airlied at gmail.com
Tue Apr 26 04:42:21 UTC 2016


From: Dave Airlie <airlied at redhat.com>

This implements basic load/store/atomic ops on MEMORY types
for compute shaders.

TODO: finish off atomic ops.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 109 ++++++++++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_exec.h |   4 ++
 2 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9baac0f..98fcc3c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3842,13 +3842,42 @@ exec_load_buf(struct tgsi_exec_machine *mach,
 }
 
 static void
+exec_load_mem(struct tgsi_exec_machine *mach,
+              const struct tgsi_full_instruction *inst)
+{
+   union tgsi_exec_channel r[3];
+   uint chan;
+   char *ptr = mach->LocalMem;
+   uint32_t offset;
+   int j;
+
+   IFETCH(&r[0], 1, TGSI_CHAN_X);
+   if (r[0].u[0] >= mach->LocalMemSize)
+      return;
+
+   offset = r[0].u[0];
+   ptr += offset;
+
+   for (j = 0; j < TGSI_QUAD_SIZE; j++)
+      memcpy(&r[0].u[j], ptr, 4);
+
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+      }
+   }
+}
+
+static void
 exec_load(struct tgsi_exec_machine *mach,
           const struct tgsi_full_instruction *inst)
 {
    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
       exec_load_img(mach, inst);
-   else
+   else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
       exec_load_buf(mach, inst);
+   else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+      exec_load_mem(mach, inst);
 }
 
 static void
@@ -3932,13 +3961,40 @@ exec_store_buf(struct tgsi_exec_machine *mach,
 }
 
 static void
+exec_store_mem(struct tgsi_exec_machine *mach,
+               const struct tgsi_full_instruction *inst)
+{
+   union tgsi_exec_channel r[3];
+   union tgsi_exec_channel value[4];
+   int i;
+   char *ptr = mach->LocalMem;
+   int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+   int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+
+   IFETCH(&r[0], 0, TGSI_CHAN_X);
+   for (i = 0; i < 4; i++) {
+      FETCH(&value[i], 1, TGSI_CHAN_X + i);
+   }
+
+   if (r[0].u[0] >= mach->LocalMemSize)
+      return;
+   ptr += r[0].u[0];
+
+   for (i = 0; i < TGSI_QUAD_SIZE; i++)
+      if (execmask & (1 << i))
+         memcpy(ptr, &value[0].u[0], 4);
+}
+
+static void
 exec_store(struct tgsi_exec_machine *mach,
            const struct tgsi_full_instruction *inst)
 {
    if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
       exec_store_img(mach, inst);
-   else
+   else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
       exec_store_buf(mach, inst);
+   else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
+      exec_store_mem(mach, inst);
 }
 
 static void
@@ -4069,13 +4125,60 @@ exec_atomop_buf(struct tgsi_exec_machine *mach,
 }
 
 static void
+exec_atomop_mem(struct tgsi_exec_machine *mach,
+                const struct tgsi_full_instruction *inst)
+{
+   union tgsi_exec_channel r[4];
+   union tgsi_exec_channel value[4], value2[4];
+   char *ptr = mach->LocalMem;
+   uint32_t val;
+   uint chan, i;
+   uint32_t offset;
+   int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+   int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+   IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+   if (r[0].u[0] >= mach->LocalMemSize)
+      return;
+
+   offset = r[0].u[0];
+   ptr += offset;
+   for (i = 0; i < 4; i++) {
+      FETCH(&value[i], 2, TGSI_CHAN_X + i);
+      if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+         FETCH(&value2[i], 3, TGSI_CHAN_X + i);
+   }
+
+   memcpy(&r[0].u[0], ptr, 4);
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ATOMUADD:
+      val = r[0].u[0];
+      val += value[0].u[0];
+      break;
+   default:
+      break;
+   }
+   for (i = 0; i < TGSI_QUAD_SIZE; i++)
+      if (execmask & (1 << i))
+         memcpy(ptr, &val, 4);
+
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+      }
+   }
+}
+
+static void
 exec_atomop(struct tgsi_exec_machine *mach,
             const struct tgsi_full_instruction *inst)
 {
    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
       exec_atomop_img(mach, inst);
-   else
+   else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
       exec_atomop_buf(mach, inst);
+   else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+      exec_atomop_mem(mach, inst);
 }
 
 static void
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 0cdc194..564b3d5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -385,6 +385,10 @@ struct tgsi_exec_machine
    float                         Face;    /**< +1 if front facing, -1 if back facing */
    bool                          flatshade_color;
 
+   /* Compute Only */
+   void                          *LocalMem;
+   unsigned                      LocalMemSize;
+
    /* See GLSL 4.50 specification for definition of helper invocations */
    uint NonHelperMask;  /**< non-helpers */
    /* Conditional execution masks */
-- 
2.5.5



More information about the mesa-dev mailing list