Mesa (master): radeon/llvm: Emit ISA for ALU instructions in the R600 code emitter

Tom Stellard tstellar at kemper.freedesktop.org
Wed Sep 19 18:47:21 UTC 2012


Module: Mesa
Branch: master
Commit: 0e0c21e00ee80bcff67e37ec86b97d6c25db066a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0e0c21e00ee80bcff67e37ec86b97d6c25db066a

Author: Michal Sciubidlo <michal.sciubidlo at gmail.com>
Date:   Wed Sep 12 08:57:01 2012 +0200

radeon/llvm: Emit ISA for ALU instructions in the R600 code emitter

Signed-off-by: Tom Stellard <thomas.stellard at amd.com>

---

 src/gallium/drivers/r600/r600_asm.c                |   43 ++++
 src/gallium/drivers/r600/r600_asm.h                |    2 +
 src/gallium/drivers/r600/r600_llvm.c               |    1 +
 src/gallium/drivers/r600/r600_shader.c             |   56 +++---
 src/gallium/drivers/r600/r700_asm.c                |   43 ++++
 src/gallium/drivers/radeon/AMDGPUSubtarget.h       |    2 +
 src/gallium/drivers/radeon/AMDILBase.td            |    5 +
 .../radeon/MCTargetDesc/R600MCCodeEmitter.cpp      |  209 ++++++++++++--------
 src/gallium/drivers/radeon/Processors.td           |    1 +
 src/gallium/drivers/radeon/R600Instructions.td     |  160 ++++++++++-----
 10 files changed, 357 insertions(+), 165 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 03ded6c..648e8b6 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2874,3 +2874,46 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6
 
 	return 0;
 }
+
+void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
+{
+	/* WORD0 */
+	alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
+	alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
+	alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
+	alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
+	alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
+	alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
+	alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
+	alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
+	alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
+	alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
+	alu->last = G_SQ_ALU_WORD0_LAST(word0);
+
+	/* WORD1 */
+	alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
+	alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
+	alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
+	alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
+	alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
+	if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
+	{
+		alu->is_op3 = 1;
+		alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
+		alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
+		alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
+		alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
+		alu->inst = G_SQ_ALU_WORD1_OP3_ALU_INST(word1);
+	}
+	else /*ALU_DWORD1_OP2*/
+	{
+		alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
+		alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
+		alu->inst = G_SQ_ALU_WORD1_OP2_ALU_INST(word1);
+		alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
+		alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
+		alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
+		alu->execute_mask =
+			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
+	}
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 87e751a..403365b 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -233,6 +233,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst);
 int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type);
 void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg);
 void r600_bytecode_dump(struct r600_bytecode *bc);
+void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
 
 int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
 
@@ -241,5 +242,6 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6
 /* r700_asm.c */
 void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
 int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
+void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index e77758b..776f47b 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -259,6 +259,7 @@ const char * r600_llvm_gpu_string(enum radeon_family family)
 	case CHIP_RV630:
 	case CHIP_RV620:
 	case CHIP_RV635:
+		gpu_family = "r600";
 	case CHIP_RS780:
 	case CHIP_RS880:
 	case CHIP_RV710:
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 736165b..3e746e5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -293,32 +293,37 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
 				unsigned char * bytes, unsigned bytes_read)
 {
 	unsigned src_idx;
-	unsigned inst0, inst1;
-	unsigned push_modifier;
 	struct r600_bytecode_alu alu;
+	unsigned src_const_reg[3];
+	uint32_t word0, word1;
+
 	memset(&alu, 0, sizeof(alu));
 	for(src_idx = 0; src_idx < 3; src_idx++) {
-		bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
-								&alu, src_idx);
-	}
-
-	alu.dst.sel = bytes[bytes_read++];
-	alu.dst.chan = bytes[bytes_read++];
-	alu.dst.clamp = bytes[bytes_read++];
-	alu.dst.write = bytes[bytes_read++];
-	alu.dst.rel = bytes[bytes_read++];
-	inst0 = bytes[bytes_read++];
-	inst1 = bytes[bytes_read++];
-	alu.inst = inst0 | (inst1 << 8);
-	alu.last = bytes[bytes_read++];
-	alu.is_op3 = bytes[bytes_read++];
-	push_modifier = bytes[bytes_read++];
-	alu.pred_sel = bytes[bytes_read++];
-	alu.bank_swizzle = bytes[bytes_read++];
-	alu.bank_swizzle_force = bytes[bytes_read++];
-	alu.omod = bytes[bytes_read++];
-	alu.index_mode = bytes[bytes_read++];
+		unsigned i;
+		src_const_reg[src_idx] = bytes[bytes_read++];
+		for (i = 0; i < 4; i++) {
+			alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
+		}
+	}
 
+	word0 = i32_from_byte_stream(bytes, &bytes_read);
+	word1 = i32_from_byte_stream(bytes, &bytes_read);
+
+	switch(ctx->bc->chip_class) {
+	case R600:
+		r600_bytecode_alu_read(&alu, word0, word1);
+		break;
+	case R700:
+	case EVERGREEN:
+	case CAYMAN:
+		r700_bytecode_alu_read(&alu, word0, word1);
+		break;
+	}
+
+	for(src_idx = 0; src_idx < 3; src_idx++) {
+		if (src_const_reg[src_idx])
+			alu.src[src_idx].sel += 512;
+	}
 
 	if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
 	    alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
@@ -329,15 +334,14 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
 		alu.src[1].sel = V_SQ_ALU_SRC_0;
 		alu.src[1].chan = 0;
 		alu.last = 1;
-    }
+	}
 
-    if (push_modifier) {
-        alu.pred_sel = 0;
-		alu.execute_mask = 1;
+	if (alu.execute_mask) {
+		alu.pred_sel = 0;
 		r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
-	} else
+	} else {
 		r600_bytecode_add_alu(ctx->bc, &alu);
-
+	}
 
 	/* XXX: Handle other KILL instructions */
 	if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index ea37c63..818933a 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -74,3 +74,46 @@ int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *
 	}
 	return 0;
 }
+
+void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
+{
+	/* WORD0 */
+	alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
+	alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
+	alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
+	alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
+	alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
+	alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
+	alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
+	alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
+	alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
+	alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
+	alu->last = G_SQ_ALU_WORD0_LAST(word0);
+
+	/* WORD1 */
+	alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
+	alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
+	alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
+	alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
+	alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
+	if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
+	{
+		alu->is_op3 = 1;
+		alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
+		alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
+		alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
+		alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
+		alu->inst = G_SQ_ALU_WORD1_OP3_ALU_INST(word1);
+	}
+	else /*ALU_DWORD1_OP2*/
+	{
+		alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
+		alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
+		alu->inst = G_SQ_ALU_WORD1_OP2_ALU_INST(word1);
+		alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
+		alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
+		alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
+		alu->execute_mask =
+			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
+	}
+}
diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.h b/src/gallium/drivers/radeon/AMDGPUSubtarget.h
index 8613861..30bda83 100644
--- a/src/gallium/drivers/radeon/AMDGPUSubtarget.h
+++ b/src/gallium/drivers/radeon/AMDGPUSubtarget.h
@@ -36,6 +36,7 @@ private:
   bool mIs64bit;
   bool mIs32on64bit;
   bool mDumpCode;
+  bool mR600ALUInst;
 
   InstrItineraryData InstrItins;
 
@@ -56,6 +57,7 @@ public:
   std::string getDeviceName() const;
   virtual size_t getDefaultSize(uint32_t dim) const;
   bool dumpCode() const { return mDumpCode; }
+  bool r600ALUEncoding() const { return mR600ALUInst; }
 
 };
 
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td
index 7f72b49..ffe9ce2 100644
--- a/src/gallium/drivers/radeon/AMDILBase.td
+++ b/src/gallium/drivers/radeon/AMDILBase.td
@@ -69,6 +69,11 @@ def FeatureDumpCode : SubtargetFeature <"DumpCode",
         "true",
         "Dump MachineInstrs in the CodeEmitter">;
 
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+        "mR600ALUInst",
+        "false",
+        "Older version of ALU instructions encoding.">;
+
 
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
index 34c4b39..dcf8338 100644
--- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -61,10 +61,9 @@ private:
   void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                     raw_ostream &OS) const;
   void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
+  void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
+                  raw_ostream &OS) const;
   void EmitDst(const MCInst &MI, raw_ostream &OS) const;
-  void EmitALU(const MCInst &MI, unsigned numSrc,
-               SmallVectorImpl<MCFixup> &Fixups,
-               raw_ostream &OS) const;
   void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                     raw_ostream &OS) const;
   void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
@@ -210,7 +209,18 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
   }
 
   // Emit instruction type
-  EmitByte(0, OS);
+  EmitByte(INSTR_ALU, OS);
+
+  uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+
+  //older alu have different encoding for instructions with one or two src
+  //parameters.
+  if (STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst &&
+      MI.getNumOperands() < 4) {
+    uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
+    InstWord01 &= ~(0x3FFULL << 39);
+    InstWord01 |= ISAOpCode << 1;
+  }
 
   unsigned int OpIndex;
   for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
@@ -218,17 +228,64 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
     if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
       break;
     }
-    EmitSrc(MI, OpIndex, OS);
+    EmitSrcISA(MI, OpIndex, InstWord01, OS);
   }
 
   // Emit zeros for unused sources
   for ( ; OpIndex < 4; OpIndex++) {
-    EmitNullBytes(SRC_BYTE_COUNT, OS);
+    EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
   }
 
-  EmitDst(MI, OS);
+  // Emit destination register
+  const MCOperand &dstOp = MI.getOperand(0);
+  if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) {
+    //element of destination register
+    InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61;
+
+    // isClamped
+    if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
+      InstWord01 |= 1ULL << 63;
+    }
+
+    // write mask
+    if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) {
+      InstWord01 |= 1ULL << 36;
+    }
+
+    // XXX: Emit relative addressing mode
+  }
 
-  EmitALU(MI, NumOperands - 1, Fixups, OS);
+  // Emit ALU
+
+  // Emit IsLast (for this instruction group) (1 byte)
+  if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
+    InstWord01 |= 1ULL << 31;
+  }
+
+  // XXX: Emit push modifier
+  if(isFlagSet(MI, 1,  MO_FLAG_PUSH)) {
+    InstWord01 |= 1ULL << 34;
+  }
+
+    // XXX: Emit predicate (1 byte)
+  int PredIdx = MCDesc.findFirstPredOperandIdx();
+  if (PredIdx != -1) {
+    switch(MI.getOperand(PredIdx).getReg()) {
+    case AMDGPU::PRED_SEL_ZERO:
+      InstWord01 |= 2ULL << 29;
+      break;
+    case AMDGPU::PRED_SEL_ONE:
+      InstWord01 |= 3ULL << 29;
+      break;
+    }
+  }
+
+  //XXX: predicate
+  //XXX: bank swizzle
+  //XXX: OMOD
+  //XXX: index mode
+
+  Emit(InstWord01, OS);
 }
 
 void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
@@ -295,99 +352,74 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
 
 }
 
-void R600MCCodeEmitter::EmitDst(const MCInst &MI, raw_ostream &OS) const {
-
-  const MCOperand &MO = MI.getOperand(0);
-  if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) {
-    // Emit the destination register index (1 byte)
-    EmitByte(getHWReg(MO.getReg()), OS);
-
-    // Emit the element of the destination register (1 byte)
-    EmitByte(getHWRegChan(MO.getReg()), OS);
-
-    // Emit isClamped (1 byte)
-    if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
+                                   uint64_t &Value, raw_ostream &OS) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  union {
+    float f;
+    uint32_t i;
+  } InlineConstant;
+  InlineConstant.i = 0;
+  // Emit the source select (2 bytes).  For GPRs, this is the register index.
+  // For other potential instruction operands, (e.g. constant registers) the
+  // value of the source select is defined in the r600isa docs.
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
       EmitByte(1, OS);
     } else {
       EmitByte(0, OS);
     }
 
-    // Emit writemask (1 byte).
-    if (isFlagSet(MI, 0, MO_FLAG_MASK)) {
-      EmitByte(0, OS);
-    } else {
-      EmitByte(1, OS);
+    if (Reg == AMDGPU::ALU_LITERAL_X) {
+      unsigned ImmOpIndex = MI.getNumOperands() - 1;
+      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+      if (ImmOp.isFPImm()) {
+        InlineConstant.f = ImmOp.getFPImm();
+      } else {
+        assert(ImmOp.isImm());
+        InlineConstant.i = ImmOp.getImm();
+      }
     }
-
-    // XXX: Emit relative addressing mode
-    EmitByte(0, OS);
-  } else {
-    // XXX: Handle other operand types.  Are there any for destination regs?
-    EmitNullBytes(DST_BYTE_COUNT, OS);
-  }
-}
-
-void R600MCCodeEmitter::EmitALU(const MCInst &MI, unsigned numSrc,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                raw_ostream &OS) const {
-  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-
-  // Emit the instruction (2 bytes)
-  EmitTwoBytes(getBinaryCodeForInstr(MI, Fixups), OS);
-
-  // Emit IsLast (for this instruction group) (1 byte)
-  if (isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
-    EmitByte(0, OS);
   } else {
-    EmitByte(1, OS);
+    // XXX: Handle other operand types.
+    EmitTwoBytes(0, OS);
   }
 
-  // Emit isOp3 (1 byte)
-  if (numSrc == 3) {
-    EmitByte(1, OS);
-  } else {
-    EmitByte(0, OS);
-  }
+  // source channel
+  uint64_t sourceChannelValue = getHWRegChan(MO.getReg());
+  if (OpIdx == 1)
+    Value |= sourceChannelValue << 10;
+  if (OpIdx == 2)
+    Value |= sourceChannelValue << 23;
+  if (OpIdx == 3)
+    Value |= sourceChannelValue << 42;
 
-  // XXX: Emit push modifier
-    if(isFlagSet(MI, 1,  MO_FLAG_PUSH)) {
-    EmitByte(1, OS);
-  } else {
-    EmitByte(0, OS);
+  // isNegated
+  if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
+      && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
+     (MO.isReg() &&
+      (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
+    if (OpIdx == 1)
+      Value |= 1ULL << 12;
+    else if (OpIdx == 2)
+      Value |= 1ULL << 25;
+    else if (OpIdx == 3)
+      Value |= 1ULL << 44;
   }
 
-    // XXX: Emit predicate (1 byte)
-  int PredIdx = MCDesc.findFirstPredOperandIdx();
-  if (PredIdx > -1)
-    switch(MI.getOperand(PredIdx).getReg()) {
-    case AMDGPU::PRED_SEL_ZERO:
-      EmitByte(2, OS);
-      break;
-    case AMDGPU::PRED_SEL_ONE:
-      EmitByte(3, OS);
-      break;
-    default:
-      EmitByte(0, OS);
-      break;
-    }
-  else {
-    EmitByte(0, OS);
+  // isAbsolute
+  if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
+    assert(OpIdx < 3);
+    Value |= 1ULL << (32+OpIdx-1);
   }
 
+  // XXX: relative addressing mode
+  // XXX: kc_bank
 
-  // XXX: Emit bank swizzle. (1 byte)  Do we need this?  It looks like
-  // r600_asm.c sets it.
-  EmitByte(0, OS);
-
-  // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
-  EmitByte(0, OS);
-
-  // XXX: Emit OMOD (1 byte) Not implemented.
-  EmitByte(0, OS);
+  // Emit the literal value, if applicable (4 bytes).
+  Emit(InlineConstant.i, OS);
 
-  // XXX: Emit index_mode.  I think this is for indirect addressing, so we
-  // don't need to worry about it.
-  EmitByte(0, OS);
 }
 
 void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
@@ -621,9 +653,12 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
                                               const MCOperand &MO,
                                         SmallVectorImpl<MCFixup> &Fixup) const {
   if (MO.isReg()) {
-    return getHWReg(MO.getReg());
-  } else {
+    return getHWRegIndex(MO.getReg());
+  } else if (MO.isImm()) {
     return MO.getImm();
+  } else {
+    assert(0);
+    return 0;
   }
 }
 
diff --git a/src/gallium/drivers/radeon/Processors.td b/src/gallium/drivers/radeon/Processors.td
index 92f4636..3469f82 100644
--- a/src/gallium/drivers/radeon/Processors.td
+++ b/src/gallium/drivers/radeon/Processors.td
@@ -13,6 +13,7 @@
 
 class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> 
 : Processor<Name, itin, Features>;
+def : Proc<"r600",       R600_EG_Itin, [FeatureR600ALUInst]>;
 def : Proc<"rv710",      R600_EG_Itin, []>;
 def : Proc<"rv730",      R600_EG_Itin, []>;
 def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index e2f8d33..c9c9f61 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -13,17 +13,18 @@
 
 include "R600Intrinsics.td"
 
-class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern,
+class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
                 InstrItinClass itin>
     : AMDGPUInst <outs, ins, asm, pattern> {
 
-  field bits<32> Inst;
+  field bits<64> Inst;
   bit Trig = 0;
   bit Op3 = 0;
   bit isVector = 0;
   bits<2> FlagOperandIdx = 0;
 
-  let Inst = inst;
+  bits<11> op_code = inst;
+  //let Inst = inst;
   let Namespace = "AMDGPU";
   let OutOperandList = outs;
   let InOperandList = ins;
@@ -75,27 +76,39 @@ def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
                                      (ops PRED_SEL_OFF)>;
 
 
-class R600_1OP <bits<32> inst, string opName, list<dag> pattern,
+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
           (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
           !strconcat(opName, " $dst, $src ($p)"),
           pattern,
-          itin
-  >;
+          itin>{
+    bits<7> dst;
+    bits<9> src;
+    let Inst{8-0}   = src;
+    let Inst{49-39} = inst;
+    let Inst{59-53} = dst;
+  }
 
-class R600_2OP <bits<32> inst, string opName, list<dag> pattern,
+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
           (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops),
           !strconcat(opName, " $dst, $src0, $src1"),
           pattern,
-          itin
-  >;
+          itin>{
+    bits<7> dst;
+    bits<9> src0;
+    bits<9> src1;
+    let Inst{8-0}   = src0;
+    let Inst{21-13} = src1;
+    let Inst{49-39} = inst;
+    let Inst{59-53} = dst;
+  }
 
-class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
+class R600_3OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
@@ -103,7 +116,15 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
           !strconcat(opName, " $dst, $src0, $src1, $src2"),
           pattern,
           itin>{
-
+    bits<7> dst;
+    bits<9> src0;
+    bits<9> src1;
+    bits<9> src2;
+    let Inst{8-0}   = src0;
+    let Inst{21-13} = src1;
+    let Inst{40-32} = src2;
+    let Inst{49-45} = inst{4-0};
+    let Inst{59-53} = dst;
     let Op3 = 1;
   }
 
@@ -114,11 +135,12 @@ def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst),
            "PRED $dst, $src0, $src1",
            [], NullALU>
 {
-  let DisableEncoding = "$src0";
-  field bits<32> Inst;
-  bits<32> src1;
-
-  let Inst = src1;
+  bits<7> dst;
+  bits<9> src0;
+  bits<11> src1;
+  let Inst{8-0}   = src0;
+  let Inst{49-39} = src1;
+  let Inst{59-53} = dst;
   let FlagOperandIdx = 3;
 }
 
@@ -131,26 +153,29 @@ def JUMP : InstR600 <0x10,
   >;
 }
 
-class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
                       InstrItinClass itin = VecALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
           ins,
           asm,
           pattern,
-          itin
-
-  >;
+          itin>{
+    bits<7> dst;
+    let Inst{49-39} = inst;
+    let Inst{59-53} = dst;
+  }
 
-class R600_TEX <bits<32> inst, string opName, list<dag> pattern,
+class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg128:$dst),
           (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2),
           !strconcat(opName, "$dst, $src0, $src1, $src2"),
           pattern,
-          itin
-  >;
+          itin>{
+    let Inst {10-0} = inst;
+  }
 
 def TEX_SHADOW : PatLeaf<
   (imm),
@@ -328,6 +353,11 @@ def MOV : InstR600 <0x19, (outs R600_Reg32:$dst),
                                R600_Pred:$p),
                           "MOV $dst, $src0", [], AnyALU> {
   let FlagOperandIdx = 2;
+  bits<7> dst;
+  bits<9> src0;
+  let Inst{8-0}   = src0;
+  let Inst{49-39} = op_code;
+  let Inst{59-53} = dst;
 }
 
 class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
@@ -335,7 +365,15 @@ class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
   (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm),
   "MOV_IMM $dst, $imm",
   [], AnyALU
->;
+>{
+  bits<7> dst;
+  bits<9> alu_literal;
+  bits<9> p;
+  let Inst{8-0}   = alu_literal;
+  let Inst{21-13} = p;
+  let Inst{49-39} = op_code;
+  let Inst{59-53} = dst;
+}
 
 def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
 def : Pat <
@@ -357,6 +395,13 @@ def KILLGT : InstR600 <0x2D,
           [],
           NullALU>{
   let FlagOperandIdx = 3;
+  bits<7> dst;
+  bits<9> src0;
+  bits<9> src1;
+  let Inst{8-0}   = src0;
+  let Inst{21-13} = src1;
+  let Inst{49-39} = op_code;
+  let Inst{59-53} = dst;
 }
 
 def AND_INT : R600_2OP <
@@ -530,39 +575,43 @@ def TEX_SAMPLE_C_G : R600_TEX <
 // Helper classes for common instructions
 //===----------------------------------------------------------------------===//
 
-class MUL_LIT_Common <bits<32> inst> : R600_3OP <
+class MUL_LIT_Common <bits<11> inst> : R600_3OP <
   inst, "MUL_LIT",
   []
 >;
 
-class MULADD_Common <bits<32> inst> : R600_3OP <
+class MULADD_Common <bits<11> inst> : R600_3OP <
   inst, "MULADD",
   [(set (f32 R600_Reg32:$dst),
    (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
 >;
 
-class CNDE_Common <bits<32> inst> : R600_3OP <
+class CNDE_Common <bits<11> inst> : R600_3OP <
   inst, "CNDE",
   [(set (f32 R600_Reg32:$dst),
    (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
 >;
 
-class CNDGT_Common <bits<32> inst> : R600_3OP <
+class CNDGT_Common <bits<11> inst> : R600_3OP <
   inst, "CNDGT",
   []
 >;
   
-class CNDGE_Common <bits<32> inst> : R600_3OP <
+class CNDGE_Common <bits<11> inst> : R600_3OP <
   inst, "CNDGE",
   [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
 >;
 
-class DOT4_Common <bits<32> inst> : R600_REDUCTION <
+class DOT4_Common <bits<11> inst> : R600_REDUCTION <
   inst,
   (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags),
   "DOT4 $dst $src0, $src1",
   []
   > {
+  bits<9> src0;
+  bits<9> src1;
+  let Inst{8-0}   = src0;
+  let Inst{21-13} = src1;
   let FlagOperandIdx = 3;
 }
 
@@ -571,7 +620,7 @@ class DOT4_Pat <Instruction dot4> : Pat <
   (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0)
 >;
 
-multiclass CUBE_Common <bits<32> inst> {
+multiclass CUBE_Common <bits<11> inst> {
 
   def _pseudo : InstR600 <
     inst,
@@ -590,110 +639,117 @@ multiclass CUBE_Common <bits<32> inst> {
     [], VecALU
   >{
     let FlagOperandIdx = 3;
+    bits<7> dst;
+    bits<9> src0;
+    bits<9> src1;
+    let Inst{8-0}   = src0;
+    let Inst{21-13} = src1;
+    let Inst{49-39} = inst;
+    let Inst{59-53} = dst;
   }
 }
 
-class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP <
   inst, "EXP_IEEE",
   [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))]
 >;
 
-class FLT_TO_INT_Common <bits<32> inst> : R600_1OP <
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP <
   inst, "FLT_TO_INT",
   [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))]
 >;
 
-class INT_TO_FLT_Common <bits<32> inst> : R600_1OP <
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP <
   inst, "INT_TO_FLT",
   [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))]
 >;
 
-class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP <
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP <
   inst, "FLT_TO_UINT",
   [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))]
 >;
 
-class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP <
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP <
   inst, "UINT_TO_FLT",
   [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))]
 >;
 
-class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP <
+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "LOG_CLAMPED",
   []
 >;
 
-class LOG_IEEE_Common <bits<32> inst> : R600_1OP <
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP <
   inst, "LOG_IEEE",
   [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))]
 >;
 
-class LSHL_Common <bits<32> inst> : R600_2OP <
+class LSHL_Common <bits<11> inst> : R600_2OP <
   inst, "LSHL $dst, $src0, $src1",
   [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))]
 >;
 
-class LSHR_Common <bits<32> inst> : R600_2OP <
+class LSHR_Common <bits<11> inst> : R600_2OP <
   inst, "LSHR $dst, $src0, $src1",
   [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))]
 >;
 
-class ASHR_Common <bits<32> inst> : R600_2OP <
+class ASHR_Common <bits<11> inst> : R600_2OP <
   inst, "ASHR $dst, $src0, $src1",
   [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))]
 >;
 
-class MULHI_INT_Common <bits<32> inst> : R600_2OP <
+class MULHI_INT_Common <bits<11> inst> : R600_2OP <
   inst, "MULHI_INT $dst, $src0, $src1",
   [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))]
 >;
 
-class MULHI_UINT_Common <bits<32> inst> : R600_2OP <
+class MULHI_UINT_Common <bits<11> inst> : R600_2OP <
   inst, "MULHI $dst, $src0, $src1",
   [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))]
 >;
 
-class MULLO_INT_Common <bits<32> inst> : R600_2OP <
+class MULLO_INT_Common <bits<11> inst> : R600_2OP <
   inst, "MULLO_INT $dst, $src0, $src1",
   [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))]
 >;
 
-class MULLO_UINT_Common <bits<32> inst> : R600_2OP <
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <
   inst, "MULLO_UINT $dst, $src0, $src1",
   []
 >;
 
-class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP <
+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "RECIP_CLAMPED",
   []
 >;
 
-class RECIP_IEEE_Common <bits<32> inst> : R600_1OP <
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
   inst, "RECIP_IEEE",
   [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))]
 >;
 
-class RECIP_UINT_Common <bits<32> inst> : R600_1OP <
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP <
   inst, "RECIP_INT $dst, $src",
   [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))]
 >;
 
-class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP <
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "RECIPSQRT_CLAMPED",
   [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))]
 >;
 
-class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP <
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
   inst, "RECIPSQRT_IEEE",
   []
 >;
 
-class SIN_Common <bits<32> inst> : R600_1OP <
+class SIN_Common <bits<11> inst> : R600_1OP <
   inst, "SIN", []>{
   let Trig = 1;
 }
 
-class COS_Common <bits<32> inst> : R600_1OP <
+class COS_Common <bits<11> inst> : R600_1OP <
   inst, "COS", []> {
   let Trig = 1;
 }




More information about the mesa-commit mailing list