[Liboil-commit] 2 commits - orc/orcprogram-x86.c orc/orcprogram.h

David Schleef ds at kemper.freedesktop.org
Mon May 26 18:08:49 PDT 2008


 orc/orcprogram-x86.c |  363 +++++++++++++++++++++++++++++++++++++++++++++++----
 orc/orcprogram.h     |   10 +
 2 files changed, 344 insertions(+), 29 deletions(-)

New commits:
commit 97fbf6b517bc7c39c404e780d110328093423175
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Mon May 26 18:08:23 2008 -0700

    [orc] add sse rules (doesn't work yet)

diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index 36aab28..0778ad2 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -50,6 +50,7 @@ void x86_test (OrcProgram *program);
 
 void orc_program_x86_register_rules (void);
 void orc_program_mmx_register_rules (void);
+void orc_program_sse_register_rules (void);
 
 enum {
   X86_EAX = ORC_GP_REG_BASE,
@@ -77,7 +78,21 @@ enum {
   X86_MM6,
   X86_MM7,
   X86_XMM0 = ORC_VEC2_REG_BASE,
-  X86_XMM1
+  X86_XMM1,
+  X86_XMM2,
+  X86_XMM3,
+  X86_XMM4,
+  X86_XMM5,
+  X86_XMM6,
+  X86_XMM7,
+  X86_XMM8,
+  X86_XMM9,
+  X86_XMM10,
+  X86_XMM11,
+  X86_XMM12,
+  X86_XMM13,
+  X86_XMM14,
+  X86_XMM15
 };
 
 #ifdef HAVE_AMD64
@@ -178,6 +193,23 @@ x86_get_regname_mmx(int i)
   }
 }
 
+static const char *
+x86_get_regname_sse(int i)
+{
+  static const char *x86_regs[] = { "xmm0", "xmm1", "xmm2", "xmm3",
+    "xmm4", "xmm5", "xmm6", "xmm7" };
+
+  if (i>=X86_XMM0 && i<X86_XMM0 + 8) return x86_regs[i - X86_XMM0];
+  switch (i) {
+    case 0:
+      return "UNALLOCATED";
+    case 1:
+      return "direct";
+    default:
+      return "ERROR";
+  }
+}
+
 int
 orc_program_x86_allocate_register (OrcProgram *program, int data_reg)
 {
@@ -285,6 +317,7 @@ orc_x86_init (void)
 {
   orc_program_x86_register_rules ();
   orc_program_mmx_register_rules ();
+  orc_program_sse_register_rules ();
 }
 
 void
@@ -431,13 +464,13 @@ orc_program_assemble_x86 (OrcProgram *program)
 
   x86_emit_mov_memoffset_reg (program, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n),
       x86_exec_ptr, X86_ECX);
-  x86_emit_sar_imm_reg (program, 4, 2, X86_ECX);
+  x86_emit_sar_imm_reg (program, 4, program->loop_shift, X86_ECX);
   x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
       (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
 
   x86_emit_mov_memoffset_reg (program, 4,
       (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
-  x86_emit_and_imm_reg (program, 4, 3, X86_ECX);
+  x86_emit_and_imm_reg (program, 4, (1<<program->loop_shift)-1, X86_ECX);
   x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
       (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
 
@@ -839,6 +872,153 @@ orc_program_mmx_register_rules (void)
   }
 }
 
+/* sse rules */
+
+static void
+sse_emit_loadi_s16 (OrcProgram *p, int reg, int value)
+{
+  if (value == 0) {
+    printf("  pxor %%%s, %%%s\n", x86_get_regname_sse(reg),
+        x86_get_regname_sse(reg));
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0xef;
+    x86_emit_modrm_reg (p, reg, reg);
+  } else {
+    x86_emit_mov_imm_reg (p, 4, value, X86_ECX);
+
+    printf("  movd %%ecx, %%%s\n", x86_get_regname_sse(reg));
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x6e;
+    x86_emit_modrm_reg (p, X86_ECX, reg);
+
+    printf("  pshufw $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
+        x86_get_regname_sse(reg));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x70;
+    x86_emit_modrm_reg (p, reg, reg);
+    *p->codeptr++ = 0x00;
+  }
+}
+
+static void
+sse_rule_loadi_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  sse_emit_loadi_s16 (p, p->vars[insn->args[0]].alloc,
+      p->vars[insn->args[2]].s16);
+}
+
+static void
+sse_rule_add_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  printf("  paddw %%%s, %%%s\n",
+      x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+      x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+  *p->codeptr++ = 0x0f;
+  *p->codeptr++ = 0xfd;
+  x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
+      p->vars[insn->args[0]].alloc);
+}
+
+static void
+sse_rule_sub_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  printf("  psubw %%%s, %%%s\n",
+      x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+      x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+  *p->codeptr++ = 0x0f;
+  *p->codeptr++ = 0xf9;
+  x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
+      p->vars[insn->args[0]].alloc);
+}
+
+static void
+sse_rule_mul_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  printf("  pmullw %%%s, %%%s\n",
+      x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+      x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+  *p->codeptr++ = 0x0f;
+  *p->codeptr++ = 0xd5;
+  x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
+      p->vars[insn->args[0]].alloc);
+}
+
+static void
+sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) {
+    printf("  psllw $%d, %%%s\n",
+        p->vars[insn->args[2]].s16,
+        x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x71;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6);
+    *p->codeptr++ = p->vars[insn->args[2]].s16;
+  } else {
+    /* FIXME this doesn't work quite right */
+    printf("  psllw %%%s, %%%s\n",
+        x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+        x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0xf1;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
+        p->vars[insn->args[2]].alloc);
+  }
+}
+
+static void
+sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) {
+    printf("  psraw $%d, %%%s\n",
+        p->vars[insn->args[2]].s16,
+        x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x71;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4);
+    *p->codeptr++ = p->vars[insn->args[2]].s16;
+  } else {
+    /* FIXME this doesn't work quite right */
+    printf("  psraw %%%s, %%%s\n",
+        x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+        x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0xe1;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
+        p->vars[insn->args[2]].alloc);
+  }
+}
+
+void
+orc_program_sse_register_rules (void)
+{
+  int i;
+
+  orc_rule_register ("_loadi_s16", ORC_RULE_SSE_4, sse_rule_loadi_s16, NULL,
+      ORC_RULE_REG_IMM);
+
+  for(i=ORC_RULE_SSE_1; i <= ORC_RULE_SSE_8; i++) {
+    orc_rule_register ("add_s16", i, sse_rule_add_s16, NULL,
+        ORC_RULE_REG_REG);
+    orc_rule_register ("sub_s16", i, sse_rule_sub_s16, NULL,
+        ORC_RULE_REG_REG);
+    orc_rule_register ("mul_s16", i, sse_rule_mul_s16, NULL,
+        ORC_RULE_REG_REG);
+    orc_rule_register ("lshift_s16", i, sse_rule_lshift_s16, NULL,
+        ORC_RULE_REG_REG);
+    orc_rule_register ("rshift_s16", i, sse_rule_rshift_s16, NULL,
+        ORC_RULE_REG_REG);
+  }
+}
+
 /* code generation */
 
 void
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 666af63..3ae3e00 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -32,7 +32,7 @@ typedef void (*OrcRuleEmitFunc)(OrcProgram *p, void *user, OrcInstruction *insn)
 #define ORC_REGCLASS_GP 1
 
 #define ORC_OPCODE_N_ARGS 4
-#define ORC_OPCODE_N_RULES 8
+#define ORC_OPCODE_N_RULES 12
 
 #define ORC_STRUCT_OFFSET(struct_type, member)    \
       ((long) ((unsigned int *) &((struct_type*) 0)->member))
@@ -52,6 +52,10 @@ enum {
   ORC_RULE_MMX_2,
   ORC_RULE_MMX_4,
   ORC_RULE_MMX_8,
+  ORC_RULE_SSE_1,
+  ORC_RULE_SSE_2,
+  ORC_RULE_SSE_4,
+  ORC_RULE_SSE_8,
   ORC_RULE_ALTIVEC_1
 };
 
commit 4df85f912cca4b14150d3883042120bd881149ab
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Mon May 26 15:06:39 2008 -0700

    [orc] handle end pieces in MMX_4

diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index 67ddd49..36aab28 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -29,13 +29,19 @@ void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
 void x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg);
 void x86_emit_dec_memoffset (OrcProgram *program, int size, int offset, int reg);
 void x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value, int offset, int reg);
+void x86_emit_and_imm_memoffset (OrcProgram *program, int size, int value, int offset, int reg);
 void x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg);
+void x86_emit_and_imm_reg (OrcProgram *program, int size, int value, int reg);
+void x86_emit_cmp_imm_memoffset (OrcProgram *program, int size, int value,
+    int offset, int reg);
 void x86_emit_emms (OrcProgram *program);
 void x86_emit_ret (OrcProgram *program);
 void x86_emit_je (OrcProgram *program, int label);
 void x86_emit_jne (OrcProgram *program, int label);
 void x86_emit_label (OrcProgram *program, int label);
 
+void x86_emit_loop (OrcProgram *program);
+
 static void mmx_emit_loadi_s16 (OrcProgram *p, int reg, int value);
 
 void x86_emit_modrm_memoffset (OrcProgram *program, int reg1, int offset, int reg2);
@@ -421,29 +427,69 @@ x86_emit_store_dest (OrcProgram *program, OrcVariable *var)
 void
 orc_program_assemble_x86 (OrcProgram *program)
 {
-  int j;
-  int k;
-  OrcInstruction *insn;
-  OrcOpcode *opcode;
-  OrcVariable *args[10];
-  OrcRule *rule;
-
   x86_emit_prologue (program);
 
   x86_emit_mov_memoffset_reg (program, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n),
       x86_exec_ptr, X86_ECX);
+  x86_emit_sar_imm_reg (program, 4, 2, X86_ECX);
+  x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
 
-  x86_emit_sar_imm_reg (program, 4, program->loop_shift, X86_ECX);
+  x86_emit_mov_memoffset_reg (program, 4,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
+  x86_emit_and_imm_reg (program, 4, 3, X86_ECX);
   x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
-      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter), x86_exec_ptr);
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
 
-  x86_emit_test_reg_reg (program, 4, X86_ECX, X86_ECX);
+  x86_load_constants (program);
 
+  x86_emit_cmp_imm_memoffset (program, 4, 0,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
   x86_emit_je (program, 1);
 
-  x86_load_constants (program);
-
+  program->rule_set = ORC_RULE_MMX_1;
+  program->n_per_loop = 1;
+  program->loop_shift = 0;
   x86_emit_label (program, 0);
+  x86_emit_loop (program);
+  x86_emit_dec_memoffset (program, 4,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1),
+      x86_exec_ptr);
+  x86_emit_cmp_imm_memoffset (program, 4, 0,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1),
+      x86_exec_ptr);
+  x86_emit_jne (program, 0);
+  x86_emit_label (program, 1);
+
+  x86_emit_cmp_imm_memoffset (program, 4, 0,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
+  x86_emit_je (program, 3);
+
+  program->rule_set = ORC_RULE_MMX_4;
+  program->n_per_loop = 4;
+  program->loop_shift = 2;
+  x86_emit_label (program, 2);
+  x86_emit_loop (program);
+  x86_emit_dec_memoffset (program, 4,
+      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2),
+      x86_exec_ptr);
+  x86_emit_jne (program, 2);
+  x86_emit_label (program, 3);
+
+  x86_emit_epilogue (program);
+
+  x86_do_fixups (program);
+}
+
+void
+x86_emit_loop (OrcProgram *program)
+{
+  int j;
+  int k;
+  OrcInstruction *insn;
+  OrcOpcode *opcode;
+  OrcVariable *args[10];
+  OrcRule *rule;
 
   for(j=0;j<program->n_insns;j++){
     insn = program->insns + j;
@@ -514,22 +560,8 @@ orc_program_assemble_x86 (OrcProgram *program)
       }
     }
   }
-
-  x86_emit_dec_memoffset (program, 4,
-      (int)ORC_STRUCT_OFFSET(OrcExecutor,counter),
-      x86_exec_ptr);
-  x86_emit_jne (program, 0);
-  x86_emit_label (program, 1);
-
-  x86_emit_epilogue (program);
-
-  //x86_test (program);
-
-
-  x86_do_fixups (program);
 }
 
-
 /* rules */
 
 static void
@@ -1085,6 +1117,70 @@ x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg)
 }
 
 void
+x86_emit_and_imm_memoffset (OrcProgram *program, int size, int value,
+    int offset, int reg)
+{
+  if (size == 2) {
+    printf("  andw $%d, %d(%%%s)\n", value, offset,
+        x86_get_regname_ptr(reg));
+    *program->codeptr++ = 0x66;
+  } else if (size == 4) {
+    printf("  andl $%d, %d(%%%s)\n", value, offset,
+        x86_get_regname_ptr(reg));
+  } else {
+    printf("  and $%d, %d(%%%s)\n", value, offset,
+        x86_get_regname_ptr(reg));
+  }
+
+  x86_emit_rex(program, size, 0, 0, reg);
+  if (value >= -128 && value < 128) {
+    *program->codeptr++ = 0x83;
+    /* FIXME */
+    x86_emit_modrm_memoffset (program, 0, offset, reg);
+    *program->codeptr++ = (value & 0xff);
+  } else {
+    *program->codeptr++ = 0x81;
+    /* FIXME */
+    x86_emit_modrm_memoffset (program, 0, offset, reg);
+    *program->codeptr++ = (value & 0xff);
+    *program->codeptr++ = ((value>>8) & 0xff);
+    if (size == 4) {
+      *program->codeptr++ = ((value>>16) & 0xff);
+      *program->codeptr++ = ((value>>24) & 0xff);
+    }
+  }
+}
+
+void
+x86_emit_and_imm_reg (OrcProgram *program, int size, int value, int reg)
+{
+  if (size == 2) {
+    printf("  andw $%d, %%%s\n", value, x86_get_regname_16(reg));
+    *program->codeptr++ = 0x66;
+  } else if (size == 4) {
+    printf("  andl $%d, %%%s\n", value, x86_get_regname(reg));
+  } else {
+    printf("  and $%d, %%%s\n", value, x86_get_regname_64(reg));
+  }
+
+  x86_emit_rex(program, size, 0, 0, reg);
+  if (value >= -128 && value < 128) {
+    *program->codeptr++ = 0x83;
+    x86_emit_modrm_reg (program, reg, 4);
+    *program->codeptr++ = (value & 0xff);
+  } else {
+    *program->codeptr++ = 0x81;
+    x86_emit_modrm_reg (program, reg, 4);
+    *program->codeptr++ = (value & 0xff);
+    *program->codeptr++ = ((value>>8) & 0xff);
+    if (size == 4) {
+      *program->codeptr++ = ((value>>16) & 0xff);
+      *program->codeptr++ = ((value>>24) & 0xff);
+    }
+  }
+}
+
+void
 x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value,
     int offset, int reg)
 {
@@ -1147,6 +1243,39 @@ x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg)
 }
 
 void
+x86_emit_cmp_imm_memoffset (OrcProgram *program, int size, int value,
+    int offset, int reg)
+{
+  if (size == 2) {
+    printf("  cmpw $%d, %d(%%%s)\n", value, offset,
+        x86_get_regname_ptr(reg));
+    *program->codeptr++ = 0x66;
+  } else if (size == 4) {
+    printf("  cmpl $%d, %d(%%%s)\n", value, offset,
+        x86_get_regname_ptr(reg));
+  } else {
+    printf("  cmp $%d, %d(%%%s)\n", value, offset,
+        x86_get_regname_ptr(reg));
+  }
+
+  x86_emit_rex(program, size, 0, 0, reg);
+  if (value >= -128 && value < 128) {
+    *program->codeptr++ = 0x83;
+    x86_emit_modrm_memoffset (program, 7, offset, reg);
+    *program->codeptr++ = (value & 0xff);
+  } else {
+    *program->codeptr++ = 0x81;
+    x86_emit_modrm_memoffset (program, 7, offset, reg);
+    *program->codeptr++ = (value & 0xff);
+    *program->codeptr++ = ((value>>8) & 0xff);
+    if (size == 4) {
+      *program->codeptr++ = ((value>>16) & 0xff);
+      *program->codeptr++ = ((value>>24) & 0xff);
+    }
+  }
+}
+
+void
 x86_emit_dec_memoffset (OrcProgram *program, int size,
     int offset, int reg)
 {
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 5a02b83..666af63 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -181,7 +181,9 @@ struct _OrcProgram {
 struct _OrcExecutor {
   OrcProgram *program;
   int n;
-  int counter;
+  int counter1;
+  int counter2;
+  int counter3;
 
   void *arrays[ORC_N_VARIABLES];
 


More information about the Liboil-commit mailing list