[Liboil-commit] orc/orcprogram-x86.c orc/orcprogram.c orc/orcrules-sse.c orc/x86.c orc/x86.h

David Schleef ds at kemper.freedesktop.org
Tue May 27 15:06:56 PDT 2008


 orc/orcprogram-x86.c |   40 +++++++++++++++++++---
 orc/orcprogram.c     |    6 +--
 orc/orcrules-sse.c   |    9 +++++
 orc/x86.c            |   90 +++++++++++++++++++++++++++++++++++++++++++++++++--
 orc/x86.h            |    6 +++
 5 files changed, 140 insertions(+), 11 deletions(-)

New commits:
commit 8c46abcb0bf8effe7776894a47c1952864174c71
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Tue May 27 15:06:30 2008 -0700

    [orc] Fixes to get SSE rules working.

diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index cb811c3..ca70bb2 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -185,7 +185,7 @@ orc_program_x86_init (OrcProgram *program)
     program->used_regs[i] = 0;
   }
 
-  program->data_register_class = 2;
+  program->data_register_class = 3;
 }
 
 void
@@ -240,6 +240,19 @@ x86_emit_load_src (OrcProgram *program, OrcVariable *var)
     case ORC_RULE_MMX_4:
       x86_emit_mov_memoffset_mmx (program, 8, 0, ptr_reg, var->alloc);
       break;
+    case ORC_RULE_SSE_1:
+      x86_emit_mov_memoffset_reg (program, 2, 0, ptr_reg, X86_ECX);
+      x86_emit_mov_reg_sse (program, X86_ECX, var->alloc);
+      break;
+    case ORC_RULE_SSE_2:
+      x86_emit_mov_memoffset_sse (program, 4, 0, ptr_reg, var->alloc);
+      break;
+    case ORC_RULE_SSE_4:
+      x86_emit_mov_memoffset_sse (program, 8, 0, ptr_reg, var->alloc);
+      break;
+    case ORC_RULE_SSE_8:
+      x86_emit_mov_memoffset_sse (program, 16, 0, ptr_reg, var->alloc);
+      break;
     default:
       printf("ERROR\n");
   }
@@ -274,6 +287,23 @@ x86_emit_store_dest (OrcProgram *program, OrcVariable *var)
     case ORC_RULE_MMX_4:
       x86_emit_mov_mmx_memoffset (program, 8, var->alloc, 0, ptr_reg);
       break;
+    case ORC_RULE_SSE_1:
+      /* FIXME we might be using ecx twice here */
+      if (ptr_reg == X86_ECX) {
+        printf("ERROR\n");
+      }
+      x86_emit_mov_sse_reg (program, var->alloc, X86_ECX);
+      x86_emit_mov_reg_memoffset (program, 2, X86_ECX, 0, ptr_reg);
+      break;
+    case ORC_RULE_SSE_2:
+      x86_emit_mov_sse_memoffset (program, 4, var->alloc, 0, ptr_reg);
+      break;
+    case ORC_RULE_SSE_4:
+      x86_emit_mov_sse_memoffset (program, 8, var->alloc, 0, ptr_reg);
+      break;
+    case ORC_RULE_SSE_8:
+      x86_emit_mov_sse_memoffset (program, 16, var->alloc, 0, ptr_reg);
+      break;
     default:
       printf("ERROR\n");
   }
@@ -302,7 +332,7 @@ orc_program_assemble_x86 (OrcProgram *program)
       (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
   x86_emit_je (program, 1);
 
-  program->rule_set = ORC_RULE_MMX_1;
+  program->rule_set = ORC_RULE_SSE_1;
   program->n_per_loop = 1;
   program->loop_shift = 0;
   x86_emit_label (program, 0);
@@ -320,9 +350,9 @@ orc_program_assemble_x86 (OrcProgram *program)
       (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
   x86_emit_je (program, 3);
 
-  program->rule_set = ORC_RULE_MMX_4;
-  program->n_per_loop = 4;
-  program->loop_shift = 2;
+  program->rule_set = ORC_RULE_SSE_8;
+  program->n_per_loop = 8;
+  program->loop_shift = 3;
   x86_emit_label (program, 2);
   x86_emit_loop (program);
   x86_emit_dec_memoffset (program, 4,
diff --git a/orc/orcprogram.c b/orc/orcprogram.c
index b64d41a..52bff06 100644
--- a/orc/orcprogram.c
+++ b/orc/orcprogram.c
@@ -24,10 +24,10 @@ orc_program_new (void)
 #if defined(HAVE_POWERPC)
   p->rule_set = ORC_RULE_ALTIVEC_1;
 #else
-  p->rule_set = ORC_RULE_MMX_1;
+  p->rule_set = ORC_RULE_SSE_8;
 #endif
-  p->n_per_loop = 1;
-  p->loop_shift = 0;
+  p->n_per_loop = 8;
+  p->loop_shift = 3;
 
   return p;
 }
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index 2c56449..9f559e0 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -29,6 +29,7 @@ sse_emit_loadi_s16 (OrcProgram *p, int reg, int value)
     x86_emit_mov_imm_reg (p, 4, value, X86_ECX);
 
     printf("  movd %%ecx, %%%s\n", x86_get_regname_sse(reg));
+    *p->codeptr++ = 0x66;
     *p->codeptr++ = 0x0f;
     *p->codeptr++ = 0x6e;
     x86_emit_modrm_reg (p, X86_ECX, reg);
@@ -36,6 +37,7 @@ sse_emit_loadi_s16 (OrcProgram *p, int reg, int value)
     printf("  pshufw $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
         x86_get_regname_sse(reg));
 
+    *p->codeptr++ = 0x66;
     *p->codeptr++ = 0x0f;
     *p->codeptr++ = 0x70;
     x86_emit_modrm_reg (p, reg, reg);
@@ -57,6 +59,7 @@ sse_rule_add_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
       x86_get_regname_sse(p->vars[insn->args[2]].alloc),
       x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+  *p->codeptr++ = 0x66;
   *p->codeptr++ = 0x0f;
   *p->codeptr++ = 0xfd;
   x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
@@ -70,6 +73,7 @@ sse_rule_sub_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
       x86_get_regname_sse(p->vars[insn->args[2]].alloc),
       x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+  *p->codeptr++ = 0x66;
   *p->codeptr++ = 0x0f;
   *p->codeptr++ = 0xf9;
   x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
@@ -83,6 +87,7 @@ sse_rule_mul_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
       x86_get_regname_sse(p->vars[insn->args[2]].alloc),
       x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+  *p->codeptr++ = 0x66;
   *p->codeptr++ = 0x0f;
   *p->codeptr++ = 0xd5;
   x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
@@ -97,6 +102,7 @@ sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
         p->vars[insn->args[2]].s16,
         x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+    *p->codeptr++ = 0x66;
     *p->codeptr++ = 0x0f;
     *p->codeptr++ = 0x71;
     x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6);
@@ -107,6 +113,7 @@ sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
         x86_get_regname_sse(p->vars[insn->args[2]].alloc),
         x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+    *p->codeptr++ = 0x66;
     *p->codeptr++ = 0x0f;
     *p->codeptr++ = 0xf1;
     x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
@@ -122,6 +129,7 @@ sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
         p->vars[insn->args[2]].s16,
         x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+    *p->codeptr++ = 0x66;
     *p->codeptr++ = 0x0f;
     *p->codeptr++ = 0x71;
     x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4);
@@ -132,6 +140,7 @@ sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
         x86_get_regname_sse(p->vars[insn->args[2]].alloc),
         x86_get_regname_sse(p->vars[insn->args[0]].alloc));
 
+    *p->codeptr++ = 0x66;
     *p->codeptr++ = 0x0f;
     *p->codeptr++ = 0xe1;
     x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
diff --git a/orc/x86.c b/orc/x86.c
index 7ef3878..55de4b3 100644
--- a/orc/x86.c
+++ b/orc/x86.c
@@ -102,10 +102,12 @@ x86_get_regname_mmx(int i)
 const char *
 x86_get_regname_sse(int i)
 {
-  static const char *x86_regs[] = { "xmm0", "xmm1", "xmm2", "xmm3",
-    "xmm4", "xmm5", "xmm6", "xmm7" };
+  static const char *x86_regs[] = {
+    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+    "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
+  };
 
-  if (i>=X86_XMM0 && i<X86_XMM0 + 8) return x86_regs[i - X86_XMM0];
+  if (i>=X86_XMM0 && i<X86_XMM0 + 16) return x86_regs[i - X86_XMM0];
   switch (i) {
     case 0:
       return "UNALLOCATED";
@@ -244,6 +246,35 @@ x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset,
 }
 
 void
+x86_emit_mov_memoffset_sse (OrcProgram *program, int size, int offset,
+    int reg1, int reg2)
+{
+  if (size == 4) {
+    printf("  movd %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+        x86_get_regname_sse(reg2));
+    *program->codeptr++ = 0x66;
+    x86_emit_rex(program, 0, reg2, 0, reg1);
+    *program->codeptr++ = 0x0f;
+    *program->codeptr++ = 0x6e;
+  } else if (size == 8) {
+    printf("  movq %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+        x86_get_regname_sse(reg2));
+    *program->codeptr++ = 0x66;
+    x86_emit_rex(program, 0, reg2, 0, reg1);
+    *program->codeptr++ = 0x0f;
+    *program->codeptr++ = 0x6f;
+  } else {
+    printf("  movdqu %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+        x86_get_regname_sse(reg2));
+    x86_emit_rex(program, 0, reg2, 0, reg1);
+    *program->codeptr++ = 0xf3;
+    *program->codeptr++ = 0x0f;
+    *program->codeptr++ = 0x6f;
+  }
+  x86_emit_modrm_memoffset (program, reg2, offset, reg1);
+}
+
+void
 x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset,
     int reg2)
 {
@@ -285,6 +316,35 @@ x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
 }
 
 void
+x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset,
+    int reg2)
+{
+  if (size == 4) {
+    printf("  movd %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+        x86_get_regname_ptr(reg2));
+    *program->codeptr++ = 0x66;
+    x86_emit_rex(program, 0, reg1, 0, reg2);
+    *program->codeptr++ = 0x0f;
+    *program->codeptr++ = 0x7e;
+  } else if (size == 8) {
+    printf("  movq %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+        x86_get_regname_ptr(reg2));
+    *program->codeptr++ = 0x66;
+    x86_emit_rex(program, 0, reg1, 0, reg2);
+    *program->codeptr++ = 0x0f;
+    *program->codeptr++ = 0x7f;
+  } else {
+    printf("  movdqu %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+        x86_get_regname_ptr(reg2));
+    *program->codeptr++ = 0xf3;
+    *program->codeptr++ = 0x0f;
+    *program->codeptr++ = 0x7f;
+  }
+
+  x86_emit_modrm_memoffset (program, reg1, offset, reg2);
+}
+
+void
 x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1)
 {
   if (size == 2) {
@@ -349,6 +409,30 @@ void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2)
   x86_emit_modrm_reg (program, reg2, reg1);
 }
 
+void x86_emit_mov_reg_sse (OrcProgram *program, int reg1, int reg2)
+{
+  /* FIXME */
+  printf("  movd %%%s, %%%s\n", x86_get_regname(reg1),
+      x86_get_regname_sse(reg2));
+  *program->codeptr++ = 0x66;
+  x86_emit_rex(program, 0, reg1, 0, reg2);
+  *program->codeptr++ = 0x0f;
+  *program->codeptr++ = 0x6e;
+  x86_emit_modrm_reg (program, reg1, reg2);
+}
+
+void x86_emit_mov_sse_reg (OrcProgram *program, int reg1, int reg2)
+{
+  /* FIXME */
+  printf("  movd %%%s, %%%s\n", x86_get_regname_sse(reg1),
+      x86_get_regname(reg2));
+  *program->codeptr++ = 0x66;
+  x86_emit_rex(program, 0, reg2, 0, reg1);
+  *program->codeptr++ = 0x0f;
+  *program->codeptr++ = 0x7e;
+  x86_emit_modrm_reg (program, reg2, reg1);
+}
+
 void
 x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2)
 {
diff --git a/orc/x86.h b/orc/x86.h
index a844a32..dc47b11 100644
--- a/orc/x86.h
+++ b/orc/x86.h
@@ -12,13 +12,19 @@ void x86_emit_pop (OrcProgram *program, int size, int reg);
 void x86_emit_mov_memoffset_reg (OrcProgram *program, int size, int offset, int reg1, int reg2);
 void x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset,
     int reg1, int reg2);
+void x86_emit_mov_memoffset_sse (OrcProgram *program, int size, int offset,
+    int reg1, int reg2);
 void x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset, int reg2);
 void x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
     int reg2);
+void x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset,
+    int reg2);
 void x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1);
 void x86_emit_mov_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
 void x86_emit_mov_reg_mmx (OrcProgram *program, int reg1, int reg2);
 void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2);
+void x86_emit_mov_reg_sse (OrcProgram *program, int reg1, int reg2);
+void x86_emit_mov_sse_reg (OrcProgram *program, int reg1, int reg2);
 void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
 void x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg);
 void x86_emit_dec_memoffset (OrcProgram *program, int size, int offset, int reg);


More information about the Liboil-commit mailing list