[Liboil-commit] examples/orc orc/orcprogram-x86.c orc/orcprogram.c orc/orcprogram.h

David Schleef ds at kemper.freedesktop.org
Tue May 20 17:50:42 PDT 2008


 examples/orc/jit.c    |   16 +-
 examples/orc/simple.c |    2 
 orc/orcprogram-x86.c  |  358 ++++++++++++++++++++++++++++++++++++++++++--------
 orc/orcprogram.c      |   87 ++++++++++--
 orc/orcprogram.h      |    8 +
 5 files changed, 402 insertions(+), 69 deletions(-)

New commits:
commit 96ab77890292b54c861a9c4b208c827e177b919d
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Tue May 20 17:49:51 2008 -0700

    [orc] changes to register allocation and addition emit functions to make
    mmx target work

diff --git a/examples/orc/jit.c b/examples/orc/jit.c
index 35df92c..868d9a4 100644
--- a/examples/orc/jit.c
+++ b/examples/orc/jit.c
@@ -39,6 +39,13 @@ main (int argc, char *argv[])
   orc_program_append (p, "add_s16", t1, t1, offset);
   orc_program_append (p, "rshift_s16", d1, t1, shift);
 
+#if 0
+  orc_program_append (p, "lshift_s16", t1, t1, shift);
+  orc_program_append (p, "sub_s16", t1, t1, shift);
+  orc_program_append (p, "mul_s16", t1, t1, shift);
+  //orc_program_append (p, "_loadi_s16", t1, t1, shift);
+#endif
+
   ex = orc_executor_new (p);
 
   orc_executor_set_n (ex, N);
@@ -46,9 +53,9 @@ main (int argc, char *argv[])
   orc_executor_set_array (ex, s2, src2);
   orc_executor_set_array (ex, d1, dest);
 
-  orc_program_compile_x86 (p);
+  orc_program_compile (p);
 
-  if (0) {
+  if (1) {
     int i;
 
     for(i=0;i<N;i++){
@@ -77,8 +84,11 @@ void
 test1 (int16_t *dest, int16_t *src1, int16_t *src2, int n)
 {
   int i;
+  int16_t t1, t2;
   for(i=0;i<n;i++){
-    dest[i] = (src1[i] + src2[i] + 1)>>1;
+    t1 = src1[i] + src2[i];
+    t2 = t1 + 1;
+    dest[i] = t2>>1;
   }
 }
 
diff --git a/examples/orc/simple.c b/examples/orc/simple.c
index dac5864..4d80282 100644
--- a/examples/orc/simple.c
+++ b/examples/orc/simple.c
@@ -42,7 +42,7 @@ test1(void)
 
   orc_program_append (p, "add_s16", d1, s1, s2);
 
-  orc_program_compile_x86 (p);
+  orc_program_compile (p);
 
   ex = orc_executor_new (p);
   orc_executor_set_n (ex, N);
diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index 6bb6353..a669bb3 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -17,17 +17,28 @@
 void x86_emit_push (OrcProgram *program, int size, int reg);
 void x86_emit_pop (OrcProgram *program, int size, int reg);
 void x86_emit_mov_memoffset_reg (OrcProgram *program, int size, int offset, int reg1, int reg2);
+void x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset,
+    int reg1, int reg2);
 void x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset, int reg2);
+void x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
+    int reg2);
 void x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1);
 void x86_emit_mov_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
+void x86_emit_mov_reg_mmx (OrcProgram *program, int reg1, int reg2);
+void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2);
 void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
+void x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg);
 void x86_emit_dec_memoffset (OrcProgram *program, int size, int offset, int reg);
 void x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value, int offset, int reg);
+void x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg);
+void x86_emit_emms (OrcProgram *program);
 void x86_emit_ret (OrcProgram *program);
 void x86_emit_je (OrcProgram *program, int label);
 void x86_emit_jne (OrcProgram *program, int label);
 void x86_emit_label (OrcProgram *program, int label);
 
+static void mmx_emit_loadi_s16 (OrcProgram *p, int reg, int value);
+
 void x86_emit_modrm_memoffset (OrcProgram *program, int reg1, int offset, int reg2);
 void x86_emit_modrm_reg (OrcProgram *program, int reg1, int reg2);
 void x86_test (OrcProgram *program);
@@ -43,9 +54,18 @@ enum {
   X86_ESP,
   X86_EBP,
   X86_ESI,
-  X86_EDI
+  X86_EDI,
+  X86_MM0,
+  X86_MM1,
+  X86_MM2,
+  X86_MM3,
+  X86_MM4,
+  X86_MM5,
+  X86_MM6,
+  X86_MM7
 };
 
+
 static const char *
 x86_get_regname(int i)
 {
@@ -92,7 +112,7 @@ x86_get_regname_mmx(int i)
   static const char *x86_regs[] = { "mm0", "mm1", "mm2", "mm3",
     "mm4", "mm5", "mm6", "mm7" };
 
-  if (i>=ORC_GP_REG_BASE && i<ORC_GP_REG_BASE + 8) return x86_regs[i - ORC_GP_REG_BASE];
+  if (i>=X86_MM0 && i<X86_MM0 + 8) return x86_regs[i - X86_MM0];
   switch (i) {
     case 0:
       return "UNALLOCATED";
@@ -129,6 +149,12 @@ x86_emit_prologue (OrcProgram *program)
 void
 x86_emit_epilogue (OrcProgram *program)
 {
+  if (program->rule_set == ORC_RULE_MMX_1 ||
+      program->rule_set == ORC_RULE_MMX_2 ||
+      program->rule_set == ORC_RULE_MMX_4 ||
+      program->rule_set == ORC_RULE_MMX_8) {
+    x86_emit_emms (program);
+  }
   if (program->used_regs[X86_EBX]) {
     x86_emit_pop (program, 4, X86_EBX);
   }
@@ -168,7 +194,7 @@ orc_program_reset_alloc (OrcProgram *program)
 {
   int i;
 
-  for(i=ORC_GP_REG_BASE;i<ORC_GP_REG_BASE+8;i++){
+  for(i=ORC_GP_REG_BASE;i<ORC_GP_REG_BASE+16;i++){
     program->alloc_regs[i] = 0;
   }
   program->alloc_regs[X86_ECX] = 1;
@@ -177,6 +203,92 @@ orc_program_reset_alloc (OrcProgram *program)
 }
 
 void
+x86_load_constants (OrcProgram *program)
+{
+  int i;
+  for(i=0;i<program->n_vars;i++){
+    switch (program->vars[i].vartype) {
+      case ORC_VAR_TYPE_CONST:
+        mmx_emit_loadi_s16 (program, program->vars[i].alloc,
+            program->vars[i].s16);
+        break;
+      case ORC_VAR_TYPE_SRC:
+      case ORC_VAR_TYPE_DEST:
+        if (program->vars[i].ptr_register) {
+          x86_emit_mov_memoffset_reg (program, 4,
+              (int)G_STRUCT_OFFSET(OrcExecutor, arrays[i]), X86_EBP,
+              program->vars[i].ptr_register);
+        } else {
+          /* FIXME */
+          printf("ERROR");
+        }
+        break;
+      default:
+        break;
+    }
+  }
+}
+
+void
+x86_emit_load_src (OrcProgram *program, OrcVariable *var)
+{
+  int ptr_reg;
+  if (var->ptr_register == 0) {
+    x86_emit_mov_memoffset_reg (program, 4, var->ptr_offset, X86_EBP, X86_ECX);
+    ptr_reg = X86_ECX;
+  } else {
+    ptr_reg = var->ptr_register;
+  }
+  switch (program->rule_set) {
+    case ORC_RULE_SCALAR_1:
+      x86_emit_mov_memoffset_reg (program, 2, 0, ptr_reg, var->alloc);
+      break;
+    case ORC_RULE_MMX_1:
+      x86_emit_mov_memoffset_reg (program, 2, 0, ptr_reg, X86_ECX);
+      x86_emit_mov_reg_mmx (program, X86_ECX, var->alloc);
+      break;
+    case ORC_RULE_MMX_2:
+      x86_emit_mov_memoffset_mmx (program, 4, 0, ptr_reg, var->alloc);
+      break;
+    case ORC_RULE_MMX_4:
+      x86_emit_mov_memoffset_mmx (program, 8, 0, ptr_reg, var->alloc);
+      break;
+    default:
+      printf("ERROR\n");
+  }
+}
+
+void
+x86_emit_store_dest (OrcProgram *program, OrcVariable *var)
+{
+  int ptr_reg;
+  if (var->ptr_register == 0) {
+    x86_emit_mov_memoffset_reg (program, 4, var->ptr_offset, X86_EBP, X86_ECX);
+    ptr_reg = X86_ECX;
+  } else {
+    ptr_reg = var->ptr_register;
+  }
+  switch (program->rule_set) {
+    case ORC_RULE_SCALAR_1:
+      x86_emit_mov_reg_memoffset (program, 2, var->alloc, 0, ptr_reg);
+      break;
+    case ORC_RULE_MMX_1:
+      /* FIXME we might be using ecx twice here */
+      x86_emit_mov_mmx_reg (program, var->alloc, X86_ECX);
+      x86_emit_mov_reg_memoffset (program, 2, X86_ECX, 0, ptr_reg);
+      break;
+    case ORC_RULE_MMX_2:
+      x86_emit_mov_mmx_memoffset (program, 4, var->alloc, 0, ptr_reg);
+      break;
+    case ORC_RULE_MMX_4:
+      x86_emit_mov_mmx_memoffset (program, 8, var->alloc, 0, ptr_reg);
+      break;
+    default:
+      printf("ERROR\n");
+  }
+}
+
+void
 orc_program_assemble_x86 (OrcProgram *program)
 {
   int j;
@@ -191,12 +303,16 @@ orc_program_assemble_x86 (OrcProgram *program)
   x86_emit_mov_memoffset_reg (program, 4, (int)G_STRUCT_OFFSET(OrcExecutor,n),
       X86_EBP, X86_ECX);
 
+  x86_emit_sar_imm_reg (program, 4, 2, X86_ECX);
   x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
       (int)G_STRUCT_OFFSET(OrcExecutor,counter), X86_EBP);
 
   x86_emit_test_reg_reg (program, 4, X86_ECX, X86_ECX);
 
   x86_emit_je (program, 1);
+
+  x86_load_constants (program);
+
   x86_emit_label (program, 0);
 
   for(j=0;j<program->n_insns;j++){
@@ -219,21 +335,11 @@ orc_program_assemble_x86 (OrcProgram *program)
     for(k=opcode->n_dest;k<opcode->n_src + opcode->n_dest;k++){
       switch (args[k]->vartype) {
         case ORC_VAR_TYPE_SRC:
-          x86_emit_mov_memoffset_reg (program, 4,
-              (int)G_STRUCT_OFFSET(OrcExecutor, arrays[k]),
-              X86_EBP, X86_ECX);
-          x86_emit_mov_memoffset_reg (program, 2, 0, X86_ECX, args[k]->alloc);
+          x86_emit_load_src (program, args[k]);
           break;
         case ORC_VAR_TYPE_CONST:
-          if (insn->rule_flag != ORC_RULE_REG_IMM) {
-            x86_emit_mov_imm_reg (program, 2, args[k]->s16, args[k]->alloc);
-          }
           break;
         case ORC_VAR_TYPE_TEMP:
-#if 0
-          g_print("  movw temp, %%%s\n",
-              x86_get_regname(args[k]->alloc));
-#endif
           break;
         default:
           break;
@@ -253,16 +359,9 @@ orc_program_assemble_x86 (OrcProgram *program)
     for(k=0;k<opcode->n_dest;k++){
       switch (args[k]->vartype) {
         case ORC_VAR_TYPE_DEST:
-          x86_emit_mov_memoffset_reg (program, 4,
-              (int)G_STRUCT_OFFSET(OrcExecutor, arrays[k]),
-              X86_EBP, X86_ECX);
-          x86_emit_mov_reg_memoffset (program, 2, args[k]->alloc, 0, X86_ECX);
+          x86_emit_store_dest (program, args[k]);
           break;
         case ORC_VAR_TYPE_TEMP:
-#if 0
-          g_print("  movw %%%s, temp\n",
-              x86_get_regname(args[k]->alloc));
-#endif
           break;
         default:
           break;
@@ -273,13 +372,21 @@ orc_program_assemble_x86 (OrcProgram *program)
   for(k=0;k<program->n_vars;k++){
     if (program->vars[k].vartype == ORC_VAR_TYPE_SRC ||
         program->vars[k].vartype == ORC_VAR_TYPE_DEST) {
-      x86_emit_add_imm_memoffset (program, 4, 2, 
-          (int)G_STRUCT_OFFSET(OrcExecutor, arrays[k]),
-          X86_EBP);
+      if (program->vars[k].ptr_register) {
+        x86_emit_add_imm_reg (program, 4,
+            orc_variable_get_size(program->vars + k) * program->n_per_loop,
+            program->vars[k].ptr_register);
+      } else {
+        x86_emit_add_imm_memoffset (program, 4,
+            orc_variable_get_size(program->vars + k) * program->n_per_loop,
+            (int)G_STRUCT_OFFSET(OrcExecutor, arrays[k]),
+            X86_EBP);
+      }
     }
   }
 
-  x86_emit_dec_memoffset (program, 4, (int)G_STRUCT_OFFSET(OrcExecutor,counter),
+  x86_emit_dec_memoffset (program, 4,
+      (int)G_STRUCT_OFFSET(OrcExecutor,counter),
       X86_EBP);
   x86_emit_jne (program, 0);
   x86_emit_label (program, 1);
@@ -426,25 +533,37 @@ orc_program_x86_register_rules (void)
 /* mmx rules */
 
 static void
-mmx_rule_loadi_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+mmx_emit_loadi_s16 (OrcProgram *p, int reg, int value)
 {
-  x86_emit_mov_imm_reg (p, 4,  p->vars[insn->args[2]].s16, X86_ECX);
+  if (value == 0) {
+    g_print("  pxor %%%s, %%%s\n", x86_get_regname_mmx(reg),
+        x86_get_regname_mmx(reg));
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0xef;
+    x86_emit_modrm_reg (p, reg, reg);
+  } else {
+    x86_emit_mov_imm_reg (p, 4, value, X86_ECX);
 
-  g_print("  movd %%ecx, %%%s\n",
-      x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
-  *p->codeptr++ = 0x0f;
-  *p->codeptr++ = 0x6e;
-  x86_emit_modrm_reg (p, X86_ECX, p->vars[insn->args[0]].alloc);
+    g_print("  movd %%ecx, %%%s\n", x86_get_regname_mmx(reg));
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x6e;
+    x86_emit_modrm_reg (p, X86_ECX, reg);
 
-  g_print("  pshufw $0, %%%s, %%%s\n",
-      x86_get_regname_mmx(p->vars[insn->args[0]].alloc),
-      x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
+    g_print("  pshufw $0, %%%s, %%%s\n", x86_get_regname_mmx(reg),
+        x86_get_regname_mmx(reg));
 
-  *p->codeptr++ = 0x0f;
-  *p->codeptr++ = 0x70;
-  x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
-      p->vars[insn->args[0]].alloc);
-  *p->codeptr++ = 0x00;
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x70;
+    x86_emit_modrm_reg (p, reg, reg);
+    *p->codeptr++ = 0x00;
+  }
+}
+
+static void
+mmx_rule_loadi_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+  mmx_emit_loadi_s16 (p, p->vars[insn->args[0]].alloc,
+      p->vars[insn->args[2]].s16);
 }
 
 static void
@@ -489,27 +608,51 @@ mmx_rule_mul_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
 static void
 mmx_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
 {
-  g_print("  psllw %%%s, %%%s\n",
-      x86_get_regname_mmx(p->vars[insn->args[2]].alloc),
-      x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
+  if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) {
+    g_print("  psllw $%d, %%%s\n",
+        p->vars[insn->args[2]].s16,
+        x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
 
-  *p->codeptr++ = 0x0f;
-  *p->codeptr++ = 0xf1;
-  x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
-      p->vars[insn->args[0]].alloc);
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x71;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6);
+    *p->codeptr++ = p->vars[insn->args[2]].s16;
+  } else {
+    /* FIXME this doesn't work quite right */
+    g_print("  psllw %%%s, %%%s\n",
+        x86_get_regname_mmx(p->vars[insn->args[2]].alloc),
+        x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0xf1;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
+        p->vars[insn->args[2]].alloc);
+  }
 }
 
 static void
 mmx_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
 {
-  g_print("  psraw %%%s, %%%s\n",
-      x86_get_regname_mmx(p->vars[insn->args[2]].alloc),
-      x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
+  if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) {
+    g_print("  psraw $%d, %%%s\n",
+        p->vars[insn->args[2]].s16,
+        x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
 
-  *p->codeptr++ = 0x0f;
-  *p->codeptr++ = 0xe1;
-  x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
-      p->vars[insn->args[0]].alloc);
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0x71;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4);
+    *p->codeptr++ = p->vars[insn->args[2]].s16;
+  } else {
+    /* FIXME this doesn't work quite right */
+    g_print("  psraw %%%s, %%%s\n",
+        x86_get_regname_mmx(p->vars[insn->args[2]].alloc),
+        x86_get_regname_mmx(p->vars[insn->args[0]].alloc));
+
+    *p->codeptr++ = 0x0f;
+    *p->codeptr++ = 0xe1;
+    x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
+        p->vars[insn->args[2]].alloc);
+  }
 }
 
 void
@@ -619,6 +762,25 @@ x86_emit_mov_memoffset_reg (OrcProgram *program, int size, int offset,
 }
 
 void
+x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset,
+    int reg1, int reg2)
+{
+  /* FIXME */
+  if (size == 4) {
+    g_print("  movd %d(%%%s), %%%s\n", offset, x86_get_regname(reg1),
+        x86_get_regname_mmx(reg2));
+    *program->codeptr++ = 0x66;
+  } else {
+    g_print("  movq %d(%%%s), %%%s\n", offset, x86_get_regname(reg1),
+        x86_get_regname_mmx(reg2));
+  }
+
+  *program->codeptr++ = 0x0f;
+  *program->codeptr++ = 0x6f;
+  x86_emit_modrm_memoffset (program, reg2, offset, reg1);
+}
+
+void
 x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset,
     int reg2)
 {
@@ -636,6 +798,25 @@ x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset,
 }
 
 void
+x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
+    int reg2)
+{
+  /* FIXME */
+  if (size == 4) {
+    g_print("  movd %%%s, %d(%%%s)\n", x86_get_regname_mmx(reg1), offset,
+        x86_get_regname(reg2));
+    *program->codeptr++ = 0x66;
+  } else {
+    g_print("  movq %%%s, %d(%%%s)\n", x86_get_regname_mmx(reg1), offset,
+        x86_get_regname(reg2));
+  }
+
+  *program->codeptr++ = 0x0f;
+  *program->codeptr++ = 0x7f;
+  x86_emit_modrm_memoffset (program, reg1, offset, reg2);
+}
+
+void
 x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1)
 {
   if (size == 2) {
@@ -670,6 +851,25 @@ void x86_emit_mov_reg_reg (OrcProgram *program, int size, int reg1, int reg2)
   x86_emit_modrm_reg (program, reg2, reg1);
 }
 
+void x86_emit_mov_reg_mmx (OrcProgram *program, int reg1, int reg2)
+{
+  /* FIXME */
+  g_print("  movd %%%s, %%%s\n", x86_get_regname(reg1),
+      x86_get_regname_mmx(reg2));
+  *program->codeptr++ = 0x66;
+  *program->codeptr++ = 0x89;
+  x86_emit_modrm_reg (program, reg2, reg1);
+}
+
+void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2)
+{
+  /* FIXME */
+  g_print("  movd %%%s, %%%s\n", x86_get_regname_mmx(reg1),
+      x86_get_regname(reg2));
+  *program->codeptr++ = 0x66;
+  *program->codeptr++ = 0x89;
+  x86_emit_modrm_reg (program, reg2, reg1);
+}
 
 void
 x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2)
@@ -688,6 +888,21 @@ x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2)
 }
 
 void
+x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg)
+{
+  g_print("  sarl $%d, %%%s\n", value, x86_get_regname(reg));
+
+  if (value == 1) {
+    *program->codeptr++ = 0xd1;
+    x86_emit_modrm_reg (program, reg, 7);
+  } else {
+    *program->codeptr++ = 0xc1;
+    x86_emit_modrm_reg (program, reg, 7);
+    *program->codeptr++ = value;
+  }
+}
+
+void
 x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value,
     int offset, int reg)
 {
@@ -717,6 +932,32 @@ x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value,
 }
 
 void
+x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg)
+{
+  if (size == 2) {
+    g_print("  addw $%d, %%%s\n", value, x86_get_regname_16(reg));
+    *program->codeptr++ = 0x66;
+  } else {
+    g_print("  addl $%d, %%%s\n", value, x86_get_regname(reg));
+  }
+
+  if (value >= -128 && value < 128) {
+    *program->codeptr++ = 0x83;
+    x86_emit_modrm_reg (program, reg, 0);
+    *program->codeptr++ = (value & 0xff);
+  } else {
+    *program->codeptr++ = 0x81;
+    x86_emit_modrm_reg (program, reg, 0);
+    *program->codeptr++ = (value & 0xff);
+    *program->codeptr++ = ((value>>8) & 0xff);
+    if (size == 4) {
+      *program->codeptr++ = ((value>>16) & 0xff);
+      *program->codeptr++ = ((value>>24) & 0xff);
+    }
+  }
+}
+
+void
 x86_emit_dec_memoffset (OrcProgram *program, int size,
     int offset, int reg)
 {
@@ -737,6 +978,13 @@ void x86_emit_ret (OrcProgram *program)
   *program->codeptr++ = 0xc3;
 }
 
+void x86_emit_emms (OrcProgram *program)
+{
+  g_print("  emms\n");
+  *program->codeptr++ = 0x0f;
+  *program->codeptr++ = 0x77;
+}
+
 void
 x86_add_fixup (OrcProgram *program, unsigned char *ptr, int label)
 {
diff --git a/orc/orcprogram.c b/orc/orcprogram.c
index 753a237..37402dc 100644
--- a/orc/orcprogram.c
+++ b/orc/orcprogram.c
@@ -9,6 +9,7 @@
 #include <orc/orcprogram.h>
 
 void orc_program_assign_rules (OrcProgram *program);
+void orc_program_global_reg_alloc (OrcProgram *program);
 void orc_program_rewrite_vars (OrcProgram *program);
 void orc_program_rewrite_vars2 (OrcProgram *program);
 void orc_program_do_regs (OrcProgram *program);
@@ -21,7 +22,8 @@ orc_program_new (void)
   p = malloc(sizeof(OrcProgram));
   memset (p, 0, sizeof(OrcProgram));
 
-  p->rule_set = ORC_RULE_SCALAR_1;
+  p->rule_set = ORC_RULE_MMX_4;
+  p->n_per_loop = 4;
 
   return p;
 }
@@ -134,10 +136,25 @@ orc_program_allocate_register (OrcProgram *program, int data_reg)
 {
   int i;
 
-  for(i=ORC_GP_REG_BASE;i<ORC_GP_REG_BASE+8;i++){
-    if (program->alloc_regs[i] == 0) {
-      program->alloc_regs[i]++;
-      return i;
+  if (program->rule_set == ORC_RULE_SCALAR_1) {
+    data_reg = FALSE;
+  }
+
+  if (!data_reg) {
+    for(i=ORC_GP_REG_BASE;i<ORC_GP_REG_BASE+8;i++){
+      if (program->alloc_regs[i] == 0) {
+        program->alloc_regs[i]++;
+        program->used_regs[i] = 1;
+        return i;
+      }
+    }
+  } else {
+    for(i=ORC_GP_REG_BASE+8;i<ORC_GP_REG_BASE+16;i++){
+      if (program->alloc_regs[i] == 0) {
+        program->alloc_regs[i]++;
+        program->used_regs[i] = 1;
+        return i;
+      }
     }
   }
   g_print("register overflow\n");
@@ -150,9 +167,11 @@ orc_program_compile (OrcProgram *program)
   orc_program_assign_rules (program);
   orc_program_rewrite_vars (program);
 
+  orc_program_reset_alloc (program);
+  orc_program_global_reg_alloc (program);
+
   orc_program_do_regs (program);
 
-  orc_program_reset_alloc (program);
   orc_program_rewrite_vars2 (program);
 
   orc_program_allocate_codemem (program);
@@ -260,9 +279,53 @@ orc_program_rewrite_vars (OrcProgram *program)
 }
 
 void
+orc_program_global_reg_alloc (OrcProgram *program)
+{
+  int i;
+  OrcVariable *var;
+
+
+  for(i=0;i<program->n_vars;i++){
+    var = program->vars + i;
+    switch (var->vartype) {
+      case ORC_VAR_TYPE_CONST:
+        var->first_use = -1;
+        var->last_use = -1;
+        var->alloc = orc_program_allocate_register (program, TRUE);
+        break;
+      case ORC_VAR_TYPE_SRC:
+      case ORC_VAR_TYPE_DEST:
+        var->ptr_register = orc_program_allocate_register (program, FALSE);
+        break;
+      default:
+        break;
+    }
+  }
+}
+
+void
 orc_program_do_regs (OrcProgram *program)
 {
+  int i;
+  int k;
+  int var;
+  OrcInstruction *insn;
+  OrcOpcode *opcode;
+
+  for(i=0;i<program->n_insns;i++){
+    insn = program->insns + i;
+    opcode = insn->opcode;
+
+    for(k=opcode->n_dest;k<opcode->n_src + opcode->n_dest;k++){
+      var = insn->args[k];
+
 
+    }
+
+    for(k=0;k<opcode->n_dest;k++){
+      var = insn->args[k];
+    }
+  }
 }
 
 void
@@ -283,9 +346,8 @@ orc_program_rewrite_vars2 (OrcProgram *program)
       int dest = program->insns[j].args[0];
       if (program->vars[src1].last_use == j) {
         if (program->vars[src1].first_use == j) {
-          k = orc_program_allocate_register (program, FALSE);
+          k = orc_program_allocate_register (program, TRUE);
           program->vars[src1].alloc = k;
-          program->used_regs[k] = 1;
         }
         program->alloc_regs[program->vars[src1].alloc]++;
         program->vars[dest].alloc = program->vars[src1].alloc;
@@ -307,9 +369,8 @@ orc_program_rewrite_vars2 (OrcProgram *program)
     for(i=0;i<program->n_vars;i++){
       if (program->vars[i].first_use == j) {
         if (program->vars[i].alloc) continue;
-        k = orc_program_allocate_register (program, FALSE);
+        k = orc_program_allocate_register (program, TRUE);
         program->vars[i].alloc = k;
-        program->used_regs[k] = 1;
       }
     }
     for(i=0;i<program->n_vars;i++){
@@ -379,3 +440,9 @@ orc_program_dump (OrcProgram *program)
 
 }
 
+int
+orc_variable_get_size (OrcVariable *var)
+{
+  return 2;
+}
+
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 3db95a5..e5cb83c 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -61,6 +61,9 @@ struct _OrcVariable {
   int is_chained;
 
   int16_t s16;
+
+  int ptr_register;
+  int ptr_offset;
 };
 
 struct _OrcRule {
@@ -109,6 +112,7 @@ struct _OrcFixup {
 struct _OrcRegister {
   int var;
 
+  int is_data;
   int is_chained;
   int chained_reg;
 
@@ -141,6 +145,8 @@ struct _OrcProgram {
 
   int used_regs[ORC_N_REGS];
   int alloc_regs[ORC_N_REGS];
+
+  int n_per_loop;
 };
 
 struct _OrcExecutor {
@@ -208,6 +214,8 @@ void orc_rule_register (const char *opcode_name, unsigned int mode,
 void orc_program_x86_register_rules (void);
 void orc_program_allocate_codemem (OrcProgram *program);
 void orc_program_dump_code (OrcProgram *program);
+
+int orc_variable_get_size (OrcVariable *var);
  
 #endif
 


More information about the Liboil-commit mailing list