Mesa (master): r600 : add pre-compile mesa shader calling interface, in order to handle

Richard Li richardradeon at kemper.freedesktop.org
Wed Dec 9 12:47:52 PST 2009


Module: Mesa
Branch: master
Commit: 8927b72118f9433aafd0e811cfc1981215eb3c5f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8927b72118f9433aafd0e811cfc1981215eb3c5f

Author: Richard Li <richardradeon at gmail.com>
Date:   Wed Dec  9 15:39:16 2009 -0500

r600 : add pre-compile mesa shader calling interface, in order to handle
complex built-in shader instructions.

---

 src/mesa/drivers/dri/r600/r700_assembler.c |  407 ++++++++++++++++++++++++++--
 src/mesa/drivers/dri/r600/r700_assembler.h |   65 ++++-
 src/mesa/drivers/dri/r600/r700_fragprog.c  |   25 ++-
 src/mesa/drivers/dri/r600/r700_vertprog.c  |   25 ++-
 4 files changed, 498 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index aed84fc..e84f524 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -32,6 +32,7 @@
 
 #include "main/mtypes.h"
 #include "main/imports.h"
+#include "shader/prog_parameter.h"
 
 #include "radeon_debug.h"
 #include "r600_context.h"
@@ -41,6 +42,39 @@
 #define USE_CF_FOR_CONTINUE_BREAK 1
 #define USE_CF_FOR_POP_AFTER      1
 
+struct prog_instruction noise1_insts[12] = { 
+    {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, 
+    {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
+};
+float noise1_const[2][4] = {
+    {0.300000f, 0.900000f, 0.500000f, 0.300000f}
+};
+
+COMPILED_SUB noise1_presub = {
+    &(noise1_insts[0]),
+    12, 
+    2, 
+    1, 
+    0, 
+    &(noise1_const[0]), 
+    SWIZZLE_X, 
+    SWIZZLE_X, 
+    SWIZZLE_X, 
+    SWIZZLE_X,
+    {0,0,0},
+    0 
+};
+
 BITS addrmode_PVSDST(PVSDST * pPVSDST)
 {
 	return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
@@ -330,14 +364,14 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
     return(format);
 }
 
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) 
 {
-    if(pAsm->D.dst.op3)
+    if(nIsOp3 > 0)
     {
         return 3;
     }
 
-    switch (pAsm->D.dst.opcode)
+    switch (opcode)
     {
     case SQ_OP2_INST_ADD:
     case SQ_OP2_INST_KILLE:
@@ -378,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
         return 1;
         
     default: radeon_error(
-		    "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
+		    "Need instruction operand number for %x.\n", opcode); 
     };
 
     return 3;
@@ -500,6 +534,11 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
 
     pAsm->unCFflags = 0;
 
+    pAsm->presubs           = NULL;
+    pAsm->unPresubArraySize = 0;
+    pAsm->unNumPresub       = 0;
+    pAsm->unCurNumILInsts   = 0;
+
     return 0;
 }
 
@@ -2010,7 +2049,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm,
 
     GLuint swizzle_key;
 
-    GLuint number_of_operands = r700GetNumOperands(pAsm);
+    GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
 
     for (src=0; src<number_of_operands; src++) 
     {
@@ -2099,7 +2138,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
 
     GLuint swizzle_key;
 
-    GLuint number_of_operands = r700GetNumOperands(pAsm);
+    GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
 
     for (src=0; src<number_of_operands; src++) 
     {
@@ -2180,7 +2219,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
     int    current_source_index;
     GLuint contiguous_slots_needed;
 
-    GLuint    uNumSrc = r700GetNumOperands(pAsm);
+    GLuint    uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
     //GLuint    channel_swizzle, j;
     //GLuint    chan_counter[4] = {0, 0, 0, 0};
     //PVSSRC *  pSource[3];
@@ -4968,7 +5007,7 @@ void add_return_inst(r700_AssemblerBase *pAsm)
     pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
 }
 
-GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
 {
     /* Put in sub */
     if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
@@ -4983,7 +5022,7 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
         pAsm->unSubArraySize += 10;
     }
 
-    pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
+    pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
     pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;  
     pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;  
     pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
@@ -5074,9 +5113,13 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm)
 
 GLboolean assemble_CAL(r700_AssemblerBase *pAsm, 
                        GLint nILindex,
+                       GLuint uiIL_Shift,
                        GLuint uiNumberInsts,
-                       struct prog_instruction *pILInst)
+                       struct prog_instruction *pILInst,
+                       PRESUB_DESC * pPresubDesc)
 {
+    GLint uiIL_Offset;
+
     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
 
     if(GL_FALSE == add_cf_instruction(pAsm) )
@@ -5109,8 +5152,12 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
         pAsm->unCallerArraySize += 10;
     }
     
-    pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
-    pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr       = pAsm->cf_current_cf_clause_ptr; 
+    uiIL_Offset = nILindex + uiIL_Shift;
+    pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; 
+    pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr       = pAsm->cf_current_cf_clause_ptr;
+    
+    pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr  = NULL; 
+    pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; 
 
     pAsm->unCallerArrayPointer++;
 
@@ -5120,7 +5167,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
     GLboolean bRet;
     for(j=0; j<pAsm->unSubArrayPointer; j++)
     {
-        if(nILindex == pAsm->subs[j].subIL_Offset)
+        if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
         {   /* compiled before */
 
             max = pAsm->subs[j].unStackDepthMax 
@@ -5138,7 +5185,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
     pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
     unSubID = pAsm->unSubArrayPointer;
 
-    bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
+    bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
 
     if(GL_TRUE == bRet)
     {
@@ -5148,6 +5195,8 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
         {
             pAsm->CALLSTACK[pAsm->CALLSP].max = max;
         }
+
+        pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
     }
 
     return bRet;
@@ -5313,6 +5362,7 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
 }
 
 GLboolean AssembleInstr(GLuint uiFirstInst,
+                        GLuint uiIL_Shift,
                         GLuint uiNumberInsts,
                         struct prog_instruction *pILInst, 
 						r700_AssemblerBase *pR700AsmCode)
@@ -5468,6 +5518,26 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
         case OPCODE_MUL: 
             if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) 
                 return GL_FALSE;
+            break;
+            
+        case OPCODE_NOISE1:
+            {                                               
+                callPreSub(pR700AsmCode, 
+                           GLSL_NOISE1,                         
+                           &noise1_presub,                                                  
+                           pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, 
+                           1); 
+                radeon_error("noise1: not yet supported shader instruction\n");
+            };
+            break; 
+        case OPCODE_NOISE2: 
+            radeon_error("noise2: not yet supported shader instruction\n");
+            break; 
+        case OPCODE_NOISE3: 
+            radeon_error("noise3: not yet supported shader instruction\n");
+            break; 
+        case OPCODE_NOISE4: 
+            radeon_error("noise4: not yet supported shader instruction\n");
             break; 
 
         case OPCODE_POW: 
@@ -5653,7 +5723,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
             break;
 
         case OPCODE_BGNSUB:
-            if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
+            if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
             {
                 return GL_FALSE;
             }
@@ -5668,9 +5738,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
         
         case OPCODE_CAL:
             if( GL_FALSE == assemble_CAL(pR700AsmCode, 
-                                         pILInst[i].BranchTarget,                                         
+                                         pILInst[i].BranchTarget,
+                                         uiIL_Shift,
                                          uiNumberInsts,
-                                         pILInst) )
+                                         pILInst,
+                                         NULL) )
             {
                 return GL_FALSE;
             }
@@ -5707,7 +5779,7 @@ GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
     return GL_TRUE;
 }
 
-GLboolean RelocProgram(r700_AssemblerBase * pAsm)
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
 {
     GLuint i;
     GLuint unCFoffset;
@@ -5717,6 +5789,12 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
     R700ShaderInstruction *        pInst;
     R700ControlFlowGenericClause * pCFInst;
 
+    R700ControlFlowALUClause * pCF_ALU;
+    R700ALUInstruction       * pALU;
+    GLuint                     unConstOffset = 0;
+    GLuint                     unRegOffset;
+    GLuint                     unMinRegIndex;
+
     plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
 
     /* remove flags init if they are not used */
@@ -5762,6 +5840,11 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
 
     unCFoffset = plstCFmain->uNumOfNode;
 
+    if(NULL != pILProg->Parameters)
+    {        
+        unConstOffset = pILProg->Parameters->NumParameters;
+    }
+
     /* Reloc subs */
     for(i=0; i<pAsm->unSubArrayPointer; i++)
     {
@@ -5799,6 +5882,84 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
             pInst = pInst->pNextInst;
         };
 
+        if(NULL != pAsm->subs[i].pPresubDesc)
+        {
+            GLuint                     uNumSrc;            
+            
+            unMinRegIndex  = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
+            unRegOffset    = pAsm->subs[i].pPresubDesc->maxStartReg;            
+            unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
+
+            pInst = plstCFsub->pHead;
+            while(pInst)
+            {
+                if(SIT_CF_ALU == pInst->m_ShaderInstType)
+                {
+                    pCF_ALU = (R700ControlFlowALUClause *)pInst;
+
+                    pALU = pCF_ALU->m_pLinkedALUInstruction;
+                    for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+                    {
+                        pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+
+                        if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
+                        {   
+                            pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+                        }
+                        else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
+                        {   
+                            pALU->m_Word0.f.src0_sel += unConstOffset;
+                        }
+
+                        if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) 
+                            >= SQ_OP3_INST_MUL_LIT )
+                        {   /* op3 : 3 srcs */
+                            if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
+                            {   
+                                pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
+                            }
+                            else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
+                            {   
+                                pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
+                            }    
+                            if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+                            {   
+                                pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+                            }
+                            else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+                            {   
+                                pALU->m_Word0.f.src1_sel += unConstOffset;
+                            }                                 
+                        }
+                        else
+                        {
+                            if(pAsm->bR6xx)
+                            {
+                                uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+                            }
+                            else
+                            {
+                                uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+                            }
+                            if(2 == uNumSrc)
+                            {   /* 2 srcs */
+                                if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+                                {   
+                                    pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+                                }
+                                else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+                                {   
+                                    pALU->m_Word0.f.src1_sel += unConstOffset;
+                                }                                  
+                            }                            
+                        }
+                        pALU = (R700ALUInstruction*)(pALU->pNextInst);
+                    }                    
+                }             
+                pInst = pInst->pNextInst;
+            };
+        }
+
         /* Put sub into main */
         plstCFmain->pTail->pNextInst = plstCFsub->pHead;
         plstCFmain->pTail            = plstCFsub->pTail;
@@ -5812,11 +5973,216 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
     {
         pAsm->callers[i].cf_ptr->m_Word0.f.addr
             = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; 
+
+        if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
+        {                 
+            unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
+            unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
+
+            if(NULL != pAsm->callers[i].prelude_cf_ptr)
+            {                
+                pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
+                pALU = pCF_ALU->m_pLinkedALUInstruction;
+                for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+                {
+                    pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+                    pALU = (R700ALUInstruction*)(pALU->pNextInst);
+                }
+            }
+            if(NULL != pAsm->callers[i].finale_cf_ptr)
+            {
+                pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
+                pALU = pCF_ALU->m_pLinkedALUInstruction;
+                for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+                {
+                    pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+                    pALU = (R700ALUInstruction*)(pALU->pNextInst);
+                }
+            }
+        }
     }
 
     return GL_TRUE;
 }
 
+GLboolean callPreSub(r700_AssemblerBase* pAsm, 
+                         LOADABLE_SCRIPT_SIGNITURE scriptSigniture,                          
+                         COMPILED_SUB * pCompiledSub,                                               
+                         GLshort uOutReg,
+                         GLshort uNumValidSrc)
+{
+    /* save assemble context */
+    GLuint starting_temp_register_number_save;
+    GLuint number_used_registers_save;
+    GLuint uFirstHelpReg_save;
+    GLuint uHelpReg_save;
+    GLuint uiCurInst_save;
+    struct prog_instruction *pILInst_save;
+    PRESUB_DESC * pPresubDesc;
+    GLboolean     bRet;
+    int i;
+
+    R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
+
+    /* copy srcs to presub inputs */  
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    for(i=0; i<uNumValidSrc; i++)
+    {
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = pCompiledSub->srcRegIndex[i];
+        pAsm->D.dst.writex = 1;
+        pAsm->D.dst.writey = 1;
+        pAsm->D.dst.writez = 1;
+        pAsm->D.dst.writew = 1;
+
+        if( GL_FALSE == assemble_src(pAsm, i, 0) )
+        {
+            return GL_FALSE;
+        }
+
+        next_ins(pAsm);
+    }
+    if(uNumValidSrc > 0)
+    {
+        prelude_cf_ptr     = pAsm->cf_current_alu_clause_ptr;
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    }
+
+    /* browse thro existing presubs. */
+    for(i=0; i<pAsm->unNumPresub; i++)
+    {
+        if(pAsm->presubs[i].sptSigniture == scriptSigniture)
+        {
+            break;
+        }
+    }
+
+    if(i == pAsm->unNumPresub)
+    {   /* not loaded yet */
+        /* save assemble context */
+        number_used_registers_save         = pAsm->number_used_registers;
+        uFirstHelpReg_save                 = pAsm->uFirstHelpReg;
+        uHelpReg_save                      = pAsm->uHelpReg;
+        starting_temp_register_number_save = pAsm->starting_temp_register_number;
+        pILInst_save                       = pAsm->pILInst;
+        uiCurInst_save                     = pAsm->uiCurInst;
+
+        /* alloc in presub */
+        if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
+        {
+            pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
+                                      sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
+                                      sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
+            if(NULL == pAsm->presubs)
+            {
+                radeon_error("No memeory to allocate built in shader function description structures. \n");
+                return GL_FALSE;
+            }
+            pAsm->unPresubArraySize += 4;
+        }
+        
+        pPresubDesc = &(pAsm->presubs[i]);
+        pPresubDesc->sptSigniture = scriptSigniture;
+
+        /* constants offsets need to be final resolved at reloc. */
+        if(0 == pAsm->unNumPresub)
+        {
+            pPresubDesc->unConstantsStart = 0; 
+        }
+        else
+        {
+            pPresubDesc->unConstantsStart =  pAsm->presubs[i-1].unConstantsStart
+                                           + pAsm->presubs[i-1].pCompiledSub->NumParameters;
+        }
+
+        pPresubDesc->pCompiledSub = pCompiledSub;
+
+        pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
+        pPresubDesc->maxStartReg  = uFirstHelpReg_save;
+        pAsm->unCurNumILInsts    += pCompiledSub->NumInstructions;
+
+        pAsm->unNumPresub++;
+
+        /* setup new assemble context */
+        pAsm->starting_temp_register_number = 0;
+        pAsm->number_used_registers = pCompiledSub->NumTemporaries;
+        pAsm->uFirstHelpReg         = pAsm->number_used_registers;
+        pAsm->uHelpReg              = pAsm->uFirstHelpReg;
+
+        bRet = assemble_CAL(pAsm, 
+                            0, 
+                            pPresubDesc->subIL_Shift, 
+                            pCompiledSub->NumInstructions,
+                            pCompiledSub->Instructions,
+                            pPresubDesc);
+
+        
+        pPresubDesc->number_used_registers = pAsm->number_used_registers;        
+
+        /* restore assemble context */
+        pAsm->number_used_registers         = number_used_registers_save; 
+        pAsm->uFirstHelpReg                 = uFirstHelpReg_save;
+        pAsm->uHelpReg                      = uHelpReg_save;
+        pAsm->starting_temp_register_number = starting_temp_register_number_save;
+        pAsm->pILInst                       = pILInst_save; 
+        pAsm->uiCurInst                     = uiCurInst_save;
+    }
+    else
+    {   /* was loaded */
+        pPresubDesc = &(pAsm->presubs[i]);  
+        
+        bRet = assemble_CAL(pAsm, 
+                            0, 
+                            pPresubDesc->subIL_Shift, 
+                            pCompiledSub->NumInstructions,
+                            pCompiledSub->Instructions,
+                            pPresubDesc);
+    }
+
+    if(GL_FALSE == bRet)
+    {
+        radeon_error("Shader presub assemble failed. \n");
+    }
+    else
+    {
+        /* copy presub output to real dst */ 
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = pCompiledSub->dstRegIndex;
+        pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
+        pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
+        pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
+        pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
+
+        next_ins(pAsm);        
+
+        pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr  = pAsm->cf_current_alu_clause_ptr;
+        pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    }
+
+    if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
+    {
+        pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
+    }
+    if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
+    {
+        pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
+    }
+
+    return bRet;
+}
+
 GLboolean Process_Export(r700_AssemblerBase* pAsm,
                          GLuint type,
                          GLuint export_starting_index,
@@ -6174,6 +6540,11 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
         FREE(pR700AsmCode->callers);
     }
 
+    if(NULL != pR700AsmCode->presubs)
+    {
+        FREE(pR700AsmCode->presubs);
+    }
+
     return GL_TRUE;
 }
 
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 6dc4401..6ef945d 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -34,6 +34,45 @@
 #include "r700_shaderinst.h"
 #include "r700_shader.h"
 
+typedef enum LOADABLE_SCRIPT_SIGNITURE
+{
+    GLSL_NOISE1 = 0x10000001,
+    GLSL_NOISE2 = 0x10000002,
+    GLSL_NOISE3 = 0x10000003,
+    GLSL_NOISE4 = 0x10000004
+}LOADABLE_SCRIPT_SIGNITURE;
+
+typedef struct COMPILED_SUB
+{
+    struct  prog_instruction *Instructions;
+    GLuint  NumInstructions;
+    GLuint  NumTemporaries;
+    GLuint  NumParameters;
+    GLuint  MinRegIndex;
+    GLfloat (*ParameterValues)[4];
+    GLbyte  outputSwizzleX; 
+    GLbyte  outputSwizzleY;
+    GLbyte  outputSwizzleZ;
+    GLbyte  outputSwizzleW;
+    GLshort srcRegIndex[3];
+    GLushort dstRegIndex;
+}COMPILED_SUB;
+
+typedef struct PRESUB_DESCtag 
+{
+    LOADABLE_SCRIPT_SIGNITURE sptSigniture;
+    GLint  subIL_Shift;
+    struct prog_src_register InReg[3];
+    struct prog_dst_register OutReg;
+
+    GLushort maxStartReg;
+    GLushort number_used_registers;
+
+    GLuint   unConstantsStart;
+
+    COMPILED_SUB * pCompiledSub;
+} PRESUB_DESC;
+
 typedef enum SHADER_PIPE_TYPE 
 {
     SPT_VP = 0,
@@ -296,6 +335,7 @@ typedef struct SUB_OFFSET
     GLint  subIL_Offset;
     GLuint unCFoffset;
     GLuint unStackDepthMax;
+    PRESUB_DESC *   pPresubDesc;
     TypedShaderList lstCFInstructions_local;
 } SUB_OFFSET;
 
@@ -304,6 +344,9 @@ typedef struct CALLER_POINTER
     GLint  subIL_Offset;
     GLint  subDescIndex;
     R700ControlFlowGenericClause* cf_ptr;
+
+    R700ControlFlowGenericClause* prelude_cf_ptr;
+    R700ControlFlowGenericClause* finale_cf_ptr;
 } CALLER_POINTER;
 
 #define SQ_MAX_CALL_DEPTH 0x00000020
@@ -437,6 +480,11 @@ typedef struct r700_AssemblerBase
 
     GLuint unCFflags;
 
+    PRESUB_DESC * presubs;
+    GLuint        unPresubArraySize;
+    GLuint        unNumPresub;
+    GLuint        unCurNumILInsts;
+
 } r700_AssemblerBase;
 
 //Internal use
@@ -458,7 +506,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
 GLboolean is_reduction_opcode(PVSDWORD * dest);
 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
 
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3);
 
 GLboolean IsTex(gl_inst_opcode Opcode);
 GLboolean IsAlu(gl_inst_opcode Opcode);
@@ -585,13 +633,15 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
 GLboolean assemble_COND(r700_AssemblerBase *pAsm);
 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
 
-GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex);
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift);
 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
 GLboolean assemble_RET(r700_AssemblerBase *pAsm);
 GLboolean assemble_CAL(r700_AssemblerBase *pAsm, 
                        GLint nILindex,
+                       GLuint uiIL_Offest,
                        GLuint uiNumberInsts,
-                       struct prog_instruction *pILInst);
+                       struct prog_instruction *pILInst,
+                       PRESUB_DESC * pPresubDesc);
 
 GLboolean Process_Export(r700_AssemblerBase* pAsm,
                          GLuint type, 
@@ -602,16 +652,23 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm,
 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, 
                                                  BITS depth_channel_select);
 
+GLboolean callPreSub(r700_AssemblerBase* pAsm, 
+                     LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+                     /* struct prog_instruction ** pILInstParent, */
+                     COMPILED_SUB * pCompiledSub,                                            
+                     GLshort uOutReg,
+                     GLshort uNumValidSrc);
 
 //Interface
 GLboolean AssembleInstr(GLuint uiFirstInst,
+                        GLuint uiIL_Shift,
                         GLuint uiNumberInsts,
                         struct prog_instruction *pILInst, 
 						r700_AssemblerBase *pR700AsmCode);
 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);  
 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
 
-GLboolean RelocProgram(r700_AssemblerBase * pAsm);
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg);
 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
 
 int       Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 8eb439a..d15f013 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -325,7 +325,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
     {
          fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
     }
+
+    fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
+
 	if( GL_FALSE == AssembleInstr(0,
+                                  0,
                                   mesa_fp->Base.NumInstructions,
                                   &(mesa_fp->Base.Instructions[0]), 
                                   &(fp->r700AsmCode)) )
@@ -338,7 +342,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
         return GL_FALSE;
     }
 
-    if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) )
+    if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
     {
         return GL_FALSE;
     }
@@ -620,6 +624,25 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
     } else
 	    r700->ps.num_consts = 0;
 
+    COMPILED_SUB * pCompiledSub;
+    GLuint uj;
+    GLuint unConstOffset = r700->ps.num_consts;
+    for(ui=0; ui<pAsm->unNumPresub; ui++)
+    {
+        pCompiledSub = pAsm->presubs[ui].pCompiledSub;
+
+        r700->ps.num_consts += pCompiledSub->NumParameters;
+
+        for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+        {
+            r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+		    r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+		    r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+		    r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+        }
+        unConstOffset += pCompiledSub->NumParameters;
+    }
+
     return GL_TRUE;
 }
 
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 759b74d..90fac07 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -341,7 +341,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
     {
         vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
     }
+
+    vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
+
 	if(GL_FALSE == AssembleInstr(0,
+                                 0,
                                  vp->mesa_program->Base.NumInstructions,
                                  &(vp->mesa_program->Base.Instructions[0]),
                                  &(vp->r700AsmCode)) )
@@ -354,7 +358,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
         return NULL;
     }
 
-    if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) )
+    if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
     {
         return GL_FALSE;
     }
@@ -671,5 +675,24 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
     } else
 	    r700->vs.num_consts = 0;
 
+    COMPILED_SUB * pCompiledSub;
+    GLuint uj;
+    GLuint unConstOffset = r700->vs.num_consts;
+    for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
+    {
+        pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
+
+        r700->vs.num_consts += pCompiledSub->NumParameters;
+
+        for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+        {
+            r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+		    r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+		    r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+		    r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+        }
+        unConstOffset += pCompiledSub->NumParameters;
+    }
+
     return GL_TRUE;
 }



More information about the mesa-commit mailing list