xf86-video-intel: Branch 'exa-i965' - 11 commits - src/exa_sf.g4a src/exa_sf_mask.g4a src/exa_sf_mask_prog.h src/exa_sf_prog.h src/exa_wm_masknoca.g4a src/exa_wm_masknoca_prog.h src/exa_wm_nomask.g4a src/exa_wm_nomask_prog.h src/i830_exa.c src/i830.h src/i830_memory.c src/i965_composite_wm_nomask.g4a src/i965_composite_wm_nomask.h src/i965_exa_render.c src/Makefile.am
Zhenyu Wang
zhen at kemper.freedesktop.org
Wed Nov 29 11:20:43 EET 2006
src/Makefile.am | 4
src/exa_sf.g4a | 17 +
src/exa_sf_mask.g4a | 53 +++++
src/exa_sf_mask_prog.h | 25 ++
src/exa_sf_prog.h | 17 +
src/exa_wm_masknoca.g4a | 202 +++++++++++++++++++++
src/exa_wm_masknoca_prog.h | 95 +++++++++
src/exa_wm_nomask.g4a | 8
src/exa_wm_nomask_prog.h | 4
src/i830.h | 2
src/i830_exa.c | 7
src/i830_memory.c | 36 +++
src/i965_exa_render.c | 430 +++++++++++++++++++++++----------------------
13 files changed, 678 insertions(+), 222 deletions(-)
New commits:
diff-tree db391e8e4c4d87bfe3ccad0de14dd5b47b69b8fe (from 290f15cd4cda97727ebcaadacbbbf7650278934b)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 17:16:46 2006 +0800
shut up warning
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 6f2bc84..2d1ce5f 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -1011,10 +1011,8 @@ I965EXAComposite(PixmapPtr pDst, int src
srcXend = srcX + w;
srcYend = srcY + h;
- if (pMask) {
- maskXend = maskX + w;
- maskYend = maskY + h;
- }
+ maskXend = maskX + w;
+ maskYend = maskY + h;
if (is_transform[0]) {
v.vector[0] = IntToxFixed(srcX);
v.vector[1] = IntToxFixed(srcY);
diff-tree 290f15cd4cda97727ebcaadacbbbf7650278934b (from 3d4edd325f3859c749ee42df102bb4239eac5287)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 17:14:55 2006 +0800
fix alpha blending state
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index c4a3f97..6f2bc84 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -562,21 +562,26 @@ ErrorF("i965 prepareComposite\n");
cc_state->cc0.stencil_enable = 0; /* disable stencil */
cc_state->cc2.depth_test = 0; /* disable depth test */
cc_state->cc2.logicop_enable = 0; /* disable logic op */
- cc_state->cc3.ia_blend_enable = 0; /* blend alpha just like colors */
+ cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */
cc_state->cc3.blend_enable = 1; /* enable color blend */
cc_state->cc3.alpha_test = 0; /* disable alpha test */
cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
cc_state->cc5.dither_enable = 0; /* disable dither */
-// cc_state->cc5.logicop_func = 0xc; /* COPY */
-// cc_state->cc5.statistics_enable = 1;
-// cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
-// cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
-// cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
- cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
+ cc_state->cc5.logicop_func = 0xc; /* COPY */
+ cc_state->cc5.statistics_enable = 1;
+ cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
I965GetBlendCntl(op, pMaskPicture, pDstPicture->format,
&src_blend, &dst_blend);
+ /* XXX: alpha blend factor should be same as color, but check
+ for CA case in future */
+ cc_state->cc5.ia_src_blend_factor = src_blend;
+ cc_state->cc5.ia_dest_blend_factor = dst_blend;
+ cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
cc_state->cc6.src_blend_factor = src_blend;
cc_state->cc6.dest_blend_factor = dst_blend;
+ cc_state->cc6.clamp_post_alpha_blend = 1;
+ cc_state->cc6.clamp_pre_alpha_blend = 1;
+ cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */
/* Upload system kernel */
memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
diff-tree 3d4edd325f3859c749ee42df102bb4239eac5287 (from a704120b15efae47344a90d972e7f3da64a202a6)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 17:05:32 2006 +0800
Add in sf/wm program for mask picture without CA
diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a
new file mode 100644
index 0000000..ab519ce
--- /dev/null
+++ b/src/exa_sf_mask.g4a
@@ -0,0 +1,53 @@
+
+/* FIXME how to setup second coeffient for mask tex coord */
+
+/*
+ g3 (v0) { u0, v0, 1.0, 1.0 } ==> {u0, v0, 1.0, 1.0, mu0, mv0, 1.0, 1.0} Co[0](u0) Co[1](v0) Co[2](mu0) Co[3](mv0)
+ g4 (v1) { u1, v1, 1.0, 1.0 } ==> {u1, v1, 1.0, 1.0, mu1, mv1, 1.0, 1.0}
+ g5 (v2) { u2, v2 } ==> (u2, v2, mu2, mv2}
+ g6 { 1/(x1-x0), 1/(y1-y0) }
+ g7 { u1-u0, v1-v0, 0, 0} ==>{u1-u0, v1-v0,0, 0, mu1-mu0, mv1-mv0, 0, 0}
+ -> { (u1-u0)/(x1-x0), (v1-v0)/(y1-y0) } ==>{(u1-u0)/(x1-x0), (v1-v0)/(y1-y0),(mu1-mu0)/(x1-x0), (mv1-mv0)/(y1-y0)
+ Cx, Cy Cx[0], Cy[0], Cx[1], Cy[1]
+ */
+
+/* assign Cx[0], Cx[1] to src, same to Cy, Co
+ Cx[2], Cx[3] to mask, same to Cy, Co */
+
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+/* Cx[0] */
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+/* Cy[0] */
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+/* Cx[2] */
+mul (1) g7.16<1>F g7.16<0,1,0>F g6<0,1,0>F { align1 };
+/* Cy[2] */
+mul (1) g7.20<1>F g7.20<0,1,0>F g6.4<0,1,0>F { align1 };
+
+/* src Cx[0], Cx[1] */
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+/* mask Cx[2], Cx[3] */
+mov (1) m1.8<1>F g7.16<0,1,0>F { align1 };
+mov (1) m1.12<1>F g7.16<0,1,0>F { align1 };
+/* src Cy[0], Cy[1] */
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+/* mask Cy[2], Cy[3] */
+mov (1) m2.8<1>F g7.20<0,1,0>F { align1 };
+mov (1) m2.12<1>F g7.20<0,1,0>F { align1 };
+/* src Co[0], Co[1] */
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+/* mask Co[2], Co[3] */
+mov (1) m3.8<1>F g3.16<0,1,0>F { align1 };
+mov (1) m3.12<1>F g3.20<0,1,0>F { align1 };
+
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_sf_mask_prog.h b/src/exa_sf_mask_prog.h
new file mode 100644
index 0000000..cd7f460
--- /dev/null
+++ b/src/exa_sf_mask_prog.h
@@ -0,0 +1,25 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00000041, 0x20f077bd, 0x000000f0, 0x000000c0 },
+ { 0x00000041, 0x20f477bd, 0x000000f4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00000001, 0x202803be, 0x000000f0, 0x00000000 },
+ { 0x00000001, 0x202c03be, 0x000000f0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00000001, 0x204803be, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x204c03be, 0x000000f4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00000001, 0x206803be, 0x00000070, 0x00000000 },
+ { 0x00000001, 0x206c03be, 0x00000074, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_masknoca.g4a b/src/exa_wm_masknoca.g4a
new file mode 100644
index 0000000..195203c
--- /dev/null
+++ b/src/exa_wm_masknoca.g4a
@@ -0,0 +1,202 @@
+/*
+ * This's for exa composite operation in no mask picture case.
+ * The simplest case is just sending what src picture has to dst picture.
+ * XXX: This is still experimental, and should be fixed to support multiple texture
+ * map, and conditional mul actions.
+ */
+
+/* I think this should be same as in g4a program for texture video,
+ as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
+
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ As mask texture coeffient needs extra setup urb starting from g4, we should
+ shift this location.
+
+ * X0_R is g4->g6
+ * X1_R is g5->g7
+ * Y0_R is g6->g8
+ * Y1_R is g7->g9
+
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+
+/* multitexture program with src and mask texture */
+/* - load src texture */
+/* - load mask texture */
+/* - mul src.X with mask's alpha */
+/* - write out src.X */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
+add (1) g6.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g6.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g8.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g8.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g6.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g6.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g8.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g8.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g9<1>F g1.16<0,1,0>UW { align1 };
+add (1) g9.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g9.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g9.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g9.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g9.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g7.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g7.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g9.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g9.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+/* This is for src texture */
+/* I don't want to change origin ssX coords, as it will be used later in mask */
+/* so store tex coords in g10, g11, g12, g13 */
+
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+/* Cx[0] */
+mul (8) g10<1>F g10<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g11<1>F g11<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+/* Co[0] */
+add (8) g10<1>F g10<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g11<1>F g11<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+/* Cy[0] */
+mul (8) g12<1>F g12<8,8,1>F g3.4<0,1,0>F { align1 };
+mul (8) g13<1>F g13<8,8,1>F g3.4<0,1,0>F { align1 };
+ /* add in texture Y offset */
+/* Co[1] */
+add (8) g12<1>F g12<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g13<1>F g13<8,8,1>F g3.28<0,1,0>F { align1 };
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+mov (8) m1<1>F g10<8,8,1>F { align1 };
+mov (8) m2<1>F g11<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
+mov (8) m3<1>F g12<8,8,1>F { align1 };
+mov (8) m4<1>F g13<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+
+/* src texture readback: g14-g21 */
+send (16) 0 /* msg reg index */
+ g14<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description,
+ (binding_table,sampler_index,datatype).
+ here(src->dst) we should use src_sampler and
+ src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+mov (8) g21<1>UD g21<8,8,1>UD { align1 }; /* wait sampler return */
+
+/* sampler mask texture, use g10, g11, g12, g13 */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+/* Cx[2] */
+mul (8) g10<1>F g10<8,8,1>F g4<0,1,0>F { align1 };
+mul (8) g11<1>F g11<8,8,1>F g4<0,1,0>F { align1 };
+ /* add in texture X offset */
+/* Co[2] */
+add (8) g10<1>F g10<8,8,1>F g4.12<0,1,0>F { align1 };
+add (8) g11<1>F g11<8,8,1>F g4.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+/* Cy[2] */
+mul (8) g12<1>F g12<8,8,1>F g4.4<0,1,0>F { align1 };
+mul (8) g13<1>F g13<8,8,1>F g4.4<0,1,0>F { align1 };
+ /* add in texture Y offset */
+/* Co[3] */
+add (8) g12<1>F g12<8,8,1>F g4.28<0,1,0>F { align1 };
+add (8) g13<1>F g13<8,8,1>F g4.28<0,1,0>F { align1 };
+
+mov (8) m1<1>F g10<8,8,1>F { align1 };
+mov (8) m2<1>F g11<8,8,1>F { align1 };
+mov (8) m3<1>F g12<8,8,1>F { align1 };
+mov (8) m4<1>F g13<8,8,1>F { align1 };
+
+/* mask sampler g22-g29 */
+/* binding_table (2), sampler (1) */
+send (16) 0 g22<1>UW g0<8,8,1>UW sampler (2,1,F) mlen 5 rlen 8 { align1 };
+mov (8) g29<1>UD g29<8,8,1>UD { align1 }; /* wait sampler return */
+
+/* mul mask's alpha channel g28,g29 to src (g14-g21), then write out src */
+mul (8) g14<1>F g14<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g15<1>F g15<8,8,1>F g29<8,8,1>F { align1 };
+mul (8) g16<1>F g16<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g17<1>F g17<8,8,1>F g29<8,8,1>F { align1 };
+mul (8) g18<1>F g18<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g19<1>F g19<8,8,1>F g29<8,8,1>F { align1 };
+mul (8) g20<1>F g20<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g21<1>F g21<8,8,1>F g29<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+mov (8) m2<1>F g14<8,8,1>F { align1 };
+mov (8) m3<1>F g16<8,8,1>F { align1 };
+mov (8) m4<1>F g18<8,8,1>F { align1 };
+mov (8) m5<1>F g20<8,8,1>F { align1 };
+mov (8) m6<1>F g15<8,8,1>F { align1 };
+mov (8) m7<1>F g17<8,8,1>F { align1 };
+mov (8) m8<1>F g19<8,8,1>F { align1 };
+mov (8) m9<1>F g21<8,8,1>F { align1 };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+
+/* write */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_wm_masknoca_prog.h b/src/exa_wm_masknoca_prog.h
new file mode 100644
index 0000000..66eb960
--- /dev/null
+++ b/src/exa_wm_masknoca_prog.h
@@ -0,0 +1,95 @@
+ { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x21240d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2128013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x212c0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
+ { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
+ { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
+ { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d03a0 },
+ { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0380 },
+ { 0x00600041, 0x222077bd, 0x008d0220, 0x008d03a0 },
+ { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0380 },
+ { 0x00600041, 0x226077bd, 0x008d0260, 0x008d03a0 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 4bc90c1..c4a3f97 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -344,12 +344,16 @@ static const CARD32 sf_kernel_static[][4
#include "exa_sf_prog.h"
};
+static const CARD32 sf_kernel_static_mask[][4] = {
+#include "exa_sf_mask_prog.h"
+};
+
/* ps kernels */
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 32
/* 1: no mask */
static const CARD32 ps_kernel_static_nomask [][4] = {
- #include "exa_wm_nomask_prog.h"
+#include "exa_wm_nomask_prog.h"
};
/* 2: mask with componentAlpha, src * mask color, XXX: later */
@@ -359,7 +363,7 @@ static const CARD32 ps_kernel_static_mas
/* 3: mask without componentAlpha, src * mask alpha */
static const CARD32 ps_kernel_static_masknoca [][4] = {
-/*#include "i965_composite_wm_masknoca.h" */
+#include "exa_wm_masknoca_prog.h"
};
Bool
@@ -375,11 +379,6 @@ I965EXAPrepareComposite(int op, PictureP
ErrorF("i965 prepareComposite\n");
- /* FIXME: fallback in pMask for now, would be enable after finish
- wm kernel program */
- if (pMask)
- I830FALLBACK("No mask support yet.\n");
-
I965GetDestFormat(pDstPicture, &dst_format);
src_offset = exaGetPixmapOffset(pSrc);
src_pitch = exaGetPixmapPitch(pSrc);
@@ -436,7 +435,10 @@ ErrorF("i965 prepareComposite\n");
/* keep current sf_kernel, which will send one setup urb entry to
PS kernel */
sf_kernel_offset = ALIGN(next_offset, 64);
- next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
+ if (pMask)
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
+ else
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
//XXX: ps_kernel may be seperated, fix with offset
ps_kernel_offset = ALIGN(next_offset, 64);
@@ -746,7 +748,10 @@ ErrorF("i965 prepareComposite\n");
* calculate dA/dx and dA/dy. Hand these interpolation coefficients
* back to SF which then hands pixels off to WM.
*/
- memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
+ if (pMask)
+ memcpy (sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static));
+ else
+ memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
memset(sf_state, 0, sizeof(*sf_state));
sf_state->thread0.kernel_start_pointer =
@@ -780,7 +785,6 @@ ErrorF("i965 prepareComposite\n");
/* Set up the PS kernel (dispatched by WM)
*/
- // XXX: replace to texture blend shader, and different cases
if (pMask) {
if (pMaskPicture->componentAlpha)
memcpy (ps_kernel, ps_kernel_static_maskca, sizeof (ps_kernel_static_maskca));
diff-tree a704120b15efae47344a90d972e7f3da64a202a6 (from e3c70c68e39183226e498271c44e98ef1b96a681)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:52:44 2006 +0800
misc cleanup for G965 vs/sf/wm states
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 51b2c60..4bc90c1 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -266,6 +266,7 @@ I965EXACheckComposite(int op, PicturePtr
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
@@ -336,9 +337,8 @@ static const CARD32 sip_kernel_static[][
* with the base texture coordinate. It was extracted from the Mesa driver
*/
-#define SF_KERNEL_NUM_GRF 10
-#define SF_KERNEL_NUM_URB 8
-#define SF_MAX_THREADS 4
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 1
static const CARD32 sf_kernel_static[][4] = {
#include "exa_sf_prog.h"
@@ -468,7 +468,6 @@ ErrorF("i965 prepareComposite\n");
next_offset = vb_offset + vb_size;
/* And then the general state: */
- //XXX: fix for texture map and target surface
dest_surf_offset = ALIGN(next_offset, 32);
next_offset = dest_surf_offset + sizeof(*dest_surf_state);
@@ -534,8 +533,8 @@ ErrorF("i965 prepareComposite\n");
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
-#define URB_SF_ENTRY_SIZE 4
-#define URB_SF_ENTRIES 8
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
@@ -564,7 +563,6 @@ ErrorF("i965 prepareComposite\n");
cc_state->cc3.ia_blend_enable = 0; /* blend alpha just like colors */
cc_state->cc3.blend_enable = 1; /* enable color blend */
cc_state->cc3.alpha_test = 0; /* disable alpha test */
- // XXX:cc_viewport needed?
cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
cc_state->cc5.dither_enable = 0; /* disable dither */
// cc_state->cc5.logicop_func = 0xc; /* COPY */
@@ -585,7 +583,6 @@ ErrorF("i965 prepareComposite\n");
memset(dest_surf_state, 0, sizeof(*dest_surf_state));
dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
- // XXX: should compare with picture's cpp?...8 bit surf?
if (pDst->drawable.bitsPerPixel == 16) {
dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
} else {
@@ -601,14 +598,12 @@ ErrorF("i965 prepareComposite\n");
dest_surf_state->ss0.mipmap_layout_mode = 0;
dest_surf_state->ss0.render_cache_read_mode = 0;
- // XXX: fix to picture address & size
dest_surf_state->ss1.base_addr = dst_offset;
dest_surf_state->ss2.height = pDst->drawable.height - 1;
dest_surf_state->ss2.width = pDst->drawable.width - 1;
dest_surf_state->ss2.mip_count = 0;
dest_surf_state->ss2.render_target_rotation = 0;
dest_surf_state->ss3.pitch = dst_pitch - 1;
- // tiled surface?
/* Set up the source surface state buffer */
memset(src_surf_state, 0, sizeof(*src_surf_state));
@@ -741,8 +736,10 @@ ErrorF("i965 prepareComposite\n");
/* Set up the vertex shader to be disabled (passthrough) */
memset(vs_state, 0, sizeof(*vs_state));
- // XXX: vs URB should be defined for VF vertex URB store. done already?
+ vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
+ vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs_state->vs6.vs_enable = 0;
+ vs_state->vs6.vert_cache_disable = 1;
// XXX: sf_kernel? keep it as now
/* Set up the SF kernel to do coord interp: for each attribute,
@@ -754,7 +751,7 @@ ErrorF("i965 prepareComposite\n");
memset(sf_state, 0, sizeof(*sf_state));
sf_state->thread0.kernel_start_pointer =
(state_base_offset + sf_kernel_offset) >> 6;
- sf_state->thread0.grf_reg_count = ((SF_KERNEL_NUM_GRF & ~15) / 16);
+ sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf_state->sf1.single_program_flow = 1;
sf_state->sf1.binding_table_entry_count = 0;
sf_state->sf1.thread_priority = 0;
@@ -795,7 +792,7 @@ ErrorF("i965 prepareComposite\n");
memset (wm_state, 0, sizeof (*wm_state));
wm_state->thread0.kernel_start_pointer =
(state_base_offset + ps_kernel_offset) >> 6;
- wm_state->thread0.grf_reg_count = ((PS_KERNEL_NUM_GRF & ~15) / 16);
+ wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
wm_state->thread1.single_program_flow = 1;
if (!pMask)
wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
@@ -808,7 +805,10 @@ ErrorF("i965 prepareComposite\n");
// XXX: urb allocation
wm_state->thread3.const_urb_entry_read_length = 0;
wm_state->thread3.const_urb_entry_read_offset = 0;
- wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
+ if (pMask)
+ wm_state->thread3.urb_entry_read_length = 2; /* two per pair of attrib */
+ else
+ wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
wm_state->thread3.urb_entry_read_offset = 0;
// wm kernel use urb from 3, see wm_program in compiler module
wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
diff-tree e3c70c68e39183226e498271c44e98ef1b96a681 (from aa515c54f0cfd9025fc38dc4b7938ff17a8a13fb)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:40:15 2006 +0800
WM kernel needs scratch space
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 51c2006..51b2c60 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -297,6 +297,7 @@ int dest_surf_offset, src_surf_offset, m
int src_sampler_offset, mask_sampler_offset,vs_offset;
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
+int wm_scratch_offset;
int binding_table_offset;
int default_color_offset;
int next_offset, total_state_size;
@@ -426,6 +427,9 @@ ErrorF("i965 prepareComposite\n");
wm_offset = ALIGN(next_offset, 32);
next_offset = wm_offset + sizeof(*wm_state);
+ wm_scratch_offset = ALIGN(next_offset, 1024);
+ next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
+
cc_offset = ALIGN(next_offset, 32);
next_offset = cc_offset + sizeof(*cc_state);
@@ -798,7 +802,8 @@ ErrorF("i965 prepareComposite\n");
else
wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
- wm_state->thread2.scratch_space_base_pointer = 0;
+ wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
+ wm_scratch_offset)>>10;
wm_state->thread2.per_thread_scratch_space = 0;
// XXX: urb allocation
wm_state->thread3.const_urb_entry_read_length = 0;
diff-tree aa515c54f0cfd9025fc38dc4b7938ff17a8a13fb (from b6eba96584bcd2c024f6443d9f3728eb65b234fb)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:37:06 2006 +0800
Setup default border color for our samplers
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 94eabfb..51c2006 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -278,6 +278,7 @@ struct brw_surface_state *src_surf_state
struct brw_surface_state *mask_surf_state;
struct brw_sampler_state *src_sampler_state;
struct brw_sampler_state *mask_sampler_state;
+struct brw_sampler_default_color *default_color_state;
struct brw_vs_unit_state *vs_state;
struct brw_sf_unit_state *sf_state;
@@ -297,6 +298,7 @@ int src_sampler_offset, mask_sampler_off
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
int binding_table_offset;
+int default_color_offset;
int next_offset, total_state_size;
char *state_base;
int state_base_offset;
@@ -478,6 +480,9 @@ ErrorF("i965 prepareComposite\n");
binding_table_offset = ALIGN(next_offset, 32);
next_offset = binding_table_offset + (binding_table_entries * 4);
+ default_color_offset = ALIGN(next_offset, 32);
+ next_offset = default_color_offset + sizeof(*default_color_state);
+
total_state_size = next_offset;
assert(total_state_size < EXA_LINEAR_EXTRA);
@@ -508,6 +513,8 @@ ErrorF("i965 prepareComposite\n");
vb = (void *)(state_base + vb_offset);
+ default_color_state = (void*)(state_base + default_color_offset);
+
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
@@ -541,7 +548,6 @@ ErrorF("i965 prepareComposite\n");
* here, but we should have synced the 3D engine already in I830PutImage.
*/
-// needed?
memset (cc_viewport, 0, sizeof (*cc_viewport));
cc_viewport->min_depth = -1.e35;
cc_viewport->max_depth = 1.e35;
@@ -678,18 +684,25 @@ ErrorF("i965 prepareComposite\n");
I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
}
+ memset(default_color_state, 0, sizeof(*default_color_state));
+ default_color_state->color[0] = 1.0; /* RGBA format */
+ default_color_state->color[1] = 0.0;
+ default_color_state->color[2] = 0.0;
+ default_color_state->color[3] = 0.0;
+
+ src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
+
if (!pSrcPicture->repeat) {
- /* XXX: clamp_border and set border to 0 */
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ src_sampler_state->ss2.default_color_pointer =
+ (state_base_offset + default_color_offset) >> 5;
} else {
src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
- /* XXX: ss2 has border color pointer, which should be in general state address,
- and just a single texel tex map, with R32G32B32A32_FLOAT */
src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
if (pMask) {
@@ -709,17 +722,16 @@ ErrorF("i965 prepareComposite\n");
}
if (!pMaskPicture->repeat) {
- /* XXX: clamp_border and set border to 0 */
- mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ mask_sampler_state->ss2.default_color_pointer =
+ (state_base_offset + default_color_offset)>>5;
} else {
mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
- /* XXX: ss2 has border color pointer, which should be in general state address,
- and just a single texel tex map, with R32G32B32A32_FLOAT */
mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
}
diff-tree b6eba96584bcd2c024f6443d9f3728eb65b234fb (from 453842c9ff733af45fa665d9db6a35164f45c60a)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:30:53 2006 +0800
fix vertex buffer size
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 9127d65..94eabfb 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -233,16 +233,12 @@ Bool
I965EXACheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
PicturePtr pDstPicture)
{
- /* check op*/
- /* check op with mask's componentAlpha*/
- /* check textures */
- /* check dst buffer format */
CARD32 tmp1;
/* Check for unsupported compositing operations. */
if (op >= sizeof(I965BlendOp) / sizeof(I965BlendOp[0]))
I830FALLBACK("Unsupported Composite op 0x%x\n", op);
-
+
if (pMaskPicture != NULL && pMaskPicture->componentAlpha) {
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
@@ -305,7 +301,7 @@ int next_offset, total_state_size;
char *state_base;
int state_base_offset;
float *vb;
-int vb_size = 4 * 4 ; /* 4 DWORDS per vertex, 4 vertices for TRIFAN*/
+int vb_size = (4 * 4) * 4 ; /* 4 DWORDS per vertex*/
CARD32 src_blend, dst_blend;
diff-tree 453842c9ff733af45fa665d9db6a35164f45c60a (from 18ad7d5cf04081d89a9f978ccc7794116f7c498b)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:24:24 2006 +0800
clean up issue cmd to ring buffer
Make it easy to track different part of ring state, and
use rectlist primitive instead.
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 2c3e43b..9127d65 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -376,11 +376,6 @@ I965EXAPrepareComposite(int op, PictureP
ErrorF("i965 prepareComposite\n");
-// i965_3d_pipeline_setup(pScrn);
-// i965_surf_setup(pScrn, pSrcPicture, pMaskPicture, pDstPicture,
-// pSrc, pMask, pDst);
- // then setup blend, and shader program
-
/* FIXME: fallback in pMask for now, would be enable after finish
wm kernel program */
if (pMask)
@@ -819,62 +814,65 @@ ErrorF("i965 prepareComposite\n");
* rendering pipe
*/
{
-
- BEGIN_LP_RING((pMask?48:46));
- // MI_FLUSH prior to PIPELINE_SELECT
- OUT_RING(MI_FLUSH |
+ BEGIN_LP_RING(2);
+ OUT_RING(MI_FLUSH |
MI_STATE_INSTRUCTION_CACHE_FLUSH |
BRW_MI_GLOBAL_SNAPSHOT_RESET);
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+ {
+ BEGIN_LP_RING(12);
- /* Match Mesa driver setup */
- OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ /* Match Mesa driver setup */
+ OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING((0 << 4) | /* URB Entry Allocation Size */
+ (0 << 0)); /* Number of URB Entries */
+
/* Zero out the two base address registers so all offsets are absolute */
- // XXX: zero out...
- OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
- // why this's not state_base_offset? -> because later we'll always add on
- // state_base_offset to offset params. see SIP
- OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
- OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
- OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
- OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */
- OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */
+ OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */
/* Set system instruction pointer */
- OUT_RING(BRW_STATE_SIP | 0);
- OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
-
+ OUT_RING(BRW_STATE_SIP | 0);
+ OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+ {
+ BEGIN_LP_RING(26);
/* Pipe control */
- // XXX: pipe control write cache before enabling color blending
- // vol2, geometry pipeline 1.8.4
- OUT_RING(BRW_PIPE_CONTROL |
+ OUT_RING(BRW_PIPE_CONTROL |
BRW_PIPE_CONTROL_NOWRITE |
BRW_PIPE_CONTROL_IS_FLUSH |
2);
- OUT_RING(0); /* Destination address */
- OUT_RING(0); /* Immediate data low DW */
- OUT_RING(0); /* Immediate data high DW */
+ OUT_RING(0); /* Destination address */
+ OUT_RING(0); /* Immediate data low DW */
+ OUT_RING(0); /* Immediate data high DW */
/* Binding table pointers */
- OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
- OUT_RING(0); /* vs */
- OUT_RING(0); /* gs */
- OUT_RING(0); /* clip */
- OUT_RING(0); /* sf */
+ OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_RING(0); /* vs */
+ OUT_RING(0); /* gs */
+ OUT_RING(0); /* clip */
+ OUT_RING(0); /* sf */
/* Only the PS uses the binding table */
- OUT_RING(state_base_offset + binding_table_offset); /* ps */
-
- //ring 20
+ OUT_RING(state_base_offset + binding_table_offset); /* ps */
/* The drawing rectangle clipping is always on. Set it to values that
* shouldn't do any clipping.
*/
- //XXX: fix for picture size
- OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
- OUT_RING(0x00000000); /* ymin, xmin */
- OUT_RING((pScrn->virtualX - 1) |
- (pScrn->virtualY - 1) << 16); /* ymax, xmax */
- OUT_RING(0x00000000); /* yorigin, xorigin */
+ OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
+ OUT_RING(0x00000000); /* ymin, xmin */
+ OUT_RING((pScrn->virtualX - 1) |
+ (pScrn->virtualY - 1) << 16); /* ymax, xmax */
+ OUT_RING(0x00000000); /* yorigin, xorigin */
/* skip the depth buffer */
/* skip the polygon stipple */
@@ -882,90 +880,82 @@ ErrorF("i965 prepareComposite\n");
/* skip the line stipple */
/* Set the pointers to the 3d pipeline state */
- OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
- OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
- OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
- OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
- OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
- OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
- OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
+ OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
+ OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
+ OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
+ OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
/* URB fence */
- // XXX: CS for const URB needed? if not, cs_fence should be equal to sf_fence
- OUT_RING(BRW_URB_FENCE |
- UF0_CS_REALLOC |
- UF0_SF_REALLOC |
- UF0_CLIP_REALLOC |
- UF0_GS_REALLOC |
- UF0_VS_REALLOC |
- 1);
- OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
- ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
- ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
- OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
- ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+ OUT_RING(BRW_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
/* Constant buffer state */
- // XXX: needed? seems no usage, as we don't have CONSTANT_BUFFER definition
- OUT_RING(BRW_CS_URB_STATE | 0);
- OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
- (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
-
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
+ (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
+ ADVANCE_LP_RING();
+ }
+ {
+ int nelem = pMask ? 3: 2;
+ BEGIN_LP_RING(pMask?12:10);
/* Set up the pointer to our vertex buffer */
- // XXX: double check
- // int vb_pitch = 4 * 4; // XXX: pitch should include mask's coords? possible
- // all three coords on one row?
- int nelem = pMask ? 3: 2;
- OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); //XXX: should be 4n-1 -> 3
- OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
- VB0_VERTEXDATA |
- ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
- // pitch includes all vertex data, 4bytes for 1 dword, each
- // element has 2 coords (x,y)(s0,t0), nelem to reflect possible
- // mask
- OUT_RING(state_base_offset + vb_offset);
- OUT_RING(4 * nelem); // max index, prim has 4 coords
- OUT_RING(0); // ignore for VERTEXDATA, but still there
+ OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
+ VB0_VERTEXDATA |
+ ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
+ OUT_RING(state_base_offset + vb_offset);
+ OUT_RING(2); // max index, prim has 4 coords
+ OUT_RING(0); // ignore for VERTEXDATA, but still there
/* Set up our vertex elements, sourced from the single vertex buffer. */
- OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); // XXX: 2n-1, (x,y) + (s0,t0) +
- // possible (s1, t1)
+ OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
/* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
- OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT));
- OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
/* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
- OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (8 << VE0_OFFSET_SHIFT));
- OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
-
- if (pMask) {
OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (16 << VE0_OFFSET_SHIFT));
- OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
- //XXX: is this has alignment issue? and thread access problem?
- }
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+
+ if (pMask) {
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (16 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ }
- ADVANCE_LP_RING();
-
+ ADVANCE_LP_RING();
}
#ifdef I830DEBUG
@@ -983,7 +973,7 @@ I965EXAComposite(PixmapPtr pDst, int src
I830Ptr pI830 = I830PTR(pScrn);
int srcXend, srcYend, maskXend, maskYend;
PictVector v;
- int pMask = 1, i = 0;
+ int pMask = 1, i;
DPRINTF(PFX, "Composite: srcX %d, srcY %d\n\t maskX %d, maskY %d\n\t"
"dstX %d, dstY %d\n\twidth %d, height %d\n\t"
@@ -999,8 +989,10 @@ I965EXAComposite(PixmapPtr pDst, int src
srcXend = srcX + w;
srcYend = srcY + h;
- maskXend = maskX + w;
- maskYend = maskY + h;
+ if (pMask) {
+ maskXend = maskX + w;
+ maskYend = maskY + h;
+ }
if (is_transform[0]) {
v.vector[0] = IntToxFixed(srcX);
v.vector[1] = IntToxFixed(srcY);
@@ -1035,51 +1027,45 @@ I965EXAComposite(PixmapPtr pDst, int src
"dstX %d, dstY %d\n", srcX, srcY, srcXend, srcYend,
maskX, maskY, maskXend, maskYend, dstX, dstY);
-
- vb[i++] = (float)dstX;
- vb[i++] = (float)dstY;
- vb[i++] = (float)srcX / scale_units[0][0];
- vb[i++] = (float)srcY / scale_units[0][1];
- if (pMask) {
- vb[i++] = (float)maskX / scale_units[1][0];
- vb[i++] = (float)maskY / scale_units[1][1];
- }
-
- vb[i++] = (float)dstX;
- vb[i++] = (float)(dstY + h);
- vb[i++] = (float)srcX / scale_units[0][0];
- vb[i++] = (float)srcYend / scale_units[0][1];
+ i = 0;
+ /* rect (x2,y2) */
+ vb[i++] = (float)(srcXend) / scale_units[0][0];
+ vb[i++] = (float)(srcYend) / scale_units[0][1];
if (pMask) {
- vb[i++] = (float)maskX / scale_units[1][0];
+ vb[i++] = (float)maskXend / scale_units[1][0];
vb[i++] = (float)maskYend / scale_units[1][1];
}
-
vb[i++] = (float)(dstX + w);
vb[i++] = (float)(dstY + h);
- vb[i++] = (float)srcXend / scale_units[0][0];
- vb[i++] = (float)srcYend / scale_units[0][1];
+
+ /* rect (x1,y2) */
+ vb[i++] = (float)(srcX)/ scale_units[0][0];
+ vb[i++] = (float)(srcYend)/ scale_units[0][1];
if (pMask) {
- vb[i++] = (float)maskXend / scale_units[1][0];
+ vb[i++] = (float)maskX / scale_units[1][0];
vb[i++] = (float)maskYend / scale_units[1][1];
}
+ vb[i++] = (float)dstX;
+ vb[i++] = (float)(dstY + h);
- vb[i++] = (float)(dstX + w);
- vb[i++] = (float)dstY;
- vb[i++] = (float)srcXend / scale_units[0][0];
- vb[i++] = (float)srcY / scale_units[0][1];
+ /* rect (x1,y1) */
+ vb[i++] = (float)(srcX) / scale_units[0][0];
+ vb[i++] = (float)(srcY) / scale_units[0][1];
if (pMask) {
- vb[i++] = (float)maskXend / scale_units[1][0];
+ vb[i++] = (float)maskX / scale_units[1][0];
vb[i++] = (float)maskY / scale_units[1][1];
}
-
+ vb[i++] = (float)dstX;
+ vb[i++] = (float)dstY;
+
{
BEGIN_LP_RING(6);
OUT_RING(BRW_3DPRIMITIVE |
BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
- (_3DPRIM_TRIFAN << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) | /* CTG - indirect vertex count */
4);
- OUT_RING(4); /* vertex count per instance */
+ OUT_RING(3); /* vertex count per instance */
OUT_RING(0); /* start vertex offset */
OUT_RING(1); /* single instance */
OUT_RING(0); /* start instance location */
@@ -1090,4 +1076,19 @@ I965EXAComposite(PixmapPtr pDst, int src
ErrorF("sync after 3dprimitive");
I830Sync(pScrn);
#endif
+ /* we must be sure that the pipeline is flushed before next exa draw,
+ because that will be new state, binding state and instructions*/
+ {
+ BEGIN_LP_RING(4);
+ OUT_RING(BRW_PIPE_CONTROL |
+ BRW_PIPE_CONTROL_NOWRITE |
+ BRW_PIPE_CONTROL_WC_FLUSH |
+ BRW_PIPE_CONTROL_IS_FLUSH |
+ (1 << 10) | /* XXX texture cache flush for BLC/CTG */
+ 2);
+ OUT_RING(0); /* Destination address */
+ OUT_RING(0); /* Immediate data low DW */
+ OUT_RING(0); /* Immediate data high DW */
+ ADVANCE_LP_RING();
+ }
}
diff-tree 18ad7d5cf04081d89a9f978ccc7794116f7c498b (from 3d5bd0c14eea7951540f7a12eee257428f78e2d1)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:04:16 2006 +0800
remove wrong scale_units
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 2751778..2c3e43b 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -397,8 +397,6 @@ ErrorF("i965 prepareComposite\n");
}
scale_units[0][0] = pSrc->drawable.width;
scale_units[0][1] = pSrc->drawable.height;
- scale_units[2][0] = pDst->drawable.width;
- scale_units[2][1] = pDst->drawable.height;
if (pSrcPicture->transform) {
is_transform[0] = TRUE;
diff-tree 3d5bd0c14eea7951540f7a12eee257428f78e2d1 (from a06beb5f80f097fac3b718e742742bb32f1c1194)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:02:16 2006 +0800
Rename exa sf/wm program files
Also fix some minors in wm program.
diff --git a/src/Makefile.am b/src/Makefile.am
index 494a921..890e90f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -78,8 +78,8 @@ sf_prog.h: packed_yuv_sf.g4a
intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a
wm_prog.h: packed_yuv_wm.g4a
intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a
-i965_composite_wm_nomask.h: i965_composite_wm_nomask.g4a
- intel-gen4asm -o i965_composite_wm_nomask.h i965_composite_wm_nomask.g4a
+exa_wm_nomask_prog.h: exa_wm_nomask.g4a
+ intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a
endif
if DRI
diff --git a/src/exa_sf.g4a b/src/exa_sf.g4a
new file mode 100644
index 0000000..8c1398f
--- /dev/null
+++ b/src/exa_sf.g4a
@@ -0,0 +1,17 @@
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_sf_prog.h b/src/exa_sf_prog.h
new file mode 100644
index 0000000..830d176
--- /dev/null
+++ b/src/exa_sf_prog.h
@@ -0,0 +1,17 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a
new file mode 100644
index 0000000..8e851a3
--- /dev/null
+++ b/src/exa_wm_nomask.g4a
@@ -0,0 +1,143 @@
+/*
+ * This's for exa composite operation in no mask picture case.
+ * The simplest case is just sending what src picture has to dst picture.
+ */
+
+/* I think this should be same as in g4a program for texture video,
+ as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
+
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+mov (8) m1<1>F g4<8,8,1>F { align1 };
+mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
+mov (8) m3<1>F g6<8,8,1>F { align1 };
+mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 0 /* msg reg index */
+ g12<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+mov (8) g19<1>UD g19<8,8,1>UD { align1 }; /* wait sampler return */
+/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>F g1<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+/* g12 -> m2
+ g13 -> m6
+ g14 -> m3
+ g15 -> m7
+ g16 -> m4
+ g17 -> m8
+ g18 -> m5
+ g19 -> m9
+*/
+mov (8) m2<1>F g12<8,8,1>F { align1 };
+mov (8) m3<1>F g14<8,8,1>F { align1 };
+mov (8) m4<1>F g16<8,8,1>F { align1 };
+mov (8) m5<1>F g18<8,8,1>F { align1 };
+mov (8) m6<1>F g13<8,8,1>F { align1 };
+mov (8) m7<1>F g15<8,8,1>F { align1 };
+mov (8) m8<1>F g17<8,8,1>F { align1 };
+mov (8) m9<1>F g19<8,8,1>F { align1 };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+
+/* write */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_wm_nomask_prog.h b/src/exa_wm_nomask_prog.h
new file mode 100644
index 0000000..7870b3b
--- /dev/null
+++ b/src/exa_wm_nomask_prog.h
@@ -0,0 +1,70 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_composite_wm_nomask.g4a b/src/i965_composite_wm_nomask.g4a
deleted file mode 100644
index 927d86a..0000000
--- a/src/i965_composite_wm_nomask.g4a
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * This's for exa composite operation in no mask picture case.
- * The simplest case is just sending what src picture has to dst picture.
- * XXX: This is still experimental, and should be fixed to support multiple texture
- * map, and conditional mul actions.
- */
-
-/* I think this should be same as in g4a program for texture video,
- as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
-
-/* The initial payload of the thread is always g0.
- * WM_URB (incoming URB entries) is g3
- * X0_R is g4
- * X1_R is g5
- * Y0_R is g6
- * Y1_R is g7
- */
-
- /* Set up ss0.x coordinates*/
-mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
-add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
-mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
- /* Set up ss0.y coordinates */
-mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
-mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
-add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
- /* set up ss1.x coordinates */
-mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
-mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
- /* set up ss1.y coordinates */
-mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
-mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
-add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
- /* Set up ss2.x coordinates */
-mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
-add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
-mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
- /* Set up ss2.y coordinates */
-mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
-mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
-add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
- /* Set up ss3.x coordinates */
-mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
-mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
- /* Set up ss3.y coordinates */
-mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
-mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
-add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
-
- /* Now, map these screen space coordinates into texture coordinates. */
- /* subtract screen-space X origin of vertex 0. */
-add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
- /* scale by texture X increment */
-mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
-mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
- /* add in texture X offset */
-add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
-add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
- /* subtract screen-space Y origin of vertex 0. */
-add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
- /* scale by texture Y increment */
-mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
-mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
- /* add in texture Y offset */
-add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
-add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-mov (8) m1<1>F g4<8,8,1>F { align1 };
-mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
-mov (8) m3<1>F g6<8,8,1>F { align1 };
-mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-send (16) 0 /* msg reg index */
- g12<1>UW /* readback */
- g0<8,8,1>UW /* copy to msg start reg*/
- sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
- /* here(src->dst) we should use src_sampler and src_surface */
- mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-
-/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
-
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>F g1<8,8,1>F { align1 };
-
-/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
-/* g12 -> m2
- g13 -> m6
- g14 -> m3
- g15 -> m7
- g16 -> m4
- g17 -> m8
- g18 -> m5
- g19 -> m9
-*/
-mov (8) m2<1>F g12<8,8,1>F { align1 };
-mov (8) m3<1>F g14<8,8,1>F { align1 };
-mov (8) m4<1>F g16<8,8,1>F { align1 };
-mov (8) m5<1>F g18<8,8,1>F { align1 };
-mov (8) m6<1>F g13<8,8,1>F { align1 };
-mov (8) m7<1>F g15<8,8,1>F { align1 };
-mov (8) m8<1>F g17<8,8,1>F { align1 };
-mov (8) m9<1>F g19<8,8,1>F { align1 };
-
-/* write */
-send (16) 0 null g0<8,8,1>UW write (
- 0, /* binding_table */
- 8, /* pixel scordboard clear, msg type simd16 single source */
- 4, /* render target write */
- 0 /* no write commit message */
- )
- mlen 10
- rlen 0
- { align1 EOT };
-
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
diff --git a/src/i965_composite_wm_nomask.h b/src/i965_composite_wm_nomask.h
deleted file mode 100644
index bd99dd9..0000000
--- a/src/i965_composite_wm_nomask.h
+++ /dev/null
@@ -1,68 +0,0 @@
- { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
- { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
- { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
- { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
- { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
- { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
- { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
- { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
- { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
- { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
- { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
- { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
- { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
- { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
- { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
- { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
- { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
- { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
- { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
- { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
- { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
- { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
- { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
- { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
- { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
- { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
- { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
- { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
- { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
- { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
- { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
- { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
- { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
- { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
- { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
- { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
- { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
- { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
- { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
- { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
- { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
- { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
- { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
- { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
- { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
- { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
- { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
- { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
- { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
- { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
- { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
- { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
- { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
- { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
- { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
- { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
- { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
- { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
- { 0x00800031, 0x20001d3c, 0x008d0000, 0x85a04800 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index fe3007b..2751778 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -342,7 +342,7 @@ static const CARD32 sip_kernel_static[][
#define SF_MAX_THREADS 4
static const CARD32 sf_kernel_static[][4] = {
-#include "sf_prog.h"
+#include "exa_sf_prog.h"
};
/* ps kernels */
@@ -350,7 +350,7 @@ static const CARD32 sf_kernel_static[][4
#define PS_MAX_THREADS 32
/* 1: no mask */
static const CARD32 ps_kernel_static_nomask [][4] = {
- #include "i965_composite_wm_nomask.h"
+ #include "exa_wm_nomask_prog.h"
};
/* 2: mask with componentAlpha, src * mask color, XXX: later */
diff-tree a06beb5f80f097fac3b718e742742bb32f1c1194 (from 4198f1216eb13b30d1e92d4395e98861f4324c38)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 15:47:19 2006 +0800
EXA state mem for G965
Not split offscreen mem for exa, but alloc a dedicated one
for G965 states.
diff --git a/src/i830.h b/src/i830.h
index df1c171..2a68499 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -73,6 +73,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN
#ifdef I830_USE_EXA
#include "exa.h"
Bool I830EXAInit(ScreenPtr pScreen);
+#define EXA_LINEAR_EXTRA (64*1024)
#endif
#ifdef I830_USE_XAA
@@ -243,6 +244,7 @@ typedef struct _I830Rec {
I830MemRange Scratch2;
#ifdef I830_USE_EXA
I830MemRange Offscreen;
+ I830MemRange EXAStateMem; /* specific exa state for G965 */
#endif
/* Regions allocated either from the above pools, or from agpgart. */
I830MemRange *CursorMem;
diff --git a/src/i830_exa.c b/src/i830_exa.c
index 9b2e6b2..8b07ecb 100644
--- a/src/i830_exa.c
+++ b/src/i830_exa.c
@@ -424,7 +424,6 @@ IntelEXADoneComposite(PixmapPtr pDst)
#endif
}
-#define BRW_LINEAR_EXTRA (32*1024)
/*
* TODO:
* - Dual head?
@@ -447,11 +446,7 @@ I830EXAInit(ScreenPtr pScreen)
pI830->EXADriverPtr->exa_minor = 0;
pI830->EXADriverPtr->memoryBase = pI830->FbBase;
pI830->EXADriverPtr->offScreenBase = pI830->Offscreen.Start;
- if (IS_I965G(pI830))
- pI830->EXADriverPtr->memorySize = pI830->Offscreen.End -
- BRW_LINEAR_EXTRA; /* BRW needs state buffer*/
- else
- pI830->EXADriverPtr->memorySize = pI830->Offscreen.End;
+ pI830->EXADriverPtr->memorySize = pI830->Offscreen.End;
DPRINTF(PFX, "EXA Mem: memoryBase 0x%x, end 0x%x, offscreen base 0x%x, memorySize 0x%x\n",
pI830->EXADriverPtr->memoryBase,
diff --git a/src/i830_memory.c b/src/i830_memory.c
index e3307d6..4a8d480 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -905,6 +905,25 @@ I830Allocate2DMemory(ScrnInfoPtr pScrn,
"offscreen memory at 0x%lx, size %ld KB\n",
pI830->Offscreen.Start, pI830->Offscreen.Size/1024);
}
+ if (IS_I965G(pI830)) {
+ memset(&(pI830->EXAStateMem), 0, sizeof(I830MemRange));
+ pI830->EXAStateMem.Key = -1;
+ size = ROUND_TO_PAGE(EXA_LINEAR_EXTRA);
+ align = GTT_PAGE_SIZE;
+ alloced = I830AllocVidMem(pScrn, &(pI830->EXAStateMem),
+ &(pI830->StolenPool), size, align,
+ flags | FROM_ANYWHERE | ALLOCATE_AT_TOP);
+ if (alloced < size) {
+ if (!dryrun) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "G965: Failed to allocate exa state buffer space.\n");
+ }
+ return FALSE;
+ }
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, verbosity,
+ "%sAllocated %ld kB for the G965 exa state buffer at 0x%lx - 0x%lx.\n", s,
+ alloced / 1024, pI830->EXAStateMem.Start, pI830->EXAStateMem.End);
+ }
#endif
} else {
long lineSize;
@@ -1545,6 +1564,11 @@ I830FixupOffsets(ScrnInfoPtr pScrn)
I830FixOffset(pScrn, &(pI830->TexMem));
}
#endif
+#ifdef I830_USE_EXA
+ I830FixOffset(pScrn, &(pI830->Offscreen));
+ if (IS_I965G(pI830))
+ I830FixOffset(pScrn, &(pI830->EXAStateMem));
+#endif
return TRUE;
}
@@ -1945,6 +1969,12 @@ I830BindAGPMemory(ScrnInfoPtr pScrn)
return FALSE;
}
#endif
+#ifdef I830_USE_EXA
+ if (!BindMemRange(pScrn, &(pI830->Offscreen)))
+ return FALSE;
+ if (IS_I965G(pI830) && !BindMemRange(pScrn, &(pI830->EXAStateMem)))
+ return FALSE;
+#endif
pI830->GttBound = 1;
}
@@ -2029,6 +2059,12 @@ I830UnbindAGPMemory(ScrnInfoPtr pScrn)
return FALSE;
}
#endif
+#ifdef I830_USE_EXA
+ if (!UnbindMemRange(pScrn, &(pI830->Offscreen)))
+ return FALSE;
+ if (IS_I965G(pI830) && !UnbindMemRange(pScrn, &(pI830->EXAStateMem)))
+ return FALSE;
+#endif
if (!xf86ReleaseGART(pScrn->scrnIndex))
return FALSE;
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 7fbf99c..fe3007b 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -490,21 +490,12 @@ ErrorF("i965 prepareComposite\n");
next_offset = binding_table_offset + (binding_table_entries * 4);
total_state_size = next_offset;
+ assert(total_state_size < EXA_LINEAR_EXTRA);
- /*
- * XXX: Use the extra space allocated at the end of the exa offscreen buffer?
- */
-#define BRW_LINEAR_EXTRA (32*1024)
-
- state_base_offset = (pI830->Offscreen.End -
- BRW_LINEAR_EXTRA);
-
+ state_base_offset = pI830->EXAStateMem.Start;
state_base_offset = ALIGN(state_base_offset, 64);
state_base = (char *)(pI830->FbBase + state_base_offset);
- /* Set up our pointers to state structures in framebuffer. It would probably
- * be a good idea to fill these structures out in system memory and then dump
- * them there, instead.
- */
+
vs_state = (void *)(state_base + vs_offset);
sf_state = (void *)(state_base + sf_offset);
wm_state = (void *)(state_base + wm_offset);
More information about the xorg-commit
mailing list