xf86-video-intel: 3 commits - src/i830_accel.c src/i830_driver.c src/i830.h src/i830_memory.c src/i965_render.c

Eric Anholt anholt at kemper.freedesktop.org
Thu Apr 10 14:10:11 PDT 2008


 src/i830.h        |   10 +
 src/i830_accel.c  |    3 
 src/i830_driver.c |    6 +
 src/i830_memory.c |   13 +-
 src/i965_render.c |  319 ++++++++++++++++++++++++++++++++++--------------------
 5 files changed, 230 insertions(+), 121 deletions(-)

New commits:
commit 2871ac8eefd0192080bb0569140c3f5d0e1d9b44
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Apr 10 13:34:13 2008 -0700

    Statically allocate the sampler default color, which we never change.
    
    Performance change is in the noise.  Also from Carl Worth.

diff --git a/src/i965_render.c b/src/i965_render.c
index 4b42db9..1b4afcc 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -268,7 +268,6 @@ static struct brw_surface_state *src_surf_state, src_surf_state_local;
 static struct brw_surface_state *mask_surf_state, mask_surf_state_local;
 static struct brw_sampler_state *src_sampler_state, src_sampler_state_local;
 static struct brw_sampler_state *mask_sampler_state, mask_sampler_state_local;
-static struct brw_sampler_default_color *default_color_state;
 
 static struct brw_vs_unit_state *vs_state, vs_state_local;
 static struct brw_sf_unit_state *sf_state, sf_state_local;
@@ -284,7 +283,6 @@ static int src_sampler_offset, mask_sampler_offset,vs_offset;
 static int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
 static int wm_scratch_offset;
 static int binding_table_offset;
-static int default_color_offset;
 static int next_offset, total_state_size;
 static char *state_base;
 static int state_base_offset;
@@ -418,6 +416,13 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
 #define KERNEL_DECL(template) \
     uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4];
 
+/* Many of the fields in the state structure must be aligned to a
+ * 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for
+ * those too).
+ */
+#define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64]
+#define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1)
+
 /**
  * Gen4 rendering state buffer structure.
  *
@@ -441,6 +446,9 @@ typedef struct _gen4_state {
     KERNEL_DECL (ps_kernel_masknoca_affine);
     KERNEL_DECL (ps_kernel_masknoca_projective);
 
+    struct brw_sampler_default_color sampler_default_color;
+    PAD64 (brw_sampler_default_color, 0);
+
     uint8_t other_state[65536];
 } gen4_state_t;
 
@@ -465,6 +473,13 @@ gen4_state_init (gen4_state_t *state)
     KERNEL_COPY (ps_kernel_masknoca_affine);
     KERNEL_COPY (ps_kernel_masknoca_projective);
 
+    memset(&state->sampler_default_color, 0,
+	   sizeof(state->sampler_default_color));
+    state->sampler_default_color.color[0] = 0.0; /* R */
+    state->sampler_default_color.color[1] = 0.0; /* G */
+    state->sampler_default_color.color[2] = 0.0; /* B */
+    state->sampler_default_color.color[3] = 0.0; /* A */
+
 #undef KERNEL_COPY
 }
 
@@ -592,9 +607,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     binding_table_offset = ALIGN(next_offset, 32);
     next_offset = binding_table_offset + (binding_table_entries * 4);
 
-    default_color_offset = ALIGN(next_offset, 32);
-    next_offset = default_color_offset + sizeof(*default_color_state);
-
     total_state_size = next_offset;
     assert(total_state_size < sizeof(gen4_state_t));
 
@@ -608,8 +620,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 
     vb = (void *)(state_base + vb_offset);
 
-    default_color_state = (void*)(state_base + default_color_offset);
-
     /* Set up a default static partitioning of the URB, which is supposed to
      * allow anything we would want to do, at potentially lower performance.
      */
@@ -793,12 +803,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
     }
 
-    memset(default_color_state, 0, sizeof(*default_color_state));
-    default_color_state->color[0] = 0.0; /* R */
-    default_color_state->color[1] = 0.0; /* G */
-    default_color_state->color[2] = 0.0; /* B */
-    default_color_state->color[3] = 0.0; /* A */
-
     src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
 
     if (!pSrcPicture->repeat) {
@@ -806,7 +810,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
    	src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
    	src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
 	src_sampler_state->ss2.default_color_pointer =
-	    (state_base_offset + default_color_offset) >> 5;
+	    (state_base_offset +
+	     offsetof(gen4_state_t, sampler_default_color)) >> 5;
     } else {
    	src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    	src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
@@ -842,8 +847,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 		BRW_TEXCOORDMODE_CLAMP_BORDER;
    	    mask_sampler_state->ss1.t_wrap_mode =
 		BRW_TEXCOORDMODE_CLAMP_BORDER;
-            mask_sampler_state->ss2.default_color_pointer =
-		(state_base_offset + default_color_offset)>>5;
+	    mask_sampler_state->ss2.default_color_pointer =
+		(state_base_offset +
+		 offsetof(gen4_state_t, sampler_default_color)) >> 5;
    	} else {
    	    mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    	    mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
commit 80dd784e33847e431403d4659a7b8d8425b2676f
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Apr 10 13:24:51 2008 -0700

    Add copyright information for recent editors of this file.

diff --git a/src/i965_render.c b/src/i965_render.c
index cd07a02..4b42db9 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1,5 +1,6 @@
 /*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2006,2008 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -23,6 +24,8 @@
  * Authors:
  *    Wang Zhenyu <zhenyu.z.wang at intel.com>
  *    Eric Anholt <eric at anholt.net>
+ *    Carl Worth <cworth at redhat.com>
+ *    Keith Packard <keithp at keithp.com>
  *
  */
 
commit b606278db83ec84b1db562a2d65697c50561b169
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Apr 10 13:17:58 2008 -0700

    Keep static copies of the 965 render programs in video memory.
    
    This reduces the CPU overhead of memcpying them in every time, for a speedup
    in aa24text of around 30%.  This is based on work by Carl Worth which is
    in the intel-batchbuffer branch.

diff --git a/src/i830.h b/src/i830.h
index 318b188..6465bd6 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -85,7 +85,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #ifdef I830_USE_EXA
 #include "exa.h"
 Bool I830EXAInit(ScreenPtr pScreen);
-#define EXA_LINEAR_EXTRA	(64*1024)
 unsigned long long I830TexOffsetStart(PixmapPtr pPix);
 #endif
 
@@ -398,7 +397,7 @@ typedef struct _I830Rec {
    i830_memory *xaa_scratch_2;
 #ifdef I830_USE_EXA
    i830_memory *exa_offscreen;
-   i830_memory *exa_965_state;
+   i830_memory *gen4_render_state_mem;
 #endif
    /* Regions allocated either from the above pools, or from agpgart. */
    I830RingBuffer *LpRing;
@@ -531,6 +530,9 @@ typedef struct _I830Rec {
    uint32_t mapstate[6];
    uint32_t samplerstate[6];
 
+   /* 965 render acceleration state */
+   struct gen4_render_state *gen4_render_state;
+
    Bool directRenderingDisabled;	/* DRI disabled in PreInit. */
    Bool directRenderingEnabled;		/* DRI enabled this generation. */
 
@@ -824,6 +826,10 @@ Bool i915_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
 			    PicturePtr pDst, PixmapPtr pSrcPixmap,
 			    PixmapPtr pMaskPixmap, PixmapPtr pDstPixmap);
 /* i965_render.c */
+unsigned int gen4_render_state_size(ScrnInfoPtr pScrn);
+void gen4_render_state_init(ScrnInfoPtr pScrn);
+void gen4_render_state_cleanup(ScrnInfoPtr pScrn);
+void gen4_render_state_reset(ScrnInfoPtr pScrn);
 Bool i965_check_composite(int op, PicturePtr pSrc, PicturePtr pMask,
 			  PicturePtr pDst);
 Bool i965_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
diff --git a/src/i830_accel.c b/src/i830_accel.c
index 953a73b..0194f00 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -205,6 +205,9 @@ I830Sync(ScrnInfoPtr pScrn)
 
    pI830->LpRing->space = pI830->LpRing->mem->size - 8;
    pI830->nextColorExpandBuf = 0;
+
+   if (IS_I965G(pI830))
+       gen4_render_state_reset(pScrn);
 }
 
 void
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 66153b7..ea37e6d 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -3190,6 +3190,9 @@ I830LeaveVT(int scrnIndex, int flags)
    }
 #endif /* XF86DRI_MM */
 
+   if (IS_I965G(pI830))
+      gen4_render_state_cleanup(pScrn);
+
    if (pI830->AccelInfoRec)
       pI830->AccelInfoRec->NeedToSync = FALSE;
 }
@@ -3236,6 +3239,9 @@ I830EnterVT(int scrnIndex, int flags)
    /* Update the screen pixmap in case the buffer moved */
    i830_update_front_offset(pScrn);
 
+   if (IS_I965G(pI830))
+      gen4_render_state_init(pScrn);
+
    if (i830_check_error_state(pScrn)) {
       xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		 "Existing errors found in hardware state.\n");
diff --git a/src/i830_memory.c b/src/i830_memory.c
index 6835a6f..84db0ef 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -334,7 +334,7 @@ i830_reset_allocations(ScrnInfoPtr pScrn)
     pI830->xaa_scratch = NULL;
     pI830->xaa_scratch_2 = NULL;
     pI830->exa_offscreen = NULL;
-    pI830->exa_965_state = NULL;
+    pI830->gen4_render_state_mem = NULL;
     pI830->overlay_regs = NULL;
     pI830->logical_context = NULL;
 #ifdef XF86DRI
@@ -1370,11 +1370,14 @@ i830_allocate_2d_memory(ScrnInfoPtr pScrn)
     }
 
     /* even in XAA, 965G needs state mem buffer for rendering */
-    if (IS_I965G(pI830) && !pI830->noAccel && pI830->exa_965_state == NULL) {
-	pI830->exa_965_state =
+    if (IS_I965G(pI830) && !pI830->noAccel &&
+	pI830->gen4_render_state_mem == NULL)
+    {
+	pI830->gen4_render_state_mem =
 	    i830_allocate_memory(pScrn, "exa G965 state buffer",
-		    EXA_LINEAR_EXTRA, GTT_PAGE_SIZE, 0);
-	if (pI830->exa_965_state == NULL) {
+				 gen4_render_state_size(pScrn),
+				 GTT_PAGE_SIZE, 0);
+	if (pI830->gen4_render_state_mem == NULL) {
 	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		    "Failed to allocate exa state buffer for 965.\n");
 	    return FALSE;
diff --git a/src/i965_render.c b/src/i965_render.c
index 96082bb..cd07a02 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -113,6 +113,12 @@ static struct formatinfo i965_tex_formats[] = {
     {PICT_a8,       BRW_SURFACEFORMAT_A8_UNORM	 },
 };
 
+/** Private data for gen4 render accel implementation. */
+struct gen4_render_state {
+    unsigned char *state_addr;
+    unsigned int state_offset;
+};
+
 static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format,
 				uint32_t *sblend, uint32_t *dblend)
 {
@@ -267,17 +273,12 @@ static struct brw_wm_unit_state *wm_state, wm_state_local;
 static struct brw_cc_unit_state *cc_state, cc_state_local;
 static struct brw_cc_viewport *cc_viewport;
 
-static struct brw_instruction *sf_kernel;
-static struct brw_instruction *ps_kernel;
-static struct brw_instruction *sip_kernel;
-
 static uint32_t *binding_table;
 static int binding_table_entries;
 
 static int dest_surf_offset, src_surf_offset, mask_surf_offset;
 static int src_sampler_offset, mask_sampler_offset,vs_offset;
 static int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
-static int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
 static int wm_scratch_offset;
 static int binding_table_offset;
 static int default_color_offset;
@@ -324,7 +325,7 @@ static const uint32_t sf_kernel_static[][4] = {
 #include "exa_sf.g4b"
 };
 
-static const uint32_t sf_kernel_static_mask[][4] = {
+static const uint32_t sf_kernel_mask_static[][4] = {
 #include "exa_sf_mask.g4b"
 };
 
@@ -334,21 +335,21 @@ static const uint32_t sf_kernel_static_mask[][4] = {
 #define PS_SCRATCH_SPACE    1024
 #define PS_SCRATCH_SPACE_LOG	0   /* log2 (PS_SCRATCH_SPACE) - 10  (1024 is 0, 2048 is 1) */
 
-static const uint32_t ps_kernel_static_nomask_affine [][4] = {
+static const uint32_t ps_kernel_nomask_affine_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_argb.g4b"
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_nomask_projective [][4] = {
+static const uint32_t ps_kernel_nomask_projective_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_projective.g4b"
 #include "exa_wm_src_sample_argb.g4b"
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_maskca_affine [][4] = {
+static const uint32_t ps_kernel_maskca_affine_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_argb.g4b"
@@ -358,7 +359,7 @@ static const uint32_t ps_kernel_static_maskca_affine [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_maskca_projective [][4] = {
+static const uint32_t ps_kernel_maskca_projective_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_projective.g4b"
 #include "exa_wm_src_sample_argb.g4b"
@@ -368,7 +369,7 @@ static const uint32_t ps_kernel_static_maskca_projective [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_maskca_srcalpha_affine [][4] = {
+static const uint32_t ps_kernel_maskca_srcalpha_affine_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_a.g4b"
@@ -378,7 +379,7 @@ static const uint32_t ps_kernel_static_maskca_srcalpha_affine [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_maskca_srcalpha_projective [][4] = {
+static const uint32_t ps_kernel_maskca_srcalpha_projective_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_projective.g4b"
 #include "exa_wm_src_sample_a.g4b"
@@ -388,7 +389,7 @@ static const uint32_t ps_kernel_static_maskca_srcalpha_projective [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_masknoca_affine [][4] = {
+static const uint32_t ps_kernel_masknoca_affine_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_argb.g4b"
@@ -398,7 +399,7 @@ static const uint32_t ps_kernel_static_masknoca_affine [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_masknoca_projective [][4] = {
+static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_projective.g4b"
 #include "exa_wm_src_sample_argb.g4b"
@@ -408,6 +409,62 @@ static const uint32_t ps_kernel_static_masknoca_projective [][4] = {
 #include "exa_wm_write.g4b"
 };
 
+/**
+ * Storage for the static kernel data with template name, rounded to 64 bytes.
+ */
+#define KERNEL_DECL(template) \
+    uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4];
+
+/**
+ * Gen4 rendering state buffer structure.
+ *
+ * Ideally this structure would contain static data for all of the
+ * combinations of state that we use for Render acceleration, and another
+ * buffer would be the use-and-throw-away surface and vertex data.  See the
+ * intel-batchbuffer branch for an implementation of that.  For now, it
+ * has the static program data, and then a changing buffer containing all
+ * the rest.
+ */
+typedef struct _gen4_state {
+    KERNEL_DECL (sip_kernel);
+    KERNEL_DECL (sf_kernel);
+    KERNEL_DECL (sf_kernel_mask);
+    KERNEL_DECL (ps_kernel_nomask_affine);
+    KERNEL_DECL (ps_kernel_nomask_projective);
+    KERNEL_DECL (ps_kernel_maskca_affine);
+    KERNEL_DECL (ps_kernel_maskca_projective);
+    KERNEL_DECL (ps_kernel_maskca_srcalpha_affine);
+    KERNEL_DECL (ps_kernel_maskca_srcalpha_projective);
+    KERNEL_DECL (ps_kernel_masknoca_affine);
+    KERNEL_DECL (ps_kernel_masknoca_projective);
+
+    uint8_t other_state[65536];
+} gen4_state_t;
+
+/**
+ * Called at EnterVT to fill in our state buffer with any static information.
+ */
+static void
+gen4_state_init (gen4_state_t *state)
+{
+#define KERNEL_COPY(kernel) \
+    memcpy(state->kernel, kernel ## _static, sizeof(kernel ## _static))
+
+    KERNEL_COPY (sip_kernel);
+    KERNEL_COPY (sf_kernel);
+    KERNEL_COPY (sf_kernel_mask);
+    KERNEL_COPY (ps_kernel_nomask_affine);
+    KERNEL_COPY (ps_kernel_nomask_projective);
+    KERNEL_COPY (ps_kernel_maskca_affine);
+    KERNEL_COPY (ps_kernel_maskca_projective);
+    KERNEL_COPY (ps_kernel_maskca_srcalpha_affine);
+    KERNEL_COPY (ps_kernel_maskca_srcalpha_projective);
+    KERNEL_COPY (ps_kernel_masknoca_affine);
+    KERNEL_COPY (ps_kernel_masknoca_projective);
+
+#undef KERNEL_COPY
+}
+
 static uint32_t 
 i965_get_card_format(PicturePtr pPict)
 {
@@ -484,7 +541,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     binding_table_entries = 2; /* default no mask */
 
     /* Set up our layout of state in framebuffer.  First the general state: */
-    next_offset = 0;
+    next_offset = offsetof(gen4_state_t, other_state);
     vs_offset = ALIGN(next_offset, 64);
     next_offset = vs_offset + sizeof(*vs_state);
 
@@ -500,46 +557,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     cc_offset = ALIGN(next_offset, 32);
     next_offset = cc_offset + sizeof(*cc_state);
 
-    /* keep current sf_kernel, which will send one setup urb entry to
-     * PS kernel
-     */
-    sf_kernel_offset = ALIGN(next_offset, 64);
-    if (pMask)
-	next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
-    else 
-	next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
-
-    ps_kernel_offset = ALIGN(next_offset, 64);
-    if (pMask) {
-	if (pMaskPicture->componentAlpha && 
-                PICT_FORMAT_RGB(pMaskPicture->format)) {
-            if (i965_blend_op[op].src_alpha) {
-		if (is_affine)
-		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_srcalpha_affine);
-		else
-		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_srcalpha_projective);
-            } else {
-		if (is_affine)
-		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_affine);
-		else
-		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_projective);
-            }
-        } else {
-	    if (is_affine)
-		next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca_affine);
-	    else
-		next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca_projective);
-	}
-    } else {
-	if (is_affine)
-	    next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_affine);
-	else
-	    next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_projective);
-    }
-
-    sip_kernel_offset = ALIGN(next_offset, 64);
-    next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
-
     /* needed? */
     cc_viewport_offset = ALIGN(next_offset, 32);
     next_offset = cc_viewport_offset + sizeof(*cc_viewport);
@@ -576,16 +593,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     next_offset = default_color_offset + sizeof(*default_color_state);
 
     total_state_size = next_offset;
-    assert(total_state_size < pI830->exa_965_state->size);
+    assert(total_state_size < sizeof(gen4_state_t));
 
-    state_base_offset = pI830->exa_965_state->offset;
-    state_base_offset = ALIGN(state_base_offset, 64);
+    state_base_offset = pI830->gen4_render_state_mem->offset;
+    assert((state_base_offset & 63) == 0);
     state_base = (char *)(pI830->FbBase + state_base_offset);
 
-    sf_kernel = (void *)(state_base + sf_kernel_offset);
-    ps_kernel = (void *)(state_base + ps_kernel_offset);
-    sip_kernel = (void *)(state_base + sip_kernel_offset);
-
     cc_viewport = (void *)(state_base + cc_viewport_offset);
 
     binding_table = (void *)(state_base + binding_table_offset);
@@ -664,9 +677,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     cc_state = (void *)(state_base + cc_offset);
     memcpy (cc_state, &cc_state_local, sizeof (cc_state_local));
 
-    /* Upload system kernel */
-    memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
-
     /* Set up the state buffer for the destination surface */
     dest_surf_state = &dest_surf_state_local;
     memset(dest_surf_state, 0, sizeof(*dest_surf_state));
@@ -857,16 +867,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
      * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
      * back to SF which then hands pixels off to WM.
      */
-    if (pMask)
-	memcpy(sf_kernel, sf_kernel_static_mask,
-		sizeof (sf_kernel_static_mask));
-    else
-	memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
-
     sf_state = &sf_state_local;
     memset(sf_state, 0, sizeof(*sf_state));
-    sf_state->thread0.kernel_start_pointer =
-	(state_base_offset + sf_kernel_offset) >> 6;
+    if (pMask) {
+	sf_state->thread0.kernel_start_pointer = (state_base_offset +
+		       offsetof(gen4_state_t, sf_kernel_mask)) >> 6;
+    } else {
+	sf_state->thread0.kernel_start_pointer = (state_base_offset +
+		       offsetof(gen4_state_t, sf_kernel)) >> 6;
+    }
     sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
     sf_state->sf1.single_program_flow = 1;
     sf_state->sf1.binding_table_entry_count = 0;
@@ -899,37 +908,64 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     memcpy (sf_state, &sf_state_local, sizeof (sf_state_local));
 
    /* Set up the PS kernel (dispatched by WM) */
+    wm_state = &wm_state_local;
+    memset(wm_state, 0, sizeof (*wm_state));
     if (pMask) {
-	if (pMaskPicture->componentAlpha && 
-                PICT_FORMAT_RGB(pMaskPicture->format)) {
+	if (pMaskPicture->componentAlpha &&
+	    PICT_FORMAT_RGB(pMaskPicture->format))
+	{
             if (i965_blend_op[op].src_alpha) {
-		if (is_affine)
-		    memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha_affine, sizeof (ps_kernel_static_maskca_srcalpha_affine));
-		else
-                    memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha_projective, sizeof (ps_kernel_static_maskca_srcalpha_projective));
+		if (is_affine) {
+		    wm_state->thread0.kernel_start_pointer =
+			(state_base_offset +
+			 offsetof(gen4_state_t,
+				  ps_kernel_maskca_srcalpha_affine)) >> 6;
+		} else {
+		    wm_state->thread0.kernel_start_pointer =
+			(state_base_offset +
+			 offsetof(gen4_state_t,
+				  ps_kernel_maskca_srcalpha_projective)) >> 6;
+		}
             } else {
-		if (is_affine)
-		    memcpy(ps_kernel, ps_kernel_static_maskca_affine, sizeof (ps_kernel_static_maskca_affine));
-		else
-		    memcpy(ps_kernel, ps_kernel_static_maskca_projective, sizeof (ps_kernel_static_maskca_projective));
-	    }
+		if (is_affine) {
+		    wm_state->thread0.kernel_start_pointer =
+			(state_base_offset +
+			 offsetof(gen4_state_t,
+				  ps_kernel_maskca_affine)) >> 6;
+		} else {
+		    wm_state->thread0.kernel_start_pointer =
+			(state_base_offset +
+			 offsetof(gen4_state_t,
+				  ps_kernel_maskca_projective)) >> 6;
+		}
+            }
         } else {
-	    if (is_affine)
-		memcpy(ps_kernel, ps_kernel_static_masknoca_affine, sizeof (ps_kernel_static_masknoca_affine));
-	    else
-		memcpy(ps_kernel, ps_kernel_static_masknoca_projective, sizeof (ps_kernel_static_masknoca_projective));
+	    if (is_affine) {
+		wm_state->thread0.kernel_start_pointer =
+		    (state_base_offset +
+		     offsetof(gen4_state_t,
+			      ps_kernel_masknoca_affine)) >> 6;
+	    } else {
+		wm_state->thread0.kernel_start_pointer =
+		    (state_base_offset +
+		     offsetof(gen4_state_t,
+			      ps_kernel_masknoca_projective)) >> 6;
+	    }
 	}
     } else {
-	if (is_affine)
-	    memcpy(ps_kernel, ps_kernel_static_nomask_affine, sizeof (ps_kernel_static_nomask_affine));
-	else
-	    memcpy(ps_kernel, ps_kernel_static_nomask_projective, sizeof (ps_kernel_static_nomask_projective));
+	if (is_affine) {
+	    wm_state->thread0.kernel_start_pointer =
+		(state_base_offset +
+		 offsetof(gen4_state_t,
+			  ps_kernel_nomask_affine)) >> 6;
+	} else {
+	    wm_state->thread0.kernel_start_pointer =
+		(state_base_offset +
+		 offsetof(gen4_state_t,
+			  ps_kernel_nomask_projective)) >> 6;
+	}
     }
 
-    wm_state = &wm_state_local;
-    memset(wm_state, 0, sizeof (*wm_state));
-    wm_state->thread0.kernel_start_pointer =
-	(state_base_offset + ps_kernel_offset) >> 6;
     wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
     wm_state->thread1.single_program_flow = 0;
     if (!pMask)
@@ -1006,7 +1042,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 
 	/* Set system instruction pointer */
 	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(state_base_offset + sip_kernel_offset);
+	OUT_BATCH(state_base_offset + offsetof(gen4_state_t, sip_kernel));
 	OUT_BATCH(MI_NOOP);
 	ADVANCE_BATCH();
     }
@@ -1328,3 +1364,49 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
      */
     i830MarkSync(pScrn);
 }
+
+/**
+ * Called at EnterVT so we can set up our offsets into the state buffer.
+ */
+void
+gen4_render_state_init(ScrnInfoPtr pScrn)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *state;
+
+    if (pI830->gen4_render_state == NULL)
+	pI830->gen4_render_state = calloc(sizeof(*state), 1);
+
+    state = pI830->gen4_render_state;
+
+    state->state_offset = pI830->gen4_render_state_mem->offset;
+    state->state_addr = pI830->FbBase + pI830->gen4_render_state_mem->offset;
+
+    gen4_state_init((gen4_state_t *)state->state_addr);
+}
+
+/**
+ * Called at LeaveVT.
+ */
+void
+gen4_render_state_cleanup(ScrnInfoPtr pScrn)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+
+    pI830->gen4_render_state->state_addr = NULL;
+}
+
+/**
+ * Called when the hardware is idled and flushed, so we know we can
+ * reuse the buffer contents.
+ */
+void
+gen4_render_state_reset(ScrnInfoPtr pScrn)
+{
+}
+
+unsigned int
+gen4_render_state_size(ScrnInfoPtr pScrn)
+{
+    return sizeof(gen4_state_t);
+}


More information about the xorg-commit mailing list