xf86-video-intel: 3 commits - src/i830_accel.c src/i830_driver.c src/i830.h src/i830_memory.c src/i965_render.c
Eric Anholt
anholt at kemper.freedesktop.org
Thu Apr 10 14:10:11 PDT 2008
src/i830.h | 10 +
src/i830_accel.c | 3
src/i830_driver.c | 6 +
src/i830_memory.c | 13 +-
src/i965_render.c | 319 ++++++++++++++++++++++++++++++++++--------------------
5 files changed, 230 insertions(+), 121 deletions(-)
New commits:
commit 2871ac8eefd0192080bb0569140c3f5d0e1d9b44
Author: Eric Anholt <eric at anholt.net>
Date: Thu Apr 10 13:34:13 2008 -0700
Statically allocate the sampler default color, which we never change.
Performance change is in the noise. Also from Carl Worth.
diff --git a/src/i965_render.c b/src/i965_render.c
index 4b42db9..1b4afcc 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -268,7 +268,6 @@ static struct brw_surface_state *src_surf_state, src_surf_state_local;
static struct brw_surface_state *mask_surf_state, mask_surf_state_local;
static struct brw_sampler_state *src_sampler_state, src_sampler_state_local;
static struct brw_sampler_state *mask_sampler_state, mask_sampler_state_local;
-static struct brw_sampler_default_color *default_color_state;
static struct brw_vs_unit_state *vs_state, vs_state_local;
static struct brw_sf_unit_state *sf_state, sf_state_local;
@@ -284,7 +283,6 @@ static int src_sampler_offset, mask_sampler_offset,vs_offset;
static int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
static int wm_scratch_offset;
static int binding_table_offset;
-static int default_color_offset;
static int next_offset, total_state_size;
static char *state_base;
static int state_base_offset;
@@ -418,6 +416,13 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
#define KERNEL_DECL(template) \
uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4];
+/* Many of the fields in the state structure must be aligned to a
+ * 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for
+ * those too).
+ */
+#define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64]
+#define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1)
+
/**
* Gen4 rendering state buffer structure.
*
@@ -441,6 +446,9 @@ typedef struct _gen4_state {
KERNEL_DECL (ps_kernel_masknoca_affine);
KERNEL_DECL (ps_kernel_masknoca_projective);
+ struct brw_sampler_default_color sampler_default_color;
+ PAD64 (brw_sampler_default_color, 0);
+
uint8_t other_state[65536];
} gen4_state_t;
@@ -465,6 +473,13 @@ gen4_state_init (gen4_state_t *state)
KERNEL_COPY (ps_kernel_masknoca_affine);
KERNEL_COPY (ps_kernel_masknoca_projective);
+ memset(&state->sampler_default_color, 0,
+ sizeof(state->sampler_default_color));
+ state->sampler_default_color.color[0] = 0.0; /* R */
+ state->sampler_default_color.color[1] = 0.0; /* G */
+ state->sampler_default_color.color[2] = 0.0; /* B */
+ state->sampler_default_color.color[3] = 0.0; /* A */
+
#undef KERNEL_COPY
}
@@ -592,9 +607,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
binding_table_offset = ALIGN(next_offset, 32);
next_offset = binding_table_offset + (binding_table_entries * 4);
- default_color_offset = ALIGN(next_offset, 32);
- next_offset = default_color_offset + sizeof(*default_color_state);
-
total_state_size = next_offset;
assert(total_state_size < sizeof(gen4_state_t));
@@ -608,8 +620,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
vb = (void *)(state_base + vb_offset);
- default_color_state = (void*)(state_base + default_color_offset);
-
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
@@ -793,12 +803,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
}
- memset(default_color_state, 0, sizeof(*default_color_state));
- default_color_state->color[0] = 0.0; /* R */
- default_color_state->color[1] = 0.0; /* G */
- default_color_state->color[2] = 0.0; /* B */
- default_color_state->color[3] = 0.0; /* A */
-
src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
if (!pSrcPicture->repeat) {
@@ -806,7 +810,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
src_sampler_state->ss2.default_color_pointer =
- (state_base_offset + default_color_offset) >> 5;
+ (state_base_offset +
+ offsetof(gen4_state_t, sampler_default_color)) >> 5;
} else {
src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
@@ -842,8 +847,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
BRW_TEXCOORDMODE_CLAMP_BORDER;
mask_sampler_state->ss1.t_wrap_mode =
BRW_TEXCOORDMODE_CLAMP_BORDER;
- mask_sampler_state->ss2.default_color_pointer =
- (state_base_offset + default_color_offset)>>5;
+ mask_sampler_state->ss2.default_color_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t, sampler_default_color)) >> 5;
} else {
mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
commit 80dd784e33847e431403d4659a7b8d8425b2676f
Author: Eric Anholt <eric at anholt.net>
Date: Thu Apr 10 13:24:51 2008 -0700
Add copyright information for recent editors of this file.
diff --git a/src/i965_render.c b/src/i965_render.c
index cd07a02..4b42db9 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1,5 +1,6 @@
/*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2006,2008 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,6 +24,8 @@
* Authors:
* Wang Zhenyu <zhenyu.z.wang at intel.com>
* Eric Anholt <eric at anholt.net>
+ * Carl Worth <cworth at redhat.com>
+ * Keith Packard <keithp at keithp.com>
*
*/
commit b606278db83ec84b1db562a2d65697c50561b169
Author: Eric Anholt <eric at anholt.net>
Date: Thu Apr 10 13:17:58 2008 -0700
Keep static copies of the 965 render programs in video memory.
This reduces the CPU overhead of memcpying them in every time, for a speedup
in aa24text of around 30%. This is based on work by Carl Worth which is
in the intel-batchbuffer branch.
diff --git a/src/i830.h b/src/i830.h
index 318b188..6465bd6 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -85,7 +85,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifdef I830_USE_EXA
#include "exa.h"
Bool I830EXAInit(ScreenPtr pScreen);
-#define EXA_LINEAR_EXTRA (64*1024)
unsigned long long I830TexOffsetStart(PixmapPtr pPix);
#endif
@@ -398,7 +397,7 @@ typedef struct _I830Rec {
i830_memory *xaa_scratch_2;
#ifdef I830_USE_EXA
i830_memory *exa_offscreen;
- i830_memory *exa_965_state;
+ i830_memory *gen4_render_state_mem;
#endif
/* Regions allocated either from the above pools, or from agpgart. */
I830RingBuffer *LpRing;
@@ -531,6 +530,9 @@ typedef struct _I830Rec {
uint32_t mapstate[6];
uint32_t samplerstate[6];
+ /* 965 render acceleration state */
+ struct gen4_render_state *gen4_render_state;
+
Bool directRenderingDisabled; /* DRI disabled in PreInit. */
Bool directRenderingEnabled; /* DRI enabled this generation. */
@@ -824,6 +826,10 @@ Bool i915_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
PicturePtr pDst, PixmapPtr pSrcPixmap,
PixmapPtr pMaskPixmap, PixmapPtr pDstPixmap);
/* i965_render.c */
+unsigned int gen4_render_state_size(ScrnInfoPtr pScrn);
+void gen4_render_state_init(ScrnInfoPtr pScrn);
+void gen4_render_state_cleanup(ScrnInfoPtr pScrn);
+void gen4_render_state_reset(ScrnInfoPtr pScrn);
Bool i965_check_composite(int op, PicturePtr pSrc, PicturePtr pMask,
PicturePtr pDst);
Bool i965_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
diff --git a/src/i830_accel.c b/src/i830_accel.c
index 953a73b..0194f00 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -205,6 +205,9 @@ I830Sync(ScrnInfoPtr pScrn)
pI830->LpRing->space = pI830->LpRing->mem->size - 8;
pI830->nextColorExpandBuf = 0;
+
+ if (IS_I965G(pI830))
+ gen4_render_state_reset(pScrn);
}
void
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 66153b7..ea37e6d 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -3190,6 +3190,9 @@ I830LeaveVT(int scrnIndex, int flags)
}
#endif /* XF86DRI_MM */
+ if (IS_I965G(pI830))
+ gen4_render_state_cleanup(pScrn);
+
if (pI830->AccelInfoRec)
pI830->AccelInfoRec->NeedToSync = FALSE;
}
@@ -3236,6 +3239,9 @@ I830EnterVT(int scrnIndex, int flags)
/* Update the screen pixmap in case the buffer moved */
i830_update_front_offset(pScrn);
+ if (IS_I965G(pI830))
+ gen4_render_state_init(pScrn);
+
if (i830_check_error_state(pScrn)) {
xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
"Existing errors found in hardware state.\n");
diff --git a/src/i830_memory.c b/src/i830_memory.c
index 6835a6f..84db0ef 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -334,7 +334,7 @@ i830_reset_allocations(ScrnInfoPtr pScrn)
pI830->xaa_scratch = NULL;
pI830->xaa_scratch_2 = NULL;
pI830->exa_offscreen = NULL;
- pI830->exa_965_state = NULL;
+ pI830->gen4_render_state_mem = NULL;
pI830->overlay_regs = NULL;
pI830->logical_context = NULL;
#ifdef XF86DRI
@@ -1370,11 +1370,14 @@ i830_allocate_2d_memory(ScrnInfoPtr pScrn)
}
/* even in XAA, 965G needs state mem buffer for rendering */
- if (IS_I965G(pI830) && !pI830->noAccel && pI830->exa_965_state == NULL) {
- pI830->exa_965_state =
+ if (IS_I965G(pI830) && !pI830->noAccel &&
+ pI830->gen4_render_state_mem == NULL)
+ {
+ pI830->gen4_render_state_mem =
i830_allocate_memory(pScrn, "exa G965 state buffer",
- EXA_LINEAR_EXTRA, GTT_PAGE_SIZE, 0);
- if (pI830->exa_965_state == NULL) {
+ gen4_render_state_size(pScrn),
+ GTT_PAGE_SIZE, 0);
+ if (pI830->gen4_render_state_mem == NULL) {
xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
"Failed to allocate exa state buffer for 965.\n");
return FALSE;
diff --git a/src/i965_render.c b/src/i965_render.c
index 96082bb..cd07a02 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -113,6 +113,12 @@ static struct formatinfo i965_tex_formats[] = {
{PICT_a8, BRW_SURFACEFORMAT_A8_UNORM },
};
+/** Private data for gen4 render accel implementation. */
+struct gen4_render_state {
+ unsigned char *state_addr;
+ unsigned int state_offset;
+};
+
static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format,
uint32_t *sblend, uint32_t *dblend)
{
@@ -267,17 +273,12 @@ static struct brw_wm_unit_state *wm_state, wm_state_local;
static struct brw_cc_unit_state *cc_state, cc_state_local;
static struct brw_cc_viewport *cc_viewport;
-static struct brw_instruction *sf_kernel;
-static struct brw_instruction *ps_kernel;
-static struct brw_instruction *sip_kernel;
-
static uint32_t *binding_table;
static int binding_table_entries;
static int dest_surf_offset, src_surf_offset, mask_surf_offset;
static int src_sampler_offset, mask_sampler_offset,vs_offset;
static int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
-static int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
static int wm_scratch_offset;
static int binding_table_offset;
static int default_color_offset;
@@ -324,7 +325,7 @@ static const uint32_t sf_kernel_static[][4] = {
#include "exa_sf.g4b"
};
-static const uint32_t sf_kernel_static_mask[][4] = {
+static const uint32_t sf_kernel_mask_static[][4] = {
#include "exa_sf_mask.g4b"
};
@@ -334,21 +335,21 @@ static const uint32_t sf_kernel_static_mask[][4] = {
#define PS_SCRATCH_SPACE 1024
#define PS_SCRATCH_SPACE_LOG 0 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */
-static const uint32_t ps_kernel_static_nomask_affine [][4] = {
+static const uint32_t ps_kernel_nomask_affine_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_nomask_projective [][4] = {
+static const uint32_t ps_kernel_nomask_projective_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_projective.g4b"
#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_maskca_affine [][4] = {
+static const uint32_t ps_kernel_maskca_affine_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
@@ -358,7 +359,7 @@ static const uint32_t ps_kernel_static_maskca_affine [][4] = {
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_maskca_projective [][4] = {
+static const uint32_t ps_kernel_maskca_projective_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_projective.g4b"
#include "exa_wm_src_sample_argb.g4b"
@@ -368,7 +369,7 @@ static const uint32_t ps_kernel_static_maskca_projective [][4] = {
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_maskca_srcalpha_affine [][4] = {
+static const uint32_t ps_kernel_maskca_srcalpha_affine_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_a.g4b"
@@ -378,7 +379,7 @@ static const uint32_t ps_kernel_static_maskca_srcalpha_affine [][4] = {
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_maskca_srcalpha_projective [][4] = {
+static const uint32_t ps_kernel_maskca_srcalpha_projective_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_projective.g4b"
#include "exa_wm_src_sample_a.g4b"
@@ -388,7 +389,7 @@ static const uint32_t ps_kernel_static_maskca_srcalpha_projective [][4] = {
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_masknoca_affine [][4] = {
+static const uint32_t ps_kernel_masknoca_affine_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
@@ -398,7 +399,7 @@ static const uint32_t ps_kernel_static_masknoca_affine [][4] = {
#include "exa_wm_write.g4b"
};
-static const uint32_t ps_kernel_static_masknoca_projective [][4] = {
+static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_projective.g4b"
#include "exa_wm_src_sample_argb.g4b"
@@ -408,6 +409,62 @@ static const uint32_t ps_kernel_static_masknoca_projective [][4] = {
#include "exa_wm_write.g4b"
};
+/**
+ * Storage for the static kernel data with template name, rounded to 64 bytes.
+ */
+#define KERNEL_DECL(template) \
+ uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4];
+
+/**
+ * Gen4 rendering state buffer structure.
+ *
+ * Ideally this structure would contain static data for all of the
+ * combinations of state that we use for Render acceleration, and another
+ * buffer would be the use-and-throw-away surface and vertex data. See the
+ * intel-batchbuffer branch for an implementation of that. For now, it
+ * has the static program data, and then a changing buffer containing all
+ * the rest.
+ */
+typedef struct _gen4_state {
+ KERNEL_DECL (sip_kernel);
+ KERNEL_DECL (sf_kernel);
+ KERNEL_DECL (sf_kernel_mask);
+ KERNEL_DECL (ps_kernel_nomask_affine);
+ KERNEL_DECL (ps_kernel_nomask_projective);
+ KERNEL_DECL (ps_kernel_maskca_affine);
+ KERNEL_DECL (ps_kernel_maskca_projective);
+ KERNEL_DECL (ps_kernel_maskca_srcalpha_affine);
+ KERNEL_DECL (ps_kernel_maskca_srcalpha_projective);
+ KERNEL_DECL (ps_kernel_masknoca_affine);
+ KERNEL_DECL (ps_kernel_masknoca_projective);
+
+ uint8_t other_state[65536];
+} gen4_state_t;
+
+/**
+ * Called at EnterVT to fill in our state buffer with any static information.
+ */
+static void
+gen4_state_init (gen4_state_t *state)
+{
+#define KERNEL_COPY(kernel) \
+ memcpy(state->kernel, kernel ## _static, sizeof(kernel ## _static))
+
+ KERNEL_COPY (sip_kernel);
+ KERNEL_COPY (sf_kernel);
+ KERNEL_COPY (sf_kernel_mask);
+ KERNEL_COPY (ps_kernel_nomask_affine);
+ KERNEL_COPY (ps_kernel_nomask_projective);
+ KERNEL_COPY (ps_kernel_maskca_affine);
+ KERNEL_COPY (ps_kernel_maskca_projective);
+ KERNEL_COPY (ps_kernel_maskca_srcalpha_affine);
+ KERNEL_COPY (ps_kernel_maskca_srcalpha_projective);
+ KERNEL_COPY (ps_kernel_masknoca_affine);
+ KERNEL_COPY (ps_kernel_masknoca_projective);
+
+#undef KERNEL_COPY
+}
+
static uint32_t
i965_get_card_format(PicturePtr pPict)
{
@@ -484,7 +541,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
binding_table_entries = 2; /* default no mask */
/* Set up our layout of state in framebuffer. First the general state: */
- next_offset = 0;
+ next_offset = offsetof(gen4_state_t, other_state);
vs_offset = ALIGN(next_offset, 64);
next_offset = vs_offset + sizeof(*vs_state);
@@ -500,46 +557,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
cc_offset = ALIGN(next_offset, 32);
next_offset = cc_offset + sizeof(*cc_state);
- /* keep current sf_kernel, which will send one setup urb entry to
- * PS kernel
- */
- sf_kernel_offset = ALIGN(next_offset, 64);
- if (pMask)
- next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
- else
- next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
-
- ps_kernel_offset = ALIGN(next_offset, 64);
- if (pMask) {
- if (pMaskPicture->componentAlpha &&
- PICT_FORMAT_RGB(pMaskPicture->format)) {
- if (i965_blend_op[op].src_alpha) {
- if (is_affine)
- next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_srcalpha_affine);
- else
- next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_srcalpha_projective);
- } else {
- if (is_affine)
- next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_affine);
- else
- next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_projective);
- }
- } else {
- if (is_affine)
- next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca_affine);
- else
- next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca_projective);
- }
- } else {
- if (is_affine)
- next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_affine);
- else
- next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_projective);
- }
-
- sip_kernel_offset = ALIGN(next_offset, 64);
- next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
-
/* needed? */
cc_viewport_offset = ALIGN(next_offset, 32);
next_offset = cc_viewport_offset + sizeof(*cc_viewport);
@@ -576,16 +593,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
next_offset = default_color_offset + sizeof(*default_color_state);
total_state_size = next_offset;
- assert(total_state_size < pI830->exa_965_state->size);
+ assert(total_state_size < sizeof(gen4_state_t));
- state_base_offset = pI830->exa_965_state->offset;
- state_base_offset = ALIGN(state_base_offset, 64);
+ state_base_offset = pI830->gen4_render_state_mem->offset;
+ assert((state_base_offset & 63) == 0);
state_base = (char *)(pI830->FbBase + state_base_offset);
- sf_kernel = (void *)(state_base + sf_kernel_offset);
- ps_kernel = (void *)(state_base + ps_kernel_offset);
- sip_kernel = (void *)(state_base + sip_kernel_offset);
-
cc_viewport = (void *)(state_base + cc_viewport_offset);
binding_table = (void *)(state_base + binding_table_offset);
@@ -664,9 +677,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
cc_state = (void *)(state_base + cc_offset);
memcpy (cc_state, &cc_state_local, sizeof (cc_state_local));
- /* Upload system kernel */
- memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
-
/* Set up the state buffer for the destination surface */
dest_surf_state = &dest_surf_state_local;
memset(dest_surf_state, 0, sizeof(*dest_surf_state));
@@ -857,16 +867,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
* calculate dA/dx and dA/dy. Hand these interpolation coefficients
* back to SF which then hands pixels off to WM.
*/
- if (pMask)
- memcpy(sf_kernel, sf_kernel_static_mask,
- sizeof (sf_kernel_static_mask));
- else
- memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
-
sf_state = &sf_state_local;
memset(sf_state, 0, sizeof(*sf_state));
- sf_state->thread0.kernel_start_pointer =
- (state_base_offset + sf_kernel_offset) >> 6;
+ if (pMask) {
+ sf_state->thread0.kernel_start_pointer = (state_base_offset +
+ offsetof(gen4_state_t, sf_kernel_mask)) >> 6;
+ } else {
+ sf_state->thread0.kernel_start_pointer = (state_base_offset +
+ offsetof(gen4_state_t, sf_kernel)) >> 6;
+ }
sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf_state->sf1.single_program_flow = 1;
sf_state->sf1.binding_table_entry_count = 0;
@@ -899,37 +908,64 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
memcpy (sf_state, &sf_state_local, sizeof (sf_state_local));
/* Set up the PS kernel (dispatched by WM) */
+ wm_state = &wm_state_local;
+ memset(wm_state, 0, sizeof (*wm_state));
if (pMask) {
- if (pMaskPicture->componentAlpha &&
- PICT_FORMAT_RGB(pMaskPicture->format)) {
+ if (pMaskPicture->componentAlpha &&
+ PICT_FORMAT_RGB(pMaskPicture->format))
+ {
if (i965_blend_op[op].src_alpha) {
- if (is_affine)
- memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha_affine, sizeof (ps_kernel_static_maskca_srcalpha_affine));
- else
- memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha_projective, sizeof (ps_kernel_static_maskca_srcalpha_projective));
+ if (is_affine) {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_maskca_srcalpha_affine)) >> 6;
+ } else {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_maskca_srcalpha_projective)) >> 6;
+ }
} else {
- if (is_affine)
- memcpy(ps_kernel, ps_kernel_static_maskca_affine, sizeof (ps_kernel_static_maskca_affine));
- else
- memcpy(ps_kernel, ps_kernel_static_maskca_projective, sizeof (ps_kernel_static_maskca_projective));
- }
+ if (is_affine) {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_maskca_affine)) >> 6;
+ } else {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_maskca_projective)) >> 6;
+ }
+ }
} else {
- if (is_affine)
- memcpy(ps_kernel, ps_kernel_static_masknoca_affine, sizeof (ps_kernel_static_masknoca_affine));
- else
- memcpy(ps_kernel, ps_kernel_static_masknoca_projective, sizeof (ps_kernel_static_masknoca_projective));
+ if (is_affine) {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_masknoca_affine)) >> 6;
+ } else {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_masknoca_projective)) >> 6;
+ }
}
} else {
- if (is_affine)
- memcpy(ps_kernel, ps_kernel_static_nomask_affine, sizeof (ps_kernel_static_nomask_affine));
- else
- memcpy(ps_kernel, ps_kernel_static_nomask_projective, sizeof (ps_kernel_static_nomask_projective));
+ if (is_affine) {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_nomask_affine)) >> 6;
+ } else {
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset +
+ offsetof(gen4_state_t,
+ ps_kernel_nomask_projective)) >> 6;
+ }
}
- wm_state = &wm_state_local;
- memset(wm_state, 0, sizeof (*wm_state));
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset + ps_kernel_offset) >> 6;
wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
wm_state->thread1.single_program_flow = 0;
if (!pMask)
@@ -1006,7 +1042,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
/* Set system instruction pointer */
OUT_BATCH(BRW_STATE_SIP | 0);
- OUT_BATCH(state_base_offset + sip_kernel_offset);
+ OUT_BATCH(state_base_offset + offsetof(gen4_state_t, sip_kernel));
OUT_BATCH(MI_NOOP);
ADVANCE_BATCH();
}
@@ -1328,3 +1364,49 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
*/
i830MarkSync(pScrn);
}
+
+/**
+ * Called at EnterVT so we can set up our offsets into the state buffer.
+ */
+void
+gen4_render_state_init(ScrnInfoPtr pScrn)
+{
+ I830Ptr pI830 = I830PTR(pScrn);
+ struct gen4_render_state *state;
+
+ if (pI830->gen4_render_state == NULL)
+ pI830->gen4_render_state = calloc(sizeof(*state), 1);
+
+ state = pI830->gen4_render_state;
+
+ state->state_offset = pI830->gen4_render_state_mem->offset;
+ state->state_addr = pI830->FbBase + pI830->gen4_render_state_mem->offset;
+
+ gen4_state_init((gen4_state_t *)state->state_addr);
+}
+
+/**
+ * Called at LeaveVT.
+ */
+void
+gen4_render_state_cleanup(ScrnInfoPtr pScrn)
+{
+ I830Ptr pI830 = I830PTR(pScrn);
+
+ pI830->gen4_render_state->state_addr = NULL;
+}
+
+/**
+ * Called when the hardware is idled and flushed, so we know we can
+ * reuse the buffer contents.
+ */
+void
+gen4_render_state_reset(ScrnInfoPtr pScrn)
+{
+}
+
+unsigned int
+gen4_render_state_size(ScrnInfoPtr pScrn)
+{
+ return sizeof(gen4_state_t);
+}
More information about the xorg-commit
mailing list