[Intel-gfx] [PATCH 2/2] use BLT command to accelerate uxa on gen6.
Zou Nan hai
nanhai.zou at intel.com
Tue Oct 26 09:33:16 CEST 2010
uxa: enable accelerate for uxa_copy and uxa_solid
on gen6.
Signed-off-by: Zou Nan hai <nanhai.zou at intel.com>
---
src/i830_reg.h | 2 +
src/intel_batchbuffer.c | 35 ++++++--
src/intel_batchbuffer.h | 31 ++++++-
src/intel_driver.c | 3 +-
src/intel_uxa.c | 230 +++++++++++++++++++++++++++++++++++++++--------
5 files changed, 248 insertions(+), 53 deletions(-)
diff --git a/src/i830_reg.h b/src/i830_reg.h
index 4080896..93d03cf 100644
--- a/src/i830_reg.h
+++ b/src/i830_reg.h
@@ -32,6 +32,8 @@
/* Flush */
#define MI_FLUSH (0x04<<23)
+#define MI_FLUSH_DW (0x26<<23)
+
#define MI_WRITE_DIRTY_STATE (1<<4)
#define MI_END_SCENE (1<<3)
#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 3b72ba1..cde086b 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -171,6 +171,12 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx)
intel_batch_do_flush(scrn, batch_idx);
} else {
+ BEGIN_BATCH_BLT(4);
+ OUT_BATCH_BLT(MI_FLUSH_DW | 2);
+ OUT_BATCH_BLT(0);
+ OUT_BATCH_BLT(0);
+ OUT_BATCH_BLT(0);
+ ADVANCE_BATCH_BLT();
}
}
@@ -193,13 +199,22 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
if (batch->batch_used == 0)
return;
+
+ if (batch_idx == RENDER_BATCH) {
+ /* Mark the end of the batchbuffer. */
+ OUT_BATCH(MI_BATCH_BUFFER_END);
+ /* Emit a padding dword if we aren't going to be quad-word aligned. */
+ if (batch->batch_used & 1)
+ OUT_BATCH(MI_NOOP);
+ } else {
+ /* Mark the end of the batchbuffer. */
+ OUT_BATCH_BLT(MI_BATCH_BUFFER_END);
+ /* Emit a padding dword if we aren't going to be quad-word aligned. */
+ if (batch->batch_used & 1)
+ OUT_BATCH_BLT(MI_NOOP);
+ }
- /* Mark the end of the batchbuffer. */
- OUT_BATCH(MI_BATCH_BUFFER_END);
- /* Emit a padding dword if we aren't going to be quad-word aligned. */
- if (batch->batch_used & 1)
- OUT_BATCH(MI_NOOP);
-
+
if (DUMP_BATCHBUFFERS) {
FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
if (file) {
@@ -211,9 +226,13 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, batch->batch_ptr);
if (ret == 0) {
if (batch_idx == RENDER_BATCH) {
- ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4,
- NULL, 0, 0xffffffff);
+ ret = drm_intel_bo_mrb_exec(batch->batch_bo,
+ batch->batch_used*4,
+ NULL, 0, 0xffffffff, I915_EXEC_RENDER);
} else {
+ ret = drm_intel_bo_mrb_exec(batch->batch_bo,
+ batch->batch_used*4,
+ NULL, 0, 0xffffffff, I915_EXEC_BLIT);
}
}
if (ret != 0) {
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 1ed3ad8..6d1ee15 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -156,20 +156,35 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
}
#define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH);
+#define ALIGN_BATCH_BLT(align) intel_batch_align(intel, align, BLT_BATCH);
+
#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH)
+#define OUT_BATCH_BLT(dword) intel_batch_emit_dword(intel, dword, BLT_BATCH)
#define OUT_RELOC(bo, read_domains, write_domains, delta) \
intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,RENDER_BATCH)
+#define OUT_RELOC_BLT(bo, read_domains, write_domains, delta) \
+ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,BLT_BATCH)
+
#define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \
intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,RENDER_BATCH)
+#define OUT_RELOC_FENCED_BLT(bo, read_domains, write_domains, delta) \
+ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,BLT_BATCH)
+
#define OUT_RELOC_PIXMAP(pixmap, reads, write, delta) \
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, RENDER_BATCH)
+#define OUT_RELOC_PIXMAP_BLT(pixmap, reads, write, delta) \
+ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, BLT_BATCH)
+
#define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta) \
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, RENDER_BATCH)
+#define OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, reads, write, delta) \
+ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, BLT_BATCH)
+
union intfloat {
float f;
unsigned int ui;
@@ -181,20 +196,23 @@ union intfloat {
OUT_BATCH(tmp.ui); \
} while(0)
-#define BEGIN_BATCH(n) \
+#define __BEGIN_BATCH(n,batch_idx) \
do { \
- struct batch *batch = &intel->batch[RENDER_BATCH]; \
+ struct batch *batch = &intel->batch[batch_idx]; \
if (batch->batch_emitting != 0) \
FatalError("%s: BEGIN_BATCH called without closing " \
"ADVANCE_BATCH\n", __FUNCTION__); \
assert(!batch->in_batch_atomic); \
- intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH); \
+ intel_batch_require_space(scrn, intel, (n) * 4, batch_idx); \
batch->batch_emitting = (n); \
batch->batch_emit_start = batch->batch_used; \
} while (0)
-#define ADVANCE_BATCH() do { \
- struct batch *batch = &intel->batch[RENDER_BATCH]; \
+#define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH)
+#define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH)
+
+#define __ADVANCE_BATCH(batch_idx) do { \
+ struct batch *batch = &intel->batch[batch_idx]; \
if (batch->batch_emitting == 0) \
FatalError("%s: ADVANCE_BATCH called with no matching " \
"BEGIN_BATCH\n", __FUNCTION__); \
@@ -213,6 +231,9 @@ do { \
batch->batch_emitting = 0; \
} while (0)
+#define ADVANCE_BATCH(batch_idx) __ADVANCE_BATCH(RENDER_BATCH)
+#define ADVANCE_BATCH_BLT(batch_idx) __ADVANCE_BATCH(BLT_BATCH)
+
void intel_next_vertex(intel_screen_private *intel);
static inline void intel_vertex_emit(intel_screen_private *intel, float v)
{
diff --git a/src/intel_driver.c b/src/intel_driver.c
index b9fb69d..051497d 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -581,8 +581,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
}
intel->use_shadow = FALSE;
- if (IS_GEN6(intel))
- intel->use_shadow = TRUE;
if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) {
intel->use_shadow =
@@ -809,6 +807,7 @@ intel_flush_callback(CallbackListPtr *list,
intel_batch_submit(scrn,
intel->batch[RENDER_BATCH].need_mi_flush
||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH);
+
}
}
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 05ac3d2..cbd87ca 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -208,17 +208,9 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap,
}
static Bool
-i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
+generic_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
{
ScrnInfoPtr scrn = xf86Screens[drawable->pScreen->myNum];
- intel_screen_private *intel = intel_get_screen_private(scrn);
-
- if (IS_GEN6(intel)) {
- intel_debug_fallback(scrn,
- "Sandybridge BLT engine not supported\n");
- return FALSE;
- }
-
if (!UXA_PM_IS_SOLID(drawable, planemask)) {
intel_debug_fallback(scrn, "planemask is not solid\n");
return FALSE;
@@ -232,7 +224,6 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
default:
return FALSE;
}
-
return TRUE;
}
@@ -240,7 +231,7 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
* Sets up hardware state for a series of solid fills.
*/
static Bool
-i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
+generic_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -252,10 +243,17 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
if (!intel_check_pitch_2d(pixmap))
return FALSE;
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
- RENDER_BATCH))
- return FALSE;
-
+ if (IS_GEN6(intel)) {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ BLT_BATCH))
+ return FALSE;
+ } else {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
+ return FALSE;
+ }
intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16;
switch (pixmap->drawable.bitsPerPixel) {
case 8:
@@ -274,6 +272,52 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
return TRUE;
}
+static void gen6_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
+{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ unsigned long pitch;
+ uint32_t cmd;
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+ if (x2 > pixmap->drawable.width)
+ x2 = pixmap->drawable.width;
+ if (y2 > pixmap->drawable.height)
+ y2 = pixmap->drawable.height;
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ pitch = intel_pixmap_pitch(pixmap);
+ {
+ BEGIN_BATCH_BLT(6);
+
+ cmd = XY_COLOR_BLT_CMD;
+
+ if (pixmap->drawable.bitsPerPixel == 32)
+ cmd |=
+ XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+
+ if (intel_pixmap_tiled(pixmap)) {
+ assert((pitch % 512) == 0);
+ pitch >>= 2;
+ cmd |= XY_COLOR_BLT_TILED;
+ }
+
+ OUT_BATCH_BLT(cmd);
+
+ OUT_BATCH_BLT(intel->BR[13] | pitch);
+ OUT_BATCH_BLT((y1 << 16) | (x1 & 0xffff));
+ OUT_BATCH_BLT((y2 << 16) | (x2 & 0xffff));
+ OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, I915_GEM_DOMAIN_RENDER,
+ 0, 0);
+ OUT_BATCH_BLT(intel->BR[16]);
+ ADVANCE_BATCH_BLT();
+ }
+}
+
static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
@@ -324,10 +368,15 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
ironlake_blt_workaround(scrn);
}
-static void i830_uxa_done_solid(PixmapPtr pixmap)
+static void gen6_uxa_done_solid(PixmapPtr pixmap)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_batch_submit(scrn, FALSE, BLT_BATCH);
+}
+static void i830_uxa_done_solid(PixmapPtr pixmap)
+{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_debug_flush(scrn, RENDER_BATCH);
}
@@ -336,17 +385,10 @@ static void i830_uxa_done_solid(PixmapPtr pixmap)
* - support planemask using FULL_BLT_CMD?
*/
static Bool
-i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
+generic_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
int alu, Pixel planemask)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
- intel_screen_private *intel = intel_get_screen_private(scrn);
-
- if (IS_GEN6(intel)) {
- intel_debug_fallback(scrn,
- "Sandybridge BLT engine not supported\n");
- return FALSE;
- }
if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) {
intel_debug_fallback(scrn, "planemask is not solid");
@@ -375,7 +417,7 @@ i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
}
static Bool
-i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
+generic_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
int ydir, int alu, Pixel planemask)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
@@ -386,9 +428,18 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
intel_get_pixmap_bo(dest),
};
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
- RENDER_BATCH))
- return FALSE;
+
+ if (IS_GEN6(intel)) {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ BLT_BATCH))
+ return FALSE;
+ } else {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
+ return FALSE;
+ }
intel->render_source = source;
@@ -408,6 +459,90 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
}
static void
+gen6_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
+ int dst_y1, int w, int h)
+{
+ ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ uint32_t cmd;
+ int dst_x2, dst_y2, src_x2, src_y2;
+ unsigned int dst_pitch, src_pitch;
+
+ dst_x2 = dst_x1 + w;
+ dst_y2 = dst_y1 + h;
+
+ /* XXX Fixup extents as a lamentable workaround for missing
+ * source clipping in the upper layers.
+ */
+ if (dst_x1 < 0)
+ src_x1 -= dst_x1, dst_x1 = 0;
+ if (dst_y1 < 0)
+ src_y1 -= dst_y1, dst_y1 = 0;
+ if (dst_x2 > dest->drawable.width)
+ dst_x2 = dest->drawable.width;
+ if (dst_y2 > dest->drawable.height)
+ dst_y2 = dest->drawable.height;
+
+ src_x2 = src_x1 + (dst_x2 - dst_x1);
+ src_y2 = src_y1 + (dst_y2 - dst_y1);
+
+ if (src_x1 < 0)
+ dst_x1 -= src_x1, src_x1 = 0;
+ if (src_y1 < 0)
+ dst_y1 -= src_y1, src_y1 = 0;
+ if (src_x2 > intel->render_source->drawable.width)
+ dst_x2 -= src_x2 - intel->render_source->drawable.width;
+ if (src_y2 > intel->render_source->drawable.height)
+ dst_y2 -= src_y2 - intel->render_source->drawable.height;
+
+ if (dst_x2 <= dst_x1 || dst_y2 <= dst_y1)
+ return;
+
+ dst_pitch = intel_pixmap_pitch(dest);
+ src_pitch = intel_pixmap_pitch(intel->render_source);
+ {
+ BEGIN_BATCH_BLT(8);
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+
+ if (dest->drawable.bitsPerPixel == 32)
+ cmd |=
+ XY_SRC_COPY_BLT_WRITE_ALPHA |
+ XY_SRC_COPY_BLT_WRITE_RGB;
+
+ if (INTEL_INFO(intel)->gen >= 40) {
+ if (intel_pixmap_tiled(dest)) {
+ assert((dst_pitch % 512) == 0);
+ dst_pitch >>= 2;
+ cmd |= XY_SRC_COPY_BLT_DST_TILED;
+ }
+
+ if (intel_pixmap_tiled(intel->render_source)) {
+ assert((src_pitch % 512) == 0);
+ src_pitch >>= 2;
+ cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+ }
+ }
+
+ OUT_BATCH_BLT(cmd);
+
+ OUT_BATCH_BLT(intel->BR[13] | dst_pitch);
+ OUT_BATCH_BLT((dst_y1 << 16) | (dst_x1 & 0xffff));
+ OUT_BATCH_BLT((dst_y2 << 16) | (dst_x2 & 0xffff));
+ OUT_RELOC_PIXMAP_FENCED_BLT(dest,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 0);
+ OUT_BATCH_BLT((src_y1 << 16) | (src_x1 & 0xffff));
+ OUT_BATCH_BLT(src_pitch);
+ OUT_RELOC_PIXMAP_FENCED_BLT(intel->render_source,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0);
+ ADVANCE_BATCH_BLT();
+ }
+}
+
+static void
i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
int dst_y1, int w, int h)
{
@@ -497,10 +632,16 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
static void i830_uxa_done_copy(PixmapPtr dest)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+ intel_debug_flush(scrn, BLT_BATCH);
+}
- intel_debug_flush(scrn, RENDER_BATCH);
+static void gen6_uxa_done_copy(PixmapPtr dest)
+{
+ ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+ intel_batch_submit(scrn, FALSE, BLT_BATCH);
}
+
/**
* Do any cleanup from the Composite operation.
*
@@ -1191,17 +1332,30 @@ Bool intel_uxa_init(ScreenPtr screen)
intel->vertex_bo = NULL;
/* Solid fill */
- intel->uxa_driver->check_solid = i830_uxa_check_solid;
- intel->uxa_driver->prepare_solid = i830_uxa_prepare_solid;
- intel->uxa_driver->solid = i830_uxa_solid;
- intel->uxa_driver->done_solid = i830_uxa_done_solid;
+ if (IS_GEN6(intel)) {
+ intel->uxa_driver->check_solid = generic_uxa_check_solid;
+ intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+ intel->uxa_driver->solid = gen6_uxa_solid;
+ intel->uxa_driver->done_solid = gen6_uxa_done_solid;
+ } else {
+ intel->uxa_driver->check_solid = generic_uxa_check_solid;
+ intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+ intel->uxa_driver->solid = i830_uxa_solid;
+ intel->uxa_driver->done_solid = i830_uxa_done_solid;
+ }
/* Copy */
- intel->uxa_driver->check_copy = i830_uxa_check_copy;
- intel->uxa_driver->prepare_copy = i830_uxa_prepare_copy;
- intel->uxa_driver->copy = i830_uxa_copy;
- intel->uxa_driver->done_copy = i830_uxa_done_copy;
-
+ if (IS_GEN6(intel)) {
+ intel->uxa_driver->check_copy = generic_uxa_check_copy;
+ intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+ intel->uxa_driver->copy = gen6_uxa_copy;
+ intel->uxa_driver->done_copy = gen6_uxa_done_copy;
+ } else {
+ intel->uxa_driver->check_copy = generic_uxa_check_copy;
+ intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+ intel->uxa_driver->copy = i830_uxa_copy;
+ intel->uxa_driver->done_copy = i830_uxa_done_copy;
+ }
/* Composite */
if (IS_GEN2(intel)) {
intel->uxa_driver->check_composite = i830_check_composite;
--
1.7.1
More information about the Intel-gfx
mailing list