[Mesa-dev] [PATCH 2/2] i965/hsw: Set MOCS for surfaces
Chad Versace
chad.versace at linux.intel.com
Thu May 9 16:48:31 PDT 2013
The drivers was setting MOCS (Memory Object Control State) to 0 for all
objects. This patch sets it as following:
renderbuffer, depthbuffer => LLC uncacheable, L3 cacheable
texture, stencil, hiz => LLC cacheable, L3 cacheable
The goal here is to avoid blowing out the LLC with too-large buffers.
Performance gains:
Haswell Harris Beach GT3
Android 4.2.2
kernel based on 3.8-4fc7c97
GLBenchmark 2.5.1 Egypt HD C24Z16 Offscreen DXT1
+32.0309% +/- 0.775397%, n = 5, 95% confidence
GLBenchmark 2.7 T-Rex HD C24Z16 Offscreen Fixed timestep ETC1
+20.2435% +/- 0.821163%, n = 5, 95% confidence
Tested-by: Matt Turner <mattst88 at gmail.com>
Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
---
This branch lives on my 'mocs' branch.
src/mesa/drivers/dri/i965/brw_context.c | 31 +++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_context.h | 13 ++++++++++
src/mesa/drivers/dri/i965/gen7_blorp.cpp | 14 +++++++---
src/mesa/drivers/dri/i965/gen7_misc_state.c | 16 ++++++++++--
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 7 +++--
5 files changed, 74 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 4650553..edcf59d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -418,3 +418,34 @@ brwCreateContext(int api,
return true;
}
+/**
+ * Get the region's Memory Object Control State.
+ */
+uint32_t
+brw_get_mocs(struct brw_context *brw,
+ struct intel_region *region,
+ enum brw_mocs_usage usage)
+{
+ struct intel_context *intel = &brw->intel;
+ uint32_t mocs = 0;
+
+ if (intel->is_haswell) {
+ /* This heuristic is dumb: it considers the buffer's usage, but not its
+ * size. A more intelligent heuristic may give us better performance.
+ */
+ switch (usage) {
+ case BRW_MOCS_USAGE_RB_SURFACE:
+ case BRW_MOCS_USAGE_DEPTH:
+ /* These surfaces are usually so large they blow out the LLC. */
+ mocs = HSW_MOCS_LCC_UNCACHEABLE | HSW_MOCS_L3_CACHEABLE;
+ break;
+ case BRW_MOCS_USAGE_TEX_SURFACE:
+ case BRW_MOCS_USAGE_HIZ:
+ case BRW_MOCS_USAGE_STENCIL:
+ mocs = HSW_MOCS_LCC_WB_TO_ALL | HSW_MOCS_L3_CACHEABLE;
+ break;
+ }
+ }
+
+ return mocs;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c682501..172d36d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1371,6 +1371,19 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
uint32_t width, uint32_t height,
uint32_t tile_x, uint32_t tile_y);
+enum brw_mocs_usage {
+ BRW_MOCS_USAGE_RB_SURFACE,
+ BRW_MOCS_USAGE_TEX_SURFACE,
+ BRW_MOCS_USAGE_DEPTH,
+ BRW_MOCS_USAGE_STENCIL,
+ BRW_MOCS_USAGE_HIZ,
+};
+
+uint32_t
+brw_get_mocs(struct brw_context *brw,
+ struct intel_region *region,
+ enum brw_mocs_usage usage);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 1c23866..48e98dc 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -151,6 +151,9 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
*/
struct intel_region *region = surface->mt->region;
uint32_t tile_x, tile_y;
+ uint32_t mocs = brw_get_mocs(brw, region, is_render_target
+ ? BRW_MOCS_USAGE_RB_SURFACE
+ : BRW_MOCS_USAGE_TEX_SURFACE);
uint32_t tiling = surface->map_stencil_as_y_tiled
? I915_TILING_Y : region->tiling;
@@ -183,7 +186,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
assert(tile_x % 4 == 0);
assert(tile_y % 2 == 0);
surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
- SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET);
+ SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
+ SET_FIELD(mocs, GEN7_SURFACE_MOCS);
surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
@@ -598,6 +602,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
intel_region_get_aligned_offset(params->depth.mt->region,
draw_x & ~tile_mask_x,
draw_y & ~tile_mask_y, false);
+ uint32_t depth_mocs = brw_get_mocs(brw, params->depth.mt->region,
+ BRW_MOCS_USAGE_DEPTH);
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
@@ -634,7 +640,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
offset);
OUT_BATCH((params->depth.width + tile_x - 1) << 4 |
(params->depth.height + tile_y - 1) << 18);
- OUT_BATCH(0);
+ OUT_BATCH(depth_mocs);
OUT_BATCH(tile_x |
tile_y << 16);
OUT_BATCH(0);
@@ -648,10 +654,12 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
intel_region_get_aligned_offset(hiz_region,
draw_x & ~tile_mask_x,
(draw_y & ~tile_mask_y) / 2, false);
+ uint32_t hiz_mocs = brw_get_mocs(brw, hiz_region, BRW_MOCS_USAGE_HIZ);
BEGIN_BATCH(3);
OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
- OUT_BATCH(hiz_region->pitch - 1);
+ OUT_BATCH((hiz_mocs << 25) |
+ (hiz_region->pitch - 1));
OUT_RELOC(hiz_region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
hiz_offset);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 12b752c..7e358ae 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -41,6 +41,11 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
+ uint32_t depth_mocs = 0;
+
+ if (depth_mt) {
+ depth_mocs = brw_get_mocs(brw, depth_mt->region, BRW_MOCS_USAGE_DEPTH);
+ }
intel_emit_depth_stall_flushes(intel);
@@ -64,7 +69,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
OUT_BATCH(((width + tile_x - 1) << 4) |
((height + tile_y - 1) << 18));
- OUT_BATCH(0);
+ OUT_BATCH(depth_mocs);
OUT_BATCH(tile_x | (tile_y << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -77,9 +82,13 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
ADVANCE_BATCH();
} else {
struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+ uint32_t hiz_mocs = brw_get_mocs(brw, hiz_mt->region,
+ BRW_MOCS_USAGE_HIZ);
+
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
- OUT_BATCH(hiz_mt->region->pitch - 1);
+ OUT_BATCH((hiz_mocs << 25) |
+ (hiz_mt->region->pitch - 1));
OUT_RELOC(hiz_mt->region->bo,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER,
@@ -95,6 +104,8 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
ADVANCE_BATCH();
} else {
const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
+ uint32_t stencil_mocs = brw_get_mocs(brw, stencil_mt->region,
+ BRW_MOCS_USAGE_STENCIL);
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
@@ -113,6 +124,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
* indicate that it does.
*/
OUT_BATCH(enabled |
+ (stencil_mocs << 25) |
(2 * stencil_mt->region->pitch - 1));
OUT_RELOC(stencil_mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 435f9dc..34d5e68 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -294,6 +294,7 @@ gen7_update_texture_surface(struct gl_context *ctx,
struct intel_mipmap_tree *mt = intelObj->mt;
struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+ uint32_t mocs = brw_get_mocs(brw, mt->region, BRW_MOCS_USAGE_TEX_SURFACE);
int width, height, depth;
uint32_t tile_x, tile_y;
@@ -347,6 +348,7 @@ gen7_update_texture_surface(struct gl_context *ctx,
*/
surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+ SET_FIELD(mocs, GEN7_SURFACE_MOCS) |
/* mip count */
(intelObj->_MaxLevel - tObj->BaseLevel));
@@ -532,7 +534,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
uint32_t format;
/* _NEW_BUFFERS */
gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
-
+ uint32_t mocs = brw_get_mocs(brw, region, BRW_MOCS_USAGE_RB_SURFACE);
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
8 * 4, 32, &brw->wm.surf_offset[unit]);
memset(surf, 0, 8 * 4);
@@ -569,7 +571,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
assert(tile_x % 4 == 0);
assert(tile_y % 2 == 0);
surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
- SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET);
+ SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
+ SET_FIELD(mocs, GEN7_SURFACE_MOCS);
surf[2] = SET_FIELD(rb->Width - 1, GEN7_SURFACE_WIDTH) |
SET_FIELD(rb->Height - 1, GEN7_SURFACE_HEIGHT);
--
1.8.1.4
More information about the mesa-dev
mailing list