[Mesa-dev] [PATCH 2/2] r600g: add htile support v16
j.glisse at gmail.com
j.glisse at gmail.com
Mon Dec 17 12:33:31 PST 2012
From: Jerome Glisse <jglisse at redhat.com>
htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.
v2 really use fast clear, still random issue with some tiles
need to try more flush combination, fix depth/stencil
texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
decompression, the custom blitter with custom dsa is no longer
needed.
v6 Reorganize render control/override update mecanism, fixing more
issues in the process.
v7 Add nop after depth surface base update to work around some htile
flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
have issue. Do not enable hyperz when flushing/uncompressing
depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
and prefetch on htile surface clear like fglrx. Record depth
clear value per level. Support several level for the htile
surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
disable fast zclear if clearing different layer of texture
array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
to zero, somehow special casing first clear to not use fast clear
and thus initialize the htile surface with proper value does not
work in all case.
v14 Use resource not texture for htile buffer make the htile buffer
size computation easier and simpler. Disable preload on evergreen
as its still troublesome in some case
v15 Cleanup some comment and remove some left over
v16 Define name for bit 20 of CP_COHER_CNTL
Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux at gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Jerome Glisse <jglisse at redhat.com>
---
src/gallium/drivers/r600/evergreen_state.c | 65 ++++++++++++++++++++++++++----
src/gallium/drivers/r600/evergreend.h | 2 +
src/gallium/drivers/r600/r600_blit.c | 28 +++++++++++++
src/gallium/drivers/r600/r600_hw_context.c | 7 ++--
src/gallium/drivers/r600/r600_pipe.c | 8 ++++
src/gallium/drivers/r600/r600_pipe.h | 26 +++++++-----
src/gallium/drivers/r600/r600_resource.h | 9 +++++
src/gallium/drivers/r600/r600_state.c | 57 ++++++++++++++++++++++++--
src/gallium/drivers/r600/r600_texture.c | 38 +++++++++++++++++
src/gallium/drivers/r600/r600d.h | 5 +++
10 files changed, 221 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 58964c4..032af78 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1545,6 +1545,18 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
S_028044_FORMAT(V_028044_STENCIL_8);
}
+ surf->htile_enabled = 0;
+ /* use htile only for first level */
+ if (rtex->htile && !level) {
+ surf->htile_enabled = 1;
+ surf->db_htile_data_base = 0;
+ surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+ S_028ABC_HTILE_HEIGHT(1) |
+ S_028ABC_LINEAR(1);
+ surf->db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+ surf->db_preload_control = 0;
+ }
+
surf->depth_initialized = true;
}
@@ -1625,6 +1637,16 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->poly_offset_state.zs_format = state->zsbuf->format;
rctx->poly_offset_state.atom.dirty = true;
}
+
+ if (rctx->db_state.rsurf != surf) {
+ rctx->db_state.rsurf = surf;
+ rctx->db_state.atom.dirty = true;
+ rctx->db_misc_state.atom.dirty = true;
+ }
+ } else if (rctx->db_state.rsurf) {
+ rctx->db_state.rsurf = NULL;
+ rctx->db_state.atom.dirty = true;
+ rctx->db_misc_state.atom.dirty = true;
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
@@ -2081,6 +2103,28 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
r600_write_value(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
}
+static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct r600_db_state *a = (struct r600_db_state*)atom;
+
+ if (a->rsurf && a->rsurf->htile_enabled) {
+ struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
+ unsigned reloc_idx;
+
+ r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear));
+ r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+ r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
+ r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+ reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = reloc_idx;
+ } else {
+ r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
+ r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
+ }
+}
+
static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
@@ -2088,7 +2132,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
unsigned db_render_control = 0;
unsigned db_count_control = 0;
unsigned db_render_override =
- S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
@@ -2099,7 +2142,12 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
}
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
}
-
+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+ /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+ db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
+ } else {
+ db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
+ }
if (a->flush_depthstencil_through_cb) {
assert(a->copy_depth || a->copy_stencil);
@@ -2112,6 +2160,10 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
S_028000_STENCIL_COMPRESS_DISABLE(1);
db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
}
+ if (a->htile_clear) {
+ /* FIXME we might want to disable cliprect here */
+ db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
+ }
r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
r600_write_value(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */
@@ -2424,6 +2476,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26);
r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10);
+ r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14);
r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
@@ -2544,9 +2597,7 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+ r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
@@ -2992,9 +3043,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+ r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index c91b2d8..d9dba95 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1888,6 +1888,8 @@
#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x00028AC0
#define R_028AC4_DB_SRESULTS_COMPARE_STATE1 0x00028AC4
#define R_028AC8_DB_PRELOAD_CONTROL 0x00028AC8
+#define S_028AC8_MAX_X(x) (((x) & 0xff) << 16)
+#define S_028AC8_MAX_Y(x) (((x) & 0xff) << 24)
#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0
#define R_028AD4_VGT_STRMOUT_VTX_STRIDE_0 0x028AD4
#define R_028AD8_VGT_STRMOUT_BUFFER_BASE_0 0x028AD8
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index f702f7b..6a83053 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -449,11 +449,39 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
struct r600_context *rctx = (struct r600_context *)ctx;
struct pipe_framebuffer_state *fb = &rctx->framebuffer.state;
+ /* if hyperz enabled just clear hyperz */
+ if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+ struct r600_texture *rtex;
+ unsigned level = fb->zsbuf->u.tex.level;
+
+ rtex = (struct r600_texture*)fb->zsbuf->texture;
+
+ /* We can't use hyperz fast clear if each slice of a texture
+ * array are clear to different value. To simplify code just
+ * disable fast clear for texture array.
+ */
+ /* Only use htile for first level */
+ if (rtex->htile && !level && rtex->surface.array_size == 1) {
+ if (rtex->depth_clear != depth) {
+ rtex->depth_clear = depth;
+ rctx->db_state.atom.dirty = true;
+ }
+ rctx->db_misc_state.htile_clear = true;
+ rctx->db_misc_state.atom.dirty = true;
+ }
+ }
+
r600_blitter_begin(ctx, R600_CLEAR);
util_blitter_clear(rctx->blitter, fb->width, fb->height,
fb->nr_cbufs, buffers, fb->nr_cbufs ? fb->cbufs[0]->format : PIPE_FORMAT_NONE,
color, depth, stencil);
r600_blitter_end(ctx);
+
+ /* disable fast clear */
+ if (rctx->db_misc_state.htile_clear) {
+ rctx->db_misc_state.htile_clear = false;
+ rctx->db_misc_state.atom.dirty = true;
+ }
}
static void r600_clear_render_target(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index bce7cc8..e81c61e 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -630,13 +630,13 @@ void r600_flush_emit(struct r600_context *rctx)
S_0085F0_DB_ACTION_ENA(1) |
S_0085F0_SH_ACTION_ENA(1) |
S_0085F0_SMX_ACTION_ENA(1) |
- (1 << 20); /* unknown bit */
+ S_0085F0_FULL_CACHE_ENA(1);
} else {
cp_coher_cntl = S_0085F0_SMX_ACTION_ENA(1) |
S_0085F0_SH_ACTION_ENA(1) |
S_0085F0_VC_ACTION_ENA(1) |
S_0085F0_TC_ACTION_ENA(1) |
- (1 << 20); /* unknown bit */
+ S_0085F0_FULL_CACHE_ENA(1);
}
}
@@ -644,7 +644,7 @@ void r600_flush_emit(struct r600_context *rctx)
cp_coher_cntl |= 0x01900000;
cp_coher_cntl |= S_0085F0_VC_ACTION_ENA(1) |
S_0085F0_TC_ACTION_ENA(1) |
- (1 << 20); /* unknown bit */
+ S_0085F0_FULL_CACHE_ENA(1);
emit_flush = 1;
}
@@ -741,6 +741,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
ctx->clip_misc_state.atom.dirty = true;
ctx->clip_state.atom.dirty = true;
ctx->db_misc_state.atom.dirty = true;
+ ctx->db_state.atom.dirty = true;
ctx->framebuffer.atom.dirty = true;
ctx->poly_offset_state.atom.dirty = true;
ctx->vgt_state.atom.dirty = true;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 19147d9..e497744 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -1032,6 +1032,14 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
LIST_INITHEAD(&rscreen->fences.blocks);
pipe_mutex_init(rscreen->fences.mutex);
+ /* Hyperz is very lockup prone any code that touch related part should be
+ * carefully tested especialy on r6xx/r7xx Development show that some piglit
+ * case were triggering lockup quickly such as :
+ * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8
+ */
+ rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", TRUE);
+ rscreen->use_hyperz = rscreen->info.drm_minor >= 26 ? rscreen->use_hyperz : FALSE;
+
rscreen->global_pool = compute_memory_pool_new(rscreen);
return &rscreen->screen;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index c348c76..c52e0e1 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -35,7 +35,7 @@
#include "r600_resource.h"
#include "evergreen_compute.h"
-#define R600_NUM_ATOMS 36
+#define R600_NUM_ATOMS 37
#define R600_MAX_USER_CONST_BUFFERS 1
#define R600_MAX_DRIVER_CONST_BUFFERS 2
@@ -77,15 +77,21 @@ struct r600_command_buffer {
unsigned pkt_flags;
};
+struct r600_db_state {
+ struct r600_atom atom;
+ struct r600_surface *rsurf;
+};
+
struct r600_db_misc_state {
- struct r600_atom atom;
- bool occlusion_query_enabled;
- bool flush_depthstencil_through_cb;
- bool flush_depthstencil_in_place;
- bool copy_depth, copy_stencil;
- unsigned copy_sample;
- unsigned log_samples;
- unsigned db_shader_control;
+ struct r600_atom atom;
+ bool occlusion_query_enabled;
+ bool flush_depthstencil_through_cb;
+ bool flush_depthstencil_in_place;
+ bool copy_depth, copy_stencil;
+ unsigned copy_sample;
+ unsigned log_samples;
+ unsigned db_shader_control;
+ bool htile_clear;
};
struct r600_cb_misc_state {
@@ -220,6 +226,7 @@ struct r600_screen {
bool has_streamout;
bool has_msaa;
enum r600_msaa_texture_mode msaa_texture_support;
+ bool use_hyperz;
struct r600_tiling_info tiling_info;
struct r600_pipe_fences fences;
@@ -439,6 +446,7 @@ struct r600_context {
struct r600_clip_misc_state clip_misc_state;
struct r600_clip_state clip_state;
struct r600_db_misc_state db_misc_state;
+ struct r600_db_state db_state;
struct r600_cso_state dsa_state;
struct r600_framebuffer framebuffer;
struct r600_poly_offset_state poly_offset_state;
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 007d5e0..dd0b613 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -60,6 +60,10 @@ struct r600_texture {
* MSAA textures cannot have mipmaps. */
unsigned fmask_offset, fmask_size, fmask_bank_height;
unsigned cmask_offset, cmask_size, cmask_slice_tile_max;
+
+ struct r600_resource *htile;
+ /* use htile only for first level */
+ float depth_clear;
};
#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
@@ -113,6 +117,11 @@ struct r600_surface {
unsigned db_stencil_info; /* EG only */
unsigned db_prefetch_limit; /* R600 only */
unsigned pa_su_poly_offset_db_fmt_cntl;
+
+ unsigned htile_enabled;
+ unsigned db_htile_surface;
+ unsigned db_htile_data_base;
+ unsigned db_preload_control;
};
/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index f969808..ef4edca 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1440,6 +1440,18 @@ static void r600_init_depth_surface(struct r600_context *rctx,
default:;
}
+ surf->htile_enabled = 0;
+ /* use htile only for first level */
+ if (rtex->htile && !level) {
+ surf->htile_enabled = 1;
+ surf->db_htile_data_base = 0;
+ surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) |
+ S_028D24_HTILE_HEIGHT(1) |
+ S_028D24_LINEAR(1);
+ /* preload is not working properly on r6xx/r7xx */
+ surf->db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
+ }
+
surf->depth_initialized = true;
}
@@ -1530,6 +1542,16 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
rctx->poly_offset_state.zs_format = state->zsbuf->format;
rctx->poly_offset_state.atom.dirty = true;
}
+
+ if (rctx->db_state.rsurf != surf) {
+ rctx->db_state.rsurf = surf;
+ rctx->db_state.atom.dirty = true;
+ rctx->db_misc_state.atom.dirty = true;
+ }
+ } else if (rctx->db_state.rsurf) {
+ rctx->db_state.rsurf = NULL;
+ rctx->db_state.atom.dirty = true;
+ rctx->db_misc_state.atom.dirty = true;
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
@@ -1831,13 +1853,32 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
}
}
+static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct r600_db_state *a = (struct r600_db_state*)atom;
+
+ if (a->rsurf && a->rsurf->htile_enabled) {
+ struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
+ unsigned reloc_idx;
+
+ r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear));
+ r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+ r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+ reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = reloc_idx;
+ } else {
+ r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
+ }
+}
+
static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
unsigned db_render_control = 0;
unsigned db_render_override =
- S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
@@ -1847,6 +1888,12 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
}
db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
}
+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+ /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+ db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
+ } else {
+ db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
+ }
if (a->flush_depthstencil_through_cb) {
assert(a->copy_depth || a->copy_stencil);
@@ -1859,6 +1906,9 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
S_028D0C_STENCIL_COMPRESS_DISABLE(1);
db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
}
+ if (a->htile_clear) {
+ db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1);
+ }
r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
r600_write_value(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */
@@ -2175,6 +2225,7 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
r600_init_atom(rctx, &rctx->clip_state.atom, id++, r600_emit_clip_state, 26);
r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, r600_emit_db_misc_state, 7);
+ r600_init_atom(rctx, &rctx->db_state.atom, id++, r600_emit_db_state, 11);
r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
@@ -2530,9 +2581,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+ r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3);
r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 56e9b64..c4c5d70 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -438,6 +438,44 @@ r600_texture_create_object(struct pipe_screen *screen,
/* Tiled depth textures utilize the non-displayable tile order. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+ /* only enable hyperz for PIPE_TEXTURE_2D not for PIPE_TEXTURE_2D_ARRAY
+ * Thought it might still be interessting to use hyperz for texture
+ * array without using fast clear features
+ */
+ rtex->htile = NULL;
+ if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | R600_RESOURCE_FLAG_FLUSHED_DEPTH)) &&
+ util_format_is_depth_or_stencil(base->format) &&
+ rscreen->use_hyperz &&
+ base->target == PIPE_TEXTURE_2D &&
+ rtex->surface.level[0].nblk_x >= 32 &&
+ rtex->surface.level[0].nblk_y >= 32) {
+ unsigned sw = rtex->surface.level[0].nblk_x * rtex->surface.blk_w;
+ unsigned sh = rtex->surface.level[0].nblk_y * rtex->surface.blk_h;
+ unsigned htile_size;
+ unsigned npipes = rscreen->info.r600_num_tile_pipes;
+
+ /* this alignment and htile size only apply to linear htile buffer */
+ sw = align(sw, 16 << 3);
+ sh = align(sh, npipes << 3);
+ htile_size = (sw >> 3) * (sh >> 3) * 4;
+ /* must be aligned with 2K * npipes */
+ htile_size = align(htile_size, (2 << 10) * npipes);
+
+ rtex->htile = (struct r600_resource*)pipe_buffer_create(&rscreen->screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STATIC, htile_size);
+ if (rtex->htile == NULL) {
+ /* this is not a fatal error as we can still keep rendering
+ * without htile buffer
+ */
+ R600_ERR("r600: failed to create bo for htile buffers\n");
+ } else {
+ void *ptr;
+ ptr = rscreen->ws->buffer_map(rtex->htile->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+ memset(ptr, 0x0, htile_size);
+ rscreen->ws->buffer_unmap(rtex->htile->cs_buf);
+ }
+ }
+
/* Now create the backing buffer. */
if (!buf && alloc_bo) {
unsigned base_align = rtex->surface.bo_alignment;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 78fa6b6..69bfd7a 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -530,6 +530,7 @@
#define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31)
#define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1)
#define C_028010_ZRANGE_PRECISION 0x7FFFFFFF
+#define R_028014_DB_HTILE_DATA_BASE 0x00028014
#define R_028414_CB_BLEND_RED 0x028414
#define S_028414_BLEND_RED(x) (((x) & 0xFFFFFFFF) << 0)
#define G_028414_BLEND_RED(x) (((x) >> 0) & 0xFFFFFFFF)
@@ -3360,6 +3361,10 @@
#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18)
#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1)
/* evergreen only end */
+/* evergreen and r7xx only */
+#define S_0085F0_FULL_CACHE_ENA(x) (((x) & 0x1) << 20)
+#define G_0085F0_FULL_CACHE_ENA(x) (((x) >> 20) & 0x1)
+/* evergreen and r7xx only end */
#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23)
#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1)
#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF
--
1.7.11.7
More information about the mesa-dev
mailing list