Mesa (master): r300g: fix HiZ memory size computation and deciding when to use HiZ

Marek Olšák mareko at kemper.freedesktop.org
Tue Mar 1 00:09:45 UTC 2011


Module: Mesa
Branch: master
Commit: d99ec708afbb785ce05031661222b38c9447059f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d99ec708afbb785ce05031661222b38c9447059f

Author: Marek Olšák <maraeo at gmail.com>
Date:   Mon Feb 28 13:38:01 2011 +0100

r300g: fix HiZ memory size computation and deciding when to use HiZ

I removed the HiZ memory management, because the HiZ RAM is too small
and I also did it in hope that HiZ will be enabled more often.

This also sets aligned strides to HIZ_PITCH and ZMASK_PITCH.

---

 src/gallium/drivers/r300/r300_blit.c          |   63 +++++++++--------
 src/gallium/drivers/r300/r300_context.c       |   17 ++----
 src/gallium/drivers/r300/r300_context.h       |   26 ++++---
 src/gallium/drivers/r300/r300_emit.c          |   66 ++++---------------
 src/gallium/drivers/r300/r300_hyperz.c        |   69 +------------------
 src/gallium/drivers/r300/r300_hyperz.h        |   35 ----------
 src/gallium/drivers/r300/r300_state.c         |   30 ++------
 src/gallium/drivers/r300/r300_state_derived.c |    3 +-
 src/gallium/drivers/r300/r300_texture.c       |    8 +--
 src/gallium/drivers/r300/r300_texture_desc.c  |   89 +++++++++++++++++++------
 10 files changed, 148 insertions(+), 258 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 4f86db3..fd8ef44 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -22,7 +22,6 @@
 
 #include "r300_context.h"
 #include "r300_emit.h"
-#include "r300_hyperz.h"
 #include "r300_texture.h"
 #include "r300_winsys.h"
 
@@ -117,6 +116,14 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300)
     return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level];
 }
 
+static boolean r300_hiz_clear_allowed(struct r300_context *r300)
+{
+    struct pipe_framebuffer_state *fb =
+        (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+    return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level];
+}
+
 static uint32_t r300_depth_clear_value(enum pipe_format format,
                                        double depth, unsigned stencil)
 {
@@ -190,8 +197,6 @@ static void r300_clear(struct pipe_context* pipe,
         (struct pipe_framebuffer_state*)r300->fb_state.state;
     struct r300_hyperz_state *hyperz =
         (struct r300_hyperz_state*)r300->hyperz_state.state;
-    struct r300_resource *zstex =
-            fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
     uint32_t width = fb->width;
     uint32_t height = fb->height;
     boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
@@ -200,20 +205,17 @@ static void r300_clear(struct pipe_context* pipe,
     /* Enable fast Z clear.
      * The zbuffer must be in micro-tiled mode, otherwise it locks up. */
     if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
-        hyperz_dcv = hyperz->zb_depthclearvalue =
-            r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
-
         if (r300_fast_zclear_allowed(r300)) {
+            hyperz_dcv = hyperz->zb_depthclearvalue =
+                r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
+
             r300_mark_atom_dirty(r300, &r300->zmask_clear);
             buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
         }
 
-        if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
+        if (r300_hiz_clear_allowed(r300)) {
             r300_mark_atom_dirty(r300, &r300->hiz_clear);
-
-        /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state);
-         * once hiz offset is constant. */
-        r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
+        }
     }
 
     /* Enable CBZB clear. */
@@ -240,14 +242,14 @@ static void r300_clear(struct pipe_context* pipe,
                            fb->nr_cbufs,
                            buffers, rgba, depth, stencil);
         r300_blitter_end(r300);
-    } else if (r300->zmask_clear.dirty) {
+    } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) {
         /* Just clear zmask and hiz now, this does not use the standard draw
          * procedure. */
         unsigned dwords;
 
         /* Calculate zmask_clear and hiz_clear atom sizes. */
         r300_update_hyperz_state(r300);
-        dwords = r300->zmask_clear.size +
+        dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) +
                  (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
                  r300_get_num_cs_end_dwords(r300);
 
@@ -257,9 +259,11 @@ static void r300_clear(struct pipe_context* pipe,
         }
 
         /* Emit clear packets. */
-        r300_emit_zmask_clear(r300, r300->zmask_clear.size,
-                              r300->zmask_clear.state);
-        r300->zmask_clear.dirty = FALSE;
+        if (r300->zmask_clear.dirty) {
+            r300_emit_zmask_clear(r300, r300->zmask_clear.size,
+                                  r300->zmask_clear.state);
+            r300->zmask_clear.dirty = FALSE;
+        }
         if (r300->hiz_clear.dirty) {
             r300_emit_hiz_clear(r300, r300->hiz_clear.size,
                                 r300->hiz_clear.state);
@@ -279,9 +283,8 @@ static void r300_clear(struct pipe_context* pipe,
     /* Enable fastfill and/or hiz.
      *
      * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
-     * looks if zmask/hiz is in use and enables fastfill accordingly. */
-    if (r300->zmask_in_use ||
-        (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+     * looks if zmask/hiz is in use and programs hardware accordingly. */
+    if (r300->zmask_in_use || r300->hiz_in_use) {
         r300_mark_atom_dirty(r300, &r300->hyperz_state);
     }
 }
@@ -295,7 +298,7 @@ static void r300_clear_render_target(struct pipe_context *pipe,
 {
     struct r300_context *r300 = r300_context(pipe);
 
-    r300->zmask_locked = TRUE;
+    r300->hyperz_locked = TRUE;
     r300_mark_atom_dirty(r300, &r300->hyperz_state);
 
     r300_blitter_begin(r300, R300_CLEAR_SURFACE);
@@ -303,7 +306,7 @@ static void r300_clear_render_target(struct pipe_context *pipe,
                                      dstx, dsty, width, height);
     r300_blitter_end(r300);
 
-    r300->zmask_locked = FALSE;
+    r300->hyperz_locked = FALSE;
     r300_mark_atom_dirty(r300, &r300->hyperz_state);
 }
 
@@ -320,11 +323,11 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
 
-    if (r300->zmask_in_use && !r300->zmask_locked) {
+    if (r300->zmask_in_use && !r300->hyperz_locked) {
         if (fb->zsbuf->texture == dst->texture) {
             r300_decompress_zmask(r300);
         } else {
-            r300->zmask_locked = TRUE;
+            r300->hyperz_locked = TRUE;
             r300_mark_atom_dirty(r300, &r300->hyperz_state);
         }
     }
@@ -334,8 +337,8 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
                                      dstx, dsty, width, height);
     r300_blitter_end(r300);
 
-    if (r300->zmask_locked) {
-        r300->zmask_locked = FALSE;
+    if (r300->hyperz_locked) {
+        r300->hyperz_locked = FALSE;
         r300_mark_atom_dirty(r300, &r300->hyperz_state);
     }
 }
@@ -345,7 +348,7 @@ void r300_decompress_zmask(struct r300_context *r300)
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
 
-    if (!r300->zmask_in_use || r300->zmask_locked)
+    if (!r300->zmask_in_use || r300->hyperz_locked)
         return;
 
     r300->zmask_decompress = TRUE;
@@ -420,12 +423,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
             util_format_description(dst->format);
     struct pipe_box box;
 
-    if (r300->zmask_in_use && !r300->zmask_locked) {
+    if (r300->zmask_in_use && !r300->hyperz_locked) {
         if (fb->zsbuf->texture == src ||
             fb->zsbuf->texture == dst) {
             r300_decompress_zmask(r300);
         } else {
-            r300->zmask_locked = TRUE;
+            r300->hyperz_locked = TRUE;
             r300_mark_atom_dirty(r300, &r300->hyperz_state);
         }
     }
@@ -502,8 +505,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
     if (old_dst.format != new_dst.format)
         r300_resource_set_properties(pipe->screen, dst, 0, &old_dst);
 
-    if (r300->zmask_locked) {
-        r300->zmask_locked = FALSE;
+    if (r300->hyperz_locked) {
+        r300->hyperz_locked = FALSE;
         r300_mark_atom_dirty(r300, &r300->hyperz_state);
     }
 }
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index d422ffe..61041bf 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -30,7 +30,6 @@
 #include "r300_cb.h"
 #include "r300_context.h"
 #include "r300_emit.h"
-#include "r300_hyperz.h"
 #include "r300_screen.h"
 #include "r300_screen_buffer.h"
 #include "r300_winsys.h"
@@ -227,7 +226,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
     if (can_hyperz) {
         /* HiZ Clear */
         if (has_hiz_ram)
-            R300_INIT_ATOM(hiz_clear, 0);
+            R300_INIT_ATOM(hiz_clear, 4);
         /* zmask clear */
         R300_INIT_ATOM(zmask_clear, 4);
     }
@@ -447,16 +446,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
 
     /* Render functions must be initialized after blitter. */
     r300_init_render_functions(r300);
+    r300_init_states(&r300->context);
 
     rws->cs_set_flush(r300->cs, r300_flush_cb, r300);
 
-    /* setup hyper-z mm */
-    if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
-        if (!r300_hyperz_init_mm(r300))
-            goto fail;
-
-    r300_init_states(&r300->context);
-
     /* The KIL opcode needs the first texture unit to be enabled
      * on r3xx-r4xx. In order to calm down the CS checker, we bind this
      * dummy texture there. */
@@ -507,10 +500,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     }
 
     /* Print driver info. */
-#ifdef NDEBUG
-    if (DBG_ON(r300, DBG_INFO)) {
-#else
+#ifdef DEBUG
     {
+#else
+    if (DBG_ON(r300, DBG_INFO)) {
 #endif
         fprintf(stderr,
                 "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index e9c7d7b..e18f876 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -295,6 +295,8 @@ struct r300_surface {
 
     uint32_t offset;    /* COLOROFFSET or DEPTHOFFSET. */
     uint32_t pitch;     /* COLORPITCH or DEPTHPITCH. */
+    uint32_t pitch_zmask; /* ZMASK_PITCH */
+    uint32_t pitch_hiz;   /* HIZ_PITCH */
     uint32_t format;    /* US_OUT_FMT or ZB_FORMAT. */
 
     /* Parameters dedicated to the CBZB clear. */
@@ -363,8 +365,12 @@ struct r300_texture_desc {
 
     /* Zbuffer compression info for each miplevel. */
     boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS];
-    /* If zero, then disable compression. */
+    /* If zero, then disable Z compression/HiZ. */
     unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS];
+    unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS];
+    /* Zmask/HiZ strides for each miplevel. */
+    unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+    unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
 };
 
 struct r300_resource
@@ -390,10 +396,6 @@ struct r300_resource
     /* Where the texture starts in the buffer. */
     unsigned tex_offset;
 
-    /* HiZ memory allocations. */
-    struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
-    boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
-
     /* This is the level tiling flags were last time set for.
      * It's used to prevent redundant tiling-flags changes from happening.*/
     unsigned surface_level;
@@ -545,22 +547,21 @@ struct r300_context {
     int sprite_coord_enable;
     /* Whether two-sided color selection is enabled (AKA light_twoside). */
     boolean two_sided_color;
-
+    /* Whether fast color clear is enabled. */
     boolean cbzb_clear;
     /* Whether ZMASK is enabled. */
     boolean zmask_in_use;
     /* Whether ZMASK is being decompressed. */
     boolean zmask_decompress;
-    /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */
-    boolean zmask_locked;
+    /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */
+    boolean hyperz_locked;
     /* The zbuffer the ZMASK of which is locked. */
     struct pipe_surface *locked_zbuffer;
+    /* Whether HIZ is enabled. */
+    boolean hiz_in_use;
 
     void *dsa_decompress_zmask;
 
-    /* two mem block managers for hiz/zmask ram space */
-    struct mem_block *hiz_mm;
-
     struct u_vbuf_mgr *vbuf_mgr;
 
     struct util_slab_mempool pool_transfers;
@@ -644,6 +645,9 @@ void r300_decompress_zmask(struct r300_context *r300);
 void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
 void r300_decompress_zmask_locked(struct r300_context *r300);
 
+/* r300_hyperz.c */
+void r300_update_hyperz_state(struct r300_context* r300);
+
 /* r300_query.c */
 void r300_resume_query(struct r300_context *r300,
                        struct r300_query *query);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index e2e4719..1adac34 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -425,27 +425,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
         OUT_CS_RELOC(surf);
 
         if (can_hyperz) {
-            uint32_t surf_pitch;
-            struct r300_resource *tex;
-            int level = surf->base.u.tex.level;
-            tex = r300_resource(surf->base.texture);
-
-            surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
-
             /* HiZ RAM. */
-            if (r300->screen->caps.hiz_ram) {
-                if (tex->hiz_mem[level]) {
-                    OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2);
-                    OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch);
-                } else {
-                    OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
-                    OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
-                }
-            }
-
+            OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
+            OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);
             /* Z Mask RAM. (compressed zbuffer) */
             OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
-            OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
+            OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask);
         }
     }
 
@@ -1039,56 +1024,29 @@ void r300_emit_viewport_state(struct r300_context* r300,
     END_CS;
 }
 
-static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
-    CS_LOCALS(r300);
-    BEGIN_CS(4);
-    OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
-    OUT_CS(start);
-    OUT_CS(count);
-    OUT_CS(val);
-    END_CS;
-}
-
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
 void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
 {
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
     struct r300_hyperz_state *z =
         (struct r300_hyperz_state*)r300->hyperz_state.state;
-    struct r300_screen* r300screen = r300->screen;
-    uint32_t stride, offset = 0, height, offset_shift;
     struct r300_resource* tex;
-    int i;
+    CS_LOCALS(r300);
 
     tex = r300_resource(fb->zsbuf->texture);
 
-    offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs;
-    stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level];
-
-    /* convert from pixels to 4x4 blocks */
-    stride = ALIGN_DIVUP(stride, 4);
-
-    stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);    
-    /* there are 4 blocks per dwords */
-    stride = ALIGN_DIVUP(stride, 4);
-
-    height = ALIGN_DIVUP(fb->zsbuf->height, 4);
-
-    offset_shift = 2;
-    offset_shift += (r300screen->caps.num_frag_pipes / 2);
+    BEGIN_CS(size);
+    OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
+    OUT_CS(0);
+    OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]);
+    OUT_CS(0xffffffff);
+    END_CS;
 
-    for (i = 0; i < height; i++) {
-        offset = i * stride;
-        offset <<= offset_shift;
-        r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff);
-    }
     z->current_func = -1;
 
     /* Mark the current zbuffer's hiz ram as in use. */
-    tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE;
+    r300->hiz_in_use = TRUE;
+    r300_mark_atom_dirty(r300, &r300->hyperz_state);
 }
 
 void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 873e020..7ff643f 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -22,7 +22,6 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
 #include "r300_context.h"
-#include "r300_hyperz.h"
 #include "r300_reg.h"
 #include "r300_fs.h"
 #include "r300_winsys.h"
@@ -100,6 +99,7 @@ static boolean r300_can_hiz(struct r300_context *r300)
 
     if (r300->query_current)
         return FALSE;
+
     /* if stencil fail/zfail op is not KEEP */
     if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) ||
         r300_dsa_stencil_op_not_keep(&dsa->stencil[1]))
@@ -139,7 +139,6 @@ static void r300_update_hyperz(struct r300_context* r300)
         (struct pipe_framebuffer_state*)r300->fb_state.state;
     struct r300_resource *zstex =
             fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
-    boolean hiz_in_use = FALSE;
 
     z->gb_z_peq_config = 0;
     z->zb_bw_cntl = 0;
@@ -157,10 +156,8 @@ static void r300_update_hyperz(struct r300_context* r300)
     if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
         return;
 
-    hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
-
     /* Zbuffer compression. */
-    if (r300->zmask_in_use && !r300->zmask_locked) {
+    if (r300->zmask_in_use && !r300->hyperz_locked) {
         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
                          /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/
                          R300_RD_COMP_ENABLE;
@@ -174,7 +171,8 @@ static void r300_update_hyperz(struct r300_context* r300)
         z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
     }
 
-    if (hiz_in_use && r300_can_hiz(r300)) {
+    /* XXX Use can_hiz to disable hyperz for good, instead of turning it off/on. */
+    if (r300->hiz_in_use && !r300->hyperz_locked && r300_can_hiz(r300)) {
         z->zb_bw_cntl |= R300_HIZ_ENABLE |
                          r300_get_hiz_min(r300);
 
@@ -282,18 +280,6 @@ static void r300_update_ztop(struct r300_context* r300)
         r300_mark_atom_dirty(r300, &r300->ztop_state);
 }
 
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
-static void r300_update_hiz_clear(struct r300_context *r300)
-{
-    struct pipe_framebuffer_state *fb =
-        (struct pipe_framebuffer_state*)r300->fb_state.state;
-    uint32_t height;
-
-    height = ALIGN_DIVUP(fb->zsbuf->height, 4);
-    r300->hiz_clear.size = height * 4;
-}
-
 void r300_update_hyperz_state(struct r300_context* r300)
 {
     r300_update_ztop(r300);
@@ -301,51 +287,4 @@ void r300_update_hyperz_state(struct r300_context* r300)
     if (r300->hyperz_state.dirty) {
         r300_update_hyperz(r300);
     }
-
-    if (r300->hiz_clear.dirty) {
-       r300_update_hiz_clear(r300);
-    }
-}
-
-void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
-{
-    struct r300_resource *tex;
-    uint32_t zsize, ndw;
-    int level = surf->base.u.tex.level;
-
-    tex = r300_resource(surf->base.texture);
-
-    if (tex->hiz_mem[level])
-        return;
-
-    zsize = tex->tex.layer_size_in_bytes[level];
-    zsize /= util_format_get_blocksize(tex->b.b.b.format);
-    ndw = ALIGN_DIVUP(zsize, 64);
-
-    tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
-}
-
-boolean r300_hyperz_init_mm(struct r300_context *r300)
-{
-    struct r300_screen* r300screen = r300->screen;
-    int frag_pipes = r300screen->caps.num_frag_pipes;
-
-    if (r300screen->caps.hiz_ram) {
-      r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
-      if (!r300->hiz_mm) {
-        return FALSE;
-      }
-    }
-
-    return TRUE;
-}
-
-void r300_hyperz_destroy_mm(struct r300_context *r300)
-{
-    struct r300_screen* r300screen = r300->screen;
-
-    if (r300screen->caps.hiz_ram) {
-      u_mmDestroy(r300->hiz_mm);
-      r300->hiz_mm = NULL;
-    }
 }
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
deleted file mode 100644
index d4c8e7c..0000000
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2010 Marek Olšák <maraeo at gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_HYPERZ_H
-#define R300_HYPERZ_H
-
-struct r300_context;
-
-void r300_update_hyperz_state(struct r300_context* r300);
-
-void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
-
-boolean r300_hyperz_init_mm(struct r300_context *r300);
-void r300_hyperz_destroy_mm(struct r300_context *r300);
-
-#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 09f18b3..be22215 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -45,7 +45,6 @@
 #include "r300_texture.h"
 #include "r300_vs.h"
 #include "r300_winsys.h"
-#include "r300_hyperz.h"
 
 /* r300_state: Functions used to intialize state context by translating
  * Gallium state objects into semi-native r300 state objects. */
@@ -707,7 +706,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
     else if (state->zsbuf) {
         r300->fb_state.size += 10;
         if (can_hyperz)
-            r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4;
+            r300->fb_state.size += 8;
     }
 
     /* The size of the rest of atoms stays the same. */
@@ -720,7 +719,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
     struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
     struct pipe_framebuffer_state *old_state = r300->fb_state.state;
-    boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
     unsigned max_width, max_height, i;
     uint32_t zbuffer_bpp = 0;
 
@@ -738,28 +736,30 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
         return;
     }
 
-    if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) {
+    if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) {
         /* There is a zmask in use, what are we gonna do? */
         if (state->zsbuf) {
             if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
                 /* Decompress the currently bound zbuffer before we bind another one. */
                 r300_decompress_zmask(r300);
+                r300->hiz_in_use = FALSE;
             }
         } else {
             /* We don't bind another zbuffer, so lock the current one. */
-            r300->zmask_locked = TRUE;
+            r300->hyperz_locked = TRUE;
             pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
         }
-    } else if (r300->zmask_locked && r300->locked_zbuffer) {
+    } else if (r300->hyperz_locked && r300->locked_zbuffer) {
         /* We have a locked zbuffer now, what are we gonna do? */
         if (state->zsbuf) {
             if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
                 /* We are binding some other zbuffer, so decompress the locked one,
                  * it gets unlocked automatically. */
                 r300_decompress_zmask_locked_unsafe(r300);
+                r300->hiz_in_use = FALSE;
             } else {
                 /* We are binding the locked zbuffer again, so unlock it. */
-                r300->zmask_locked = FALSE;
+                r300->hyperz_locked = FALSE;
             }
         }
     }
@@ -778,7 +778,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
 
     util_copy_framebuffer_state(r300->fb_state.state, state);
 
-    if (!r300->zmask_locked) {
+    if (!r300->hyperz_locked) {
         pipe_surface_reference(&r300->locked_zbuffer, NULL);
     }
 
@@ -794,20 +794,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
             break;
         }
 
-        /* Setup Hyper-Z. */
-        if (can_hyperz) {
-            struct r300_surface *zs_surf = r300_surface(state->zsbuf);
-            struct r300_resource *tex = r300_resource(zs_surf->base.texture);
-            int level = zs_surf->base.u.tex.level;
-
-            /* work out whether we can support hiz on this buffer */
-            r300_hiz_alloc_block(r300, zs_surf);
-
-            DBG(r300, DBG_HYPERZ,
-                "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
-                tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef);
-        }
-
         /* Polygon offset depends on the zbuffer bit depth. */
         if (r300->zbuffer_bpp != zbuffer_bpp) {
             r300->zbuffer_bpp = zbuffer_bpp;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 003fe9a..7776ab5 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -29,7 +29,6 @@
 
 #include "r300_context.h"
 #include "r300_fs.h"
-#include "r300_hyperz.h"
 #include "r300_screen.h"
 #include "r300_shader_semantics.h"
 #include "r300_state_inlines.h"
@@ -937,7 +936,7 @@ static void r300_decompress_depth_textures(struct r300_context *r300)
                           state->sampler_state_count);
     unsigned i;
 
-    if (!r300->zmask_locked || !r300->locked_zbuffer) {
+    if (!r300->hyperz_locked || !r300->locked_zbuffer) {
         return;
     }
 
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index b97c45a..bbd3f97 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -676,6 +676,8 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf)
                 R300_DEPTHMACROTILE(tex->tex.macrotile[level]) |
                 R300_DEPTHMICROTILE(tex->tex.microtile);
         surf->format = r300_translate_zsformat(surf->base.format);
+        surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level];
+        surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level];
     } else {
         surf->pitch =
                 tex->tex.stride_in_pixels[level] |
@@ -713,14 +715,8 @@ static void r300_texture_destroy(struct pipe_screen *screen,
                                  struct pipe_resource* texture)
 {
     struct r300_resource* tex = (struct r300_resource*)texture;
-    int i;
 
     r300_winsys_bo_reference(&tex->buf, NULL);
-    for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
-        if (tex->hiz_mem[i])
-            u_mmFreeMem(tex->hiz_mem[i]);
-    }
-
     FREE(tex);
 }
 
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 2cfeec7..9dcdf15 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -334,12 +334,17 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
         tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i];
 }
 
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
+static unsigned r300_pixels_to_dwords(unsigned stride,
+                                      unsigned height,
+                                      unsigned xblock, unsigned yblock)
+{
+    return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock);
+}
 
-static void r300_setup_zmask_flags(struct r300_screen *screen,
-                                   struct r300_resource *tex)
+static void r300_setup_hyperz_properties(struct r300_screen *screen,
+                                         struct r300_resource *tex)
 {
-    /* The tile size of 1 DWORD is:
+    /* The tile size of 1 DWORD in ZMASK RAM is:
      *
      * GPU    Pipes    4x4 mode   8x8 mode
      * ------------------------------------------
@@ -348,8 +353,31 @@ static void r300_setup_zmask_flags(struct r300_screen *screen,
      * RV530  1P/2Z    32x16      64x32
      *        1P/1Z    16x16      32x32
      */
-    static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8};
-    static unsigned num_blocks_y_per_dw[4] = {4, 4,  4, 8};
+    static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8};
+    static unsigned zmask_blocks_y_per_dw[4] = {4, 4,  4, 8};
+
+    /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels),
+     * but the blocks have very weird ordering.
+     *
+     * With 2 pipes and an image of size 8xY, where Y >= 1,
+     * clearing 4 dwords clears blocks like this:
+     *
+     *    01012323
+     *
+     * where numbers correspond to dword indices. The blocks are interleaved
+     * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels).
+     *
+     * With 4 pipes and an image of size 8xY, where Y >= 4,
+     * clearing 8 dwords clears blocks like this:
+     *    01012323
+     *    45456767
+     *    01012323
+     *    45456767
+     * where numbers correspond to dword indices. The blocks are interleaved
+     * in both directions, so the alignment must be 4x4 blocks (32x32 pixels)
+     */
+    static unsigned hiz_align_x[4] = {8, 32, 48, 32};
+    static unsigned hiz_align_y[4] = {8, 8, 8, 32};
 
     if (util_format_is_depth_or_stencil(tex->b.b.b.format) &&
         util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
@@ -363,30 +391,49 @@ static void r300_setup_zmask_flags(struct r300_screen *screen,
         }
 
         for (i = 0; i <= tex->b.b.b.last_level; i++) {
-            unsigned numdw, compsize;
+            unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height;
+
+            stride = align(tex->tex.stride_in_pixels[i], 16);
+            height = u_minify(tex->b.b.b.height0, i);
 
             /* The 8x8 compression mode needs macrotiling. */
-            compsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
+            zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
                        tex->tex.macrotile[i] &&
                        tex->b.b.b.nr_samples <= 1 ? 8 : 4;
 
-            /* Get the zbuffer size (with the aligned width and height). */
-            numdw = align(tex->tex.stride_in_pixels[i],
-                          num_blocks_x_per_dw[pipes-1] * compsize) *
-                    align(u_minify(tex->b.b.b.height0, i),
-                          num_blocks_y_per_dw[pipes-1] * compsize);
+            /* Get the ZMASK buffer size in dwords. */
+            zcomp_numdw = r300_pixels_to_dwords(stride, height,
+                                zmask_blocks_x_per_dw[pipes-1] * zcompsize,
+                                zmask_blocks_y_per_dw[pipes-1] * zcompsize);
 
-            /* Convert pixels -> dwords. */
-            numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize *
-                                       num_blocks_y_per_dw[pipes-1] * compsize);
+            /* Check whether we have enough ZMASK memory. */
+            if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
+                zcomp_numdw <= screen->caps.zmask_ram * pipes) {
+                tex->tex.zmask_dwords[i] = zcomp_numdw;
+                tex->tex.zcomp8x8[i] = zcompsize == 8;
 
-            /* Check that we have enough ZMASK memory. */
-            if (numdw <= screen->caps.zmask_ram * pipes) {
-                tex->tex.zmask_dwords[i] = numdw;
-                tex->tex.zcomp8x8[i] = compsize == 8;
+                tex->tex.zmask_stride_in_pixels[i] =
+                    align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize);
             } else {
                 tex->tex.zmask_dwords[i] = 0;
                 tex->tex.zcomp8x8[i] = FALSE;
+                tex->tex.zmask_stride_in_pixels[i] = 0;
+            }
+
+            /* Now setup HIZ. */
+            stride = align(stride, hiz_align_x[pipes-1]);
+            height = align(height, hiz_align_y[pipes-1]);
+
+            /* Get the HIZ buffer size in dwords. */
+            hiz_numdw = (stride * height) / (8*8 * pipes);
+
+            /* Check whether we have enough HIZ memory. */
+            if (hiz_numdw <= screen->caps.hiz_ram * pipes) {
+                tex->tex.hiz_dwords[i] = hiz_numdw;
+                tex->tex.hiz_stride_in_pixels[i] = stride;
+            } else {
+                tex->tex.hiz_dwords[i] = 0;
+                tex->tex.hiz_stride_in_pixels[i] = 0;
             }
         }
     }
@@ -495,7 +542,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen,
     }
 
     r300_texture_3d_fix_mipmapping(rscreen, tex);
-    r300_setup_zmask_flags(rscreen, tex);
+    r300_setup_hyperz_properties(rscreen, tex);
 
     if (tex->buf_size) {
         /* Make sure the buffer we got is large enough. */




More information about the mesa-commit mailing list