Mesa (master): vc4: Start using the pack header.

Eric Anholt anholt at kemper.freedesktop.org
Fri Jun 30 19:27:39 UTC 2017


Module: Mesa
Branch: master
Commit: 4cef255872e8467aabce52938038a9d2bf27d9b2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4cef255872e8467aabce52938038a9d2bf27d9b2

Author: Eric Anholt <eric at anholt.net>
Date:   Wed Oct 26 12:46:58 2016 -0700

vc4: Start using the pack header.

This slightly inflates the size of the generated code, in exchange for
getting us some convenient tools.

before:
   4389	      0	      0	   4389	   1125	src/gallium/drivers/vc4/.libs/vc4_draw.o
    808	      0	      0	    808	    328	src/gallium/drivers/vc4/.libs/vc4_emit.o

after:
   4449	      0	      0	   4449	   1161	src/gallium/drivers/vc4/.libs/vc4_draw.o
    988	      0	      0	    988	    3dc	src/gallium/drivers/vc4/.libs/vc4_emit.o

---

 src/gallium/drivers/vc4/vc4_cl.h   | 63 ++++++++++++++++++++++++++++++++++++
 src/gallium/drivers/vc4/vc4_draw.c | 49 ++++++++++++++--------------
 src/gallium/drivers/vc4/vc4_emit.c | 65 ++++++++++++++++++++++++--------------
 src/gallium/drivers/vc4/vc4_job.c  |  4 +--
 4 files changed, 130 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 74bf8cfcaa..bec177cd03 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -40,6 +40,27 @@ struct vc4_job;
  */
 struct vc4_cl_out;
 
+/** A reference to a BO used in the CL packing functions */
+struct vc4_cl_reloc {
+        struct vc4_bo *bo;
+        uint32_t offset;
+};
+
+/* We don't call anything that packs a reloc yet, so don't implement it. */
+static inline void cl_pack_emit_reloc(void *cl, const struct vc4_cl_reloc *reloc)
+{
+        abort();
+}
+
+/* We don't use the data arg yet */
+#define __gen_user_data void
+#define __gen_address_type struct vc4_cl_reloc
+#define __gen_address_offset(reloc) ((reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+
+#include "kernel/vc4_packet.h"
+#include "broadcom/cle/v3d_packet_v21_pack.h"
+
 struct vc4_cl {
         void *base;
         struct vc4_cl_out *next;
@@ -205,4 +226,46 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
 
 void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
 
+#define cl_packet_header(packet) V3D21_ ## packet ## _header
+#define cl_packet_length(packet) V3D21_ ## packet ## _length
+#define cl_packet_pack(packet)   V3D21_ ## packet ## _pack
+#define cl_packet_struct(packet)   V3D21_ ## packet
+
+static inline void *
+cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
+{
+        void *addr = *cl;
+        cl_advance(cl, size);
+        return addr;
+}
+
+/* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
+ * is created, which you get to set fields in of the form:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
+ *     .flags.flat_shade_flags = 1 << 2,
+ * }
+ *
+ * or default values only can be emitted with just:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
+ *
+ * The trick here is that we make a for loop that will execute the body
+ * (either the block or the ';' after the macro invocation) exactly once.
+ * Also, *dst is actually of the wrong type, it's the
+ * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
+ */
+#define cl_emit(cl_out, packet, name)                            \
+        for (struct cl_packet_struct(packet) name = {            \
+                cl_packet_header(packet)                         \
+        },                                                       \
+        *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
+        __builtin_expect(_dst != NULL, 1);                       \
+        ({                                                       \
+                cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name);  \
+                VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst,           \
+                                                 cl_packet_length(packet))); \
+                _dst = NULL;                                     \
+        }))                                                      \
+
 #endif /* VC4_CL_H */
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 0aee73ed10..4b3fa8ab8f 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -82,32 +82,28 @@ vc4_start_draw(struct vc4_context *vc4)
         vc4_get_draw_cl_space(job, 0);
 
         struct vc4_cl_out *bcl = cl_start(&job->bcl);
-        //   Tile state data is 48 bytes per tile, I think it can be thrown away
-        //   as soon as binning is finished.
-        cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
-        cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
-        cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
-        cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
-        cl_u8(&bcl, job->draw_tiles_x);
-        cl_u8(&bcl, job->draw_tiles_y);
-        /* Other flags are filled by kernel. */
-        cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
+        cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
+                bin.width_in_tiles = job->draw_tiles_x;
+                bin.height_in_tiles = job->draw_tiles_y;
+                bin.multisample_mode_4x = job->msaa;
+        }
 
         /* START_TILE_BINNING resets the statechange counters in the hardware,
          * which are what is used when a primitive is binned to a tile to
          * figure out what new state packets need to be written to that tile's
          * command list.
          */
-        cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
+        cl_emit(&bcl, START_TILE_BINNING, start);
 
         /* Reset the current compressed primitives format.  This gets modified
          * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
          * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
          * of every tile.
          */
-        cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
-        cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
-                     VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+        cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
+                list.data_type = _16_BIT_INDEX;
+                list.primitive_type = TRIANGLES_LIST;
+        }
 
         job->needs_flush = true;
         job->draw_width = vc4->framebuffer.width;
@@ -221,13 +217,15 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
         cl_end(&job->shader_rec, shader_rec);
 
         struct vc4_cl_out *bcl = cl_start(&job->bcl);
-        /* the actual draw call. */
-        cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
-        assert(vtx->num_elements <= 8);
-        /* Note that number of attributes == 0 in the packet means 8
-         * attributes.  This field also contains the offset into shader_rec.
-         */
-        cl_u32(&bcl, num_elements_emit & 0x7);
+        cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
+                /* Note that number of attributes == 0 in the packet means 8
+                 * attributes.  This field also contains the offset into
+                 * shader_rec.
+                 */
+                assert(vtx->num_elements <= 8);
+                shader_state.number_of_attribute_arrays =
+                        num_elements_emit & 0x7;
+        }
         cl_end(&job->bcl, bcl);
 
         vc4_write_uniforms(vc4, vc4->prog.fs,
@@ -436,10 +434,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                                 }
                         }
 
-                        cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
-                        cl_u8(&bcl, info->mode);
-                        cl_u32(&bcl, this_count);
-                        cl_u32(&bcl, start);
+                        cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
+                                array.primitive_mode = info->mode;
+                                array.length = this_count;
+                                array.index_of_first_vertex = start;
+                        }
                         job->draw_calls_queued++;
 
                         count -= step;
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index b48d89a060..9fc266e5ba 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -60,11 +60,12 @@ vc4_emit_state(struct pipe_context *pctx)
                         maxy = MIN2(vp_maxy, vc4->scissor.maxy);
                 }
 
-                cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
-                cl_u16(&bcl, minx);
-                cl_u16(&bcl, miny);
-                cl_u16(&bcl, maxx - minx);
-                cl_u16(&bcl, maxy - miny);
+                cl_emit(&bcl, CLIP_WINDOW, clip) {
+                        clip.clip_window_left_pixel_coordinate = minx;
+                        clip.clip_window_bottom_pixel_coordinate = miny;
+                        clip.clip_window_height_in_pixels = maxy - miny;
+                        clip.clip_window_width_in_pixels = maxx - minx;
+                }
 
                 job->draw_min_x = MIN2(job->draw_min_x, minx);
                 job->draw_min_y = MIN2(job->draw_min_y, miny);
@@ -113,35 +114,51 @@ vc4_emit_state(struct pipe_context *pctx)
         }
 
         if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
-                cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
-                cl_u16(&bcl, vc4->rasterizer->offset_factor);
-                cl_u16(&bcl, vc4->rasterizer->offset_units);
+                cl_emit(&bcl, DEPTH_OFFSET, depth) {
+                        depth.depth_offset_units =
+                                vc4->rasterizer->offset_units;
+                        depth.depth_offset_factor =
+                                vc4->rasterizer->offset_factor;
+                }
 
-                cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
-                cl_f(&bcl, vc4->rasterizer->point_size);
+                cl_emit(&bcl, POINT_SIZE, points) {
+                        points.point_size = vc4->rasterizer->point_size;
+                }
 
-                cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
-                cl_f(&bcl, vc4->rasterizer->base.line_width);
+                cl_emit(&bcl, LINE_WIDTH, points) {
+                        points.line_width = vc4->rasterizer->base.line_width;
+                }
         }
 
         if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
-                cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
-                cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
-                cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
+                cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
+                        clip.viewport_half_width_in_1_16th_of_pixel =
+                                vc4->viewport.scale[0] * 16.0f;
+                        clip.viewport_half_height_in_1_16th_of_pixel =
+                                vc4->viewport.scale[1] * 16.0f;
+                }
 
-                cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
-                cl_f(&bcl, vc4->viewport.translate[2]);
-                cl_f(&bcl, vc4->viewport.scale[2]);
+                cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+                        clip.viewport_z_offset_zc_to_zs =
+                                vc4->viewport.translate[2];
+                        clip.viewport_z_scale_zc_to_zs =
+                                vc4->viewport.scale[2];
+                }
 
-                cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
-                cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
-                cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
+                cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
+                        vp.viewport_centre_x_coordinate =
+                                16 * vc4->viewport.translate[0];
+                        vp.viewport_centre_y_coordinate =
+                                16 * vc4->viewport.translate[1];
+                }
         }
 
         if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
-                cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
-                cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
-                       vc4->prog.fs->color_inputs : 0);
+                cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
+                        if (vc4->rasterizer->base.flatshade)
+                                flags.flat_shading_flags =
+                                        vc4->prog.fs->color_inputs;
+                }
         }
 
         cl_end(&job->bcl, bcl);
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index d39472ef13..afdac8c991 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -378,11 +378,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
                  */
                 cl_ensure_space(&job->bcl, 8);
                 struct vc4_cl_out *bcl = cl_start(&job->bcl);
-                cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+                cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
                 /* The FLUSH caps all of our bin lists with a
                  * VC4_PACKET_RETURN.
                  */
-                cl_u8(&bcl, VC4_PACKET_FLUSH);
+                cl_emit(&bcl, FLUSH, flush);
                 cl_end(&job->bcl, bcl);
         }
         struct drm_vc4_submit_cl submit = {




More information about the mesa-commit mailing list