[Mesa-dev] [PATCH v3 01/13] gallium: Basic compute interface.

Francisco Jerez currojerez at riseup.net
Tue May 1 08:27:39 PDT 2012


Define an interface that exposes the minimal functionality required to
implement some of the popular compute APIs.  This commit adds entry
points to set the grid layout and other state required to keep track
of the usual address spaces employed in compute APIs, to bind a
compute program, and execute it on the device.

Reviewed-by: Marek Olšák <maraeo at gmail.com>
---
v2: Add "start slot" argument to the resource binding driver hooks.
v3: Split sampler views from shader resources.

 src/gallium/docs/source/context.rst        |   39 +++++++++++++++
 src/gallium/docs/source/screen.rst         |   28 ++++++++++-
 src/gallium/include/pipe/p_context.h       |   73 ++++++++++++++++++++++++++++
 src/gallium/include/pipe/p_defines.h       |   19 +++++++-
 src/gallium/include/pipe/p_screen.h        |   12 +++++
 src/gallium/include/pipe/p_shader_tokens.h |    9 ++++
 src/gallium/include/pipe/p_state.h         |    7 +++
 7 files changed, 185 insertions(+), 2 deletions(-)

diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index b2872cd..cb9b8de 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -542,3 +542,42 @@ These flags control the behavior of a transfer object.
 ``PIPE_TRANSFER_FLUSH_EXPLICIT``
   Written ranges will be notified later with :ref:`transfer_flush_region`.
   Cannot be used with ``PIPE_TRANSFER_READ``.
+
+
+Compute kernel execution
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A compute program can be defined, bound or destroyed using
+``create_compute_state``, ``bind_compute_state`` or
+``destroy_compute_state`` respectively.
+
+Any of the subroutines contained within the compute program can be
+executed on the device using the ``launch_grid`` method.  This method
+will execute as many instances of the program as elements in the
+specified N-dimensional grid, hopefully in parallel.
+
+The compute program has access to four special resources:
+
+* ``GLOBAL`` represents a memory space shared among all the threads
+  running on the device.  An arbitrary buffer created with the
+  ``PIPE_BIND_GLOBAL`` flag can be mapped into it using the
+  ``set_global_binding`` method.
+
+* ``LOCAL`` represents a memory space shared among all the threads
+  running in the same working group.  The initial contents of this
+  resource are undefined.
+
+* ``PRIVATE`` represents a memory space local to a single thread.
+  The initial contents of this resource are undefined.
+
+* ``INPUT`` represents a read-only memory space that can be
+  initialized at ``launch_grid`` time.
+
+These resources use a byte-based addressing scheme, and they can be
+accessed from the compute program by means of the LOAD/STORE TGSI
+opcodes.
+
+In addition, normal texture sampling is allowed from the compute
+program: ``bind_compute_sampler_states`` may be used to set up texture
+samplers for the compute stage and ``set_compute_sampler_views`` may
+be used to bind a number of sampler views to it.
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 05f7e8f..5d8280a 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -110,7 +110,8 @@ The integer capabilities:
 * ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes
   a hw limitation.  If true, pipe_vertex_element::src_offset must always be
   aligned to 4.  If false, there are no restrictions on src_offset.
-
+* ``PIPE_CAP_COMPUTE``: Whether the implementation supports the
+  compute entry points defined in pipe_context and pipe_screen.
 
 
 .. _pipe_capf:
@@ -186,6 +187,29 @@ to be 0.
   samplers.
 
 
+.. _pipe_compute_cap:
+
+PIPE_COMPUTE_CAP_*
+^^^^^^^^^^^^^^^^^^
+
+Compute-specific capabilities. They can be queried using
+pipe_screen::get_compute_param.
+
+* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
+  for grid and block coordinates.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
+  units.  Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
+  units.  Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
+  resource.  Value type: ``uint64_t``.
+
 .. _pipe_bind:
 
 PIPE_BIND_*
@@ -223,6 +247,8 @@ resources might be created and handled quite differently.
 * ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer.
 * ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another
   process.
+* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global
+  address space of a compute program.
 
 .. _pipe_usage:
 
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 8b4a158..3c0b89e 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -63,6 +63,7 @@ struct pipe_vertex_element;
 struct pipe_video_buffer;
 struct pipe_video_decoder;
 struct pipe_viewport_state;
+struct pipe_compute_state;
 union pipe_color_union;
 union pipe_query_result;
 
@@ -141,6 +142,10 @@ struct pipe_context {
    void   (*bind_geometry_sampler_states)(struct pipe_context *,
                                           unsigned num_samplers,
                                           void **samplers);
+   void   (*bind_compute_sampler_states)(struct pipe_context *,
+                                         unsigned start_slot,
+                                         unsigned num_samplers,
+                                         void **samplers);
    void   (*delete_sampler_state)(struct pipe_context *, void *);
 
    void * (*create_rasterizer_state)(struct pipe_context *,
@@ -220,6 +225,10 @@ struct pipe_context {
                                       unsigned num_views,
                                       struct pipe_sampler_view **);
 
+   void (*set_compute_sampler_views)(struct pipe_context *,
+                                     unsigned start_slot, unsigned num_views,
+                                     struct pipe_sampler_view **);
+
    void (*set_vertex_buffers)( struct pipe_context *,
                                unsigned num_buffers,
                                const struct pipe_vertex_buffer * );
@@ -418,6 +427,70 @@ struct pipe_context {
     */
    struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context,
                                                      const struct pipe_video_buffer *templat );
+
+   /**
+    * Compute kernel execution
+    */
+   /*@{*/
+   /**
+    * Define the compute program and parameters to be used by
+    * pipe_context::launch_grid.
+    */
+   void *(*create_compute_state)(struct pipe_context *context,
+				 const struct pipe_compute_state *);
+   void (*bind_compute_state)(struct pipe_context *, void *);
+   void (*delete_compute_state)(struct pipe_context *, void *);
+
+   /**
+    * Bind an array of buffers to be mapped into the address space of
+    * the GLOBAL resource.  Any buffers that were previously bound
+    * between [first, first + count - 1] are unbound after this call.
+    *
+    * \param first      first buffer to map.
+    * \param count      number of consecutive buffers to map.
+    * \param resources  array of pointers to the buffers to map, it
+    *                   should contain at least \a count elements
+    *                   unless it's NULL, in which case no new
+    *                   resources will be bound.
+    * \param handles    array of pointers to the memory locations that
+    *                   will be filled with the respective base
+    *                   addresses each buffer will be mapped to.  It
+    *                   should contain at least \a count elements,
+    *                   unless \a resources is NULL in which case \a
+    *                   handles should be NULL as well.
+    *
+    * Note that the driver isn't required to make any guarantees about
+    * the contents of the \a handles array being valid anytime except
+    * during the subsequent calls to pipe_context::launch_grid.  This
+    * means that the only sensible location handles[i] may point to is
+    * somewhere within the INPUT buffer itself.  This is so to
+    * accommodate implementations that lack virtual memory but
+    * nevertheless migrate buffers on the fly, leading to resource
+    * base addresses that change on each kernel invocation or are
+    * unknown to the pipe driver.
+    */
+   void (*set_global_binding)(struct pipe_context *context,
+                              unsigned first, unsigned count,
+                              struct pipe_resource **resources,
+                              uint32_t **handles);
+
+   /**
+    * Launch the compute kernel starting from instruction \a pc of the
+    * currently bound compute program.
+    *
+    * \a grid_layout and \a block_layout are arrays of size \a
+    * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
+    * grid (in block units) and working block (in thread units) to be
+    * used, respectively.
+    *
+    * \a input will be used to initialize the INPUT resource, and it
+    * should point to a buffer of at least
+    * pipe_compute_state::req_input_mem bytes.
+    */
+   void (*launch_grid)(struct pipe_context *context,
+                       const uint *block_layout, const uint *grid_layout,
+                       uint32_t pc, const void *input);
+   /*@}*/
 };
 
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 8b6d00d..c4c217b 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -304,6 +304,7 @@ enum pipe_transfer_usage {
 #define PIPE_BIND_STREAM_OUTPUT        (1 << 11) /* set_stream_output_buffers */
 #define PIPE_BIND_CURSOR               (1 << 16) /* mouse cursor */
 #define PIPE_BIND_CUSTOM               (1 << 17) /* state-tracker/winsys usages */
+#define PIPE_BIND_GLOBAL               (1 << 18) /* set_global_binding */
 
 /* The first two flags above were previously part of the amorphous
  * TEXTURE_USAGE, most of which are now descriptions of the ways a
@@ -346,7 +347,8 @@ enum pipe_transfer_usage {
 #define PIPE_SHADER_VERTEX   0
 #define PIPE_SHADER_FRAGMENT 1
 #define PIPE_SHADER_GEOMETRY 2
-#define PIPE_SHADER_TYPES    3
+#define PIPE_SHADER_COMPUTE  3
+#define PIPE_SHADER_TYPES    4
 
 
 /**
@@ -477,6 +479,7 @@ enum pipe_cap {
    PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65,
    PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66,
    PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67,
+   PIPE_CAP_COMPUTE = 68
 };
 
 /**
@@ -522,6 +525,20 @@ enum pipe_shader_cap
    PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18
 };
 
+/**
+ * Compute-specific implementation capability.  They can be queried
+ * using pipe_screen::get_compute_param.
+ */
+enum pipe_compute_cap
+{
+   PIPE_COMPUTE_CAP_GRID_DIMENSION,
+   PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
+   PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
+   PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE,
+   PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+   PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE,
+   PIPE_COMPUTE_CAP_MAX_INPUT_SIZE
+};
 
 /**
  * Composite query types
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 45c441b..7ae7c9a 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -98,6 +98,18 @@ struct pipe_screen {
 			   enum pipe_video_profile profile,
 			   enum pipe_video_cap param );
 
+   /**
+    * Query a compute-specific capability/parameter/limit.
+    * \param param  one of PIPE_COMPUTE_CAP_x
+    * \param ret    pointer to a preallocated buffer that will be
+    *               initialized to the parameter value, or NULL.
+    * \return       size in bytes of the parameter value that would be
+    *               returned.
+    */
+   int (*get_compute_param)(struct pipe_screen *,
+			    enum pipe_compute_cap param,
+			    void *ret);
+
    struct pipe_context * (*context_create)( struct pipe_screen *,
 					    void *priv );
 
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index df2dd5e..9d08fde 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -166,6 +166,15 @@ struct tgsi_declaration_resource {
    unsigned ReturnTypeW : 6; /**< one of enum pipe_type */
 };
 
+/*
+ * Special resources that don't need to be declared.  They map to the
+ * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces.
+ */
+#define TGSI_RESOURCE_GLOBAL	0x7fff
+#define TGSI_RESOURCE_LOCAL	0x7ffe
+#define TGSI_RESOURCE_PRIVATE	0x7ffd
+#define TGSI_RESOURCE_INPUT	0x7ffc
+
 #define TGSI_IMM_FLOAT32   0
 #define TGSI_IMM_UINT32    1
 #define TGSI_IMM_INT32     2
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index a459a56..74f4ebd 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -580,6 +580,13 @@ struct pipe_resolve_info
    unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */
 };
 
+struct pipe_compute_state
+{
+   const struct tgsi_token *tokens; /**< Compute program to be executed. */
+   unsigned req_local_mem; /**< Required size of the LOCAL resource. */
+   unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
+   unsigned req_input_mem; /**< Required size of the INPUT resource. */
+};
 
 #ifdef __cplusplus
 }
-- 
1.7.10



More information about the mesa-dev mailing list