[Mesa-dev] [PATCH 6/8] radeonsi: use WRITE_DATA for small glBufferSubData sizes

Marek Olšák maraeo at gmail.com
Fri Jan 18 16:43:57 UTC 2019


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_buffer.c | 27 ++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
 2 files changed, 28 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 4766cf4bdfa..a1e421b8b0d 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -16,20 +16,22 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "radeonsi/si_pipe.h"
+#include "sid.h"
+
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_transfer.h"
 #include <inttypes.h>
 #include <stdio.h>
 
 bool si_rings_is_buffer_referenced(struct si_context *sctx,
 				   struct pb_buffer *buf,
 				   enum radeon_bo_usage usage)
 {
@@ -506,20 +508,38 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
 	data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
 	if (!data) {
 		return NULL;
 	}
 	data += box->x;
 
 	return si_buffer_get_transfer(ctx, resource, usage, box,
 					ptransfer, data, NULL, 0);
 }
 
+static void si_buffer_write_data(struct si_context *sctx, struct r600_resource *buf,
+				 unsigned offset, unsigned size, const void *data)
+{
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+	si_need_gfx_cs_space(sctx);
+
+	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		       SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
+	si_emit_cache_flush(sctx);
+
+	si_cp_write_data(sctx, buf, offset, size, V_370_TC_L2, V_370_ME, data);
+
+	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+	radeon_emit(cs, 0);
+}
+
 static void si_buffer_do_flush_region(struct pipe_context *ctx,
 				      struct pipe_transfer *transfer,
 				      const struct pipe_box *box)
 {
 	struct si_transfer *stransfer = (struct si_transfer*)transfer;
 	struct r600_resource *rbuffer = r600_resource(transfer->resource);
 
 	if (stransfer->u.staging) {
 		/* Copy the staging buffer into the original one. */
 		si_copy_buffer((struct si_context*)ctx, transfer->resource,
@@ -568,20 +588,27 @@ static void si_buffer_transfer_unmap(struct pipe_context *ctx,
 
 static void si_buffer_subdata(struct pipe_context *ctx,
 			      struct pipe_resource *buffer,
 			      unsigned usage, unsigned offset,
 			      unsigned size, const void *data)
 {
 	struct pipe_transfer *transfer = NULL;
 	struct pipe_box box;
 	uint8_t *map = NULL;
 
+	if (size <= SI_TRANSFER_WRITE_DATA_THRESHOLD &&
+	    offset % 4 == 0 && size % 4 == 0 && (uintptr_t)data % 4 == 0) {
+		si_buffer_write_data((struct si_context*)ctx,
+				     r600_resource(buffer), offset, size, data);
+		return;
+	}
+
 	u_box_1d(offset, size, &box);
 	map = si_buffer_transfer_map(ctx, buffer, 0,
 				       PIPE_TRANSFER_WRITE |
 				       PIPE_TRANSFER_DISCARD_RANGE |
 				       usage,
 				       &box, &transfer);
 	if (!map)
 		return;
 
 	memcpy(map, data, size);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5bd3d9641d2..f79828f3438 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -94,20 +94,21 @@
 #define SI_PREFETCH_ES			(1 << 3)
 #define SI_PREFETCH_GS			(1 << 4)
 #define SI_PREFETCH_VS			(1 << 5)
 #define SI_PREFETCH_PS			(1 << 6)
 
 #define SI_MAX_BORDER_COLORS		4096
 #define SI_MAX_VIEWPORTS		16
 #define SIX_BITS			0x3F
 #define SI_MAP_BUFFER_ALIGNMENT		64
 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
+#define SI_TRANSFER_WRITE_DATA_THRESHOLD 64
 
 #define SI_RESOURCE_FLAG_TRANSFER	(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define SI_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 #define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define SI_RESOURCE_FLAG_DISABLE_DCC	(PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
 #define SI_RESOURCE_FLAG_UNMAPPABLE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
 #define SI_RESOURCE_FLAG_READ_ONLY	(PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
 #define SI_RESOURCE_FLAG_32BIT		(PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
 #define SI_RESOURCE_FLAG_SO_FILLED_SIZE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
 
-- 
2.17.1



More information about the mesa-dev mailing list