[Mesa-dev] [PATCH 2/4] r600g: add multi ring support with dma as first second ring v4

j.glisse at gmail.com j.glisse at gmail.com
Fri Jan 25 09:50:48 PST 2013


From: Jerome Glisse <jglisse at redhat.com>

We keep track of ring emission order in a stack, whenever we need to
flush we empty the stack in a fifo order. There is few helpers function
for bo mapping and other ring activities that will make sure that
the ring stack is properly flush and submitted.

v2: fix st flush path, and other flush path to properly flush all
    rings if necessary
v3: - improve name of ring helpers
    - make sure that each time a cs is gona be written it endup at
      top of the stack to avoid any issue such as :
      STACK[0] = dma (withbo A,B)
      STACK[1] = gfx (withbo C,D)
      Now if code try to emit a dma command relative to bo C or D
      it will start writting cmd stream into the cs and once it
      reach the point where it adds relocation it will flush.
      At that point the cs will have cmd that don't have proper
      relocation into the relocation buffer and kernel will just
      refuse to run.
v4: - Drop the stack idea as it turn out there is no way to use it
      or benefit from it. Any time the driver start command on other
      ring, it always need to flush the previous ring. So make code
      simpler by not using a stack.

Signed-off-by: Jerome Glisse <jglisse at redhat.com>
---
 src/gallium/drivers/r600/evergreen_compute.c       |  30 ++---
 .../drivers/r600/evergreen_compute_internal.c      |  42 +++---
 src/gallium/drivers/r600/evergreen_hw_context.c    |   4 +-
 src/gallium/drivers/r600/evergreen_state.c         |  56 ++++----
 src/gallium/drivers/r600/r600_asm.c                |   3 +-
 src/gallium/drivers/r600/r600_buffer.c             |  17 +--
 src/gallium/drivers/r600/r600_hw_context.c         |  51 ++++----
 src/gallium/drivers/r600/r600_pipe.c               | 142 ++++++++++++++++++---
 src/gallium/drivers/r600/r600_pipe.h               |  40 +++++-
 src/gallium/drivers/r600/r600_query.c              |  24 ++--
 src/gallium/drivers/r600/r600_shader.c             |   2 +-
 src/gallium/drivers/r600/r600_state.c              |  48 +++----
 src/gallium/drivers/r600/r600_state_common.c       |  29 +++--
 src/gallium/drivers/r600/r600_texture.c            |  15 ++-
 14 files changed, 327 insertions(+), 176 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index ed5055b..f4a7905 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -210,8 +210,7 @@ void evergreen_compute_upload_input(
 						ctx->screen, buffer_size);
 	}
 
-	num_work_groups_start = ctx->ws->buffer_map(
-		shader->kernel_param->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+	num_work_groups_start = r600_buffer_mmap_sync_with_rings(ctx, shader->kernel_param, PIPE_TRANSFER_WRITE);
 	global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
 	local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
 	kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
@@ -251,7 +250,7 @@ static void evergreen_emit_direct_dispatch(
 		const uint *block_layout, const uint *grid_layout)
 {
 	int i;
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	unsigned num_waves;
 	unsigned num_pipes = rctx->screen->info.r600_max_pipes;
 	unsigned wave_divisor = (16 * num_pipes);
@@ -314,20 +313,22 @@ static void evergreen_emit_direct_dispatch(
 static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 		const uint *grid_layout)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	unsigned flush_flags = 0;
 	int i;
-
 	struct r600_resource *onebo = NULL;
 	struct evergreen_compute_resource *resources =
 					ctx->cs_shader_state.shader->resources;
 
+	/* make sure that the gfx ring is only one active */
+	ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
+
 	/* Initialize all the compute-related registers.
 	 *
 	 * See evergreen_init_atom_start_compute_cs() in this file for the list
 	 * of registers initialized by the start_compute_cs_cmd atom.
 	 */
-	r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
+	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
 
 	ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
 	r600_flush_emit(ctx);
@@ -335,7 +336,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	/* Emit colorbuffers. */
 	for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) {
 		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
-		unsigned reloc = r600_context_bo_reloc(ctx, (struct r600_resource*)cb->base.texture,
+		unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx,
+						       (struct r600_resource*)cb->base.texture,
 						       RADEON_USAGE_READWRITE);
 
 		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
@@ -424,7 +426,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 		flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 	}
 
-	ctx->ws->cs_flush(ctx->cs, flush_flags);
+	ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags);
 
 	ctx->pm4_dirty_cdwords = 0;
 	ctx->flags = 0;
@@ -452,7 +454,7 @@ void evergreen_emit_cs_shader(
 					(struct r600_cs_shader_state*)atom;
 	struct r600_pipe_compute *shader = state->shader;
 	struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint64_t va;
 
 	va = r600_resource_va(&rctx->screen->screen, &kernel->code_bo->b.b);
@@ -465,8 +467,8 @@ void evergreen_emit_cs_shader(
 	r600_write_value(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
 
 	r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0));
-	r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo,
-							RADEON_USAGE_READ));
+	r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx,
+							kernel->code_bo, RADEON_USAGE_READ));
 
 	rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
 }
@@ -488,8 +490,7 @@ static void evergreen_launch_grid(
 		r600_compute_shader_create(ctx_, kernel->llvm_module, &kernel->bc);
 		kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
 							kernel->bc.ndw * 4);
-		p = ctx->ws->buffer_map(kernel->code_bo->cs_buf, ctx->cs,
-							PIPE_TRANSFER_WRITE);
+		p = r600_buffer_mmap_sync_with_rings(ctx, kernel->code_bo, PIPE_TRANSFER_WRITE);
 		memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
 		ctx->ws->buffer_unmap(kernel->code_bo->cs_buf);
 	}
@@ -901,8 +902,7 @@ void *r600_compute_global_transfer_map(
 
 	COMPUTE_DBG("* r600_compute_global_transfer_map()\n");
 
-	if (!(map = rctx->ws->buffer_map(buffer->chunk->pool->bo->cs_buf,
-						rctx->cs, transfer->usage))) {
+	if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buffer->chunk->pool->bo, transfer->usage))) {
 		util_slab_free(&rctx->pool_transfers, transfer);
 		return NULL;
 	}
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 1654ab0..2e8e9da 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -63,7 +63,9 @@ void evergreen_emit_raw_value(
 
 void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
 {
-	ctx->cs->buf[ctx->cs->cdw++] = value;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+
+	cs->buf[cs->cdw++] = value;
 }
 
 void evergreen_mult_reg_set_(
@@ -178,37 +180,38 @@ void evergreen_emit_ctx_reg_set(
 	unsigned index,
 	int num)
 {
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	if (index >= EVERGREEN_CONFIG_REG_OFFSET
 			&& index < EVERGREEN_CONFIG_REG_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
 	} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
 			&& index < EVERGREEN_CONTEXT_REG_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
 	} else if (index >= EVERGREEN_RESOURCE_OFFSET
 			&& index < EVERGREEN_RESOURCE_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
 	} else if (index >= EVERGREEN_SAMPLER_OFFSET
 			&& index < EVERGREEN_SAMPLER_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
 	} else if (index >= EVERGREEN_CTL_CONST_OFFSET
 			&& index < EVERGREEN_CTL_CONST_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
 	} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
 			&& index < EVERGREEN_LOOP_CONST_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
 	} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
 			&& index < EVERGREEN_BOOL_CONST_END) {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
-		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
+		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
+		cs->buf[cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
 	} else {
-		ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
+		cs->buf[cs->cdw++] = PKT0(index, num-1);
 	}
 }
 
@@ -217,13 +220,14 @@ void evergreen_emit_ctx_reloc(
 	struct r600_resource *bo,
 	enum radeon_bo_usage usage)
 {
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	u32 rr = 0;
 
 	assert(bo);
 
-	ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-	rr = r600_context_bo_reloc(ctx, bo, usage);
-	ctx->cs->buf[ctx->cs->cdw++] = rr;
+	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+	rr = r600_context_bo_reloc(ctx, &ctx->rings.gfx, bo, usage);
+	cs->buf[cs->cdw++] = rr;
 }
 
 int evergreen_compute_get_gpu_format(
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 0ca7f9e..fa90c9a 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -210,7 +210,7 @@ out_err:
 
 void evergreen_flush_vgt_streamout(struct r600_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	r600_write_config_reg(cs, R_0084FC_CP_STRMOUT_CNTL, 0);
 
@@ -228,7 +228,7 @@ void evergreen_flush_vgt_streamout(struct r600_context *ctx)
 
 void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	if (buffer_enable_bit) {
 		r600_write_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 1d49153..86e2c81 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1202,7 +1202,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
 
 static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
 	r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
@@ -1245,7 +1245,7 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx,
 
 static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_scissor_state *state = &rctx->scissor.scissor;
 	uint32_t tl, br;
 
@@ -1811,7 +1811,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 	};
 	static unsigned max_dist_8x = 7;
 
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	unsigned max_dist = 0;
 
 	switch (nr_samples) {
@@ -1901,7 +1901,7 @@ static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 	};
 	static unsigned max_dist_16x = 8;
 
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	unsigned max_dist = 0;
 
 	switch (nr_samples) {
@@ -1992,7 +1992,7 @@ static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 
 static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
 	unsigned nr_cbufs = state->nr_cbufs;
 	unsigned i, tl, br;
@@ -2005,7 +2005,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	/* Colorbuffers. */
 	for (i = 0; i < nr_cbufs; i++) {
 		struct r600_surface *cb = (struct r600_surface*)state->cbufs[i];
-		unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)cb->base.texture,
+		unsigned reloc = r600_context_bo_reloc(rctx,
+						       &rctx->rings.gfx,
+						       (struct r600_resource*)cb->base.texture,
 						       RADEON_USAGE_READWRITE);
 
 		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 11);
@@ -2044,7 +2046,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 				       ((struct r600_surface*)state->cbufs[0])->cb_color_info);
 
 		if (!rctx->keep_tiling_flags) {
-			unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)state->cbufs[0]->texture,
+			unsigned reloc = r600_context_bo_reloc(rctx,
+							       &rctx->rings.gfx,
+							       (struct r600_resource*)state->cbufs[0]->texture,
 							       RADEON_USAGE_READWRITE);
 
 			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
@@ -2064,7 +2068,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	/* ZS buffer. */
 	if (state->zsbuf) {
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
-		unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)state->zsbuf->texture,
+		unsigned reloc = r600_context_bo_reloc(rctx,
+						       &rctx->rings.gfx,
+						       (struct r600_resource*)state->zsbuf->texture,
 						       RADEON_USAGE_READWRITE);
 
 		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
@@ -2121,7 +2127,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 
 static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
 	float offset_units = state->offset_units;
 	float offset_scale = state->offset_scale;
@@ -2146,7 +2152,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
 
 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 	unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
 	unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
@@ -2161,7 +2167,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_db_state *a = (struct r600_db_state*)atom;
 
 	if (a->rsurf && a->rsurf->htile_enabled) {
@@ -2172,7 +2178,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
 		r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
 		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-		reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
+		reloc_idx = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rtex->htile, RADEON_USAGE_READWRITE);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
 	} else {
@@ -2183,7 +2189,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 
 static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
 	unsigned db_count_control = 0;
@@ -2233,7 +2239,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 					  unsigned resource_offset,
 					  unsigned pkt_flags)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2269,7 +2275,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
 	}
 	state->dirty_mask = 0;
 }
@@ -2291,7 +2297,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 					    unsigned reg_alu_constbuf_size,
 					    unsigned reg_alu_const_cache)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2312,7 +2318,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, va >> 8);
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
 
 		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
 		r600_write_value(cs, (buffer_id_base + buffer_index) * 8);
@@ -2333,7 +2339,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
 
 		dirty_mask &= ~(1 << buffer_index);
 	}
@@ -2365,7 +2371,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 					 struct r600_samplerview_state *state,
 					 unsigned resource_id_base)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2380,7 +2386,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 		r600_write_value(cs, (resource_id_base + resource_index) * 8);
 		r600_write_array(cs, 8, rview->tex_resource_words);
 
-		reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
+		reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rview->tex_resource,
 					      RADEON_USAGE_READ);
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
 		r600_write_value(cs, reloc);
@@ -2413,7 +2419,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
 				unsigned resource_id_base,
 				unsigned border_index_reg)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
 	while (dirty_mask) {
@@ -2456,14 +2462,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	r600_write_context_reg(rctx->cs, R_028C3C_PA_SC_AA_MASK,
+	r600_write_context_reg(rctx->rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
 static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
 {
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint16_t mask = s->sample_mask;
 
 	r600_write_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
@@ -2473,14 +2479,14 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
 
 static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
 	r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
 			       (r600_resource_va(rctx->context.screen, &shader->buffer->b.b) + shader->offset) >> 8);
 	r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
+	r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ));
 }
 
 void evergreen_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 0a6f63f..5e426c0 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2890,8 +2890,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 		return NULL;
 	}
 
-	bytecode = rctx->ws->buffer_map(shader->buffer->cs_buf, rctx->cs,
-					PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+	bytecode = r600_buffer_mmap_sync_with_rings(rctx, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
 	bytecode += shader->offset / 4;
 
 	if (R600_BIG_ENDIAN) {
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index e674e13..be171f8 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -85,11 +85,11 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx,
 }
 
 static void *r600_buffer_transfer_map(struct pipe_context *ctx,
-                                      struct pipe_resource *resource,
-                                      unsigned level,
-                                      unsigned usage,
-                                      const struct pipe_box *box,
-				      struct pipe_transfer **ptransfer)
+					struct pipe_resource *resource,
+					unsigned level,
+					unsigned usage,
+					const struct pipe_box *box,
+					struct pipe_transfer **ptransfer)
 {
 	struct r600_context *rctx = (struct r600_context*)ctx;
 	struct r600_resource *rbuffer = r600_resource(resource);
@@ -102,7 +102,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		assert(usage & PIPE_TRANSFER_WRITE);
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
-		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
 			unsigned i, mask;
 
@@ -144,7 +144,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		assert(usage & PIPE_TRANSFER_WRITE);
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
-		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
 			/* Do a wait-free write-only transfer using a temporary buffer. */
 			unsigned offset;
@@ -161,7 +161,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		}
 	}
 
-	data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, usage);
+	/* mmap and synchronize with rings */
+	data = r600_buffer_mmap_sync_with_rings(rctx, rbuffer, usage);
 	if (!data) {
 		return NULL;
 	}
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index caebf5c..9cef87f 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -32,7 +32,7 @@
 /* Get backends mask */
 void r600_get_backend_mask(struct r600_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	struct r600_resource *buffer;
 	uint32_t *results;
 	unsigned num_backends = ctx->screen->info.r600_num_backends;
@@ -72,11 +72,10 @@ void r600_get_backend_mask(struct r600_context *ctx)
 				   PIPE_USAGE_STAGING, ctx->max_db*16);
 	if (!buffer)
 		goto err;
-
 	va = r600_resource_va(&ctx->screen->screen, (void*)buffer);
 
 	/* initialize buffer with zeroes */
-	results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+	results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
 	if (results) {
 		memset(results, 0, ctx->max_db * 4 * 4);
 		ctx->ws->buffer_unmap(buffer->cs_buf);
@@ -88,10 +87,10 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-		cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
+		cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE);
 
 		/* analyze results */
-		results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ);
+		results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
 		if (results) {
 			for(i = 0; i < ctx->max_db; i++) {
 				/* at least highest bit will be set if backend is used */
@@ -361,7 +360,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 			boolean count_draw_in)
 {
 	/* The number of dwords we already used in the CS so far. */
-	num_dw += ctx->cs->cdw;
+	num_dw += ctx->rings.gfx.cs->cdw;
 
 	if (count_draw_in) {
 		unsigned i;
@@ -413,7 +412,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 
 	/* Flush if there's not enough space. */
 	if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
-		r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
+		ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 }
 
@@ -543,7 +542,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block,
 	unsigned pkt_flags)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
 	int cp_dwords = block->pm4_ndwords, start_dword = 0;
 	int new_dwords = 0;
@@ -560,7 +559,7 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
 				struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
 				if (reloc->bo) {
 					block->pm4[reloc->bo_pm4_index] =
-							r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
+							r600_context_bo_reloc(ctx, &ctx->rings.gfx, reloc->bo, reloc->bo_usage);
 				} else {
 					block->pm4[reloc->bo_pm4_index] = 0;
 				}
@@ -604,7 +603,7 @@ out:
 
 void r600_flush_emit(struct r600_context *rctx)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	unsigned cp_coher_cntl = 0;
 	unsigned wait_until = 0;
 	unsigned emit_flush = 0;
@@ -692,7 +691,7 @@ void r600_flush_emit(struct r600_context *rctx)
 
 void r600_context_flush(struct r600_context *ctx, unsigned flags)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	if (cs->cdw == ctx->start_cs_cmd.num_dw)
 		return;
@@ -743,7 +742,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 		rscreen->cs_count++;
 	}
 #endif
-	ctx->ws->cs_flush(ctx->cs, flags);
+	ctx->ws->cs_flush(ctx->rings.gfx.cs, flags);
 #if R600_TRACE_CS
 	if (ctx->screen->trace_bo) {
 		struct r600_screen *rscreen = ctx->screen;
@@ -776,7 +775,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->flags = 0;
 
 	/* Begin a new CS. */
-	r600_emit_command_buffer(ctx->cs, &ctx->start_cs_cmd);
+	r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
 
 	/* Re-emit states. */
 	ctx->alphatest_state.atom.dirty = true;
@@ -854,7 +853,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	uint64_t va;
 
 	r600_need_cs_space(ctx, 10, FALSE);
@@ -872,12 +871,12 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
 	cs->buf[cs->cdw++] = value;                   /* DATA_LO */
 	cs->buf[cs->cdw++] = 0;                       /* DATA_HI */
 	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
+	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, fence_bo, RADEON_USAGE_WRITE);
 }
 
 static void r600_flush_vgt_streamout(struct r600_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	r600_write_config_reg(cs, R_008490_CP_STRMOUT_CNTL, 0);
 
@@ -895,7 +894,7 @@ static void r600_flush_vgt_streamout(struct r600_context *ctx)
 
 static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	if (buffer_enable_bit) {
 		r600_write_context_reg(cs, R_028AB0_VGT_STRMOUT_EN, S_028AB0_STREAMOUT(1));
@@ -907,7 +906,7 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_
 
 void r600_context_streamout_begin(struct r600_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	struct r600_so_target **t = ctx->so_targets;
 	unsigned *stride_in_dw = ctx->vs_shader->so.stride;
 	unsigned buffer_en, i, update_flags = 0;
@@ -963,7 +962,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 			cs->buf[cs->cdw++] =
-				r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
+				r600_context_bo_reloc(ctx, &ctx->rings.gfx, r600_resource(t[i]->b.buffer),
 						      RADEON_USAGE_WRITE);
 
 			/* R7xx requires this packet after updating BUFFER_BASE.
@@ -975,7 +974,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 				cs->buf[cs->cdw++] =
-					r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
+					r600_context_bo_reloc(ctx, &ctx->rings.gfx, r600_resource(t[i]->b.buffer),
 							      RADEON_USAGE_WRITE);
 			}
 
@@ -993,7 +992,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 				cs->buf[cs->cdw++] =
-					r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
+					r600_context_bo_reloc(ctx,  &ctx->rings.gfx, t[i]->buf_filled_size,
 							      RADEON_USAGE_READ);
 			} else {
 				/* Start from the beginning. */
@@ -1016,7 +1015,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
 void r600_context_streamout_end(struct r600_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	struct r600_so_target **t = ctx->so_targets;
 	unsigned i;
 	uint64_t va;
@@ -1042,7 +1041,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 			cs->buf[cs->cdw++] =
-				r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
+				r600_context_bo_reloc(ctx,  &ctx->rings.gfx, t[i]->buf_filled_size,
 						      RADEON_USAGE_WRITE);
 
 		}
@@ -1069,7 +1068,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 			     struct pipe_resource *src, unsigned src_offset,
 			     unsigned size)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 
 	assert(size);
 	assert(rctx->chip_class != R600);
@@ -1107,8 +1106,8 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		}
 
 		/* This must be done after r600_need_cs_space. */
-		src_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)src, RADEON_USAGE_READ);
-		dst_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+		src_reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)src, RADEON_USAGE_READ);
+		dst_reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE);
 
 		r600_write_value(cs, PKT3(PKT3_CP_DMA, 4, 0));
 		r600_write_value(cs, src_offset);	/* SRC_ADDR_LO [31:0] */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e4a35cf..c72ee8f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -53,9 +53,7 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx)
 			R600_ERR("r600: failed to create bo for fence objects\n");
 			goto out;
 		}
-		rscreen->fences.data = rctx->ws->buffer_map(rscreen->fences.bo->cs_buf,
-							   rctx->cs,
-							   PIPE_TRANSFER_READ_WRITE);
+		rscreen->fences.data = r600_buffer_mmap_sync_with_rings(rctx, rscreen->fences.bo, PIPE_TRANSFER_READ_WRITE);
 	}
 
 	if (!LIST_IS_EMPTY(&rscreen->fences.pool)) {
@@ -108,25 +106,20 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx)
 			pipe_buffer_create(&rctx->screen->screen, PIPE_BIND_CUSTOM,
 					   PIPE_USAGE_STAGING, 1);
 	/* Add the fence as a dummy relocation. */
-	r600_context_bo_reloc(rctx, fence->sleep_bo, RADEON_USAGE_READWRITE);
+	r600_context_bo_reloc(rctx, &rctx->rings.gfx, fence->sleep_bo, RADEON_USAGE_READWRITE);
 
 out:
 	pipe_mutex_unlock(rscreen->fences.mutex);
 	return fence;
 }
 
-
-void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
-		unsigned flags)
+static void r600_flush(struct pipe_context *ctx, unsigned flags)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_fence **rfence = (struct r600_fence**)fence;
 	struct pipe_query *render_cond = NULL;
 	unsigned render_cond_mode = 0;
 
-	if (rfence)
-		*rfence = r600_create_fence(rctx);
-
+	rctx->rings.gfx.flushing = true;
 	/* Disable render condition. */
 	if (rctx->current_render_cond) {
 		render_cond = rctx->current_render_cond;
@@ -140,19 +133,119 @@ void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
 	if (render_cond) {
 		ctx->render_condition(ctx, render_cond, render_cond_mode);
 	}
+	rctx->rings.gfx.flushing = false;
 }
 
 static void r600_flush_from_st(struct pipe_context *ctx,
 			       struct pipe_fence_handle **fence,
 			       enum pipe_flush_flags flags)
 {
-	r600_flush(ctx, fence,
-		   flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0);
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_fence **rfence = (struct r600_fence**)fence;
+	unsigned fflags;
+
+	fflags = flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0;
+	if (rfence) {
+		*rfence = r600_create_fence(rctx);
+	}
+	/* flush gfx & dma ring, order does not matter as only one can be live */
+	rctx->rings.dma.flush(rctx, fflags);
+	rctx->rings.gfx.flush(rctx, fflags);
+}
+
+static void r600_flush_gfx_ring(void *ctx, unsigned flags)
+{
+	r600_flush((struct pipe_context*)ctx, flags);
+}
+
+static void r600_flush_dma_ring(void *ctx, unsigned flags)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
+	if (!rctx->rings.dma.cs->cdw) {
+		return;
+	}
+	rctx->rings.dma.flushing = true;
+	rctx->ws->cs_flush(rctx->rings.dma.cs, flags);
+	rctx->rings.dma.flushing = false;
+}
+
+boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
+					struct radeon_winsys_cs_handle *buf,
+					enum radeon_bo_usage usage)
+{
+	if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
+		return TRUE;
+	}
+	if (ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
+		return TRUE;
+	}
+	return FALSE;
+}
+
+void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
+					struct r600_resource *resource,
+					unsigned usage)
+{
+	enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
+	unsigned flags = 0;
+	bool sync_flush = TRUE;
+
+	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+		return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
+	}
+
+	if (!(usage & PIPE_TRANSFER_WRITE)) {
+		/* have to wait for pending read */
+		rusage = RADEON_USAGE_WRITE;
+	}
+	if (usage & PIPE_TRANSFER_DONTBLOCK) {
+		flags |= RADEON_FLUSH_ASYNC;
+	}
+
+	if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, resource->cs_buf, rusage) && ctx->rings.gfx.cs->cdw) {
+		ctx->rings.gfx.flush(ctx, flags);
+		if (usage & PIPE_TRANSFER_DONTBLOCK) {
+			return NULL;
+		}
+	}
+	if (ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, resource->cs_buf, rusage) && ctx->rings.dma.cs->cdw) {
+		ctx->rings.dma.flush(ctx, flags);
+		if (usage & PIPE_TRANSFER_DONTBLOCK) {
+			return NULL;
+		}
+	}
+
+	if (usage & PIPE_TRANSFER_DONTBLOCK) {
+		if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
+			return NULL;
+		}
+	}
+	if (sync_flush) {
+		/* Try to avoid busy-waiting in radeon_bo_wait. */
+		ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
+		if (ctx->rings.dma.cs) {
+			ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
+		}
+	}
+	ctx->ws->buffer_wait(resource->buf, rusage);
+
+	/* at this point everything is synchronized */
+	return ctx->ws->buffer_map(resource->cs_buf, NULL, usage | PIPE_TRANSFER_UNSYNCHRONIZED);
 }
 
 static void r600_flush_from_winsys(void *ctx, unsigned flags)
 {
-	r600_flush((struct pipe_context*)ctx, NULL, flags);
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
+	rctx->rings.gfx.flush(rctx, flags);
+}
+
+static void r600_flush_dma_from_winsys(void *ctx, unsigned flags)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
+	rctx->rings.dma.flush(rctx, flags);
 }
 
 static void r600_destroy_context(struct pipe_context *context)
@@ -197,8 +290,11 @@ static void r600_destroy_context(struct pipe_context *context)
 
 	r600_release_command_buffer(&rctx->start_cs_cmd);
 
-	if (rctx->cs) {
-		rctx->ws->cs_destroy(rctx->cs);
+	if (rctx->rings.gfx.cs) {
+		rctx->ws->cs_destroy(rctx->rings.gfx.cs);
+	}
+	if (rctx->rings.dma.cs) {
+		rctx->ws->cs_destroy(rctx->rings.dma.cs);
 	}
 
 	FREE(rctx->range);
@@ -289,8 +385,18 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 		goto fail;
 	}
 
-	rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
-	rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx);
+	rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
+	rctx->rings.gfx.flush = r600_flush_gfx_ring;
+	rctx->ws->cs_set_flush_callback(rctx->rings.gfx.cs, r600_flush_from_winsys, rctx);
+	rctx->rings.gfx.flushing = false;
+
+	rctx->rings.dma.cs = NULL;
+	if (rscreen->info.r600_has_dma) {
+		rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA);
+		rctx->rings.dma.flush = r600_flush_dma_ring;
+		rctx->ws->cs_set_flush_callback(rctx->rings.dma.cs, r600_flush_dma_from_winsys, rctx);
+		rctx->rings.dma.flushing = false;
+	}
 
 	rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256,
 					PIPE_BIND_INDEX_BUFFER |
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index d983718..5cb0805 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -406,11 +406,22 @@ struct r600_fetch_shader {
 	unsigned			offset;
 };
 
+struct r600_ring {
+	struct radeon_winsys_cs		*cs;
+	bool				flushing;
+	void (*flush)(void *ctx, unsigned flags);
+};
+
+struct r600_rings {
+	struct r600_ring		gfx;
+	struct r600_ring		dma;
+};
+
 struct r600_context {
 	struct pipe_context		context;
 	struct r600_screen		*screen;
 	struct radeon_winsys		*ws;
-	struct radeon_winsys_cs		*cs;
+	struct r600_rings		rings;
 	struct blitter_context		*blitter;
 	struct u_upload_mgr		*uploader;
 	struct u_suballocator		*allocator_so_filled_size;
@@ -626,8 +637,12 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
 					 unsigned alignment);
 
 /* r600_pipe.c */
-void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
-		unsigned flags);
+boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
+					struct radeon_winsys_cs_handle *buf,
+					enum radeon_bo_usage usage);
+void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
+					struct r600_resource *resource,
+					unsigned usage);
 
 /* r600_query.c */
 void r600_init_query_functions(struct r600_context *rctx);
@@ -835,12 +850,25 @@ void r600_release_command_buffer(struct r600_command_buffer *cb);
 /*
  * Helpers for emitting state into a command stream directly.
  */
-
-static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_resource *rbo,
+static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx,
+					     struct r600_ring *ring,
+					     struct r600_resource *rbo,
 					     enum radeon_bo_usage usage)
 {
 	assert(usage);
-	return ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage, rbo->domains) * 4;
+	/* make sure that all previous ring use are flushed so everything
+	 * look serialized from driver pov
+	 */
+	if (!ring->flushing) {
+		if (ring == &ctx->rings.gfx) {
+			/* flush dma ring */
+			ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
+		} else {
+			/* flush gfx ring */
+			ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
+		}
+	}
+	return ctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, rbo->domains) * 4;
 }
 
 static INLINE void r600_write_value(struct radeon_winsys_cs *cs, unsigned value)
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 083d510..0335189 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -52,7 +52,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns
 	switch (type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		results = ctx->ws->buffer_map(buf->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+		results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
 		memset(results, 0, buf_size);
 
 		/* Set top bits for unused backends. */
@@ -75,7 +75,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		results = ctx->ws->buffer_map(buf->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+		results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
 		memset(results, 0, buf_size);
 		ctx->ws->buffer_unmap(buf->cs_buf);
 		break;
@@ -106,7 +106,7 @@ static void r600_update_occlusion_query_state(struct r600_context *rctx,
 
 static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	uint64_t va;
 
 	r600_update_occlusion_query_state(ctx, query->type, 1);
@@ -154,7 +154,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *q
 		assert(0);
 	}
 	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
+	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
 
 	if (!r600_is_timer_query(query->type)) {
 		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
@@ -163,7 +163,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *q
 
 static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 	uint64_t va;
 
 	/* The queries which need begin already called this in begin_query. */
@@ -206,7 +206,7 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que
 		assert(0);
 	}
 	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
+	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
 
 	query->buffer.results_end += query->result_size;
 
@@ -222,7 +222,7 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que
 static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
 					int operation, bool flag_wait)
 {
-	struct radeon_winsys_cs *cs = ctx->cs;
+	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
 	if (operation == PREDICATION_OP_CLEAR) {
 		r600_need_cs_space(ctx, 3, FALSE);
@@ -256,7 +256,7 @@ static void r600_emit_query_predication(struct r600_context *ctx, struct r600_qu
 				cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
 				cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-				cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ);
+				cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ);
 				results_base += query->result_size;
 
 				/* set CONTINUE bit for all packets except the first */
@@ -351,7 +351,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 	}
 
 	/* Obtain a new buffer if the current one can't be mapped without a stall. */
-	if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+	if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
 	    rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
 		pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
 		rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
@@ -406,9 +406,9 @@ static boolean r600_get_query_buffer_result(struct r600_context *ctx,
 	unsigned results_base = 0;
 	char *map;
 
-	map = ctx->ws->buffer_map(qbuf->buf->cs_buf, ctx->cs,
-				  PIPE_TRANSFER_READ |
-				  (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
+	map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf,
+						PIPE_TRANSFER_READ |
+						(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
 	if (!map)
 		return FALSE;
 
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 8307750..1722bab 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -72,7 +72,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
 		if (shader->bo == NULL) {
 			return -ENOMEM;
 		}
-		ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
+		ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
 		if (R600_BIG_ENDIAN) {
 			for (i = 0; i < rshader->bc.ndw; ++i) {
 				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3ec21d2..e9fffaa 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -646,7 +646,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 
 static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
 	float offset_units = state->offset_units;
 	float offset_scale = state->offset_scale;
@@ -1144,7 +1144,7 @@ r600_create_sampler_view(struct pipe_context *ctx,
 
 static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
 	r600_write_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
@@ -1158,7 +1158,7 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
 
 static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_scissor_state *state = &rctx->scissor.scissor;
 
 	if (rctx->chip_class != R600 || rctx->scissor.enable) {
@@ -1655,7 +1655,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 	};
 	static unsigned max_dist_8x = 7;
 
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	unsigned max_dist = 0;
 
 	if (rctx->family == CHIP_R600) {
@@ -1722,7 +1722,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 
 static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
 	unsigned nr_cbufs = state->nr_cbufs;
 	struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0];
@@ -1752,6 +1752,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 		/* relocations */
 		for (i = 0; i < nr_cbufs; i++) {
 			unsigned reloc = r600_context_bo_reloc(rctx,
+							       &rctx->rings.gfx,
 							       (struct r600_resource*)cb[i]->base.texture,
 							       RADEON_USAGE_READWRITE);
 			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1781,6 +1782,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 		/* relocations */
 		for (i = 0; i < nr_cbufs; i++) {
 			unsigned reloc = r600_context_bo_reloc(rctx,
+							       &rctx->rings.gfx,
 							       cb[i]->cb_buffer_fmask,
 							       RADEON_USAGE_READWRITE);
 			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1795,6 +1797,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 		/* relocations */
 		for (i = 0; i < nr_cbufs; i++) {
 			unsigned reloc = r600_context_bo_reloc(rctx,
+							       &rctx->rings.gfx,
 							       cb[i]->cb_buffer_cmask,
 							       RADEON_USAGE_READWRITE);
 			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1815,6 +1818,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 	if (state->zsbuf) {
 		struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
 		unsigned reloc = r600_context_bo_reloc(rctx,
+						       &rctx->rings.gfx,
 						       (struct r600_resource*)state->zsbuf->texture,
 						       RADEON_USAGE_READWRITE);
 
@@ -1869,7 +1873,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 
 static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 
 	if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
@@ -1899,7 +1903,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_db_state *a = (struct r600_db_state*)atom;
 
 	if (a->rsurf && a->rsurf->htile_enabled) {
@@ -1909,7 +1913,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear));
 		r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
 		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-		reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
+		reloc_idx = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rtex->htile, RADEON_USAGE_READWRITE);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
 	} else {
@@ -1919,7 +1923,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 
 static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
 	unsigned db_render_override =
@@ -1962,7 +1966,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_config_state *a = (struct r600_config_state*)atom;
 
 	r600_write_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
@@ -1970,7 +1974,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
 
 static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
 
 	while (dirty_mask) {
@@ -1999,7 +2003,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
 	}
 }
 
@@ -2009,7 +2013,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 				       unsigned reg_alu_constbuf_size,
 				       unsigned reg_alu_const_cache)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2029,7 +2033,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
 
 		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		r600_write_value(cs, (buffer_id_base + buffer_index) * 7);
@@ -2044,7 +2048,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
 
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
 
 		dirty_mask &= ~(1 << buffer_index);
 	}
@@ -2076,7 +2080,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 				    struct r600_samplerview_state *state,
 				    unsigned resource_id_base)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2091,7 +2095,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 		r600_write_value(cs, (resource_id_base + resource_index) * 7);
 		r600_write_array(cs, 7, rview->tex_resource_words);
 
-		reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
+		reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rview->tex_resource,
 					      RADEON_USAGE_READ);
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
 		r600_write_value(cs, reloc);
@@ -2128,7 +2132,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
 				unsigned resource_id_base,
 				unsigned border_color_reg)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
 	while (dirty_mask) {
@@ -2189,7 +2193,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a
 
 static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	unsigned tmp;
 
 	tmp = S_009508_DISABLE_CUBE_ANISO(1) |
@@ -2207,19 +2211,19 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	r600_write_context_reg(rctx->cs, R_028C48_PA_SC_AA_MASK,
+	r600_write_context_reg(rctx->rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
 static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
 	r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
 	r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
+	r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ));
 }
 
 void r600_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 3497737..c7f672f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -65,12 +65,12 @@ void r600_init_atom(struct r600_context *rctx,
 
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_command_buffer(rctx->cs, ((struct r600_cso_state*)atom)->cb);
+	r600_emit_command_buffer(rctx->rings.gfx.cs, ((struct r600_cso_state*)atom)->cb);
 }
 
 void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
 	unsigned alpha_ref = a->sx_alpha_ref;
 
@@ -176,7 +176,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
 
 void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_blend_color *state = &rctx->blend_color.state;
 
 	r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
@@ -188,7 +188,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 
 void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
 
 	r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
@@ -197,7 +197,7 @@ void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
 
 void r600_emit_vgt2_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_vgt2_state *a = (struct r600_vgt2_state *)atom;
 
 	r600_write_context_reg(cs, R_028408_VGT_INDX_OFFSET, a->vgt_indx_offset);
@@ -231,7 +231,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
 
 void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
 
 	r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
@@ -658,7 +658,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx,
 
 void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct pipe_viewport_state *state = &rctx->viewport.state;
 
 	r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0, 6);
@@ -1243,7 +1243,7 @@ static unsigned r600_conv_prim_to_gs_out(unsigned mode)
 
 void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	struct r600_clip_misc_state *state = &rctx->clip_misc_state;
 
 	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -1261,7 +1261,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct pipe_index_buffer ib = {};
 	unsigned i;
 	struct r600_block *dirty_block = NULL, *next_block = NULL;
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 
 	if (!info.count && (info.indexed || !info.count_from_stream_output)) {
 		assert(0);
@@ -1280,6 +1280,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		return;
 	}
 
+	/* make sure that the gfx ring is only one active */
+	rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC);
+
 	if (info.indexed) {
 		/* Initialize the index buffer struct. */
 		pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
@@ -1405,7 +1408,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			cs->buf[cs->cdw++] = info.count;
 			cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->predicate_drawing);
-			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ);
+			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ);
 		}
 	} else {
 		if (info.count_from_stream_output) {
@@ -1422,7 +1425,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			cs->buf[cs->cdw++] = 0; /* unused */
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
+			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, &rctx->rings.gfx, t->buf_filled_size, RADEON_USAGE_READ);
 		}
 
 		cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);
@@ -1734,12 +1737,12 @@ void r600_init_common_state_functions(struct r600_context *rctx)
 void r600_trace_emit(struct r600_context *rctx)
 {
 	struct r600_screen *rscreen = rctx->screen;
-	struct radeon_winsys_cs *cs = rctx->cs;
+	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
 	uint64_t va;
 	uint32_t reloc;
 
 	va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
-	reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
+	reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
 	r600_write_value(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
 	r600_write_value(cs, va & 0xFFFFFFFFUL);
 	r600_write_value(cs, (va >> 32UL) & 0xFFUL);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 45a30f8..5b7873d 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -725,7 +725,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 	struct r600_transfer *trans;
 	boolean use_staging_texture = FALSE;
 	enum pipe_format format = texture->format;
-	struct radeon_winsys_cs_handle *buf;
+	struct r600_resource *buf;
 	unsigned offset = 0;
 	char *map;
 
@@ -746,7 +746,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 
 	/* Use a staging texture for uploads if the underlying BO is busy. */
 	if (!(usage & PIPE_TRANSFER_READ) &&
-	    (rctx->ws->cs_is_buffer_referenced(rctx->cs, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
+	    (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
 	     rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
 		use_staging_texture = TRUE;
 	}
@@ -839,8 +839,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 		trans->transfer.layer_stride = staging->surface.level[0].slice_size;
 		if (usage & PIPE_TRANSFER_READ) {
 			r600_copy_to_staging_texture(ctx, trans);
-			/* Always referenced in the blit. */
-			r600_flush(ctx, NULL, 0);
+			/* flush gfx & dma ring, order does not matter as only one can be live */
+			rctx->rings.dma.flush(rctx, 0);
+			rctx->rings.gfx.flush(rctx, 0);
 		}
 	} else {
 		trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
@@ -849,9 +850,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 	}
 
 	if (trans->staging) {
-		buf = trans->staging->cs_buf;
+		buf = trans->staging;
 	} else {
-		buf = rtex->resource.cs_buf;
+		buf = &rtex->resource;
 	}
 
 	if (rtex->is_depth || !trans->staging)
@@ -859,7 +860,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 			box->y / util_format_get_blockheight(format) * trans->transfer.stride +
 			box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
 
-	if (!(map = rctx->ws->buffer_map(buf, rctx->cs, usage))) {
+	if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buf, usage))) {
 		pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
 		FREE(trans);
 		return NULL;
-- 
1.7.11.7



More information about the mesa-dev mailing list