[Mesa-dev] [PATCH 2/2] r600g: add multi ring support with dma as first second ring

Christian König deathsimple at vodafone.de
Tue Jan 8 04:13:59 PST 2013


On 07.01.2013 21:30, j.glisse at gmail.com wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> We keep track of ring emission order in a stack, whenever we need to
> flush we empty the stack in a fifo order. There is few helpers function
> for bo mapping and other ring activities that will make sure that
> the ring stack is properly flush and submitted.
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>

Yeah, that's better.

Reviewed-by: Christian König <christian.koenig at amd.com>

> ---
>   src/gallium/drivers/r600/evergreen_compute.c       |  26 ++--
>   .../drivers/r600/evergreen_compute_internal.c      |  42 ++---
>   src/gallium/drivers/r600/evergreen_hw_context.c    |   4 +-
>   src/gallium/drivers/r600/evergreen_state.c         |  56 ++++---
>   src/gallium/drivers/r600/r600_asm.c                |   3 +-
>   src/gallium/drivers/r600/r600_buffer.c             |  17 ++-
>   src/gallium/drivers/r600/r600_hw_context.c         |  43 +++---
>   src/gallium/drivers/r600/r600_pipe.c               | 170 +++++++++++++++++++--
>   src/gallium/drivers/r600/r600_pipe.h               |  29 +++-
>   src/gallium/drivers/r600/r600_query.c              |  24 +--
>   src/gallium/drivers/r600/r600_shader.c             |   2 +-
>   src/gallium/drivers/r600/r600_state.c              |  48 +++---
>   src/gallium/drivers/r600/r600_state_common.c       |  26 ++--
>   src/gallium/drivers/r600/r600_texture.c            |  10 +-
>   14 files changed, 342 insertions(+), 158 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 8d3050b..85cbf94 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -210,8 +210,7 @@ void evergreen_compute_upload_input(
>   						ctx->screen, buffer_size);
>   	}
>   
> -	num_work_groups_start = ctx->ws->buffer_map(
> -		shader->kernel_param->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
> +	num_work_groups_start = r600_buffer_mmap_sync_with_rings(ctx, shader->kernel_param, PIPE_TRANSFER_WRITE);
>   	global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
>   	local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
>   	kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
> @@ -251,7 +250,7 @@ static void evergreen_emit_direct_dispatch(
>   		const uint *block_layout, const uint *grid_layout)
>   {
>   	int i;
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	unsigned num_waves;
>   	unsigned num_pipes = rctx->screen->info.r600_max_pipes;
>   	unsigned wave_divisor = (16 * num_pipes);
> @@ -314,7 +313,7 @@ static void evergreen_emit_direct_dispatch(
>   static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>   		const uint *grid_layout)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	unsigned flush_flags = 0;
>   	int i;
>   
> @@ -327,7 +326,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>   	 * See evergreen_init_atom_start_compute_cs() in this file for the list
>   	 * of registers initialized by the start_compute_cs_cmd atom.
>   	 */
> -	r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
> +	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
>   
>   	ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
>   	r600_flush_emit(ctx);
> @@ -335,7 +334,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>   	/* Emit colorbuffers. */
>   	for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) {
>   		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
> -		unsigned reloc = r600_context_bo_reloc(ctx, (struct r600_resource*)cb->base.texture,
> +		unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx,
> +						       (struct r600_resource*)cb->base.texture,
>   						       RADEON_USAGE_READWRITE);
>   
>   		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
> @@ -424,7 +424,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>   		flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
>   	}
>   
> -	ctx->ws->cs_flush(ctx->cs, flush_flags);
> +	ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags);
>   
>   	ctx->pm4_dirty_cdwords = 0;
>   	ctx->flags = 0;
> @@ -452,7 +452,7 @@ void evergreen_emit_cs_shader(
>   					(struct r600_cs_shader_state*)atom;
>   	struct r600_pipe_compute *shader = state->shader;
>   	struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint64_t va;
>   
>   	va = r600_resource_va(&rctx->screen->screen, &kernel->code_bo->b.b);
> @@ -465,8 +465,8 @@ void evergreen_emit_cs_shader(
>   	r600_write_value(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
>   
>   	r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0));
> -	r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo,
> -							RADEON_USAGE_READ));
> +	r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx,
> +							kernel->code_bo, RADEON_USAGE_READ));
>   
>   	rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
>   }
> @@ -488,8 +488,7 @@ static void evergreen_launch_grid(
>   		r600_compute_shader_create(ctx_, kernel->llvm_module, &kernel->bc);
>   		kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
>   							kernel->bc.ndw * 4);
> -		p = ctx->ws->buffer_map(kernel->code_bo->cs_buf, ctx->cs,
> -							PIPE_TRANSFER_WRITE);
> +		p = r600_buffer_mmap_sync_with_rings(ctx, kernel->code_bo, PIPE_TRANSFER_WRITE);
>   		memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
>   		ctx->ws->buffer_unmap(kernel->code_bo->cs_buf);
>   	}
> @@ -901,8 +900,7 @@ void *r600_compute_global_transfer_map(
>   
>   	COMPUTE_DBG("* r600_compute_global_transfer_map()\n");
>   
> -	if (!(map = rctx->ws->buffer_map(buffer->chunk->pool->bo->cs_buf,
> -						rctx->cs, transfer->usage))) {
> +	if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buffer->chunk->pool->bo, transfer->usage))) {
>   		util_slab_free(&rctx->pool_transfers, transfer);
>   		return NULL;
>   	}
> diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
> index 1654ab0..2e8e9da 100644
> --- a/src/gallium/drivers/r600/evergreen_compute_internal.c
> +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
> @@ -63,7 +63,9 @@ void evergreen_emit_raw_value(
>   
>   void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
>   {
> -	ctx->cs->buf[ctx->cs->cdw++] = value;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
> +
> +	cs->buf[cs->cdw++] = value;
>   }
>   
>   void evergreen_mult_reg_set_(
> @@ -178,37 +180,38 @@ void evergreen_emit_ctx_reg_set(
>   	unsigned index,
>   	int num)
>   {
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	if (index >= EVERGREEN_CONFIG_REG_OFFSET
>   			&& index < EVERGREEN_CONFIG_REG_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
>   	} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
>   			&& index < EVERGREEN_CONTEXT_REG_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
>   	} else if (index >= EVERGREEN_RESOURCE_OFFSET
>   			&& index < EVERGREEN_RESOURCE_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
>   	} else if (index >= EVERGREEN_SAMPLER_OFFSET
>   			&& index < EVERGREEN_SAMPLER_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
>   	} else if (index >= EVERGREEN_CTL_CONST_OFFSET
>   			&& index < EVERGREEN_CTL_CONST_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
>   	} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
>   			&& index < EVERGREEN_LOOP_CONST_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
>   	} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
>   			&& index < EVERGREEN_BOOL_CONST_END) {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
> -		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
> +		cs->buf[cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
> +		cs->buf[cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
>   	} else {
> -		ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
> +		cs->buf[cs->cdw++] = PKT0(index, num-1);
>   	}
>   }
>   
> @@ -217,13 +220,14 @@ void evergreen_emit_ctx_reloc(
>   	struct r600_resource *bo,
>   	enum radeon_bo_usage usage)
>   {
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	u32 rr = 0;
>   
>   	assert(bo);
>   
> -	ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -	rr = r600_context_bo_reloc(ctx, bo, usage);
> -	ctx->cs->buf[ctx->cs->cdw++] = rr;
> +	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> +	rr = r600_context_bo_reloc(ctx, &ctx->rings.gfx, bo, usage);
> +	cs->buf[cs->cdw++] = rr;
>   }
>   
>   int evergreen_compute_get_gpu_format(
> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
> index 0ca7f9e..fa90c9a 100644
> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
> @@ -210,7 +210,7 @@ out_err:
>   
>   void evergreen_flush_vgt_streamout(struct r600_context *ctx)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	r600_write_config_reg(cs, R_0084FC_CP_STRMOUT_CNTL, 0);
>   
> @@ -228,7 +228,7 @@ void evergreen_flush_vgt_streamout(struct r600_context *ctx)
>   
>   void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	if (buffer_enable_bit) {
>   		r600_write_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 032af78..f23d6ea 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1147,7 +1147,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
>   
>   static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_clip_state *state = &rctx->clip_state.state;
>   
>   	r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
> @@ -1190,7 +1190,7 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx,
>   
>   static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_scissor_state *state = &rctx->scissor.scissor;
>   	uint32_t tl, br;
>   
> @@ -1755,7 +1755,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
>   	};
>   	static unsigned max_dist_8x = 7;
>   
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	unsigned max_dist = 0;
>   
>   	switch (nr_samples) {
> @@ -1845,7 +1845,7 @@ static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples)
>   	};
>   	static unsigned max_dist_16x = 8;
>   
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	unsigned max_dist = 0;
>   
>   	switch (nr_samples) {
> @@ -1936,7 +1936,7 @@ static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples)
>   
>   static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
>   	unsigned nr_cbufs = state->nr_cbufs;
>   	unsigned i, tl, br;
> @@ -1949,7 +1949,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
>   	/* Colorbuffers. */
>   	for (i = 0; i < nr_cbufs; i++) {
>   		struct r600_surface *cb = (struct r600_surface*)state->cbufs[i];
> -		unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)cb->base.texture,
> +		unsigned reloc = r600_context_bo_reloc(rctx,
> +						       &rctx->rings.gfx,
> +						       (struct r600_resource*)cb->base.texture,
>   						       RADEON_USAGE_READWRITE);
>   
>   		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 11);
> @@ -1988,7 +1990,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
>   				       ((struct r600_surface*)state->cbufs[0])->cb_color_info);
>   
>   		if (!rctx->keep_tiling_flags) {
> -			unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)state->cbufs[0]->texture,
> +			unsigned reloc = r600_context_bo_reloc(rctx,
> +							       &rctx->rings.gfx,
> +							       (struct r600_resource*)state->cbufs[0]->texture,
>   							       RADEON_USAGE_READWRITE);
>   
>   			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
> @@ -2008,7 +2012,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
>   	/* ZS buffer. */
>   	if (state->zsbuf) {
>   		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
> -		unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)state->zsbuf->texture,
> +		unsigned reloc = r600_context_bo_reloc(rctx,
> +						       &rctx->rings.gfx,
> +						       (struct r600_resource*)state->zsbuf->texture,
>   						       RADEON_USAGE_READWRITE);
>   
>   		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
> @@ -2065,7 +2071,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
>   
>   static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
>   	float offset_units = state->offset_units;
>   	float offset_scale = state->offset_scale;
> @@ -2090,7 +2096,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
>   
>   static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
>   	unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
>   	unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
> @@ -2105,7 +2111,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
>   
>   static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_db_state *a = (struct r600_db_state*)atom;
>   
>   	if (a->rsurf && a->rsurf->htile_enabled) {
> @@ -2116,7 +2122,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
>   		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
>   		r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
>   		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
> -		reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
> +		reloc_idx = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rtex->htile, RADEON_USAGE_READWRITE);
>   		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>   		cs->buf[cs->cdw++] = reloc_idx;
>   	} else {
> @@ -2127,7 +2133,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
>   
>   static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
>   	unsigned db_render_control = 0;
>   	unsigned db_count_control = 0;
> @@ -2177,7 +2183,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
>   					  unsigned resource_offset,
>   					  unsigned pkt_flags)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = state->dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -2213,7 +2219,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
>   		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
>   
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
> -		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
> +		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
>   	}
>   	state->dirty_mask = 0;
>   }
> @@ -2235,7 +2241,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
>   					    unsigned reg_alu_constbuf_size,
>   					    unsigned reg_alu_const_cache)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = state->dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -2256,7 +2262,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
>   		r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, va >> 8);
>   
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
> +		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
>   
>   		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
>   		r600_write_value(cs, (buffer_id_base + buffer_index) * 8);
> @@ -2277,7 +2283,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
>   		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
>   
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
> +		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
>   
>   		dirty_mask &= ~(1 << buffer_index);
>   	}
> @@ -2309,7 +2315,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
>   					 struct r600_samplerview_state *state,
>   					 unsigned resource_id_base)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = state->dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -2324,7 +2330,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
>   		r600_write_value(cs, (resource_id_base + resource_index) * 8);
>   		r600_write_array(cs, 8, rview->tex_resource_words);
>   
> -		reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
> +		reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rview->tex_resource,
>   					      RADEON_USAGE_READ);
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
>   		r600_write_value(cs, reloc);
> @@ -2357,7 +2363,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
>   				unsigned resource_id_base,
>   				unsigned border_index_reg)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = texinfo->states.dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -2400,14 +2406,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
>   	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
>   	uint8_t mask = s->sample_mask;
>   
> -	r600_write_context_reg(rctx->cs, R_028C3C_PA_SC_AA_MASK,
> +	r600_write_context_reg(rctx->rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
>   			       mask | (mask << 8) | (mask << 16) | (mask << 24));
>   }
>   
>   static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
>   {
>   	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint16_t mask = s->sample_mask;
>   
>   	r600_write_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
> @@ -2417,14 +2423,14 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
>   
>   static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_cso_state *state = (struct r600_cso_state*)a;
>   	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
>   
>   	r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
>   			       (r600_resource_va(rctx->context.screen, &shader->buffer->b.b) + shader->offset) >> 8);
>   	r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
> +	r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ));
>   }
>   
>   void evergreen_init_state_functions(struct r600_context *rctx)
> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
> index ef67717..fc3aacc 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -2889,8 +2889,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
>   		return NULL;
>   	}
>   
> -	bytecode = rctx->ws->buffer_map(shader->buffer->cs_buf, rctx->cs,
> -					PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
> +	bytecode = r600_buffer_mmap_sync_with_rings(rctx, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
>   	bytecode += shader->offset / 4;
>   
>   	if (R600_BIG_ENDIAN) {
> diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
> index e674e13..be171f8 100644
> --- a/src/gallium/drivers/r600/r600_buffer.c
> +++ b/src/gallium/drivers/r600/r600_buffer.c
> @@ -85,11 +85,11 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx,
>   }
>   
>   static void *r600_buffer_transfer_map(struct pipe_context *ctx,
> -                                      struct pipe_resource *resource,
> -                                      unsigned level,
> -                                      unsigned usage,
> -                                      const struct pipe_box *box,
> -				      struct pipe_transfer **ptransfer)
> +					struct pipe_resource *resource,
> +					unsigned level,
> +					unsigned usage,
> +					const struct pipe_box *box,
> +					struct pipe_transfer **ptransfer)
>   {
>   	struct r600_context *rctx = (struct r600_context*)ctx;
>   	struct r600_resource *rbuffer = r600_resource(resource);
> @@ -102,7 +102,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
>   		assert(usage & PIPE_TRANSFER_WRITE);
>   
>   		/* Check if mapping this buffer would cause waiting for the GPU. */
> -		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
> +		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
>   		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
>   			unsigned i, mask;
>   
> @@ -144,7 +144,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
>   		assert(usage & PIPE_TRANSFER_WRITE);
>   
>   		/* Check if mapping this buffer would cause waiting for the GPU. */
> -		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
> +		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
>   		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
>   			/* Do a wait-free write-only transfer using a temporary buffer. */
>   			unsigned offset;
> @@ -161,7 +161,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
>   		}
>   	}
>   
> -	data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, usage);
> +	/* mmap and synchronize with rings */
> +	data = r600_buffer_mmap_sync_with_rings(rctx, rbuffer, usage);
>   	if (!data) {
>   		return NULL;
>   	}
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index 1506b39..dbaa741 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -32,7 +32,7 @@
>   /* Get backends mask */
>   void r600_get_backend_mask(struct r600_context *ctx)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	struct r600_resource *buffer;
>   	uint32_t *results;
>   	unsigned num_backends = ctx->screen->info.r600_num_backends;
> @@ -72,11 +72,10 @@ void r600_get_backend_mask(struct r600_context *ctx)
>   				   PIPE_USAGE_STAGING, ctx->max_db*16);
>   	if (!buffer)
>   		goto err;
> -
>   	va = r600_resource_va(&ctx->screen->screen, (void*)buffer);
>   
>   	/* initialize buffer with zeroes */
> -	results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
> +	results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
>   	if (results) {
>   		memset(results, 0, ctx->max_db * 4 * 4);
>   		ctx->ws->buffer_unmap(buffer->cs_buf);
> @@ -88,10 +87,10 @@ void r600_get_backend_mask(struct r600_context *ctx)
>   		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
>   
>   		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -		cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
> +		cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE);
>   
>   		/* analyze results */
> -		results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ);
> +		results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
>   		if (results) {
>   			for(i = 0; i < ctx->max_db; i++) {
>   				/* at least highest bit will be set if backend is used */
> @@ -361,7 +360,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>   			boolean count_draw_in)
>   {
>   	/* The number of dwords we already used in the CS so far. */
> -	num_dw += ctx->cs->cdw;
> +	num_dw += ctx->rings.gfx.cs->cdw;
>   
>   	if (count_draw_in) {
>   		unsigned i;
> @@ -543,7 +542,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
>   void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block,
>   	unsigned pkt_flags)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
>   	int cp_dwords = block->pm4_ndwords, start_dword = 0;
>   	int new_dwords = 0;
> @@ -560,7 +559,7 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
>   				struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
>   				if (reloc->bo) {
>   					block->pm4[reloc->bo_pm4_index] =
> -							r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
> +							r600_context_bo_reloc(ctx, &ctx->rings.gfx, reloc->bo, reloc->bo_usage);
>   				} else {
>   					block->pm4[reloc->bo_pm4_index] = 0;
>   				}
> @@ -604,7 +603,7 @@ out:
>   
>   void r600_flush_emit(struct r600_context *rctx)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	unsigned cp_coher_cntl = 0;
>   	unsigned emit_flush = 0;
>   
> @@ -685,7 +684,7 @@ void r600_flush_emit(struct r600_context *rctx)
>   
>   void r600_context_flush(struct r600_context *ctx, unsigned flags)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	if (cs->cdw == ctx->start_cs_cmd.num_dw)
>   		return;
> @@ -735,7 +734,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
>   		rscreen->cs_count++;
>   	}
>   #endif
> -	ctx->ws->cs_flush(ctx->cs, flags);
> +	ctx->ws->cs_flush(ctx->rings.gfx.cs, flags);
>   #if R600_TRACE_CS
>   	if (ctx->screen->trace_bo) {
>   		struct r600_screen *rscreen = ctx->screen;
> @@ -768,7 +767,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
>   	ctx->flags = 0;
>   
>   	/* Begin a new CS. */
> -	r600_emit_command_buffer(ctx->cs, &ctx->start_cs_cmd);
> +	r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
>   
>   	/* Re-emit states. */
>   	ctx->alphatest_state.atom.dirty = true;
> @@ -846,7 +845,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
>   
>   void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	uint64_t va;
>   
>   	r600_need_cs_space(ctx, 10, FALSE);
> @@ -864,12 +863,12 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
>   	cs->buf[cs->cdw++] = value;                   /* DATA_LO */
>   	cs->buf[cs->cdw++] = 0;                       /* DATA_HI */
>   	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
> +	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, fence_bo, RADEON_USAGE_WRITE);
>   }
>   
>   static void r600_flush_vgt_streamout(struct r600_context *ctx)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	r600_write_config_reg(cs, R_008490_CP_STRMOUT_CNTL, 0);
>   
> @@ -887,7 +886,7 @@ static void r600_flush_vgt_streamout(struct r600_context *ctx)
>   
>   static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	if (buffer_enable_bit) {
>   		r600_write_context_reg(cs, R_028AB0_VGT_STRMOUT_EN, S_028AB0_STREAMOUT(1));
> @@ -899,7 +898,7 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_
>   
>   void r600_context_streamout_begin(struct r600_context *ctx)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	struct r600_so_target **t = ctx->so_targets;
>   	unsigned *stride_in_dw = ctx->vs_shader->so.stride;
>   	unsigned buffer_en, i, update_flags = 0;
> @@ -955,7 +954,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
>   
>   			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>   			cs->buf[cs->cdw++] =
> -				r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
> +				r600_context_bo_reloc(ctx, &ctx->rings.gfx, r600_resource(t[i]->b.buffer),
>   						      RADEON_USAGE_WRITE);
>   
>   			/* R7xx requires this packet after updating BUFFER_BASE.
> @@ -967,7 +966,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
>   
>   				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>   				cs->buf[cs->cdw++] =
> -					r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
> +					r600_context_bo_reloc(ctx, &ctx->rings.gfx, r600_resource(t[i]->b.buffer),
>   							      RADEON_USAGE_WRITE);
>   			}
>   
> @@ -985,7 +984,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
>   
>   				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>   				cs->buf[cs->cdw++] =
> -					r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
> +					r600_context_bo_reloc(ctx,  &ctx->rings.gfx, t[i]->buf_filled_size,
>   							      RADEON_USAGE_READ);
>   			} else {
>   				/* Start from the beginning. */
> @@ -1008,7 +1007,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
>   
>   void r600_context_streamout_end(struct r600_context *ctx)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	struct r600_so_target **t = ctx->so_targets;
>   	unsigned i;
>   	uint64_t va;
> @@ -1034,7 +1033,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
>   
>   			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>   			cs->buf[cs->cdw++] =
> -				r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
> +				r600_context_bo_reloc(ctx,  &ctx->rings.gfx, t[i]->buf_filled_size,
>   						      RADEON_USAGE_WRITE);
>   
>   		}
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 7c4ec44..87a4399 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -53,9 +53,7 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx)
>   			R600_ERR("r600: failed to create bo for fence objects\n");
>   			goto out;
>   		}
> -		rscreen->fences.data = rctx->ws->buffer_map(rscreen->fences.bo->cs_buf,
> -							   rctx->cs,
> -							   PIPE_TRANSFER_READ_WRITE);
> +		rscreen->fences.data = r600_buffer_mmap_sync_with_rings(rctx, rscreen->fences.bo, PIPE_TRANSFER_READ_WRITE);
>   	}
>   
>   	if (!LIST_IS_EMPTY(&rscreen->fences.pool)) {
> @@ -108,7 +106,7 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx)
>   			pipe_buffer_create(&rctx->screen->screen, PIPE_BIND_CUSTOM,
>   					   PIPE_USAGE_STAGING, 1);
>   	/* Add the fence as a dummy relocation. */
> -	r600_context_bo_reloc(rctx, fence->sleep_bo, RADEON_USAGE_READWRITE);
> +	r600_context_bo_reloc(rctx, &rctx->rings.gfx, fence->sleep_bo, RADEON_USAGE_READWRITE);
>   
>   out:
>   	pipe_mutex_unlock(rscreen->fences.mutex);
> @@ -150,11 +148,153 @@ static void r600_flush_from_st(struct pipe_context *ctx,
>   		   flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0);
>   }
>   
> -static void r600_flush_from_winsys(void *ctx, unsigned flags)
> +static void r600_flush_gfx_ring(void *ctx, unsigned flags)
>   {
>   	r600_flush((struct pipe_context*)ctx, NULL, flags);
>   }
>   
> +static void r600_flush_dma_ring(void *ctx, unsigned flags)
> +{
> +	struct r600_context *rctx = (struct r600_context *)ctx;
> +
> +	rctx->ws->cs_flush(rctx->rings.dma.cs, flags);
> +}
> +
> +static void r600_flush_pop_ring(struct r600_context *ctx, unsigned flags)
> +{
> +	unsigned i;
> +
> +	if (!ctx->rings.nflush) {
> +		return;
> +	}
> +	ctx->rings.ring_flush_stack[0]->flush(ctx, flags);
> +	for (i = 1; i < ctx->rings.nflush; i++) {
> +		ctx->rings.ring_flush_stack[i - 1] = ctx->rings.ring_flush_stack[i];
> +	}
> +	ctx->rings.nflush--;
> +	ctx->rings.ring_flush_stack[ctx->rings.nflush] = NULL;
> +}
> +
> +static void r600_flush_pop_ring_up_to(struct r600_context *ctx, struct r600_ring *ring, unsigned flags)
> +{
> +	unsigned i, j;
> +
> +	for (i = 0; i < ctx->rings.nflush; i++) {
> +		if (ctx->rings.ring_flush_stack[i] == ring) {
> +			for (j = 0; j <= i; j++) {
> +				/* we need to pop until ring is no longer in the stack */
> +				r600_flush_pop_ring(ctx, flags);
> +			}
> +			break;
> +		}
> +	}
> +}
> +
> +boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
> +					struct radeon_winsys_cs_handle *buf,
> +					enum radeon_bo_usage usage)
> +{
> +	unsigned i;
> +
> +	for (i = 0; i < ctx->rings.nflush; i++) {
> +		if (ctx->ws->cs_is_buffer_referenced(ctx->rings.ring_flush_stack[i]->cs, buf, usage)) {
> +			return TRUE;
> +		}
> +	}
> +	return FALSE;
> +}
> +
> +void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
> +					struct r600_resource *resource,
> +					unsigned usage)
> +{
> +	enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
> +	unsigned flags = 0;
> +	bool sync_flush = TRUE;
> +	int i, j;
> +
> +	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
> +		return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
> +	}
> +
> +	if (!(usage & PIPE_TRANSFER_WRITE)) {
> +		/* have to wait for pending read */
> +		rusage = RADEON_USAGE_WRITE;
> +	}
> +	if (usage & PIPE_TRANSFER_DONTBLOCK) {
> +		flags |= RADEON_FLUSH_ASYNC;
> +	}
> +
> +	if (ctx->rings.nflush) {
> +		for (i = ctx->rings.nflush - 1; i >= 0; i--) {
> +			if (ctx->ws->cs_is_buffer_referenced(ctx->rings.ring_flush_stack[i]->cs, resource->cs_buf, rusage)) {
> +				sync_flush = FALSE;
> +				for (j = 0; j <= i; j++) {
> +					/* we need to pop until ring is no longer in the stack */
> +					r600_flush_pop_ring(ctx, flags);
> +				}
> +				if (usage & PIPE_TRANSFER_DONTBLOCK) {
> +					return NULL;
> +				}
> +				break;
> +			}
> +		}
> +	}
> +
> +	if (usage & PIPE_TRANSFER_DONTBLOCK) {
> +		if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
> +			return NULL;
> +		}
> +	}
> +	if (sync_flush) {
> +		/* Try to avoid busy-waiting in radeon_bo_wait. */
> +		ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
> +		if (ctx->rings.dma.cs) {
> +			ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
> +		}
> +	}
> +	ctx->ws->buffer_wait(resource->buf, rusage);
> +
> +	/* at this point everything is synchronized */
> +	return ctx->ws->buffer_map(resource->cs_buf, NULL, usage | PIPE_TRANSFER_UNSYNCHRONIZED);
> +}
> +
> +void r600_flush_push_ring(struct r600_context *ctx, struct r600_ring *ring, unsigned flags)
> +{
> +	unsigned i, j;
> +
> +	if (ctx->rings.nflush > 1) {
> +		for (i = 0; i < (ctx->rings.nflush - 1); i++) {
> +			if (ctx->rings.ring_flush_stack[i] == ring) {
> +				for (j = 0; j <= i; j++) {
> +					/* we need to pop until ring is no longer in the stack or is last */
> +					r600_flush_pop_ring(ctx, flags);
> +				}
> +				break;
> +			}
> +		}
> +	}
> +	/* ring will be last in the stack */
> +	if (ctx->rings.nflush && ctx->rings.ring_flush_stack[ctx->rings.nflush - 1] == ring) {
> +		return;
> +	}
> +	ctx->rings.ring_flush_stack[ctx->rings.nflush++] = ring;
> +}
> +
> +static void r600_flush_from_winsys(void *ctx, unsigned flags)
> +{
> +	struct r600_context *rctx = (struct r600_context *)ctx;
> +
> +	r600_flush_pop_ring_up_to(rctx, &rctx->rings.gfx, flags);
> +}
> +
> +static void r600_flush_dma_from_winsys(void *ctx, unsigned flags)
> +{
> +	struct r600_context *rctx = (struct r600_context *)ctx;
> +
> +	r600_flush_pop_ring_up_to(rctx, &rctx->rings.dma, flags);
> +}
> +
>   static void r600_destroy_context(struct pipe_context *context)
>   {
>   	struct r600_context *rctx = (struct r600_context *)context;
> @@ -197,8 +337,11 @@ static void r600_destroy_context(struct pipe_context *context)
>   
>   	r600_release_command_buffer(&rctx->start_cs_cmd);
>   
> -	if (rctx->cs) {
> -		rctx->ws->cs_destroy(rctx->cs);
> +	if (rctx->rings.gfx.cs) {
> +		rctx->ws->cs_destroy(rctx->rings.gfx.cs);
> +	}
> +	if (rctx->rings.dma.cs) {
> +		rctx->ws->cs_destroy(rctx->rings.dma.cs);
>   	}
>   
>   	FREE(rctx->range);
> @@ -289,8 +432,17 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>   		goto fail;
>   	}
>   
> -	rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
> -	rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx);
> +	rctx->rings.nflush = 0;
> +	rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
> +	rctx->rings.gfx.flush = r600_flush_gfx_ring;
> +	rctx->ws->cs_set_flush_callback(rctx->rings.gfx.cs, r600_flush_from_winsys, rctx);
> +
> +	rctx->rings.dma.cs = NULL;
> +	if (rscreen->info.r600_has_dma) {
> +		rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA);
> +		rctx->rings.dma.flush = r600_flush_dma_ring;
> +		rctx->ws->cs_set_flush_callback(rctx->rings.dma.cs, r600_flush_dma_from_winsys, rctx);
> +	}
>   
>   	rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256,
>   					PIPE_BIND_INDEX_BUFFER |
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 934a6f5..0008fbf 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -402,11 +402,23 @@ struct r600_fetch_shader {
>   	unsigned			offset;
>   };
>   
> +struct r600_ring {
> +	struct radeon_winsys_cs		*cs;
> +	void (*flush)(void *ctx, unsigned flags);
> +};
> +
> +struct r600_rings {
> +	struct r600_ring		gfx;
> +	struct r600_ring		dma;
> +	unsigned			nflush;
> +	struct r600_ring		*ring_flush_stack[RING_LAST];
> +};
> +
>   struct r600_context {
>   	struct pipe_context		context;
>   	struct r600_screen		*screen;
>   	struct radeon_winsys		*ws;
> -	struct radeon_winsys_cs		*cs;
> +	struct r600_rings		rings;
>   	struct blitter_context		*blitter;
>   	struct u_upload_mgr		*uploader;
>   	struct u_suballocator		*allocator_so_filled_size;
> @@ -625,6 +637,13 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
>   /* r600_pipe.c */
>   void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
>   		unsigned flags);
> +void r600_flush_push_ring(struct r600_context *ctx, struct r600_ring *ring, unsigned flags);
> +boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
> +					struct radeon_winsys_cs_handle *buf,
> +					enum radeon_bo_usage usage);
> +void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
> +					struct r600_resource *resource,
> +					unsigned usage);
>   
>   /* r600_query.c */
>   void r600_init_query_functions(struct r600_context *rctx);
> @@ -828,12 +847,14 @@ void r600_release_command_buffer(struct r600_command_buffer *cb);
>   /*
>    * Helpers for emitting state into a command stream directly.
>    */
> -
> -static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_resource *rbo,
> +static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx,
> +					     struct r600_ring *ring,
> +					     struct r600_resource *rbo,
>   					     enum radeon_bo_usage usage)
>   {
>   	assert(usage);
> -	return ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage, rbo->domains) * 4;
> +	r600_flush_push_ring(ctx, ring, RADEON_FLUSH_ASYNC);
> +	return ctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, rbo->domains) * 4;
>   }
>   
>   static INLINE void r600_write_value(struct radeon_winsys_cs *cs, unsigned value)
> diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
> index 083d510..0335189 100644
> --- a/src/gallium/drivers/r600/r600_query.c
> +++ b/src/gallium/drivers/r600/r600_query.c
> @@ -52,7 +52,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns
>   	switch (type) {
>   	case PIPE_QUERY_OCCLUSION_COUNTER:
>   	case PIPE_QUERY_OCCLUSION_PREDICATE:
> -		results = ctx->ws->buffer_map(buf->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
> +		results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
>   		memset(results, 0, buf_size);
>   
>   		/* Set top bits for unused backends. */
> @@ -75,7 +75,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns
>   	case PIPE_QUERY_PRIMITIVES_GENERATED:
>   	case PIPE_QUERY_SO_STATISTICS:
>   	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
> -		results = ctx->ws->buffer_map(buf->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
> +		results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
>   		memset(results, 0, buf_size);
>   		ctx->ws->buffer_unmap(buf->cs_buf);
>   		break;
> @@ -106,7 +106,7 @@ static void r600_update_occlusion_query_state(struct r600_context *rctx,
>   
>   static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	uint64_t va;
>   
>   	r600_update_occlusion_query_state(ctx, query->type, 1);
> @@ -154,7 +154,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *q
>   		assert(0);
>   	}
>   	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
> +	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
>   
>   	if (!r600_is_timer_query(query->type)) {
>   		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
> @@ -163,7 +163,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *q
>   
>   static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   	uint64_t va;
>   
>   	/* The queries which need begin already called this in begin_query. */
> @@ -206,7 +206,7 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que
>   		assert(0);
>   	}
>   	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
> +	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
>   
>   	query->buffer.results_end += query->result_size;
>   
> @@ -222,7 +222,7 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que
>   static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
>   					int operation, bool flag_wait)
>   {
> -	struct radeon_winsys_cs *cs = ctx->cs;
> +	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
>   
>   	if (operation == PREDICATION_OP_CLEAR) {
>   		r600_need_cs_space(ctx, 3, FALSE);
> @@ -256,7 +256,7 @@ static void r600_emit_query_predication(struct r600_context *ctx, struct r600_qu
>   				cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
>   				cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
>   				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -				cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ);
> +				cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ);
>   				results_base += query->result_size;
>   
>   				/* set CONTINUE bit for all packets except the first */
> @@ -351,7 +351,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
>   	}
>   
>   	/* Obtain a new buffer if the current one can't be mapped without a stall. */
> -	if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
> +	if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
>   	    rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
>   		pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
>   		rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
> @@ -406,9 +406,9 @@ static boolean r600_get_query_buffer_result(struct r600_context *ctx,
>   	unsigned results_base = 0;
>   	char *map;
>   
> -	map = ctx->ws->buffer_map(qbuf->buf->cs_buf, ctx->cs,
> -				  PIPE_TRANSFER_READ |
> -				  (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
> +	map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf,
> +						PIPE_TRANSFER_READ |
> +						(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
>   	if (!map)
>   		return FALSE;
>   
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index db45dfd..8302270 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -72,7 +72,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
>   		if (shader->bo == NULL) {
>   			return -ENOMEM;
>   		}
> -		ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
> +		ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
>   		if (R600_BIG_ENDIAN) {
>   			for (i = 0; i < rshader->bc.ndw; ++i) {
>   				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index ef4edca..4c4ad41 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -646,7 +646,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
>   
>   static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
>   	float offset_units = state->offset_units;
>   	float offset_scale = state->offset_scale;
> @@ -1101,7 +1101,7 @@ r600_create_sampler_view(struct pipe_context *ctx,
>   
>   static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_clip_state *state = &rctx->clip_state.state;
>   
>   	r600_write_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
> @@ -1115,7 +1115,7 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
>   
>   static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_scissor_state *state = &rctx->scissor.scissor;
>   
>   	if (rctx->chip_class != R600 || rctx->scissor.enable) {
> @@ -1611,7 +1611,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
>   	};
>   	static unsigned max_dist_8x = 7;
>   
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	unsigned max_dist = 0;
>   
>   	if (rctx->family == CHIP_R600) {
> @@ -1678,7 +1678,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
>   
>   static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
>   	unsigned nr_cbufs = state->nr_cbufs;
>   	struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0];
> @@ -1708,6 +1708,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
>   		/* relocations */
>   		for (i = 0; i < nr_cbufs; i++) {
>   			unsigned reloc = r600_context_bo_reloc(rctx,
> +							       &rctx->rings.gfx,
>   							       (struct r600_resource*)cb[i]->base.texture,
>   							       RADEON_USAGE_READWRITE);
>   			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> @@ -1737,6 +1738,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
>   		/* relocations */
>   		for (i = 0; i < nr_cbufs; i++) {
>   			unsigned reloc = r600_context_bo_reloc(rctx,
> +							       &rctx->rings.gfx,
>   							       cb[i]->cb_buffer_fmask,
>   							       RADEON_USAGE_READWRITE);
>   			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> @@ -1751,6 +1753,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
>   		/* relocations */
>   		for (i = 0; i < nr_cbufs; i++) {
>   			unsigned reloc = r600_context_bo_reloc(rctx,
> +							       &rctx->rings.gfx,
>   							       cb[i]->cb_buffer_cmask,
>   							       RADEON_USAGE_READWRITE);
>   			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> @@ -1771,6 +1774,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
>   	if (state->zsbuf) {
>   		struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
>   		unsigned reloc = r600_context_bo_reloc(rctx,
> +						       &rctx->rings.gfx,
>   						       (struct r600_resource*)state->zsbuf->texture,
>   						       RADEON_USAGE_READWRITE);
>   
> @@ -1825,7 +1829,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
>   
>   static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
>   
>   	if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
> @@ -1855,7 +1859,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
>   
>   static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_db_state *a = (struct r600_db_state*)atom;
>   
>   	if (a->rsurf && a->rsurf->htile_enabled) {
> @@ -1865,7 +1869,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
>   		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear));
>   		r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
>   		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
> -		reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
> +		reloc_idx = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rtex->htile, RADEON_USAGE_READWRITE);
>   		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>   		cs->buf[cs->cdw++] = reloc_idx;
>   	} else {
> @@ -1875,7 +1879,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
>   
>   static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
>   	unsigned db_render_control = 0;
>   	unsigned db_render_override =
> @@ -1918,7 +1922,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
>   
>   static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_config_state *a = (struct r600_config_state*)atom;
>   
>   	r600_write_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
> @@ -1926,7 +1930,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
>   
>   static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -1955,7 +1959,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
>   		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
>   
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
> +		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
>   	}
>   }
>   
> @@ -1965,7 +1969,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
>   				       unsigned reg_alu_constbuf_size,
>   				       unsigned reg_alu_const_cache)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = state->dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -1985,7 +1989,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
>   		r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
>   
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
> +		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
>   
>   		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
>   		r600_write_value(cs, (buffer_id_base + buffer_index) * 7);
> @@ -2000,7 +2004,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
>   		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
>   
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
> +		r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
>   
>   		dirty_mask &= ~(1 << buffer_index);
>   	}
> @@ -2032,7 +2036,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
>   				    struct r600_samplerview_state *state,
>   				    unsigned resource_id_base)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = state->dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -2047,7 +2051,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
>   		r600_write_value(cs, (resource_id_base + resource_index) * 7);
>   		r600_write_array(cs, 7, rview->tex_resource_words);
>   
> -		reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
> +		reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rview->tex_resource,
>   					      RADEON_USAGE_READ);
>   		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
>   		r600_write_value(cs, reloc);
> @@ -2084,7 +2088,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
>   				unsigned resource_id_base,
>   				unsigned border_color_reg)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint32_t dirty_mask = texinfo->states.dirty_mask;
>   
>   	while (dirty_mask) {
> @@ -2145,7 +2149,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a
>   
>   static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	unsigned tmp;
>   
>   	tmp = S_009508_DISABLE_CUBE_ANISO(1) |
> @@ -2163,19 +2167,19 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
>   	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
>   	uint8_t mask = s->sample_mask;
>   
> -	r600_write_context_reg(rctx->cs, R_028C48_PA_SC_AA_MASK,
> +	r600_write_context_reg(rctx->rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
>   			       mask | (mask << 8) | (mask << 16) | (mask << 24));
>   }
>   
>   static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_cso_state *state = (struct r600_cso_state*)a;
>   	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
>   
>   	r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
>   	r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> -	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
> +	r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ));
>   }
>   
>   void r600_init_state_functions(struct r600_context *rctx)
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index bbcfc4f..0a8502c 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -65,12 +65,12 @@ void r600_init_atom(struct r600_context *rctx,
>   
>   void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	r600_emit_command_buffer(rctx->cs, ((struct r600_cso_state*)atom)->cb);
> +	r600_emit_command_buffer(rctx->rings.gfx.cs, ((struct r600_cso_state*)atom)->cb);
>   }
>   
>   void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
>   	unsigned alpha_ref = a->sx_alpha_ref;
>   
> @@ -176,7 +176,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
>   
>   void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_blend_color *state = &rctx->blend_color.state;
>   
>   	r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
> @@ -188,7 +188,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
>   
>   void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
>   
>   	r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
> @@ -197,7 +197,7 @@ void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
>   
>   void r600_emit_vgt2_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_vgt2_state *a = (struct r600_vgt2_state *)atom;
>   
>   	r600_write_context_reg(cs, R_028408_VGT_INDX_OFFSET, a->vgt_indx_offset);
> @@ -231,7 +231,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
>   
>   void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
>   
>   	r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
> @@ -656,7 +656,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx,
>   
>   void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct pipe_viewport_state *state = &rctx->viewport.state;
>   
>   	r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0, 6);
> @@ -1140,7 +1140,7 @@ static unsigned r600_conv_prim_to_gs_out(unsigned mode)
>   
>   void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	struct r600_clip_misc_state *state = &rctx->clip_misc_state;
>   
>   	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
> @@ -1158,7 +1158,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>   	struct pipe_index_buffer ib = {};
>   	unsigned i;
>   	struct r600_block *dirty_block = NULL, *next_block = NULL;
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   
>   	if (!info.count && (info.indexed || !info.count_from_stream_output)) {
>   		assert(0);
> @@ -1302,7 +1302,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>   			cs->buf[cs->cdw++] = info.count;
>   			cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
>   			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->predicate_drawing);
> -			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ);
> +			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ);
>   		}
>   	} else {
>   		if (info.count_from_stream_output) {
> @@ -1319,7 +1319,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>   			cs->buf[cs->cdw++] = 0; /* unused */
>   
>   			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> -			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
> +			cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, &rctx->rings.gfx, t->buf_filled_size, RADEON_USAGE_READ);
>   		}
>   
>   		cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);
> @@ -1631,12 +1631,12 @@ void r600_init_common_state_functions(struct r600_context *rctx)
>   void r600_trace_emit(struct r600_context *rctx)
>   {
>   	struct r600_screen *rscreen = rctx->screen;
> -	struct radeon_winsys_cs *cs = rctx->cs;
> +	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
>   	uint64_t va;
>   	uint32_t reloc;
>   
>   	va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
> -	reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
> +	reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
>   	r600_write_value(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
>   	r600_write_value(cs, va & 0xFFFFFFFFUL);
>   	r600_write_value(cs, (va >> 32UL) & 0xFFUL);
> diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
> index 85da093..84701eb 100644
> --- a/src/gallium/drivers/r600/r600_texture.c
> +++ b/src/gallium/drivers/r600/r600_texture.c
> @@ -725,7 +725,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
>   	struct r600_transfer *trans;
>   	boolean use_staging_texture = FALSE;
>   	enum pipe_format format = texture->format;
> -	struct radeon_winsys_cs_handle *buf;
> +	struct r600_resource *buf;
>   	unsigned offset = 0;
>   	char *map;
>   
> @@ -746,7 +746,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
>   
>   	/* Use a staging texture for uploads if the underlying BO is busy. */
>   	if (!(usage & PIPE_TRANSFER_READ) &&
> -	    (rctx->ws->cs_is_buffer_referenced(rctx->cs, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
> +	    (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
>   	     rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
>   		use_staging_texture = TRUE;
>   	}
> @@ -849,9 +849,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
>   	}
>   
>   	if (trans->staging) {
> -		buf = trans->staging->cs_buf;
> +		buf = trans->staging;
>   	} else {
> -		buf = rtex->resource.cs_buf;
> +		buf = &rtex->resource;
>   	}
>   
>   	if (rtex->is_depth || !trans->staging)
> @@ -859,7 +859,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
>   			box->y / util_format_get_blockheight(format) * trans->transfer.stride +
>   			box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
>   
> -	if (!(map = rctx->ws->buffer_map(buf, rctx->cs, usage))) {
> +	if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buf, usage))) {
>   		pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
>   		FREE(trans);
>   		return NULL;



More information about the mesa-dev mailing list