[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v9

j.glisse at gmail.com j.glisse at gmail.com
Mon Jan 9 12:38:09 PST 2012


From: Jerome Glisse <jglisse at redhat.com>

Virtual address space put the userspace in charge of their GPU
address space. It's up to userspace to bind bo into the virtual
address space. Command stream can them be executed using the
IB_VM chunck.

This patch add support for this configuration. It doesn't remove
the 64K ib size limit thought this limit can be extanded up to
1M for IB_VM chunk.

v2: fix rendering
v3: fix rendering when using index buffer
v4: make vm conditional on kernel support add basic va management
v5: catch the case when we already have va for a bo
v6: agd5f: update on top of ioctl changes
v7: agd5f: further ioctl updates
v8: indentation cleanup + fix non cayman
v9: rebase against lastest mesa + improvement from Marek & Michel

Signed-off-by: Jerome Glisse <jglisse at redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 src/gallium/drivers/r600/evergreen_hw_context.c   |    9 +-
 src/gallium/drivers/r600/evergreen_state.c        |   49 ++++--
 src/gallium/drivers/r600/r600_hw_context.c        |   47 ++++--
 src/gallium/drivers/r600/r600_pipe.h              |    3 +-
 src/gallium/drivers/r600/r600_resource.c          |   11 ++
 src/gallium/drivers/r600/r600_resource.h          |    2 +
 src/gallium/drivers/r600/r600_state_common.c      |   14 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |  177 +++++++++++++++++++++
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h     |    2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c     |   21 ++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h     |    4 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   10 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h     |   11 ++
 13 files changed, 312 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index bd1d969..e75eaf2 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	struct r600_block *dirty_block = NULL;
 	struct r600_block *next_block;
 	uint32_t *pm4;
+	uint64_t va;
 
 	if (draw->indices) {
 		ndwords = 11;
@@ -1174,9 +1175,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
 	pm4[3] = draw->vgt_num_instances;
 	if (draw->indices) {
-	        pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-		pm4[5] = draw->indices_bo_offset;
-		pm4[6] = 0;
+		va = r600_resource_va(&ctx->screen->screen, (void*)draw->indices);
+		va += draw->indices_bo_offset;
+		pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
+		pm4[5] = va;
+		pm4[6] = (va >> 32UL) & 0xFF;
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 7ded03d..aca6136 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1101,8 +1101,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 	rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) |
 			  S_030004_TEX_DEPTH(depth - 1) |
 			  S_030004_ARRAY_MODE(array_mode));
-	rstate->val[2] = tmp->offset[0] >> 8;
-	rstate->val[3] = tmp->offset[1] >> 8;
+	rstate->val[2] = (tmp->offset[0] + r600_resource_va(ctx->screen, texture)) >> 8;
+	rstate->val[3] = (tmp->offset[1] + r600_resource_va(ctx->screen, texture)) >> 8;
 	rstate->val[4] = (word4 |
 			  S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 			  S_030010_ENDIAN_SWAP(endian) |
@@ -1343,7 +1343,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 	unsigned pitch, slice;
 	unsigned color_info;
 	unsigned format, swap, ntype, endian;
-	unsigned offset;
+	uint64_t offset;
 	unsigned tile_type;
 	const struct util_format_description *desc;
 	int i;
@@ -1443,10 +1443,13 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 	} else /* workaround for linear buffers */
 		tile_type = 1;
 
+	offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
+	offset >>= 8;
+
 	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
 	r600_pipe_state_add_reg(rstate,
 				R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-				offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+				offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028C78_CB_COLOR0_DIM + cb * 0x3C,
 				0x0, 0xFFFFFFFF, NULL, 0);
@@ -1475,7 +1478,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 {
 	struct r600_resource_texture *rtex;
 	struct r600_surface *surf;
-	unsigned level, first_layer, pitch, slice, format, offset, array_mode;
+	unsigned level, first_layer, pitch, slice, format, array_mode;
+	uint64_t offset;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1494,20 +1498,26 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
 	format = r600_translate_dbformat(rtex->real_format);
 
+	offset += r600_resource_va(rctx->context.screen, surf->base.texture);
+	offset >>= 8;
+
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-				offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+				offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-				offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+				offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
 	if (rtex->stencil) {
-		uint32_t stencil_offset =
+		uint64_t stencil_offset =
 			r600_texture_get_offset(rtex->stencil, level, first_layer);
 
+		stencil_offset += r600_resource_va(rctx->context.screen, (void*)rtex->stencil);
+		stencil_offset >>= 8;
+
 		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-					stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+					stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
 		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-					stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+					stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
 		r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
 					1, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
 	} else {
@@ -2383,7 +2393,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+				r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+				0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 	r600_pipe_state_add_reg(rstate,
 				R_028844_SQ_PGM_RESOURCES_PS,
 				S_028844_NUM_GPRS(rshader->bc.ngpr) |
@@ -2457,7 +2468,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_02885C_SQ_PGM_START_VS,
-			0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+			r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+			0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -2474,7 +2486,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
 				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
-				0,
+				r600_resource_va(ctx->screen, (void *)ve->fetch_shader) >> 8,
 				0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
@@ -2521,15 +2533,20 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 }
 
 
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+					struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
 					unsigned offset, unsigned stride,
 					enum radeon_bo_usage usage)
 {
+	uint64_t va;
+
+	va = r600_resource_va(ctx->screen, (void *)rbuffer);
 	rstate->bo[0] = rbuffer;
 	rstate->bo_usage[0] = usage;
-	rstate->val[0] = offset;
+	rstate->val[0] = (offset + va) & 0xFFFFFFFFUL;
 	rstate->val[1] = rbuffer->buf->size - offset - 1;
 	rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
-	                 S_030008_STRIDE(stride);
+			 S_030008_STRIDE(stride) |
+			 (((va + offset) >> 32UL) & 0xFF);
 }
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1dba966..b0a28d9 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -978,6 +978,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_resource *bo)
 {
+	uint64_t va = 0;
+
 	/* if bo has already been flushed */
 	if (!(~bo->cs_buf->last_flush & flush_flags)) {
 		bo->cs_buf->last_flush &= flush_mask;
@@ -1007,10 +1009,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 			ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
 		}
 	} else {
+		va = r600_resource_va(&ctx->screen->screen, (void *)bo);
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
 		ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8;
-		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
+		ctx->pm4[ctx->pm4_cdwords++] = va >> 8;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
@@ -1590,14 +1593,20 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
 {
+	uint64_t va;
+
 	r600_need_cs_space(ctx, 10, FALSE);
 
+	va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
+	va = va + (offset << 2);
+
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-	ctx->pm4[ctx->pm4_cdwords++] = offset << 2;             /* ADDRESS_LO */
-	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);   /* DATA_SEL | INT_EN | ADDRESS_HI */
+	ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
+	/* DATA_SEL | INT_EN | ADDRESS_HI */
+	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
@@ -1707,6 +1716,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
 	unsigned new_results_end, i;
 	u32 *results;
+	uint64_t va;
 
 	r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
 
@@ -1751,13 +1761,16 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 	}
 
 	/* emit begin query */
+	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+	va += query->results_end;
+
 	switch (query->type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
-		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = va;
+		ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF;
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -1771,8 +1784,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 	case PIPE_QUERY_TIME_ELAPSED:
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
-		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+		ctx->pm4[ctx->pm4_cdwords++] = va;
+		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		break;
@@ -1787,14 +1800,18 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 
 void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 {
+	uint64_t va;
+
+	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
 	/* emit end query */
 	switch (query->type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		va += query->results_end + 8;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
-		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = va;
+		ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF;
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -1806,10 +1823,11 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
+		va += query->results_end + query->result_size/2;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2;
-		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+		ctx->pm4[ctx->pm4_cdwords++] = va;
+		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		break;
@@ -1826,6 +1844,8 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
 			    int flag_wait)
 {
+	uint64_t va;
+
 	if (operation == PREDICATION_OP_CLEAR) {
 		r600_need_cs_space(ctx, 3, FALSE);
 
@@ -1845,12 +1865,13 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 
 		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
 				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+		va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
 
 		/* emit predicate packets for all data blocks */
 		while (results_base != query->results_end) {
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = results_base;
-			ctx->pm4[ctx->pm4_cdwords++] = op;
+			ctx->pm4[ctx->pm4_cdwords++] = (va + results_base) & 0xFFFFFFFFUL;
+			ctx->pm4[ctx->pm4_cdwords++] = op | (((va + results_base) >> 32UL) & 0xFF);
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
 			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
 									     RADEON_USAGE_READ);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 447b9dc..bd78243 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -243,7 +243,8 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
 void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
 void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 					 struct r600_pipe_resource_state *rstate);
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+					struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
 					unsigned offset, unsigned stride,
 					enum radeon_bo_usage usage);
diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c
index f3ab361..01db97a 100644
--- a/src/gallium/drivers/r600/r600_resource.c
+++ b/src/gallium/drivers/r600/r600_resource.c
@@ -62,3 +62,14 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600)
 	r600->context.transfer_destroy = u_transfer_destroy_vtbl;
 	r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
 }
+
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource)
+{
+	struct r600_screen *rscreen = (struct r600_screen*)screen;
+	struct r600_resource *rresource = (struct r600_resource*)resource;
+
+	if (rresource->buf) {
+		return rscreen->ws->buffer_get_virtual_address(rresource->buf);
+	}
+	return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 1ca6729..f39ac55 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -102,4 +102,6 @@ struct r600_pipe_context;
 
 void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource **rbuffer, uint32_t *offset);
 
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource);
+
 #endif
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 054ab90..034a560 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -337,6 +337,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_resource *rbuffer = r600_resource(buffer);
 	struct r600_pipe_resource_state *rstate;
+	uint64_t va_offset;
 	uint32_t offset;
 
 	/* Note that the state tracker can unbind constant buffers by
@@ -347,6 +348,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 	}
 
 	r600_upload_const_buffer(rctx, &rbuffer, &offset);
+	va_offset = r600_resource_va(ctx->screen, (void*)rbuffer);
+	va_offset += offset;
+	va_offset >>= 8;
 
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
@@ -357,7 +361,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028980_ALU_CONST_CACHE_VS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+					va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
 
 		rstate = &rctx->vs_const_buffer_resource[index];
@@ -370,7 +374,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+			evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
 		} else {
 			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
@@ -385,7 +389,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028940_ALU_CONST_CACHE_PS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+					va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
 
 		rstate = &rctx->ps_const_buffer_resource[index];
@@ -397,7 +401,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 			}
 		}
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+			evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
 		} else {
 			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
@@ -522,7 +526,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
+			evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
 		} else {
 			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index d4746ff..7b153b0 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -30,6 +30,7 @@
 #include "util/u_hash_table.h"
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
+#include "util/u_double_list.h"
 #include "os/os_thread.h"
 #include "os/os_mman.h"
 
@@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
     return (struct radeon_bo *)bo;
 }
 
+struct radeon_bo_va_hole {
+    struct list_head list;
+    uint64_t         offset;
+    uint64_t         size;
+};
+
 struct radeon_bomgr {
     /* Base class. */
     struct pb_manager base;
@@ -77,6 +84,12 @@ struct radeon_bomgr {
     /* List of buffer handles and its mutex. */
     struct util_hash_table *bo_handles;
     pipe_mutex bo_handles_mutex;
+    pipe_mutex bo_va_mutex;
+
+    /* is virtual address supported */
+    bool va;
+    unsigned va_offset;
+    struct list_head va_holes;
 };
 
 static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
@@ -151,9 +164,94 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
     }
 }
 
+static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
+{
+    struct radeon_bo_va_hole *hole, *n;
+    uint64_t offset = 0;
+
+    pipe_mutex_lock(mgr->bo_va_mutex);
+    /* first look for a hole */
+    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+        if (hole->size == size) {
+            offset = hole->offset;
+            list_del(&hole->list);
+            FREE(hole);
+            pipe_mutex_unlock(mgr->bo_va_mutex);
+            return offset;
+        }
+        if (hole->size > size) {
+            offset = hole->offset;
+            hole->size -= size;
+            hole->offset += size;
+            pipe_mutex_unlock(mgr->bo_va_mutex);
+            return offset;
+        }
+    }
+
+    offset = mgr->va_offset;
+    mgr->va_offset += size;
+    pipe_mutex_unlock(mgr->bo_va_mutex);
+    return offset;
+}
+
+static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+    pipe_mutex_lock(mgr->bo_va_mutex);
+    if (va >= mgr->va_offset) {
+        if (va > mgr->va_offset) {
+            struct radeon_bo_va_hole *hole;
+            hole = CALLOC_STRUCT(radeon_bo_va_hole);
+            if (hole) {
+                hole->size = va - mgr->va_offset;
+                hole->offset = va;
+                list_add(&hole->list, &mgr->va_holes);
+            }
+        }
+        mgr->va_offset = va + size;
+    } else {
+        struct radeon_bo_va_hole *hole, *n;
+        uint64_t stmp, etmp;
+
+        /* free all holes that fall into the range
+         * NOTE that we might lose virtual address space
+         */
+        LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+            stmp = hole->offset;
+            etmp = stmp + hole->size;
+            if (va >= stmp && va < etmp) {
+                list_del(&hole->list);
+                FREE(hole);
+            }
+        }
+    }
+    pipe_mutex_unlock(mgr->bo_va_mutex);
+}
+
+static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+    pipe_mutex_lock(mgr->bo_va_mutex);
+    if ((va + size) == mgr->va_offset) {
+        mgr->va_offset = va;
+    } else {
+        struct radeon_bo_va_hole *hole;
+
+        /* FIXME on allocation failure we just lose virtual address space
+         * maybe print a warning
+         */
+        hole = CALLOC_STRUCT(radeon_bo_va_hole);
+        if (hole) {
+            hole->size = size;
+            hole->offset = va;
+            list_add(&hole->list, &mgr->va_holes);
+        }
+    }
+    pipe_mutex_unlock(mgr->bo_va_mutex);
+}
+
 static void radeon_bo_destroy(struct pb_buffer *_buf)
 {
     struct radeon_bo *bo = radeon_bo(_buf);
+    struct radeon_bomgr *mgr = bo->mgr;
     struct drm_gem_close args;
 
     memset(&args, 0, sizeof(args));
@@ -168,6 +266,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
     if (bo->ptr)
         os_munmap(bo->ptr, bo->base.size);
 
+    if (mgr->va) {
+        radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+    }
+
     /* Close object. */
     args.handle = bo->handle;
     drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
@@ -343,6 +445,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     struct radeon_bo *bo;
     struct drm_radeon_gem_create args;
     struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
+    int r;
 
     memset(&args, 0, sizeof(args));
 
@@ -375,8 +478,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     bo->mgr = mgr;
     bo->rws = mgr->rws;
     bo->handle = args.handle;
+    bo->va = 0;
     pipe_mutex_init(bo->map_mutex);
 
+    if (mgr->va) {
+        struct drm_radeon_gem_va va;
+
+        bo->va_size = align(size,  4096);
+        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+        va.handle = bo->handle;
+        va.vm_id = 0;
+        va.operation = RADEON_VA_MAP;
+        va.flags = RADEON_VM_PAGE_READABLE |
+                   RADEON_VM_PAGE_WRITEABLE |
+                   RADEON_VM_PAGE_SNOOPED;
+        va.offset = bo->va;
+        r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+            fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
+            fprintf(stderr, "radeon:    size      : %d bytes\n", size);
+            fprintf(stderr, "radeon:    alignment : %d bytes\n", desc->alignment);
+            fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
+            radeon_bo_destroy(&bo->base);
+            return NULL;
+        }
+        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+            bo->va = va.offset;
+            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+        }
+    }
+
     return &bo->base;
 }
 
@@ -407,6 +540,7 @@ static void radeon_bomgr_destroy(struct pb_manager *_mgr)
     struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
     util_hash_table_destroy(mgr->bo_handles);
     pipe_mutex_destroy(mgr->bo_handles_mutex);
+    pipe_mutex_destroy(mgr->bo_va_mutex);
     FREE(mgr);
 }
 
@@ -438,6 +572,12 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
     mgr->rws = rws;
     mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
     pipe_mutex_init(mgr->bo_handles_mutex);
+    pipe_mutex_init(mgr->bo_va_mutex);
+
+    mgr->va = rws->info.r600_virtual_address;
+    mgr->va_offset = rws->info.r600_va_start;
+    list_inithead(&mgr->va_holes);
+
     return &mgr->base;
 }
 
@@ -560,6 +700,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     struct radeon_bo *bo;
     struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
     struct drm_gem_open open_arg = {};
+    int r;
 
     memset(&open_arg, 0, sizeof(open_arg));
 
@@ -603,6 +744,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     bo->base.vtbl = &radeon_bo_vtbl;
     bo->mgr = mgr;
     bo->rws = mgr->rws;
+    bo->va = 0;
     pipe_mutex_init(bo->map_mutex);
 
     util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
@@ -613,6 +755,33 @@ done:
     if (stride)
         *stride = whandle->stride;
 
+    if (mgr->va) {
+        struct drm_radeon_gem_va va;
+
+        bo->va_size = ((bo->base.size + 4095) & ~4095);
+        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+        va.handle = bo->handle;
+        va.operation = RADEON_VA_MAP;
+        va.vm_id = 0;
+        va.offset = bo->va;
+        va.flags = RADEON_VM_PAGE_READABLE |
+                   RADEON_VM_PAGE_WRITEABLE |
+                   RADEON_VM_PAGE_SNOOPED;
+        va.offset = bo->va;
+        r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+            fprintf(stderr, "radeon: Failed to assign virtual address space\n");
+            radeon_bo_destroy(&bo->base);
+            return NULL;
+        }
+        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+            bo->va = va.offset;
+            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+        }
+    }
+
     return (struct pb_buffer*)bo;
 
 fail:
@@ -649,6 +818,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
     return TRUE;
 }
 
+static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
+{
+    struct radeon_bo *bo = get_radeon_bo(buffer);
+
+    return bo->va;
+}
+
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -661,4 +837,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
     ws->base.buffer_create = radeon_winsys_bo_create;
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index 35d25e8..21cfe99 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -59,6 +59,8 @@ struct radeon_bo {
 
     uint32_t handle;
     uint32_t name;
+    uint64_t va;
+    uint64_t va_size;
 
     /* how many command streams is this bo referenced in? */
     int num_cs_references;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index e6109af..5d8d10f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -72,7 +72,7 @@
 #include <xf86drm.h>
 
 #ifndef RADEON_CHUNK_ID_FLAGS
-#define RADEON_CHUNK_ID_FLAGS	0x03
+#define RADEON_CHUNK_ID_FLAGS       0x03
 
 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
@@ -80,9 +80,10 @@
 
 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
 
-static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
+static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
+                                      struct radeon_drm_winsys *ws)
 {
-    csc->fd = fd;
+    csc->fd = ws->fd;
     csc->nrelocs = 512;
     csc->relocs_bo = (struct radeon_bo**)
                      CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
@@ -157,11 +158,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
 
     cs->ws = ws;
 
-    if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
+    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
         FREE(cs);
         return NULL;
     }
-    if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
+    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
         radeon_destroy_cs_context(&cs->csc1);
         FREE(cs);
         return NULL;
@@ -440,11 +441,15 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
             p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
         }
 
+        cs->cst->flags = 0;
+        cs->cst->cs.num_chunks = 2;
         if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
+            cs->cst->flags |= RADEON_CS_KEEP_TILING_FLAGS;
             cs->cst->cs.num_chunks = 3;
-            cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS;
-        } else {
-            cs->cst->cs.num_chunks = 2;
+        }
+        if (cs->ws->info.r600_virtual_address) {
+            cs->cst->cs.num_chunks = 3;
+            cs->cst->flags |= RADEON_CS_USE_VM;
         }
 
         if (cs->thread &&
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 904000d..05b9a48 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -51,8 +51,8 @@ struct radeon_cs_context {
     struct drm_radeon_cs_reloc  *relocs_hashlist[256];
     unsigned                    reloc_indices_hashlist[256];
 
-    unsigned used_vram;
-    unsigned used_gart;
+    unsigned                    used_vram;
+    unsigned                    used_gart;
 };
 
 struct radeon_drm_cs {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index f337411..5bc25d6 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -265,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
                                       &ws->info.r600_backend_map))
                 ws->info.r600_backend_map_valid = TRUE;
         }
+        ws->info.r600_virtual_address = FALSE;
+        if (ws->info.drm_minor >= 13) {
+            ws->info.r600_virtual_address = TRUE;
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
+                                      &ws->info.r600_va_start))
+                ws->info.r600_virtual_address = FALSE;
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+                                      &ws->info.r600_ib_vm_max_size))
+                ws->info.r600_virtual_address = FALSE;
+        }
     }
 
     return TRUE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 59c1aad..d33eaa7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -103,6 +103,9 @@ struct radeon_info {
     uint32_t r600_num_tile_pipes;
     uint32_t r600_backend_map;
     boolean r600_backend_map_valid;
+    boolean r600_virtual_address;
+    uint32_t r600_va_start;
+    uint32_t r600_ib_vm_max_size;
 };
 
 enum radeon_feature_id {
@@ -250,6 +253,14 @@ struct radeon_winsys {
                                  unsigned stride,
                                  struct winsys_handle *whandle);
 
+    /**
+     * Return the virtual address of a buffer.
+     *
+     * \param buf       A winsys buffer object
+     * \return          virtual address
+     */
+    uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
+
     /**************************************************************************
      * Command submission.
      *
-- 
1.7.7.1



More information about the mesa-dev mailing list