[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v8

j.glisse at gmail.com j.glisse at gmail.com
Fri Jan 6 07:42:16 PST 2012


From: Jerome Glisse <jglisse at redhat.com>

Virtual address space put the userspace in charge of their GPU
address space. It's up to userspace to bind bo into the virtual
address space. Command stream can them be executed using the
IB_VM chunck.

This patch add support for this configuration. It doesn't remove
the 64K ib size limit thought this limit can be extanded up to
1M for IB_VM chunk.

v2: fix rendering
v3: fix rendering when using index buffer
v4: make vm conditional on kernel support add basic va management
v5: catch the case when we already have va for a bo
v6: agd5f: update on top of ioctl changes
v7: agd5f: further ioctl updates
v8: indentation cleanup + fix non cayman

Signed-off-by: Jerome Glisse <jglisse at redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 src/gallium/drivers/r600/evergreen_hw_context.c   |    7 +-
 src/gallium/drivers/r600/evergreen_state.c        |   49 ++++--
 src/gallium/drivers/r600/r600_hw_context.c        |   47 +++++--
 src/gallium/drivers/r600/r600_pipe.h              |    3 +-
 src/gallium/drivers/r600/r600_resource.c          |   11 ++
 src/gallium/drivers/r600/r600_resource.h          |    2 +
 src/gallium/drivers/r600/r600_state_common.c      |   14 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |  165 +++++++++++++++++++++
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h     |    2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c     |   29 +++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h     |    5 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   18 ++-
 src/gallium/winsys/radeon/drm/radeon_winsys.h     |   11 ++
 13 files changed, 316 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 96e8d18..01764ed 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	struct r600_block *dirty_block = NULL;
 	struct r600_block *next_block;
 	uint32_t *pm4;
+	uint64_t va;
 
 	if (draw->indices) {
 		ndwords = 11;
@@ -1174,8 +1175,10 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
 	pm4[3] = draw->vgt_num_instances;
 	if (draw->indices) {
-	        pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-		pm4[5] = draw->indices_bo_offset;
+		va = r600_resource_va(&ctx->screen->screen, (void*)draw->indices);
+		va += draw->indices_bo_offset;
+		pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
+		pm4[5] = va;
 		pm4[6] = 0;
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index d0c02d5..678d0db 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1099,8 +1099,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 	rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) |
 			  S_030004_TEX_DEPTH(depth - 1) |
 			  S_030004_ARRAY_MODE(array_mode));
-	rstate->val[2] = tmp->offset[0] >> 8;
-	rstate->val[3] = tmp->offset[1] >> 8;
+	rstate->val[2] = (tmp->offset[0] + r600_resource_va(ctx->screen, texture)) >> 8;
+	rstate->val[3] = (tmp->offset[1] + r600_resource_va(ctx->screen, texture)) >> 8;
 	rstate->val[4] = (word4 |
 			  S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 			  S_030010_ENDIAN_SWAP(endian) |
@@ -1341,7 +1341,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 	unsigned pitch, slice;
 	unsigned color_info;
 	unsigned format, swap, ntype, endian;
-	unsigned offset;
+	uint64_t offset;
 	unsigned tile_type;
 	const struct util_format_description *desc;
 	int i;
@@ -1441,10 +1441,13 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 	} else /* workaround for linear buffers */
 		tile_type = 1;
 
+	offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
+	offset >>= 8;
+
 	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
 	r600_pipe_state_add_reg(rstate,
 				R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-				offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+				offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028C78_CB_COLOR0_DIM + cb * 0x3C,
 				0x0, 0xFFFFFFFF, NULL, 0);
@@ -1473,7 +1476,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 {
 	struct r600_resource_texture *rtex;
 	struct r600_surface *surf;
-	unsigned level, first_layer, pitch, slice, format, offset, array_mode;
+	unsigned level, first_layer, pitch, slice, format, array_mode;
+	uint64_t offset;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1492,20 +1496,26 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
 	format = r600_translate_dbformat(rtex->real_format);
 
+	offset += r600_resource_va(rctx->context.screen, surf->base.texture);
+	offset >>= 8;
+
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-				offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+				offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-				offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+				offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
 	if (rtex->stencil) {
-		uint32_t stencil_offset =
+		uint64_t stencil_offset =
 			r600_texture_get_offset(rtex->stencil, level, first_layer);
 
+		stencil_offset += r600_resource_va(rctx->context.screen, (void*)rtex->stencil);
+		stencil_offset >>= 8;
+
 		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-					stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+					stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
 		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-					stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+					stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
 		r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
 					1, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
 	} else {
@@ -2378,7 +2388,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+				r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+				0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 	r600_pipe_state_add_reg(rstate,
 				R_028844_SQ_PGM_RESOURCES_PS,
 				S_028844_NUM_GPRS(rshader->bc.ngpr) |
@@ -2453,7 +2464,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_02885C_SQ_PGM_START_VS,
-			0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+			r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+			0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -2470,7 +2482,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
 				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
-				0,
+				r600_resource_va(ctx->screen, (void *)ve->fetch_shader) >> 8,
 				0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
@@ -2517,15 +2529,20 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 }
 
 
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+					struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
 					unsigned offset, unsigned stride,
 					enum radeon_bo_usage usage)
 {
+	uint64_t va;
+
+	va = r600_resource_va(ctx->screen, (void *)rbuffer);
 	rstate->bo[0] = rbuffer;
 	rstate->bo_usage[0] = usage;
-	rstate->val[0] = offset;
+	rstate->val[0] = (offset + va) & 0xFFFFFFFFUL;
 	rstate->val[1] = rbuffer->buf->size - offset - 1;
 	rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
-	                 S_030008_STRIDE(stride);
+			 S_030008_STRIDE(stride) |
+			 ((va >> 32UL) & 0xFF);
 }
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 52e0be7..93cb025 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -975,6 +975,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_resource *bo)
 {
+	uint64_t va = 0;
+
 	/* if bo has already been flushed */
 	if (!(~bo->cs_buf->last_flush & flush_flags)) {
 		bo->cs_buf->last_flush &= flush_mask;
@@ -1004,10 +1006,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 			ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
 		}
 	} else {
+		va = r600_resource_va(&ctx->screen->screen, (void *)bo);
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
 		ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8;
-		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
+		ctx->pm4[ctx->pm4_cdwords++] = va >> 8;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
@@ -1570,14 +1573,20 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
 {
+	uint64_t va;
+
 	r600_need_cs_space(ctx, 10, FALSE);
 
+	va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
+	va = va + (offset << 2);
+
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-	ctx->pm4[ctx->pm4_cdwords++] = offset << 2;             /* ADDRESS_LO */
-	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);   /* DATA_SEL | INT_EN | ADDRESS_HI */
+	ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
+	/* DATA_SEL | INT_EN | ADDRESS_HI */
+	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFFFFFFFFUL);
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
@@ -1649,6 +1658,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
 	unsigned new_results_end, i;
 	u32 *results;
+	uint64_t va;
 
 	r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
 
@@ -1684,19 +1694,22 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 	}
 
 	/* emit begin query */
+	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+	va += query->results_end;
+
 	switch (query->type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
-		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;
+		ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFFFFFFFFUL;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
-		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+		ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;
+		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFFFFFFFFUL);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		break;
@@ -1711,20 +1724,25 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 
 void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 {
+	uint64_t va;
+
+	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
 	/* emit end query */
 	switch (query->type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		va += query->results_end + 8;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
-		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;
+		ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFFFFFFFFUL;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
+		va += query->results_end + query->result_size/2;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2;
-		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+		ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;
+		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFFFFFFFFUL);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		break;
@@ -1741,6 +1759,8 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
 			    int flag_wait)
 {
+	uint64_t va;
+
 	if (operation == PREDICATION_OP_CLEAR) {
 		r600_need_cs_space(ctx, 3, FALSE);
 
@@ -1760,12 +1780,13 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 
 		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
 				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+		va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
 
 		/* emit predicate packets for all data blocks */
 		while (results_base != query->results_end) {
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = results_base;
-			ctx->pm4[ctx->pm4_cdwords++] = op;
+			ctx->pm4[ctx->pm4_cdwords++] = (va + results_base) & 0xFFFFFFFFUL;
+			ctx->pm4[ctx->pm4_cdwords++] = op | (((va + results_base) >> 32UL) & 0xFFFFFFFFUL);
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
 			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
 									     RADEON_USAGE_READ);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index a127eed..84304a3 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -241,7 +241,8 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
 void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
 void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 					 struct r600_pipe_resource_state *rstate);
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+					struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
 					unsigned offset, unsigned stride,
 					enum radeon_bo_usage usage);
diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c
index f3ab361..b72c8ce 100644
--- a/src/gallium/drivers/r600/r600_resource.c
+++ b/src/gallium/drivers/r600/r600_resource.c
@@ -62,3 +62,14 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600)
 	r600->context.transfer_destroy = u_transfer_destroy_vtbl;
 	r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
 }
+
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource)
+{
+	struct r600_screen *rscreen = (struct r600_screen*)screen;
+	struct r600_resource *rresource = (struct r600_resource*)resource;
+
+	if (rresource->buf) {
+		return rscreen->ws->buffer_va(rresource->buf);
+	}
+	return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 1ca6729..f39ac55 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -102,4 +102,6 @@ struct r600_pipe_context;
 
 void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource **rbuffer, uint32_t *offset);
 
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource);
+
 #endif
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 9ecbc53..f7fd6d1 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -336,6 +336,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_resource *rbuffer = r600_resource(buffer);
 	struct r600_pipe_resource_state *rstate;
+	uint64_t va_offset;
 	uint32_t offset;
 
 	/* Note that the state tracker can unbind constant buffers by
@@ -346,6 +347,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 	}
 
 	r600_upload_const_buffer(rctx, &rbuffer, &offset);
+	va_offset = r600_resource_va(ctx->screen, (void*)rbuffer);
+	va_offset += offset;
+	va_offset >>= 8;
 
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
@@ -356,7 +360,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028980_ALU_CONST_CACHE_VS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+					va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
 
 		rstate = &rctx->vs_const_buffer_resource[index];
@@ -369,7 +373,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+			evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
 		} else {
 			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
@@ -384,7 +388,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028940_ALU_CONST_CACHE_PS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+					va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
 
 		rstate = &rctx->ps_const_buffer_resource[index];
@@ -396,7 +400,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 			}
 		}
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+			evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
 		} else {
 			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
@@ -456,7 +460,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
+			evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
 		} else {
 			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index ccf9c4f..8ef0c18 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -30,6 +30,7 @@
 #include "util/u_hash_table.h"
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
+#include "util/u_double_list.h"
 #include "os/os_thread.h"
 #include "os/os_mman.h"
 
@@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
     return (struct radeon_bo *)bo;
 }
 
+struct radeon_bo_va_hole {
+    struct list_head list;
+    uint64_t         offset;
+    uint64_t         size;
+};
+
 struct radeon_bomgr {
     /* Base class. */
     struct pb_manager base;
@@ -77,6 +84,11 @@ struct radeon_bomgr {
     /* List of buffer handles and its mutex. */
     struct util_hash_table *bo_handles;
     pipe_mutex bo_handles_mutex;
+
+    /* is virtual address supported */
+    bool va;
+    unsigned va_offset;
+    struct list_head va_holes;
 };
 
 static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
@@ -151,9 +163,85 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
     }
 }
 
+static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
+{
+    struct radeon_bo_va_hole *hole, *n;
+    uint64_t offset = 0;
+
+    pipe_mutex_lock(mgr->bo_handles_mutex);
+    /* first look for a hole */
+    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+        if (hole->size == size) {
+            offset = hole->offset;
+            list_del(&hole->list);
+            FREE(hole);
+            pipe_mutex_unlock(mgr->bo_handles_mutex);
+            return offset;
+        }
+        if (hole->size > size) {
+            offset = hole->offset;
+            hole->size -= size;
+            hole->offset += size;
+            pipe_mutex_unlock(mgr->bo_handles_mutex);
+            return offset;
+        }
+    }
+
+    offset = mgr->va_offset;
+    mgr->va_offset += size;
+    pipe_mutex_unlock(mgr->bo_handles_mutex);
+    return offset;
+}
+
+static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+    pipe_mutex_lock(mgr->bo_handles_mutex);
+    if (va >= mgr->va_offset) {
+        mgr->va_offset = va + size;
+    } else {
+        struct radeon_bo_va_hole *hole, *n;
+        uint64_t stmp, etmp;
+
+        /* free all hole that fall into the range
+         * NOTE that we might loose virtual address space
+         */
+        LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+            stmp = hole->offset;
+            etmp = stmp + hole->size;
+            if (va >= stmp && va < etmp) {
+                list_del(&hole->list);
+                FREE(hole);
+            }
+        }
+    }
+    pipe_mutex_unlock(mgr->bo_handles_mutex);
+}
+
+static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+    pipe_mutex_lock(mgr->bo_handles_mutex);
+    if ((va + size) == mgr->va_offset) {
+        mgr->va_offset = va;
+    } else {
+        struct radeon_bo_va_hole *hole;
+
+        /* FIXME on allocation failure we just loose virtual address space
+         * maybe print a warning
+         */
+        hole = CALLOC_STRUCT(radeon_bo_va_hole);
+        if (hole) {
+            hole->size = size;
+            hole->offset = va;
+            list_add(&hole->list, &mgr->va_holes);
+        }
+    }
+    pipe_mutex_unlock(mgr->bo_handles_mutex);
+}
+
 static void radeon_bo_destroy(struct pb_buffer *_buf)
 {
     struct radeon_bo *bo = radeon_bo(_buf);
+    struct radeon_bomgr *mgr = bo->mgr;
     struct drm_gem_close args;
 
     memset(&args, 0, sizeof(args));
@@ -168,6 +256,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
     if (bo->ptr)
         os_munmap(bo->ptr, bo->base.size);
 
+    if (mgr->va) {
+        radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+    }
+
     /* Close object. */
     args.handle = bo->handle;
     drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
@@ -343,6 +435,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     struct radeon_bo *bo;
     struct drm_radeon_gem_create args;
     struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
+    int r;
 
     memset(&args, 0, sizeof(args));
 
@@ -378,8 +471,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     bo->rws = mgr->rws;
     bo->handle = args.handle;
     bo->reloc_domains = rdesc->reloc_domains;
+    bo->va = 0;
     pipe_mutex_init(bo->map_mutex);
 
+    if (mgr->va) {
+        struct drm_radeon_gem_va va;
+
+        bo->va_size = ((size + 4095) & ~4095);
+        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+        va.handle = bo->handle;
+        va.vm_id = 0;
+        va.operation = RADEON_VA_MAP;
+        va.flags = RADEON_VM_PAGE_READABLE |
+                   RADEON_VM_PAGE_WRITEABLE |
+                   RADEON_VM_PAGE_SNOOPED;
+        va.offset = bo->va;
+        r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+            fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
+            fprintf(stderr, "radeon:    size      : %d bytes\n", size);
+            fprintf(stderr, "radeon:    alignment : %d bytes\n", desc->alignment);
+            fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
+            radeon_bo_destroy(&bo->base);
+            return NULL;
+        }
+        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+            bo->va = va.offset;
+            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+        }
+    }
+
     return &bo->base;
 }
 
@@ -441,6 +564,11 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
     mgr->rws = rws;
     mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
     pipe_mutex_init(mgr->bo_handles_mutex);
+
+    mgr->va = rws->info.r600_va;
+    mgr->va_offset = rws->info.r600_va_start;
+    list_inithead(&mgr->va_holes);
+
     return &mgr->base;
 }
 
@@ -584,6 +712,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     struct radeon_bo *bo;
     struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
     struct drm_gem_open open_arg = {};
+    int r;
 
     memset(&open_arg, 0, sizeof(open_arg));
 
@@ -628,6 +757,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     bo->base.vtbl = &radeon_bo_vtbl;
     bo->mgr = mgr;
     bo->rws = mgr->rws;
+    bo->va = 0;
     pipe_mutex_init(bo->map_mutex);
 
     util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
@@ -638,6 +768,33 @@ done:
     if (stride)
         *stride = whandle->stride;
 
+    if (mgr->va) {
+        struct drm_radeon_gem_va va;
+
+        bo->va_size = ((bo->base.size + 4095) & ~4095);
+        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+        va.handle = bo->handle;
+        va.operation = RADEON_VA_MAP;
+        va.vm_id = 0;
+        va.offset = bo->va;
+        va.flags = RADEON_VM_PAGE_READABLE |
+                   RADEON_VM_PAGE_WRITEABLE |
+                   RADEON_VM_PAGE_SNOOPED;
+        va.offset = bo->va;
+        r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+            fprintf(stderr, "radeon: Failed to open a buffer:\n");
+            radeon_bo_destroy(&bo->base);
+            return NULL;
+        }
+        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+            bo->va = va.offset;
+            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+        }
+    }
+
     return (struct pb_buffer*)bo;
 
 fail:
@@ -674,6 +831,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
     return TRUE;
 }
 
+static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
+{
+    struct radeon_bo *bo = get_radeon_bo(buffer);
+
+    return bo->va;
+}
+
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -686,4 +850,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
     ws->base.buffer_create = radeon_winsys_bo_create;
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+    ws->base.buffer_va = radeon_winsys_bo_va;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index ba71cfb..0fc00ae 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -61,6 +61,8 @@ struct radeon_bo {
     uint32_t reloc_domains;
     uint32_t handle;
     uint32_t name;
+    uint64_t va;
+    uint64_t va_size;
 
     /* how many command streams is this bo referenced in? */
     int num_cs_references;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 8d5a6b3..a745937 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -73,9 +73,10 @@
 
 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
 
-static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
+static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
+                                      struct radeon_drm_winsys *ws)
 {
-    csc->fd = fd;
+    csc->fd = ws->fd;
     csc->nrelocs = 512;
     csc->relocs_bo = (struct radeon_bo**)
                      CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
@@ -90,17 +91,34 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
         return FALSE;
     }
 
+    csc->cs_flags = (uint32_t*)
+                  CALLOC(1, 3 * sizeof(uint32_t));
+    if (!csc->cs_flags) {
+        FREE(csc->relocs_bo);
+        FREE(csc->relocs);
+        return FALSE;
+    }
+
     csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
     csc->chunks[0].length_dw = 0;
     csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
     csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
     csc->chunks[1].length_dw = 0;
     csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
+    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
+    csc->chunks[2].length_dw = 3;
+    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)csc->cs_flags;
+    csc->cs_flags[0] = 0;
+    csc->cs_flags[1] = RADEON_CS_RING_GFX;
+    csc->cs_flags[2] = 0;
+    if (ws->info.r600_va)
+        csc->cs_flags[0] |= RADEON_CS_USE_VM;
 
     csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
     csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
+    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
 
-    csc->cs.num_chunks = 2;
+    csc->cs.num_chunks = 3;
     csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
     return TRUE;
 }
@@ -128,6 +146,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
     radeon_cs_context_cleanup(csc);
     FREE(csc->relocs_bo);
     FREE(csc->relocs);
+    FREE(csc->cs_flags);
 }
 
 DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
@@ -147,11 +166,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
 
     cs->ws = ws;
 
-    if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
+    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
         FREE(cs);
         return NULL;
     }
-    if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
+    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
         radeon_destroy_cs_context(&cs->csc1);
         FREE(cs);
         return NULL;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index f316b5e..26e0c76 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -35,8 +35,8 @@ struct radeon_cs_context {
 
     int fd;
     struct drm_radeon_cs        cs;
-    struct drm_radeon_cs_chunk  chunks[2];
-    uint64_t                    chunk_array[2];
+    struct drm_radeon_cs_chunk  chunks[3];
+    uint64_t                    chunk_array[3];
 
     /* Relocs. */
     unsigned                    nrelocs;
@@ -44,6 +44,7 @@ struct radeon_cs_context {
     unsigned                    validated_crelocs;
     struct radeon_bo            **relocs_bo;
     struct drm_radeon_cs_reloc  *relocs;
+    uint32_t                    *cs_flags;
 
     /* 0 = BO not added, 1 = BO added */
     char                        is_handle_added[256];
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 442bd2a..8124016 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -138,9 +138,11 @@ static boolean radeon_get_drm_value(int fd, unsigned request,
     info.request = request;
 
     retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval && errname) {
-        fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
-                errname, retval);
+    if (retval) {
+        if (errname) {
+            fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
+                    errname, retval);
+        }
         return FALSE;
     }
     return TRUE;
@@ -263,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
                                       &ws->info.r600_backend_map))
                 ws->info.r600_backend_map_valid = TRUE;
         }
+        ws->info.r600_va = FALSE;
+        if (ws->info.drm_minor >= 12) {
+            ws->info.r600_va = TRUE;
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
+                                      &ws->info.r600_va_start))
+                ws->info.r600_va = FALSE;
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+                                      &ws->info.r600_ib_vm_max_size))
+                ws->info.r600_va = FALSE;
+        }
     }
 
     return TRUE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index c4ea655..69c42c2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -96,6 +96,9 @@ struct radeon_info {
     uint32_t r600_num_tile_pipes;
     uint32_t r600_backend_map;
     boolean r600_backend_map_valid;
+    boolean r600_va;
+    uint32_t r600_va_start;
+    uint32_t r600_ib_vm_max_size;
 };
 
 enum radeon_feature_id {
@@ -242,6 +245,14 @@ struct radeon_winsys {
                                  unsigned stride,
                                  struct winsys_handle *whandle);
 
+    /**
+     * Return the virtual address of a buffer.
+     *
+     * \param buf       A winsys buffer object
+     * \return          virtual address
+     */
+    uint64_t (*buffer_va)(struct pb_buffer *buf);
+
     /**************************************************************************
      * Command submission.
      *
-- 
1.7.7.1



More information about the mesa-dev mailing list