[PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some BOs in GTT
Michel Dänzer
michel at daenzer.net
Thu Jul 17 03:01:12 PDT 2014
From: Michel Dänzer <michel.daenzer at amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---
src/gallium/drivers/r300/r300_query.c | 2 +-
src/gallium/drivers/r300/r300_render.c | 2 +-
src/gallium/drivers/r300/r300_screen_buffer.c | 4 ++--
src/gallium/drivers/r300/r300_texture.c | 2 +-
src/gallium/drivers/radeon/r600_buffer_common.c | 9 ++++++--
src/gallium/drivers/radeon/r600_texture.c | 2 ++
src/gallium/drivers/radeon/radeon_uvd.c | 8 +++++---
src/gallium/drivers/radeon/radeon_vce.c | 8 ++++----
src/gallium/drivers/radeon/radeon_video.c | 11 ++++++----
src/gallium/drivers/radeon/radeon_video.h | 4 +++-
src/gallium/drivers/radeonsi/si_state.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 25 +++++++++++++++++++----
src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 1 +
src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 12 +++++++++++
src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 ++
src/gallium/winsys/radeon/drm/radeon_winsys.h | 7 ++++++-
17 files changed, 77 insertions(+), 26 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 5305ebd..1679433 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -59,7 +59,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
q->num_pipes = r300screen->info.r300_num_gb_pipes;
q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE,
- RADEON_DOMAIN_GTT);
+ RADEON_DOMAIN_GTT, 0);
if (!q->buf) {
FREE(q);
return NULL;
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 175b83a..6e5b381 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -907,7 +907,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
r300->vbo = rws->buffer_create(rws,
MAX2(R300_MAX_DRAW_VBO_SIZE, size),
R300_BUFFER_ALIGNMENT, TRUE,
- RADEON_DOMAIN_GTT);
+ RADEON_DOMAIN_GTT, 0);
if (!r300->vbo) {
return FALSE;
}
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 86e4478..de557b5 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -103,7 +103,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
/* Create a new one in the same pipe_resource. */
new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0,
R300_BUFFER_ALIGNMENT, TRUE,
- rbuf->domain);
+ rbuf->domain, 0);
if (new_buf) {
/* Discard the old buffer. */
pb_reference(&rbuf->buf, NULL);
@@ -185,7 +185,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
rbuf->buf =
r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0,
R300_BUFFER_ALIGNMENT, TRUE,
- rbuf->domain);
+ rbuf->domain, 0);
if (!rbuf->buf) {
FREE(rbuf);
return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 4ea69dc..ffe8c00 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1042,7 +1042,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
/* Create the backing buffer if needed. */
if (!tex->buf) {
tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE,
- tex->domain);
+ tex->domain, 0);
if (!tex->buf) {
goto fail;
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 0eaa817..4e6b897 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -107,11 +107,14 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
{
struct r600_texture *rtex = (struct r600_texture*)res;
struct pb_buffer *old_buf, *new_buf;
+ enum radeon_bo_flag flags = 0;
switch (res->b.b.usage) {
- case PIPE_USAGE_STAGING:
case PIPE_USAGE_DYNAMIC:
case PIPE_USAGE_STREAM:
+ flags = RADEON_FLAG_GTT_WC;
+ /* fall through */
+ case PIPE_USAGE_STAGING:
/* Transfers are likely to occur more often with these resources. */
res->domains = RADEON_DOMAIN_GTT;
break;
@@ -120,6 +123,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
default:
/* Not listing GTT here improves performance in some apps. */
res->domains = RADEON_DOMAIN_VRAM;
+ flags = RADEON_FLAG_GTT_WC;
break;
}
@@ -129,6 +133,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
res->domains = RADEON_DOMAIN_GTT;
+ flags = 0;
}
/* Tiled textures are unmappable. Always put them in VRAM. */
@@ -140,7 +145,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
/* Allocate a new resource. */
new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
use_reusable_pool,
- res->domains);
+ res->domains, flags);
if (!new_buf) {
return false;
}
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index bfda69e..6dd84a4 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1027,6 +1027,8 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
r600_init_temp_resource_from_box(&resource, texture, box, level,
R600_RESOURCE_FLAG_TRANSFER);
+ resource.usage = (usage & PIPE_TRANSFER_READ) ?
+ PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
/* Create the temporary texture. */
staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 137c69c..d77217c 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -816,12 +816,14 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
for (i = 0; i < NUM_BUFFERS; ++i) {
unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
- if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size, RADEON_DOMAIN_VRAM)) {
+ if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size,
+ RADEON_DOMAIN_VRAM, 0)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
- if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size, RADEON_DOMAIN_GTT)) {
+ if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size,
+ RADEON_DOMAIN_GTT, 0)) {
RVID_ERR("Can't allocated bitstream buffers.\n");
goto error;
}
@@ -830,7 +832,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
rvid_clear_buffer(dec->ws, dec->cs, &dec->bs_buffers[i]);
}
- if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM)) {
+ if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM, 0)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
}
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index f5395b3..9174c97 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -191,7 +191,7 @@ static void rvce_destroy(struct pipe_video_codec *encoder)
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
if (enc->stream_handle) {
struct rvid_buffer fb;
- rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT);
+ rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT, 0);
enc->fb = &fb;
enc->session(enc);
enc->feedback(enc);
@@ -233,7 +233,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
if (!enc->stream_handle) {
struct rvid_buffer fb;
enc->stream_handle = rvid_alloc_stream_handle();
- rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT);
+ rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT, 0);
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
@@ -265,7 +265,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
enc->bs_size = destination->width0;
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
- if (!rvid_create_buffer(enc->ws, enc->fb, 512, RADEON_DOMAIN_GTT)) {
+ if (!rvid_create_buffer(enc->ws, enc->fb, 512, RADEON_DOMAIN_GTT, 0)) {
RVID_ERR("Can't create feedback buffer.\n");
return;
}
@@ -390,7 +390,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
tmp_buf->destroy(tmp_buf);
- if (!rvid_create_buffer(enc->ws, &enc->cpb, cpb_size, RADEON_DOMAIN_VRAM)) {
+ if (!rvid_create_buffer(enc->ws, &enc->cpb, cpb_size, RADEON_DOMAIN_VRAM, 0)) {
RVID_ERR("Can't create CPB buffer.\n");
goto error;
}
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index eae533e..17e9a59 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -61,11 +61,13 @@ unsigned rvid_alloc_stream_handle()
/* create a buffer in the winsys */
bool rvid_create_buffer(struct radeon_winsys *ws, struct rvid_buffer *buffer,
- unsigned size, enum radeon_bo_domain domain)
+ unsigned size, enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags)
{
buffer->domain = domain;
+ buffer->flags = flags;
- buffer->buf = ws->buffer_create(ws, size, 4096, false, domain);
+ buffer->buf = ws->buffer_create(ws, size, 4096, false, domain, flags);
if (!buffer->buf)
return false;
@@ -91,7 +93,8 @@ bool rvid_resize_buffer(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct rvid_buffer old_buf = *new_buf;
void *src = NULL, *dst = NULL;
- if (!rvid_create_buffer(ws, new_buf, new_size, new_buf->domain))
+ if (!rvid_create_buffer(ws, new_buf, new_size, new_buf->domain,
+ new_buf->flags))
goto error;
src = ws->buffer_map(old_buf.cs_handle, cs, PIPE_TRANSFER_READ);
@@ -191,7 +194,7 @@ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
/* TODO: 2D tiling workaround */
alignment *= 2;
- pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM);
+ pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM, 0);
if (!pb)
return;
diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h
index 55d2ca4..42de5a9 100644
--- a/src/gallium/drivers/radeon/radeon_video.h
+++ b/src/gallium/drivers/radeon/radeon_video.h
@@ -44,6 +44,7 @@
struct rvid_buffer
{
enum radeon_bo_domain domain;
+ enum radeon_bo_flag flags;
struct pb_buffer* buf;
struct radeon_winsys_cs_handle* cs_handle;
};
@@ -53,7 +54,8 @@ unsigned rvid_alloc_stream_handle(void);
/* create a buffer in the winsys */
bool rvid_create_buffer(struct radeon_winsys *ws, struct rvid_buffer *buffer,
- unsigned size, enum radeon_bo_domain domain);
+ unsigned size, enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags);
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index c64958a..1388b50 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2781,7 +2781,7 @@ static void si_set_sampler_states(struct si_context *sctx,
sctx->border_color_table =
si_resource_create_custom(&sctx->screen->b.b,
- PIPE_USAGE_STAGING,
+ PIPE_USAGE_DYNAMIC,
4096 * 4 * 4);
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index d06bb34..73f8d38 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -477,6 +477,10 @@ const struct pb_vtbl radeon_bo_vtbl = {
radeon_bo_get_base_buffer,
};
+#ifndef RADEON_GEM_GTT_WC
+#define RADEON_GEM_GTT_WC (1 << 2)
+#endif
+
static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
@@ -497,6 +501,10 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
args.size = size;
args.alignment = desc->alignment;
args.initial_domain = rdesc->initial_domains;
+ args.flags = 0;
+
+ if (rdesc->flags & RADEON_FLAG_GTT_WC)
+ args.flags |= RADEON_GEM_GTT_WC;
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
&args, sizeof(args))) {
@@ -504,6 +512,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
fprintf(stderr, "radeon: size : %d bytes\n", size);
fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
+ fprintf(stderr, "radeon: flags : %d\n", args.flags);
return NULL;
}
@@ -784,7 +793,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
unsigned size,
unsigned alignment,
boolean use_reusable_pool,
- enum radeon_bo_domain domain)
+ enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
@@ -798,13 +808,20 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
/* Additional criteria for the cache manager. */
desc.base.usage = domain;
desc.initial_domains = domain;
+ desc.flags = flags;
/* Assign a buffer manager. */
if (use_reusable_pool) {
- if (domain == RADEON_DOMAIN_VRAM)
- provider = ws->cman_vram;
- else
+ if (domain == RADEON_DOMAIN_VRAM) {
+ if (flags & RADEON_FLAG_GTT_WC)
+ provider = ws->cman_vram_gtt_wc;
+ else
+ provider = ws->cman_vram;
+ } else if (flags & RADEON_FLAG_GTT_WC) {
+ provider = ws->cman_gtt_wc;
+ } else {
provider = ws->cman_gtt;
+ }
} else {
provider = ws->kman;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index f5b122f..1c00a13 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -42,6 +42,7 @@ struct radeon_bo_desc {
struct pb_desc base;
unsigned initial_domains;
+ unsigned flags;
};
struct radeon_bo {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 67375dc..3596f8d 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -606,7 +606,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
/* Create a fence, which is a dummy BO. */
fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
- RADEON_DOMAIN_GTT);
+ RADEON_DOMAIN_GTT, 0);
/* Add the fence as a dummy relocation. */
cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 0834cbd..398c089 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -418,7 +418,9 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
pipe_mutex_destroy(ws->cs_stack_lock);
ws->cman_vram->destroy(ws->cman_vram);
+ ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc);
ws->cman_gtt->destroy(ws->cman_gtt);
+ ws->cman_gtt_wc->destroy(ws->cman_gtt_wc);
ws->kman->destroy(ws->kman);
if (ws->gen >= DRV_R600) {
radeon_surface_manager_free(ws->surf_man);
@@ -636,9 +638,15 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
ws->cman_vram = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
if (!ws->cman_vram)
goto fail;
+ ws->cman_vram_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
+ if (!ws->cman_vram_gtt_wc)
+ goto fail;
ws->cman_gtt = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
if (!ws->cman_gtt)
goto fail;
+ ws->cman_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
+ if (!ws->cman_gtt_wc)
+ goto fail;
if (ws->gen >= DRV_R600) {
ws->surf_man = radeon_surface_manager_new(fd);
@@ -695,8 +703,12 @@ fail:
pipe_mutex_unlock(fd_tab_mutex);
if (ws->cman_gtt)
ws->cman_gtt->destroy(ws->cman_gtt);
+ if (ws->cman_gtt_wc)
+ ws->cman_gtt_wc->destroy(ws->cman_gtt_wc);
if (ws->cman_vram)
ws->cman_vram->destroy(ws->cman_vram);
+ if (ws->cman_vram_gtt_wc)
+ ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc);
if (ws->kman)
ws->kman->destroy(ws->kman);
if (ws->surf_man)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index fc6f53b..ea6f7f0 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -58,7 +58,9 @@ struct radeon_drm_winsys {
struct pb_manager *kman;
struct pb_manager *cman_vram;
+ struct pb_manager *cman_vram_gtt_wc;
struct pb_manager *cman_gtt;
+ struct pb_manager *cman_gtt_wc;
struct radeon_surface_manager *surf_man;
uint32_t num_cpus; /* Number of CPUs. */
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 6df1987..766071f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -65,6 +65,10 @@ enum radeon_bo_domain { /* bitfield */
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
};
+enum radeon_bo_flag { /* bitfield */
+ RADEON_FLAG_GTT_WC = (1 << 0)
+};
+
enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_READ = 2,
RADEON_USAGE_WRITE = 4,
@@ -285,7 +289,8 @@ struct radeon_winsys {
unsigned size,
unsigned alignment,
boolean use_reusable_pool,
- enum radeon_bo_domain domain);
+ enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags);
struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
struct pb_buffer *buf);
--
2.0.0
More information about the dri-devel
mailing list