<div dir="ltr"><div>Reviewed-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com">marek.olsak@amd.com</a>></div><div><br></div><div>Marek</div></div><br><div class="gmail_quote"><div dir="ltr">On Thu, Nov 22, 2018 at 6:32 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
Introduce a new driver-private transfer flag RADEON_TRANSFER_TEMPORARY<br>
that specifies whether the caller will use buffer_unmap or not. The<br>
default behavior is set to permanent maps, because that's what drivers<br>
do for Gallium buffer maps.<br>
<br>
This should eliminate the need for hacks in libdrm. Assertions are added<br>
to catch when the buffer_unmap calls don't match the (temporary)<br>
buffer_map calls.<br>
<br>
I did my best to update r600 for consistency (r300 needs no changes<br>
because it never calls buffer_unmap), even though the radeon winsys<br>
ignores the new flag.<br>
<br>
As an added bonus, this should actually improve the performance of<br>
the normal fast path, because we no longer call into libdrm at all<br>
after the first map, and there's one less atomic in the winsys itself<br>
(there are now no atomics left in the UNSYNCHRONIZED fast path).<br>
<br>
Cc: Leo Liu <<a href="mailto:leo.liu@amd.com" target="_blank">leo.liu@amd.com</a>><br>
v2:<br>
- remove comment about visible VRAM (Marek)<br>
- don't rely on amdgpu_bo_cpu_map doing an atomic write<br>
---<br>
src/gallium/drivers/r600/evergreen_compute.c | 4 +-<br>
src/gallium/drivers/r600/r600_asm.c | 4 +-<br>
src/gallium/drivers/r600/r600_shader.c | 4 +-<br>
src/gallium/drivers/r600/radeon_uvd.c | 8 +-<br>
src/gallium/drivers/r600/radeon_vce.c | 4 +-<br>
src/gallium/drivers/r600/radeon_video.c | 6 +-<br>
src/gallium/drivers/radeon/radeon_uvd.c | 10 +-<br>
src/gallium/drivers/radeon/radeon_uvd_enc.c | 6 +-<br>
src/gallium/drivers/radeon/radeon_vce.c | 4 +-<br>
src/gallium/drivers/radeon/radeon_vcn_dec.c | 18 ++--<br>
src/gallium/drivers/radeon/radeon_vcn_enc.c | 4 +-<br>
src/gallium/drivers/radeon/radeon_video.c | 6 +-<br>
src/gallium/drivers/radeon/radeon_winsys.h | 14 ++-<br>
src/gallium/drivers/radeonsi/si_shader.c | 3 +-<br>
src/gallium/include/pipe/p_defines.h | 8 +-<br>
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 96 +++++++++++++-------<br>
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 3 +-<br>
17 files changed, 140 insertions(+), 62 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c<br>
index a77f58242e3..9085be4e2f3 100644<br>
--- a/src/gallium/drivers/r600/evergreen_compute.c<br>
+++ b/src/gallium/drivers/r600/evergreen_compute.c<br>
@@ -431,21 +431,23 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,<br>
COMPUTE_DBG(rctx->screen, "*** evergreen_create_compute_state\n");<br>
header = cso->prog;<br>
code = cso->prog + sizeof(struct pipe_llvm_program_header);<br>
radeon_shader_binary_init(&shader->binary);<br>
r600_elf_read(code, header->num_bytes, &shader->binary);<br>
r600_create_shader(&shader->bc, &shader->binary, &use_kill);<br>
<br>
/* Upload code + ROdata */<br>
shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,<br>
shader->bc.ndw * 4);<br>
- p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);<br>
+ p = r600_buffer_map_sync_with_rings(<br>
+ &rctx->b, shader->code_bo,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
//TODO: use util_memcpy_cpu_to_le32 ?<br>
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);<br>
rctx->b.ws->buffer_unmap(shader->code_bo->buf);<br>
#endif<br>
<br>
return shader;<br>
}<br>
<br>
static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state)<br>
{<br>
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c<br>
index 7029be24f4b..4ba77c535f9 100644<br>
--- a/src/gallium/drivers/r600/r600_asm.c<br>
+++ b/src/gallium/drivers/r600/r600_asm.c<br>
@@ -2765,21 +2765,23 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,<br>
<br>
u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,<br>
&shader->offset,<br>
(struct pipe_resource**)&shader->buffer);<br>
if (!shader->buffer) {<br>
r600_bytecode_clear(&bc);<br>
FREE(shader);<br>
return NULL;<br>
}<br>
<br>
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);<br>
+ bytecode = r600_buffer_map_sync_with_rings<br>
+ (&rctx->b, shader->buffer,<br>
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);<br>
bytecode += shader->offset / 4;<br>
<br>
if (R600_BIG_ENDIAN) {<br>
for (i = 0; i < fs_size / 4; ++i) {<br>
bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);<br>
}<br>
} else {<br>
memcpy(bytecode, bc.bytecode, fs_size);<br>
}<br>
rctx->b.ws->buffer_unmap(shader->buffer->buf);<br>
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c<br>
index 408939d1105..fc826470d69 100644<br>
--- a/src/gallium/drivers/r600/r600_shader.c<br>
+++ b/src/gallium/drivers/r600/r600_shader.c<br>
@@ -134,21 +134,23 @@ static int store_shader(struct pipe_context *ctx,<br>
{<br>
struct r600_context *rctx = (struct r600_context *)ctx;<br>
uint32_t *ptr, i;<br>
<br>
if (shader->bo == NULL) {<br>
shader->bo = (struct r600_resource*)<br>
pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);<br>
if (shader->bo == NULL) {<br>
return -ENOMEM;<br>
}<br>
- ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);<br>
+ ptr = r600_buffer_map_sync_with_rings(<br>
+ &rctx->b, shader->bo,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
if (R600_BIG_ENDIAN) {<br>
for (i = 0; i < shader->shader.bc.ndw; ++i) {<br>
ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);<br>
}<br>
} else {<br>
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));<br>
}<br>
rctx->b.ws->buffer_unmap(shader->bo->buf);<br>
}<br>
<br>
diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c<br>
index 495a93dc55a..5568f2138e4 100644<br>
--- a/src/gallium/drivers/r600/radeon_uvd.c<br>
+++ b/src/gallium/drivers/r600/radeon_uvd.c<br>
@@ -145,21 +145,22 @@ static bool have_it(struct ruvd_decoder *dec)<br>
/* map the next available message/feedback/itscaling buffer */<br>
static void map_msg_fb_it_buf(struct ruvd_decoder *dec)<br>
{<br>
struct rvid_buffer* buf;<br>
uint8_t *ptr;<br>
<br>
/* grab the current message/feedback buffer */<br>
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];<br>
<br>
/* and map it for CPU access */<br>
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);<br>
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
<br>
/* calc buffer offsets */<br>
dec->msg = (struct ruvd_msg *)ptr;<br>
memset(dec->msg, 0, sizeof(*dec->msg));<br>
<br>
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);<br>
if (have_it(dec))<br>
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);<br>
}<br>
<br>
@@ -1061,21 +1062,21 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,<br>
<br>
assert(decoder);<br>
<br>
frame = ++dec->frame_number;<br>
vl_video_buffer_set_associated_data(target, decoder, (void *)frame,<br>
&ruvd_destroy_associated_data);<br>
<br>
dec->bs_size = 0;<br>
dec->bs_ptr = dec->ws->buffer_map(<br>
dec->bs_buffers[dec->cur_buffer].res->buf,<br>
- dec->cs, PIPE_TRANSFER_WRITE);<br>
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
}<br>
<br>
/**<br>
* decode a macroblock<br>
*/<br>
static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,<br>
struct pipe_video_buffer *target,<br>
struct pipe_picture_desc *picture,<br>
const struct pipe_macroblock *macroblocks,<br>
unsigned num_macroblocks)<br>
@@ -1114,21 +1115,22 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,<br>
new_size += 2; /* save for EOI */<br>
<br>
if (new_size > buf->res->buf->size) {<br>
dec->ws->buffer_unmap(buf->res->buf);<br>
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {<br>
RVID_ERR("Can't resize bitstream buffer!");<br>
return;<br>
}<br>
<br>
dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,<br>
- PIPE_TRANSFER_WRITE);<br>
+ PIPE_TRANSFER_WRITE |<br>
+ RADEON_TRANSFER_TEMPORARY);<br>
if (!dec->bs_ptr)<br>
return;<br>
<br>
dec->bs_ptr += dec->bs_size;<br>
}<br>
<br>
memcpy(dec->bs_ptr, buffers[i], sizes[i]);<br>
dec->bs_size += sizes[i];<br>
dec->bs_ptr += sizes[i];<br>
}<br>
diff --git a/src/gallium/drivers/r600/radeon_vce.c b/src/gallium/drivers/r600/radeon_vce.c<br>
index 60ba12a593a..e38b927b1d4 100644<br>
--- a/src/gallium/drivers/r600/radeon_vce.c<br>
+++ b/src/gallium/drivers/r600/radeon_vce.c<br>
@@ -346,21 +346,23 @@ static void rvce_end_frame(struct pipe_video_codec *encoder,<br>
}<br>
}<br>
<br>
static void rvce_get_feedback(struct pipe_video_codec *encoder,<br>
void *feedback, unsigned *size)<br>
{<br>
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;<br>
struct rvid_buffer *fb = feedback;<br>
<br>
if (size) {<br>
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);<br>
+ uint32_t *ptr = enc->ws->buffer_map(<br>
+ fb->res->buf, enc->cs,<br>
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
<br>
if (ptr[1]) {<br>
*size = ptr[4] - ptr[9];<br>
} else {<br>
*size = 0;<br>
}<br>
<br>
enc->ws->buffer_unmap(fb->res->buf);<br>
}<br>
//dump_feedback(enc, fb);<br>
diff --git a/src/gallium/drivers/r600/radeon_video.c b/src/gallium/drivers/r600/radeon_video.c<br>
index 02fcf77d4ff..8e0af448be5 100644<br>
--- a/src/gallium/drivers/r600/radeon_video.c<br>
+++ b/src/gallium/drivers/r600/radeon_video.c<br>
@@ -90,25 +90,27 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,<br>
{<br>
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;<br>
struct radeon_winsys* ws = rscreen->ws;<br>
unsigned bytes = MIN2(new_buf->res->buf->size, new_size);<br>
struct rvid_buffer old_buf = *new_buf;<br>
void *src = NULL, *dst = NULL;<br>
<br>
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))<br>
goto error;<br>
<br>
- src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);<br>
+ src = ws->buffer_map(old_buf.res->buf, cs,<br>
+ PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);<br>
if (!src)<br>
goto error;<br>
<br>
- dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);<br>
+ dst = ws->buffer_map(new_buf->res->buf, cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
if (!dst)<br>
goto error;<br>
<br>
memcpy(dst, src, bytes);<br>
if (new_size > bytes) {<br>
new_size -= bytes;<br>
dst += bytes;<br>
memset(dst, 0, new_size);<br>
}<br>
ws->buffer_unmap(new_buf->res->buf);<br>
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c<br>
index 62af1a311c2..ca066e89823 100644<br>
--- a/src/gallium/drivers/radeon/radeon_uvd.c<br>
+++ b/src/gallium/drivers/radeon/radeon_uvd.c<br>
@@ -141,21 +141,22 @@ static bool have_it(struct ruvd_decoder *dec)<br>
/* map the next available message/feedback/itscaling buffer */<br>
static void map_msg_fb_it_buf(struct ruvd_decoder *dec)<br>
{<br>
struct rvid_buffer* buf;<br>
uint8_t *ptr;<br>
<br>
/* grab the current message/feedback buffer */<br>
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];<br>
<br>
/* and map it for CPU access */<br>
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);<br>
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
<br>
/* calc buffer offsets */<br>
dec->msg = (struct ruvd_msg *)ptr;<br>
memset(dec->msg, 0, sizeof(*dec->msg));<br>
<br>
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);<br>
if (have_it(dec))<br>
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);<br>
}<br>
<br>
@@ -1008,21 +1009,21 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,<br>
<br>
assert(decoder);<br>
<br>
frame = ++dec->frame_number;<br>
vl_video_buffer_set_associated_data(target, decoder, (void *)frame,<br>
&ruvd_destroy_associated_data);<br>
<br>
dec->bs_size = 0;<br>
dec->bs_ptr = dec->ws->buffer_map(<br>
dec->bs_buffers[dec->cur_buffer].res->buf,<br>
- dec->cs, PIPE_TRANSFER_WRITE);<br>
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
}<br>
<br>
/**<br>
* decode a macroblock<br>
*/<br>
static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,<br>
struct pipe_video_buffer *target,<br>
struct pipe_picture_desc *picture,<br>
const struct pipe_macroblock *macroblocks,<br>
unsigned num_macroblocks)<br>
@@ -1053,22 +1054,23 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,<br>
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];<br>
unsigned new_size = dec->bs_size + sizes[i];<br>
<br>
if (new_size > buf->res->buf->size) {<br>
dec->ws->buffer_unmap(buf->res->buf);<br>
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {<br>
RVID_ERR("Can't resize bitstream buffer!");<br>
return;<br>
}<br>
<br>
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,<br>
- PIPE_TRANSFER_WRITE);<br>
+ dec->bs_ptr = dec->ws->buffer_map(<br>
+ buf->res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
if (!dec->bs_ptr)<br>
return;<br>
<br>
dec->bs_ptr += dec->bs_size;<br>
}<br>
<br>
memcpy(dec->bs_ptr, buffers[i], sizes[i]);<br>
dec->bs_size += sizes[i];<br>
dec->bs_ptr += sizes[i];<br>
}<br>
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c<br>
index 4384e5e1646..3164dbb2c20 100644<br>
--- a/src/gallium/drivers/radeon/radeon_uvd_enc.c<br>
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c<br>
@@ -256,23 +256,23 @@ radeon_uvd_enc_destroy(struct pipe_video_codec *encoder)<br>
<br>
static void<br>
radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,<br>
void *feedback, unsigned *size)<br>
{<br>
struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;<br>
struct rvid_buffer *fb = feedback;<br>
<br>
if (NULL != size) {<br>
radeon_uvd_enc_feedback_t *fb_data =<br>
- (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,<br>
- enc->cs,<br>
- PIPE_TRANSFER_READ_WRITE);<br>
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(<br>
+ fb->res->buf, enc->cs,<br>
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
<br>
if (!fb_data->status)<br>
*size = fb_data->bitstream_size;<br>
else<br>
*size = 0;<br>
enc->ws->buffer_unmap(fb->res->buf);<br>
}<br>
<br>
si_vid_destroy_buffer(fb);<br>
FREE(fb);<br>
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c<br>
index 310d1654b05..94df06e88c6 100644<br>
--- a/src/gallium/drivers/radeon/radeon_vce.c<br>
+++ b/src/gallium/drivers/radeon/radeon_vce.c<br>
@@ -345,21 +345,23 @@ static void rvce_end_frame(struct pipe_video_codec *encoder,<br>
}<br>
}<br>
<br>
static void rvce_get_feedback(struct pipe_video_codec *encoder,<br>
void *feedback, unsigned *size)<br>
{<br>
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;<br>
struct rvid_buffer *fb = feedback;<br>
<br>
if (size) {<br>
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);<br>
+ uint32_t *ptr = enc->ws->buffer_map(<br>
+ fb->res->buf, enc->cs,<br>
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
<br>
if (ptr[1]) {<br>
*size = ptr[4] - ptr[9];<br>
} else {<br>
*size = 0;<br>
}<br>
<br>
enc->ws->buffer_unmap(fb->res->buf);<br>
}<br>
//dump_feedback(enc, fb);<br>
diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c<br>
index 1ee85ae3d3f..e402af21a64 100644<br>
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c<br>
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c<br>
@@ -934,21 +934,23 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,<br>
ctx_size += 8 * 2 * 4096;<br>
<br>
if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)<br>
ctx_size += 8 * 2 * 4096;<br>
<br>
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))<br>
RVID_ERR("Can't allocated context buffer.\n");<br>
si_vid_clear_buffer(dec->base.context, &dec->ctx);<br>
<br>
/* ctx needs probs table */<br>
- ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE);<br>
+ ptr = dec->ws->buffer_map(<br>
+ dec->ctx.res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
fill_probs_table(ptr);<br>
dec->ws->buffer_unmap(dec->ctx.res->buf);<br>
}<br>
break;<br>
}<br>
default:<br>
assert(0);<br>
return NULL;<br>
}<br>
<br>
@@ -1027,21 +1029,22 @@ static bool have_probs(struct radeon_decoder *dec)<br>
/* map the next available message/feedback/itscaling buffer */<br>
static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)<br>
{<br>
struct rvid_buffer* buf;<br>
uint8_t *ptr;<br>
<br>
/* grab the current message/feedback buffer */<br>
buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];<br>
<br>
/* and map it for CPU access */<br>
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);<br>
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
<br>
/* calc buffer offsets */<br>
dec->msg = ptr;<br>
<br>
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);<br>
if (have_it(dec))<br>
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);<br>
else if (have_probs(dec))<br>
dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);<br>
}<br>
@@ -1305,21 +1308,21 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,<br>
assert(decoder);<br>
<br>
frame = ++dec->frame_number;<br>
if (dec->stream_type != RDECODE_CODEC_VP9)<br>
vl_video_buffer_set_associated_data(target, decoder, (void *)frame,<br>
&radeon_dec_destroy_associated_data);<br>
<br>
dec->bs_size = 0;<br>
dec->bs_ptr = dec->ws->buffer_map(<br>
dec->bs_buffers[dec->cur_buffer].res->buf,<br>
- dec->cs, PIPE_TRANSFER_WRITE);<br>
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
}<br>
<br>
/**<br>
* decode a macroblock<br>
*/<br>
static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder,<br>
struct pipe_video_buffer *target,<br>
struct pipe_picture_desc *picture,<br>
const struct pipe_macroblock *macroblocks,<br>
unsigned num_macroblocks)<br>
@@ -1350,22 +1353,23 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,<br>
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];<br>
unsigned new_size = dec->bs_size + sizes[i];<br>
<br>
if (new_size > buf->res->buf->size) {<br>
dec->ws->buffer_unmap(buf->res->buf);<br>
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {<br>
RVID_ERR("Can't resize bitstream buffer!");<br>
return;<br>
}<br>
<br>
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,<br>
- PIPE_TRANSFER_WRITE);<br>
+ dec->bs_ptr = dec->ws->buffer_map(<br>
+ buf->res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
if (!dec->bs_ptr)<br>
return;<br>
<br>
dec->bs_ptr += dec->bs_size;<br>
}<br>
<br>
memcpy(dec->bs_ptr, buffers[i], sizes[i]);<br>
dec->bs_size += sizes[i];<br>
dec->bs_ptr += sizes[i];<br>
}<br>
@@ -1536,21 +1540,23 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,<br>
}<br>
<br>
si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]);<br>
si_vid_clear_buffer(context, &dec->bs_buffers[i]);<br>
<br>
if (have_probs(dec)) {<br>
struct rvid_buffer* buf;<br>
void *ptr;<br>
<br>
buf = &dec->msg_fb_it_probs_buffers[i];<br>
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);<br>
+ ptr = dec->ws->buffer_map(<br>
+ buf->res->buf, dec->cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;<br>
fill_probs_table(ptr);<br>
dec->ws->buffer_unmap(buf->res->buf);<br>
}<br>
}<br>
<br>
dpb_size = calc_dpb_size(dec);<br>
if (dpb_size) {<br>
if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {<br>
RVID_ERR("Can't allocated dpb.\n");<br>
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c<br>
index e8676f6c721..7d64a28a405 100644<br>
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c<br>
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c<br>
@@ -237,21 +237,23 @@ static void radeon_enc_destroy(struct pipe_video_codec *encoder)<br>
FREE(enc);<br>
}<br>
<br>
static void radeon_enc_get_feedback(struct pipe_video_codec *encoder,<br>
void *feedback, unsigned *size)<br>
{<br>
struct radeon_encoder *enc = (struct radeon_encoder*)encoder;<br>
struct rvid_buffer *fb = feedback;<br>
<br>
if (size) {<br>
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);<br>
+ uint32_t *ptr = enc->ws->buffer_map(<br>
+ fb->res->buf, enc->cs,<br>
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
if (ptr[1])<br>
*size = ptr[6];<br>
else<br>
*size = 0;<br>
enc->ws->buffer_unmap(fb->res->buf);<br>
}<br>
<br>
si_vid_destroy_buffer(fb);<br>
FREE(fb);<br>
}<br>
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c<br>
index a39ce4cc73e..bb1173e8005 100644<br>
--- a/src/gallium/drivers/radeon/radeon_video.c<br>
+++ b/src/gallium/drivers/radeon/radeon_video.c<br>
@@ -81,25 +81,27 @@ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,<br>
{<br>
struct si_screen *sscreen = (struct si_screen *)screen;<br>
struct radeon_winsys* ws = sscreen->ws;<br>
unsigned bytes = MIN2(new_buf->res->buf->size, new_size);<br>
struct rvid_buffer old_buf = *new_buf;<br>
void *src = NULL, *dst = NULL;<br>
<br>
if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))<br>
goto error;<br>
<br>
- src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);<br>
+ src = ws->buffer_map(old_buf.res->buf, cs,<br>
+ PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);<br>
if (!src)<br>
goto error;<br>
<br>
- dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);<br>
+ dst = ws->buffer_map(new_buf->res->buf, cs,<br>
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);<br>
if (!dst)<br>
goto error;<br>
<br>
memcpy(dst, src, bytes);<br>
if (new_size > bytes) {<br>
new_size -= bytes;<br>
dst += bytes;<br>
memset(dst, 0, new_size);<br>
}<br>
ws->buffer_unmap(new_buf->res->buf);<br>
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h<br>
index 49f8bb279e5..a56ff75ad24 100644<br>
--- a/src/gallium/drivers/radeon/radeon_winsys.h<br>
+++ b/src/gallium/drivers/radeon/radeon_winsys.h<br>
@@ -69,20 +69,29 @@ enum radeon_bo_usage { /* bitfield */<br>
RADEON_USAGE_READ = 2,<br>
RADEON_USAGE_WRITE = 4,<br>
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,<br>
<br>
/* The winsys ensures that the CS submission will be scheduled after<br>
* previously flushed CSs referencing this BO in a conflicting way.<br>
*/<br>
RADEON_USAGE_SYNCHRONIZED = 8<br>
};<br>
<br>
+enum radeon_transfer_flags {<br>
+ /* Indicates that the caller will unmap the buffer.<br>
+ *<br>
+ * Not unmapping buffers is an important performance optimization for<br>
+ * OpenGL (avoids kernel overhead for frequently mapped buffers).<br>
+ */<br>
+ RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0),<br>
+};<br>
+<br>
#define RADEON_SPARSE_PAGE_SIZE (64 * 1024)<br>
<br>
enum ring_type {<br>
RING_GFX = 0,<br>
RING_COMPUTE,<br>
RING_DMA,<br>
RING_UVD,<br>
RING_VCE,<br>
RING_UVD_ENC,<br>
RING_VCN_DEC,<br>
@@ -287,23 +296,26 @@ struct radeon_winsys {<br>
struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,<br>
uint64_t size,<br>
unsigned alignment,<br>
enum radeon_bo_domain domain,<br>
enum radeon_bo_flag flags);<br>
<br>
/**<br>
* Map the entire data store of a buffer object into the client's address<br>
* space.<br>
*<br>
+ * Callers are expected to unmap buffers again if and only if the<br>
+ * RADEON_TRANSFER_TEMPORARY flag is set in \p usage.<br>
+ *<br>
* \param buf A winsys buffer object to map.<br>
* \param cs A command stream to flush if the buffer is referenced by it.<br>
- * \param usage A bitmask of the PIPE_TRANSFER_* flags.<br>
+ * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags.<br>
* \return The pointer at the beginning of the buffer.<br>
*/<br>
void *(*buffer_map)(struct pb_buffer *buf,<br>
struct radeon_cmdbuf *cs,<br>
enum pipe_transfer_usage usage);<br>
<br>
/**<br>
* Unmap a buffer object from the client's address space.<br>
*<br>
* \param buf A winsys buffer object to unmap.<br>
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c<br>
index 19522cc97b1..d455fb5db6a 100644<br>
--- a/src/gallium/drivers/radeonsi/si_shader.c<br>
+++ b/src/gallium/drivers/radeonsi/si_shader.c<br>
@@ -5286,21 +5286,22 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)<br>
0 : SI_RESOURCE_FLAG_READ_ONLY,<br>
PIPE_USAGE_IMMUTABLE,<br>
align(bo_size, SI_CPDMA_ALIGNMENT),<br>
256);<br>
if (!shader->bo)<br>
return -ENOMEM;<br>
<br>
/* Upload. */<br>
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,<br>
PIPE_TRANSFER_READ_WRITE |<br>
- PIPE_TRANSFER_UNSYNCHRONIZED);<br>
+ PIPE_TRANSFER_UNSYNCHRONIZED |<br>
+ RADEON_TRANSFER_TEMPORARY);<br>
<br>
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are<br>
* endian-independent. */<br>
if (prolog) {<br>
memcpy(ptr, prolog->code, prolog->code_size);<br>
ptr += prolog->code_size;<br>
}<br>
if (previous_stage) {<br>
memcpy(ptr, previous_stage->code, previous_stage->code_size);<br>
ptr += previous_stage->code_size;<br>
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h<br>
index 693f041b1da..e99895d30d8 100644<br>
--- a/src/gallium/include/pipe/p_defines.h<br>
+++ b/src/gallium/include/pipe/p_defines.h<br>
@@ -334,21 +334,27 @@ enum pipe_transfer_usage<br>
*/<br>
PIPE_TRANSFER_PERSISTENT = (1 << 13),<br>
<br>
/**<br>
* If PERSISTENT is set, this ensures any writes done by the device are<br>
* immediately visible to the CPU and vice versa.<br>
*<br>
* PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating<br>
* the resource.<br>
*/<br>
- PIPE_TRANSFER_COHERENT = (1 << 14)<br>
+ PIPE_TRANSFER_COHERENT = (1 << 14),<br>
+<br>
+ /**<br>
+ * This and higher bits are reserved for private use by drivers. Drivers<br>
+ * should use this as (PIPE_TRANSFER_DRV_PRV << i).<br>
+ */<br>
+ PIPE_TRANSFER_DRV_PRV = (1 << 24)<br>
};<br>
<br>
/**<br>
* Flags for the flush function.<br>
*/<br>
enum pipe_flush_flags<br>
{<br>
PIPE_FLUSH_END_OF_FRAME = (1 << 0),<br>
PIPE_FLUSH_DEFERRED = (1 << 1),<br>
PIPE_FLUSH_FENCE_FD = (1 << 2),<br>
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
index 9f0d4c12482..99cd8cb31a5 100644<br>
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
@@ -49,20 +49,21 @@<br>
struct amdgpu_sparse_backing_chunk {<br>
uint32_t begin, end;<br>
};<br>
<br>
static struct pb_buffer *<br>
amdgpu_bo_create(struct radeon_winsys *rws,<br>
uint64_t size,<br>
unsigned alignment,<br>
enum radeon_bo_domain domain,<br>
enum radeon_bo_flag flags);<br>
+static void amdgpu_bo_unmap(struct pb_buffer *buf);<br>
<br>
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,<br>
enum radeon_bo_usage usage)<br>
{<br>
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);<br>
struct amdgpu_winsys *ws = bo->ws;<br>
int64_t abs_timeout;<br>
<br>
if (timeout == 0) {<br>
if (p_atomic_read(&bo->num_active_ioctls))<br>
@@ -166,20 +167,26 @@ static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)<br>
bo->max_fences = 0;<br>
}<br>
<br>
void amdgpu_bo_destroy(struct pb_buffer *_buf)<br>
{<br>
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);<br>
struct amdgpu_winsys *ws = bo->ws;<br>
<br>
assert(bo->bo && "must not be called for slab entries");<br>
<br>
+ if (!bo->is_user_ptr && bo->cpu_ptr) {<br>
+ bo->cpu_ptr = NULL;<br>
+ amdgpu_bo_unmap(&bo->base);<br>
+ }<br>
+ assert(bo->is_user_ptr || bo->u.real.map_count == 0);<br>
+<br>
if (ws->debug_all_bos) {<br>
simple_mtx_lock(&ws->global_bo_list_lock);<br>
LIST_DEL(&bo->u.real.global_list_item);<br>
ws->num_buffers--;<br>
simple_mtx_unlock(&ws->global_bo_list_lock);<br>
}<br>
<br>
simple_mtx_lock(&ws->bo_export_table_lock);<br>
util_hash_table_remove(ws->bo_export_table, bo->bo);<br>
simple_mtx_unlock(&ws->bo_export_table_lock);<br>
@@ -188,54 +195,66 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)<br>
amdgpu_va_range_free(bo->u.real.va_handle);<br>
amdgpu_bo_free(bo->bo);<br>
<br>
amdgpu_bo_remove_fences(bo);<br>
<br>
if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size);<br>
else if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);<br>
<br>
- if (bo->u.real.map_count >= 1) {<br>
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
- ws->mapped_vram -= bo->base.size;<br>
- else if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
- ws->mapped_gtt -= bo->base.size;<br>
- ws->num_mapped_buffers--;<br>
- }<br>
-<br>
simple_mtx_destroy(&bo->lock);<br>
FREE(bo);<br>
}<br>
<br>
static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)<br>
{<br>
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);<br>
<br>
assert(bo->bo); /* slab buffers have a separate vtbl */<br>
<br>
if (bo->u.real.use_reusable_pool)<br>
pb_cache_add_buffer(&bo->u.real.cache_entry);<br>
else<br>
amdgpu_bo_destroy(_buf);<br>
}<br>
<br>
+static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)<br>
+{<br>
+ assert(!bo->sparse && bo->bo && !bo->is_user_ptr);<br>
+ int r = amdgpu_bo_cpu_map(bo->bo, cpu);<br>
+ if (r) {<br>
+ /* Clear the cache and try again. */<br>
+ pb_cache_release_all_buffers(&bo->ws->bo_cache);<br>
+ r = amdgpu_bo_cpu_map(bo->bo, cpu);<br>
+ if (r)<br>
+ return false;<br>
+ }<br>
+<br>
+ if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {<br>
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
+ bo->ws->mapped_vram += bo->base.size;<br>
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
+ bo->ws->mapped_gtt += bo->base.size;<br>
+ bo->ws->num_mapped_buffers++;<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
static void *amdgpu_bo_map(struct pb_buffer *buf,<br>
struct radeon_cmdbuf *rcs,<br>
enum pipe_transfer_usage usage)<br>
{<br>
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;<br>
struct amdgpu_winsys_bo *real;<br>
struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;<br>
- int r;<br>
- void *cpu = NULL;<br>
- uint64_t offset = 0;<br>
<br>
assert(!bo->sparse);<br>
<br>
/* If it's not unsynchronized bo_map, flush CS if needed and then wait. */<br>
if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {<br>
/* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */<br>
if (usage & PIPE_TRANSFER_DONTBLOCK) {<br>
if (!(usage & PIPE_TRANSFER_WRITE)) {<br>
/* Mapping for read.<br>
*<br>
@@ -306,63 +325,75 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,<br>
}<br>
<br>
amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,<br>
RADEON_USAGE_READWRITE);<br>
}<br>
<br>
bo->ws->buffer_wait_time += os_time_get_nano() - time;<br>
}<br>
}<br>
<br>
- /* If the buffer is created from user memory, return the user pointer. */<br>
- if (bo->user_ptr)<br>
- return bo->user_ptr;<br>
+ /* Buffer synchronization has been checked, now actually map the buffer. */<br>
+ void *cpu = NULL;<br>
+ uint64_t offset = 0;<br>
<br>
if (bo->bo) {<br>
real = bo;<br>
} else {<br>
real = bo->u.slab.real;<br>
offset = bo->va - real->va;<br>
}<br>
<br>
- r = amdgpu_bo_cpu_map(real->bo, &cpu);<br>
- if (r) {<br>
- /* Clear the cache and try again. */<br>
- pb_cache_release_all_buffers(&real->ws->bo_cache);<br>
- r = amdgpu_bo_cpu_map(real->bo, &cpu);<br>
- if (r)<br>
- return NULL;<br>
+ if (usage & RADEON_TRANSFER_TEMPORARY) {<br>
+ if (real->is_user_ptr) {<br>
+ cpu = real->cpu_ptr;<br>
+ } else {<br>
+ if (!amdgpu_bo_do_map(real, &cpu))<br>
+ return NULL;<br>
+ }<br>
+ } else {<br>
+ cpu = p_atomic_read(&real->cpu_ptr);<br>
+ if (!cpu) {<br>
+ simple_mtx_lock(&real->lock);<br>
+ /* Must re-check due to the possibility of a race. Re-check need not<br>
+ * be atomic thanks to the lock. */<br>
+ cpu = real->cpu_ptr;<br>
+ if (!cpu) {<br>
+ if (!amdgpu_bo_do_map(real, &cpu)) {<br>
+ simple_mtx_unlock(&real->lock);<br>
+ return NULL;<br>
+ }<br>
+ p_atomic_set(&real->cpu_ptr, cpu);<br>
+ }<br>
+ simple_mtx_unlock(&real->lock);<br>
+ }<br>
}<br>
<br>
- if (p_atomic_inc_return(&real->u.real.map_count) == 1) {<br>
- if (real->initial_domain & RADEON_DOMAIN_VRAM)<br>
- real->ws->mapped_vram += real->base.size;<br>
- else if (real->initial_domain & RADEON_DOMAIN_GTT)<br>
- real->ws->mapped_gtt += real->base.size;<br>
- real->ws->num_mapped_buffers++;<br>
- }<br>
return (uint8_t*)cpu + offset;<br>
}<br>
<br>
static void amdgpu_bo_unmap(struct pb_buffer *buf)<br>
{<br>
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;<br>
struct amdgpu_winsys_bo *real;<br>
<br>
assert(!bo->sparse);<br>
<br>
- if (bo->user_ptr)<br>
+ if (bo->is_user_ptr)<br>
return;<br>
<br>
real = bo->bo ? bo : bo->u.slab.real;<br>
-<br>
+ assert(real->u.real.map_count != 0 && "too many unmaps");<br>
if (p_atomic_dec_zero(&real->u.real.map_count)) {<br>
+ assert(!real->cpu_ptr &&<br>
+ "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag");<br>
+<br>
if (real->initial_domain & RADEON_DOMAIN_VRAM)<br>
real->ws->mapped_vram -= real->base.size;<br>
else if (real->initial_domain & RADEON_DOMAIN_GTT)<br>
real->ws->mapped_gtt -= real->base.size;<br>
real->ws->num_mapped_buffers--;<br>
}<br>
<br>
amdgpu_bo_cpu_unmap(real->bo);<br>
}<br>
<br>
@@ -1446,28 +1477,29 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,<br>
<br>
if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,<br>
aligned_size, 1 << 12, 0, &va, &va_handle,<br>
AMDGPU_VA_RANGE_HIGH))<br>
goto error_va_alloc;<br>
<br>
if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))<br>
goto error_va_map;<br>
<br>
/* Initialize it. */<br>
+ bo->is_user_ptr = true;<br>
pipe_reference_init(&bo->base.reference, 1);<br>
simple_mtx_init(&bo->lock, mtx_plain);<br>
bo->bo = buf_handle;<br>
bo->base.alignment = 0;<br>
bo->base.size = size;<br>
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;<br>
bo->ws = ws;<br>
- bo->user_ptr = pointer;<br>
+ bo->cpu_ptr = pointer;<br>
bo->va = va;<br>
bo->u.real.va_handle = va_handle;<br>
bo->initial_domain = RADEON_DOMAIN_GTT;<br>
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);<br>
<br>
ws->allocated_gtt += aligned_size;<br>
<br>
amdgpu_add_buffer_to_global_list(bo);<br>
<br>
amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);<br>
@@ -1480,21 +1512,21 @@ error_va_map:<br>
error_va_alloc:<br>
amdgpu_bo_free(buf_handle);<br>
<br>
error:<br>
FREE(bo);<br>
return NULL;<br>
}<br>
<br>
static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)<br>
{<br>
- return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;<br>
+ return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;<br>
}<br>
<br>
static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)<br>
{<br>
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;<br>
<br>
return !bo->bo && !bo->sparse;<br>
}<br>
<br>
static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)<br>
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h<br>
index 58e6eed733d..88f4241327d 100644<br>
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h<br>
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h<br>
@@ -81,24 +81,25 @@ struct amdgpu_winsys_bo {<br>
uint32_t num_backing_pages;<br>
<br>
struct list_head backing;<br>
<br>
/* Commitment information for each page of the virtual memory area. */<br>
struct amdgpu_sparse_commitment *commitments;<br>
} sparse;<br>
} u;<br>
<br>
struct amdgpu_winsys *ws;<br>
- void *user_ptr; /* from buffer_from_ptr */<br>
+ void *cpu_ptr; /* for user_ptr and permanent maps */<br>
<br>
amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */<br>
bool sparse;<br>
+ bool is_user_ptr;<br>
bool is_local;<br>
uint32_t unique_id;<br>
uint64_t va;<br>
enum radeon_bo_domain initial_domain;<br>
<br>
/* how many command streams is this bo referenced in? */<br>
int num_cs_references;<br>
<br>
/* how many command streams, which are being emitted in a separate<br>
* thread, is this bo referenced in? */<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>