[Mesa-dev] [PATCH 03/14] gallium/radeon: add RADEON_USAGE_SYNCHRONIZED
Nicolai Hähnle
nhaehnle at gmail.com
Tue Sep 13 09:56:14 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
This is really the behavior we want most of the time, but having a
SYNCHRONIZED flag instead of an UNSYNCHRONIZED one has the advantage that
OR'ing different flags together always results in stronger guarantees.
The parent BOs of sub-allocated buffers will be added unsynchronized.
---
src/gallium/drivers/r300/r300_emit.c | 19 +++++++++++--------
src/gallium/drivers/radeon/r600_cs.h | 6 ++++--
src/gallium/drivers/radeon/radeon_uvd.c | 3 ++-
src/gallium/drivers/radeon/radeon_vce.c | 3 ++-
src/gallium/drivers/radeon/radeon_winsys.h | 7 ++++++-
5 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 95971de..671aa62 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1314,91 +1314,94 @@ boolean r300_emit_buffer_validate(struct r300_context *r300,
validate:
if (r300->fb_state.dirty) {
/* Color buffers... */
for (i = 0; i < fb->nr_cbufs; i++) {
if (!fb->cbufs[i])
continue;
tex = r300_resource(fb->cbufs[i]->texture);
assert(tex && tex->buf && "cbuf is marked, but NULL!");
r300->rws->cs_add_buffer(r300->cs, tex->buf,
- RADEON_USAGE_READWRITE,
+ RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,
r300_surface(fb->cbufs[i])->domain,
tex->b.b.nr_samples > 1 ?
RADEON_PRIO_COLOR_BUFFER_MSAA :
RADEON_PRIO_COLOR_BUFFER);
}
/* ...depth buffer... */
if (fb->zsbuf) {
tex = r300_resource(fb->zsbuf->texture);
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
r300->rws->cs_add_buffer(r300->cs, tex->buf,
- RADEON_USAGE_READWRITE,
+ RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,
r300_surface(fb->zsbuf)->domain,
tex->b.b.nr_samples > 1 ?
RADEON_PRIO_DEPTH_BUFFER_MSAA :
RADEON_PRIO_DEPTH_BUFFER);
}
}
/* The AA resolve buffer. */
if (r300->aa_state.dirty) {
if (aa->dest) {
r300->rws->cs_add_buffer(r300->cs, aa->dest->buf,
- RADEON_USAGE_WRITE,
+ RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,
aa->dest->domain,
RADEON_PRIO_COLOR_BUFFER);
}
}
if (r300->textures_state.dirty) {
/* ...textures... */
for (i = 0; i < texstate->count; i++) {
if (!(texstate->tx_enable & (1 << i))) {
continue;
}
tex = r300_resource(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_buffer(r300->cs, tex->buf, RADEON_USAGE_READ,
+ r300->rws->cs_add_buffer(r300->cs, tex->buf,
+ RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf,
- RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
+ RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,
+ RADEON_DOMAIN_GTT,
RADEON_PRIO_QUERY);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo)
r300->rws->cs_add_buffer(r300->cs, r300->vbo,
- RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
+ RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
+ RADEON_DOMAIN_GTT,
RADEON_PRIO_VERTEX_BUFFER);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
struct pipe_vertex_buffer *last = r300->vertex_buffer +
r300->nr_vertex_buffers;
struct pipe_resource *buf;
for (; vbuf != last; vbuf++) {
buf = vbuf->buffer;
if (!buf)
continue;
r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf,
- RADEON_USAGE_READ,
+ RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
r300_resource(buf)->domain,
RADEON_PRIO_SAMPLER_BUFFER);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf,
- RADEON_USAGE_READ,
+ RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
r300_resource(index_buffer)->domain,
RADEON_PRIO_INDEX_BUFFER);
/* Now do the validation (flush is called inside cs_validate on failure). */
if (!r300->rws->cs_validate(r300->cs)) {
/* Ooops, an infinite loop, give up. */
if (flushed)
return FALSE;
flushed = TRUE;
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index 6c15df8..28bdf15 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -66,22 +66,24 @@ radeon_cs_memory_below_limit(struct r600_common_screen *screen,
* The buffer list becomes empty after every context flush and must be
* rebuilt.
*/
static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
struct r600_ring *ring,
struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
{
assert(usage);
- return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage,
- rbo->domains, priority) * 4;
+ return rctx->ws->cs_add_buffer(
+ ring->cs, rbo->buf,
+ (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
+ rbo->domains, priority) * 4;
}
/**
* Same as above, but also checks memory usage and flushes the context
* accordingly.
*
* When this SHOULD NOT be used:
*
* - if r600_context_add_resource_size has been called for the buffer
* followed by *_need_cs_space for checking the memory usage
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index d5d654a..3ae0eaa 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -106,21 +106,22 @@ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
radeon_emit(dec->cs, val);
}
/* send a command to the VCPU through the GPCOM registers */
static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
struct pb_buffer* buf, uint32_t off,
enum radeon_bo_usage usage, enum radeon_bo_domain domain)
{
int reloc_idx;
- reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage, domain,
+ reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain,
RADEON_PRIO_UVD);
if (!dec->use_legacy) {
uint64_t addr;
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
} else {
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 92cb8ce..8b5d277 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -530,21 +530,22 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
/**
* Add the buffer as relocation to the current command submission
*/
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset)
{
int reloc_idx;
- reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
+ reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
addr = addr + offset;
RVCE_CS(addr >> 32);
RVCE_CS(addr);
} else {
RVCE_CS(reloc_idx * 4);
RVCE_CS(offset);
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 91f6e89..809a203 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -50,21 +50,26 @@ enum radeon_bo_domain { /* bitfield */
enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
};
enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_READ = 2,
RADEON_USAGE_WRITE = 4,
- RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
+ RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
+
+ /* The winsys ensures that the CS submission will be scheduled after
+ * previously flushed CSs referencing this BO in a conflicting way.
+ */
+ RADEON_USAGE_SYNCHRONIZED = 8
};
enum ring_type {
RING_GFX = 0,
RING_COMPUTE,
RING_DMA,
RING_UVD,
RING_VCE,
RING_LAST,
};
--
2.7.4
More information about the mesa-dev
mailing list