[Mesa-dev] [PATCH 12/24] cso: don't track the number of sampler states bound
Nicolai Hähnle
nhaehnle at gmail.com
Wed Jun 14 07:23:14 UTC 2017
On 12.06.2017 20:18, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> This removes 2 loops from hot codepaths and adds 1 loop to a rare codepath
> (restore_sampler_states), and makes sanitize_hash() slightly worse.
>
> Sampler states, when bound, are not unbound for draw calls that don't need
> them. That's OK, because bound sampler states don't add any overhead.
Is this really always true? They might show up in texture decompression
checks.
Cheers,
Nicolai
>
> This results in lower CPU overhead in most cases.
> ---
> src/gallium/auxiliary/cso_cache/cso_context.c | 59 +++++++++++----------------
> 1 file changed, 23 insertions(+), 36 deletions(-)
>
> diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
> index 5558385..4947b8e 100644
> --- a/src/gallium/auxiliary/cso_cache/cso_context.c
> +++ b/src/gallium/auxiliary/cso_cache/cso_context.c
> @@ -50,21 +50,20 @@
> #include "cso_context.h"
>
>
> /**
> * Per-shader sampler information.
> */
> struct sampler_info
> {
> struct cso_sampler *cso_samplers[PIPE_MAX_SAMPLERS];
> void *samplers[PIPE_MAX_SAMPLERS];
> - unsigned nr_samplers;
> };
>
>
>
> struct cso_context {
> struct pipe_context *pipe;
> struct cso_cache *cache;
> struct u_vbuf *vbuf;
>
> boolean has_geometry_shader;
> @@ -76,20 +75,25 @@ struct cso_context {
>
> struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
> unsigned nr_fragment_views;
>
> struct pipe_sampler_view *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
> unsigned nr_fragment_views_saved;
>
> struct sampler_info fragment_samplers_saved;
> struct sampler_info samplers[PIPE_SHADER_TYPES];
>
> + /* Temporary number until cso_single_sampler_done is called.
> + * It tracks the highest sampler seen in cso_single_sampler.
> + */
> + int max_sampler_seen;
> +
> struct pipe_vertex_buffer aux_vertex_buffer_current;
> struct pipe_vertex_buffer aux_vertex_buffer_saved;
> unsigned aux_vertex_buffer_index;
>
> struct pipe_constant_buffer aux_constbuf_current[PIPE_SHADER_TYPES];
> struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES];
>
> struct pipe_image_view fragment_image0_current;
> struct pipe_image_view fragment_image0_saved;
>
> @@ -233,21 +237,21 @@ sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
> if (type == CSO_SAMPLER) {
> int i, j;
>
> samplers_to_restore = MALLOC(PIPE_SHADER_TYPES * PIPE_MAX_SAMPLERS *
> sizeof(*samplers_to_restore));
>
> /* Temporarily remove currently bound sampler states from the hash
> * table, to prevent them from being deleted
> */
> for (i = 0; i < PIPE_SHADER_TYPES; i++) {
> - for (j = 0; j < ctx->samplers[i].nr_samplers; j++) {
> + for (j = 0; j < PIPE_MAX_SAMPLERS; j++) {
> struct cso_sampler *sampler = ctx->samplers[i].cso_samplers[j];
>
> if (sampler && cso_hash_take(hash, sampler->hash_key))
> samplers_to_restore[to_restore++] = sampler;
> }
> }
> }
>
> iter = cso_hash_first_node(hash);
> while (to_remove) {
> @@ -327,20 +331,21 @@ cso_create_context(struct pipe_context *pipe, unsigned u_vbuf_flags)
> PIPE_SHADER_CAP_SUPPORTED_IRS);
> if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
> ctx->has_compute_shader = TRUE;
> }
> }
> if (pipe->screen->get_param(pipe->screen,
> PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
> ctx->has_streamout = TRUE;
> }
>
> + ctx->max_sampler_seen = -1;
> return ctx;
>
> out:
> cso_destroy_context( ctx );
> return NULL;
> }
>
> /**
> * Free the CSO context.
> */
> @@ -1223,116 +1228,98 @@ cso_single_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage,
> FREE(cso);
> return PIPE_ERROR_OUT_OF_MEMORY;
> }
> }
> else {
> cso = cso_hash_iter_data(iter);
> }
>
> ctx->samplers[shader_stage].cso_samplers[idx] = cso;
> ctx->samplers[shader_stage].samplers[idx] = cso->data;
> - } else {
> - ctx->samplers[shader_stage].cso_samplers[idx] = NULL;
> - ctx->samplers[shader_stage].samplers[idx] = NULL;
> + ctx->max_sampler_seen = MAX2(ctx->max_sampler_seen, (int)idx);
> }
>
> return PIPE_OK;
> }
>
>
> /**
> * Send staged sampler state to the driver.
> */
> void
> cso_single_sampler_done(struct cso_context *ctx,
> enum pipe_shader_type shader_stage)
> {
> struct sampler_info *info = &ctx->samplers[shader_stage];
> - const unsigned old_nr_samplers = info->nr_samplers;
> - unsigned i;
>
> - /* find highest non-null sampler */
> - for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
> - if (info->samplers[i - 1] != NULL)
> - break;
> - }
> + if (ctx->max_sampler_seen == -1)
> + return;
>
> - info->nr_samplers = i;
> ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0,
> - MAX2(old_nr_samplers, info->nr_samplers),
> + ctx->max_sampler_seen + 1,
> info->samplers);
> + ctx->max_sampler_seen = -1;
> }
>
>
> /*
> * If the function encouters any errors it will return the
> * last one. Done to always try to set as many samplers
> * as possible.
> */
> enum pipe_error
> cso_set_samplers(struct cso_context *ctx,
> enum pipe_shader_type shader_stage,
> unsigned nr,
> const struct pipe_sampler_state **templates)
> {
> - struct sampler_info *info = &ctx->samplers[shader_stage];
> unsigned i;
> enum pipe_error temp, error = PIPE_OK;
>
> for (i = 0; i < nr; i++) {
> temp = cso_single_sampler(ctx, shader_stage, i, templates[i]);
> if (temp != PIPE_OK)
> error = temp;
> }
>
> - for ( ; i < info->nr_samplers; i++) {
> - temp = cso_single_sampler(ctx, shader_stage, i, NULL);
> - if (temp != PIPE_OK)
> - error = temp;
> - }
> -
> cso_single_sampler_done(ctx, shader_stage);
>
> return error;
> }
>
> static void
> cso_save_fragment_samplers(struct cso_context *ctx)
> {
> struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
> struct sampler_info *saved = &ctx->fragment_samplers_saved;
>
> - saved->nr_samplers = info->nr_samplers;
> - memcpy(saved->cso_samplers, info->cso_samplers, info->nr_samplers *
> - sizeof(*info->cso_samplers));
> - memcpy(saved->samplers, info->samplers, info->nr_samplers *
> - sizeof(*info->samplers));
> + memcpy(saved->cso_samplers, info->cso_samplers,
> + sizeof(info->cso_samplers));
> + memcpy(saved->samplers, info->samplers, sizeof(info->samplers));
> }
>
>
> static void
> cso_restore_fragment_samplers(struct cso_context *ctx)
> {
> struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
> struct sampler_info *saved = &ctx->fragment_samplers_saved;
> - int delta = (int)info->nr_samplers - saved->nr_samplers;
>
> memcpy(info->cso_samplers, saved->cso_samplers,
> - saved->nr_samplers * sizeof(*info->cso_samplers));
> - memcpy(info->samplers, saved->samplers,
> - saved->nr_samplers * sizeof(*info->samplers));
> -
> - if (delta > 0) {
> - memset(&info->cso_samplers[saved->nr_samplers], 0,
> - delta * sizeof(*info->cso_samplers));
> - memset(&info->samplers[saved->nr_samplers], 0,
> - delta * sizeof(*info->samplers));
> + sizeof(info->cso_samplers));
> + memcpy(info->samplers, saved->samplers, sizeof(info->samplers));
> +
> + for (int i = PIPE_MAX_SAMPLERS - 1; i >= 0; i--) {
> + if (info->samplers[i]) {
> + ctx->max_sampler_seen = i;
> + break;
> + }
> }
>
> cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
> }
>
>
> void
> cso_set_sampler_views(struct cso_context *ctx,
> enum pipe_shader_type shader_stage,
> unsigned count,
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list