[Mesa-dev] [RFC PATCH v2 6/6] nv50: Add shader disk caching
Mark Menzynski
mmenzyns at redhat.com
Thu Mar 19 20:57:48 UTC 2020
Adds shader disk caching for nv50 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nv50_screen structure.
It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.
Signed-off-by: Mark Menzynski <mmenzyns at redhat.com>
---
.../drivers/nouveau/nv50/nv50_program.c | 276 +++---------------
.../drivers/nouveau/nv50/nv50_program.h | 2 +
.../drivers/nouveau/nv50/nv50_shader_state.c | 4 +-
src/gallium/drivers/nouveau/nv50/nv50_state.c | 1 +
4 files changed, 47 insertions(+), 236 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index b5e36cf488d..156ac286a7f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -37,108 +37,6 @@ bitcount4(const uint32_t val)
return cnt[val & 0xf];
}
-static int
-nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info)
-{
- struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
- unsigned i, n, c;
-
- n = 0;
- for (i = 0; i < info->numInputs; ++i) {
- prog->in[i].id = i;
- prog->in[i].sn = info->in[i].sn;
- prog->in[i].si = info->in[i].si;
- prog->in[i].hw = n;
- prog->in[i].mask = info->in[i].mask;
-
- prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
-
- for (c = 0; c < 4; ++c)
- if (info->in[i].mask & (1 << c))
- info->in[i].slot[c] = n++;
-
- if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
- }
- prog->in_nr = info->numInputs;
-
- for (i = 0; i < info->numSysVals; ++i) {
- switch (info->sv[i].sn) {
- case TGSI_SEMANTIC_INSTANCEID:
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
- continue;
- case TGSI_SEMANTIC_VERTEXID:
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
- continue;
- default:
- break;
- }
- }
-
- /*
- * Corner case: VP has no inputs, but we will still need to submit data to
- * draw it. HW will shout at us and won't draw anything if we don't enable
- * any input, so let's just pretend it's the first one.
- */
- if (prog->vp.attrs[0] == 0 &&
- prog->vp.attrs[1] == 0 &&
- prog->vp.attrs[2] == 0)
- prog->vp.attrs[0] |= 0xf;
-
- /* VertexID before InstanceID */
- if (info->io.vertexId < info->numSysVals)
- info->sv[info->io.vertexId].slot[0] = n++;
- if (info->io.instanceId < info->numSysVals)
- info->sv[info->io.instanceId].slot[0] = n++;
-
- n = 0;
- for (i = 0; i < info->numOutputs; ++i) {
- switch (info->out[i].sn) {
- case TGSI_SEMANTIC_PSIZE:
- prog->vp.psiz = i;
- break;
- case TGSI_SEMANTIC_CLIPDIST:
- prog->vp.clpd[info->out[i].si] = n;
- break;
- case TGSI_SEMANTIC_EDGEFLAG:
- prog->vp.edgeflag = i;
- break;
- case TGSI_SEMANTIC_BCOLOR:
- prog->vp.bfc[info->out[i].si] = i;
- break;
- case TGSI_SEMANTIC_LAYER:
- prog->gp.has_layer = true;
- prog->gp.layerid = n;
- break;
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- prog->gp.has_viewport = true;
- prog->gp.viewportid = n;
- break;
- default:
- break;
- }
- prog->out[i].id = i;
- prog->out[i].sn = info->out[i].sn;
- prog->out[i].si = info->out[i].si;
- prog->out[i].hw = n;
- prog->out[i].mask = info->out[i].mask;
-
- for (c = 0; c < 4; ++c)
- if (info->out[i].mask & (1 << c))
- info->out[i].slot[c] = n++;
- }
- prog->out_nr = info->numOutputs;
- prog->max_out = n;
- if (!prog->max_out)
- prog->max_out = 1;
-
- if (prog->vp.psiz < info->numOutputs)
- prog->vp.psiz = prog->out[prog->vp.psiz].hw;
-
- return 0;
-}
-
static int
nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
{
@@ -263,115 +161,6 @@ nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info)
return 0;
}
-static int
-nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info)
-{
- struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
- unsigned i, n, m, c;
- unsigned nvary;
- unsigned nflat;
- unsigned nintp = 0;
-
- /* count recorded non-flat inputs */
- for (m = 0, i = 0; i < info->numInputs; ++i) {
- switch (info->in[i].sn) {
- case TGSI_SEMANTIC_POSITION:
- continue;
- default:
- m += info->in[i].flat ? 0 : 1;
- break;
- }
- }
- /* careful: id may be != i in info->in[prog->in[i].id] */
-
- /* Fill prog->in[] so that non-flat inputs are first and
- * kick out special inputs that don't use the RESULT_MAP.
- */
- for (n = 0, i = 0; i < info->numInputs; ++i) {
- if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
- prog->fp.interp |= info->in[i].mask << 24;
- for (c = 0; c < 4; ++c)
- if (info->in[i].mask & (1 << c))
- info->in[i].slot[c] = nintp++;
- } else {
- unsigned j = info->in[i].flat ? m++ : n++;
-
- if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
- prog->vp.bfc[info->in[i].si] = j;
- else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
-
- prog->in[j].id = i;
- prog->in[j].mask = info->in[i].mask;
- prog->in[j].sn = info->in[i].sn;
- prog->in[j].si = info->in[i].si;
- prog->in[j].linear = info->in[i].linear;
-
- prog->in_nr++;
- }
- }
- if (!(prog->fp.interp & (8 << 24))) {
- ++nintp;
- prog->fp.interp |= 8 << 24;
- }
-
- for (i = 0; i < prog->in_nr; ++i) {
- int j = prog->in[i].id;
-
- prog->in[i].hw = nintp;
- for (c = 0; c < 4; ++c)
- if (prog->in[i].mask & (1 << c))
- info->in[j].slot[c] = nintp++;
- }
- /* (n == m) if m never increased, i.e. no flat inputs */
- nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
- nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
- nvary = nintp - nflat;
-
- prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
- prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
-
- /* put front/back colors right after HPOS */
- prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
- for (i = 0; i < 2; ++i)
- if (prog->vp.bfc[i] < 0xff)
- prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
-
- /* FP outputs */
-
- if (info->prop.fp.numColourResults > 1)
- prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
-
- for (i = 0; i < info->numOutputs; ++i) {
- prog->out[i].id = i;
- prog->out[i].sn = info->out[i].sn;
- prog->out[i].si = info->out[i].si;
- prog->out[i].mask = info->out[i].mask;
-
- if (i == info->io.fragDepth || i == info->io.sampleMask)
- continue;
- prog->out[i].hw = info->out[i].si * 4;
-
- for (c = 0; c < 4; ++c)
- info->out[i].slot[c] = prog->out[i].hw + c;
-
- prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
- }
-
- if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
- info->out[info->io.sampleMask].slot[0] = prog->max_out++;
- prog->fp.has_samplemask = 1;
- }
-
- if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
- info->out[info->io.fragDepth].slot[2] = prog->max_out++;
-
- if (!prog->max_out)
- prog->max_out = 4;
-
- return 0;
-}
-
static int
nv50_fragprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
{
@@ -558,23 +347,6 @@ nv50_fragprog_assign_slots_prog(struct nv50_ir_prog_info_out *info)
return 0;
}
-static int
-nv50_program_assign_varying_slots(struct nv50_ir_prog_info_out *info)
-{
- switch (info->type) {
- case PIPE_SHADER_VERTEX:
- return nv50_vertprog_assign_slots(info);
- case PIPE_SHADER_GEOMETRY:
- return nv50_vertprog_assign_slots(info);
- case PIPE_SHADER_FRAGMENT:
- return nv50_fragprog_assign_slots(info);
- case PIPE_SHADER_COMPUTE:
- return 0;
- default:
- return -1;
- }
-}
-
static int
nv50_program_assign_varying_slots_info(struct nv50_ir_prog_info_out *info)
{
@@ -667,11 +439,21 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info_out *info,
bool
nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
+ struct disk_cache *disk_shader_cache,
struct pipe_debug_callback *debug)
{
+ struct blob blob;
struct nv50_ir_prog_info *info;
struct nv50_ir_prog_info_out info_out = {};
- int i, ret;
+
+ void *cached_data = NULL;
+ size_t cached_size;
+ bool shader_found = false;
+
+ int i;
+ int ret = 0;
+ cache_key key;
+
const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
info = CALLOC_STRUCT(nv50_ir_prog_info);
@@ -705,8 +487,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
- info->assignSlots = nv50_program_assign_varying_slots;
-
prog->vp.bfc[0] = 0xff;
prog->vp.bfc[1] = 0xff;
prog->vp.edgeflag = 0xff;
@@ -727,16 +507,42 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->optLevel = 3;
#endif
+ info->assignSlots = nv50_program_assign_varying_slots_info;
info->bin.smemSize = prog->cp.smem_size;
info->io.genUserClip = prog->vp.clpd_nr;
+ blob_init(&blob);
+
+ if (disk_shader_cache) {
+ nv50_ir_prog_info_serialize(&blob, info);
+ disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
+ cached_data = disk_cache_get(disk_shader_cache, key, &cached_size);
+
+ if (cached_data && cached_size >= blob.size) { // blob.size is the size of serialized "info"
+ if (memcmp(cached_data, blob.data, blob.size) == 0) {
+ shader_found = true;
+ /* Blob contains only "info". In disk cache, "info_out" comes right after it */
+ size_t offset = blob.size;
+ nv50_ir_prog_info_out_deserialize(cached_data, cached_size, offset, &info_out);
+ }
+ }
+ free(cached_data);
+ }
info_out.driverPriv = prog;
- ret = nv50_ir_generate_code(info, &info_out);
- if (ret) {
- NOUVEAU_ERR("shader translation failed: %i\n", ret);
- goto out;
+ if (!shader_found) {
+ ret = nv50_ir_generate_code(info, &info_out);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+ }
+ if (disk_shader_cache) {
+ nv50_ir_prog_info_out_serialize(&blob, &info_out);
+ disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
+ }
}
+ blob_finish(&blob);
+ nv50_program_assign_varying_slots_prog(&info_out);
prog->code = info_out.bin.code;
prog->code_size = info_out.bin.codeSize;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 1a89e0d5067..528e1d01fa1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -116,7 +116,9 @@ struct nv50_program {
struct nv50_stream_output_state *so;
};
+struct disk_cache;
bool nv50_program_translate(struct nv50_program *, uint16_t chipset,
+ struct disk_cache *,
struct pipe_debug_callback *);
bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 2cbbdc0cc35..65891108464 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -116,7 +116,9 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
{
if (!prog->translated) {
prog->translated = nv50_program_translate(
- prog, nv50->screen->base.device->chipset, &nv50->base.debug);
+ prog, nv50->screen->base.device->chipset,
+ nv50->screen->base.disk_shader_cache,
+ &nv50->base.debug);
if (!prog->translated)
return false;
} else
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 6488c71f4ad..7250ee35895 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -779,6 +779,7 @@ nv50_sp_state_create(struct pipe_context *pipe,
prog->translated = nv50_program_translate(
prog, nv50_context(pipe)->screen->base.device->chipset,
+ nv50_context(pipe)->screen->base.disk_shader_cache,
&nouveau_context(pipe)->debug);
return (void *)prog;
--
2.24.1
More information about the mesa-dev
mailing list