[Mesa-dev] [PATCH 21/22] i965/gen7: Generalize gen7_vs_state in preparation for GS.
Eric Anholt
eric at anholt.net
Thu Aug 29 16:06:29 PDT 2013
Kenneth Graunke <kenneth at whitecape.org> writes:
> On 08/26/2013 03:12 PM, Paul Berry wrote:
>> ---
>> src/mesa/drivers/dri/i965/brw_state.h | 41 ++++++++++
>> src/mesa/drivers/dri/i965/gen7_vs_state.c | 123 ++++++++++++++++++++----------
>> 2 files changed, 122 insertions(+), 42 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>> index b54338a..efef994 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -128,6 +128,38 @@ extern const struct brw_tracked_state gen7_wm_state;
>> extern const struct brw_tracked_state haswell_cut_index;
>>
>>
>> +/**
>> + * Parameters that differ between Gen7 VS and GS state upload commands.
>> + */
>> +struct gen7_vec4_upload_params
>> +{
>> + /**
>> + * Command used to set the binding table pointers for this stage.
>> + */
>> + unsigned binding_table_pointers_cmd;
>> +
>> + /**
>> + * Command used to set the sampler state pointers for this stage.
>> + */
>> + unsigned sampler_state_pointers_cmd;
>> +
>> + /**
>> + * Command used to send constants for this stage.
>> + */
>> + unsigned constant_cmd;
>> +
>> + /**
>> + * Command used to send state for this stage.
>> + */
>> + unsigned state_cmd;
>> +
>> + /**
>> + * Size of the state command for this stage.
>> + */
>> + unsigned state_cmd_size;
>> +};
>> +
>> +
>> /* brw_misc_state.c */
>> void brw_upload_invariant_state(struct brw_context *brw);
>> uint32_t
>> @@ -240,6 +272,15 @@ brw_vec4_upload_binding_table(struct brw_context *brw,
>> struct brw_vec4_context_base *vec4_ctx,
>> const struct brw_vec4_prog_data *prog_data);
>>
>> +/* gen7_vs_state.c */
>> +void
>> +gen7_upload_vec4_state(struct brw_context *brw,
>> + const struct gen7_vec4_upload_params *upload_params,
>> + const struct brw_vec4_context_base *vec4_ctx,
>> + bool active, bool alt_floating_point_mode,
>> + const struct brw_vec4_prog_data *prog_data,
>> + const unsigned *stage_specific_cmd_data);
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> index 30fe802..fd81112 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> @@ -29,33 +29,31 @@
>> #include "program/prog_statevars.h"
>> #include "intel_batchbuffer.h"
>>
>> -static void
>> -upload_vs_state(struct brw_context *brw)
>> -{
>> - struct gl_context *ctx = &brw->ctx;
>> - const struct brw_vec4_context_base *vec4_ctx = &brw->vs.base;
>> - uint32_t floating_point_mode = 0;
>> - const int max_threads_shift = brw->is_haswell ?
>> - HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
>>
>> - gen7_emit_vs_workaround_flush(brw);
>> -
>> - /* BRW_NEW_VS_BINDING_TABLE */
>> +void
>> +gen7_upload_vec4_state(struct brw_context *brw,
>> + const struct gen7_vec4_upload_params *upload_params,
>> + const struct brw_vec4_context_base *vec4_ctx,
>> + bool active, bool alt_floating_point_mode,
>> + const struct brw_vec4_prog_data *prog_data,
>> + const unsigned *stage_specific_cmd_data)
>> +{
>> + /* BRW_NEW_*_BINDING_TABLE */
>> BEGIN_BATCH(2);
>> - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
>> + OUT_BATCH(upload_params->binding_table_pointers_cmd << 16 | (2 - 2));
>> OUT_BATCH(vec4_ctx->bind_bo_offset);
>> ADVANCE_BATCH();
>>
>> /* CACHE_NEW_SAMPLER */
>> BEGIN_BATCH(2);
>> - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
>> + OUT_BATCH(upload_params->sampler_state_pointers_cmd << 16 | (2 - 2));
>> OUT_BATCH(vec4_ctx->sampler_offset);
>> ADVANCE_BATCH();
>>
>> - if (vec4_ctx->push_const_size == 0) {
>> + if (!active || vec4_ctx->push_const_size == 0) {
>> /* Disable the push constant buffers. */
>> BEGIN_BATCH(7);
>> - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
>> + OUT_BATCH(upload_params->constant_cmd << 16 | (7 - 2));
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> @@ -65,10 +63,10 @@ upload_vs_state(struct brw_context *brw)
>> ADVANCE_BATCH();
>> } else {
>> BEGIN_BATCH(7);
>> - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
>> + OUT_BATCH(upload_params->constant_cmd << 16 | (7 - 2));
>> OUT_BATCH(vec4_ctx->push_const_size);
>> OUT_BATCH(0);
>> - /* Pointer to the VS constant buffer. Covered by the set of
>> + /* Pointer to the stage's constant buffer. Covered by the set of
>> * state flags from gen6_prepare_wm_contants
>> */
>> OUT_BATCH(vec4_ctx->push_const_offset | GEN7_MOCS_L3);
>> @@ -78,36 +76,77 @@ upload_vs_state(struct brw_context *brw)
>> ADVANCE_BATCH();
>> }
>>
>> + BEGIN_BATCH(upload_params->state_cmd_size);
>> + OUT_BATCH(upload_params->state_cmd << 16 |
>> + (upload_params->state_cmd_size - 2));
>> + if (active) {
>> + OUT_BATCH(vec4_ctx->prog_offset);
>> + OUT_BATCH((alt_floating_point_mode ? GEN6_FLOATING_POINT_MODE_ALT
>> + : GEN6_FLOATING_POINT_MODE_IEEE_754) |
>> + ((ALIGN(vec4_ctx->sampler_count, 4)/4) <<
>> + GEN6_SAMPLER_COUNT_SHIFT));
>> +
>> + if (prog_data->total_scratch) {
>> + OUT_RELOC(vec4_ctx->scratch_bo,
>> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> + ffs(prog_data->total_scratch) - 11);
>> + } else {
>> + OUT_BATCH(0);
>> + }
>> + } else {
>> + OUT_BATCH(0); /* prog_bo */
>> + OUT_BATCH((0 << GEN6_SAMPLER_COUNT_SHIFT) |
>> + (0 << GEN6_BINDING_TABLE_ENTRY_COUNT_SHIFT));
>> + OUT_BATCH(0); /* scratch space base offset */
>> + }
>> + for (int i = 0; i < upload_params->state_cmd_size - 4; ++i)
>> + OUT_BATCH(stage_specific_cmd_data[i]);
>> + ADVANCE_BATCH();
>> +}
>> +
>> +
>> +static const struct gen7_vec4_upload_params vs_upload_params = {
>> + .binding_table_pointers_cmd = _3DSTATE_BINDING_TABLE_POINTERS_VS,
>> + .sampler_state_pointers_cmd = _3DSTATE_SAMPLER_STATE_POINTERS_VS,
>> + .constant_cmd = _3DSTATE_CONSTANT_VS,
>> + .state_cmd = _3DSTATE_VS,
>> + .state_cmd_size = 6,
>> +};
>> +
>> +
>> +static void
>> +upload_vs_state(struct brw_context *brw)
>> +{
>> + struct gl_context *ctx = &brw->ctx;
>> + const struct brw_vec4_context_base *vec4_ctx = &brw->vs.base;
>> + const int max_threads_shift = brw->is_haswell ?
>> + HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
>> + /* CACHE_NEW_VS_PROG */
>> + const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
>> +
>> + gen7_emit_vs_workaround_flush(brw);
>> +
>> /* Use ALT floating point mode for ARB vertex programs, because they
>> * require 0^0 == 1.
>> */
>> - if (ctx->Shader.CurrentVertexProgram == NULL)
>> - floating_point_mode = GEN6_FLOATING_POINT_MODE_ALT;
>> -
>> - BEGIN_BATCH(6);
>> - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
>> - OUT_BATCH(vec4_ctx->prog_offset);
>> - OUT_BATCH(floating_point_mode |
>> - ((ALIGN(vec4_ctx->sampler_count, 4)/4) <<
>> - GEN6_SAMPLER_COUNT_SHIFT));
>> -
>> - if (brw->vs.prog_data->base.total_scratch) {
>> - OUT_RELOC(vec4_ctx->scratch_bo,
>> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> - ffs(brw->vs.prog_data->base.total_scratch) - 11);
>> - } else {
>> - OUT_BATCH(0);
>> - }
>> + bool alt_floating_point_mode = (ctx->Shader.CurrentVertexProgram == NULL);
>>
>> - OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg <<
>> - GEN6_VS_DISPATCH_START_GRF_SHIFT) |
>> - (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
>> - (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
>> + unsigned stage_specific_cmd_data[2];
>> + stage_specific_cmd_data[0] =
>> + (prog_data->dispatch_grf_start_reg <<
>> + GEN6_VS_DISPATCH_START_GRF_SHIFT) |
>> + (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
>> + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT);
>> + stage_specific_cmd_data[1] =
>> + ((brw->max_vs_threads - 1) << max_threads_shift) |
>> + GEN6_VS_STATISTICS_ENABLE |
>> + GEN6_VS_ENABLE;
>>
>> - OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
>> - GEN6_VS_STATISTICS_ENABLE |
>> - GEN6_VS_ENABLE);
>> - ADVANCE_BATCH();
>> + /* BRW_NEW_VS_BINDING_TABLE */
>> + /* CACHE_NEW_SAMPLER */
>> + gen7_upload_vec4_state(brw, &vs_upload_params, vec4_ctx, true /* active */,
>> + alt_floating_point_mode, prog_data,
>> + stage_specific_cmd_data);
>> }
>>
>> const struct brw_tracked_state gen7_vs_state = {
>>
>
> I can't quite put my finger on why, but I really dislike this code
> reuse. It just seems really awkward to me.
The struct argument is what makes me cringe. If upload_vs_state() was:
static void
upload_vs_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
uint32_t floating_point_mode = 0;
const int max_threads_shift = brw->is_haswell ?
HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
/* the VS has a special requirement for synchronization due to a bug
* in its constant handling.
*/
gen7_emit_vs_workaround_flush(brw);
emit_binding_table_pointers(_3DSTATE_BINDING_TABLE_POINTERS_VS, brw->vs.bind_bo_offset);
emit_sampler(_3DSTATE_SAMPLER_STATE_POINTERS_VS, brw->vs.sampler_offset);
emit_constants(3DSTATE_CONSTANT_VS, &brw->vs.stage_state);
/* inline emit of 3DSTATE_VS just like we have today, since there's a
* whole bunch of stage-specific stuff as is evident from the
* alt_floating_point_mode, state_cmd, state_cmd_size,
* stage_specific_cmd_data arguments.
*/
}
that would share the definitely-common code without the contortions to
make 3DSTATE_VS be "shared". Plus you get to fairly trivially reuse
those 3 shared packets from blorp, which this patch didn't do today.
(only "fairly" trivially because you'd want to have individual
enable/disable packets for the constants to really trivially reuse from
blorp).
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 835 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130829/4c38de92/attachment-0001.pgp>
More information about the mesa-dev
mailing list