<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Sun, May 29, 2016 at 3:38 PM, Jordan Justen <span dir="ltr"><<a href="mailto:jordan.l.justen@intel.com" target="_blank">jordan.l.justen@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">We need information about push constants in a few places for the GL<br>
driver, and another couple places for the vulkan driver.<br>
<br>
When we add support for uploading both a common (cross-thread) set of<br>
push constants, combined with the previous per-thread push constant<br>
data, things are going to get even more complicated. To simplify<br>
things, we add push constant info into the cs prog_data struct.<br>
<br>
The cross-thread constant support is added as of Haswell. To support<br>
it we need to make sure all push constants with uniform values are<br>
added to earlier registers. The register that varies per thread and<br>
holds the thread invocation's unique local ID needs to be added last.<br>
<br>
Signed-off-by: Jordan Justen <<a href="mailto:jordan.l.justen@intel.com">jordan.l.justen@intel.com</a>><br>
---<br>
src/mesa/drivers/dri/i965/brw_compiler.h | 12 +++++++<br>
src/mesa/drivers/dri/i965/brw_fs.cpp | 58 ++++++++++++++++++++++++++++++++<br>
2 files changed, 70 insertions(+)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h<br>
index f1f9e56..dda6297 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_compiler.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h<br>
@@ -424,6 +424,12 @@ struct brw_wm_prog_data {<br>
int urb_setup[VARYING_SLOT_MAX];<br>
};<br>
<br>
+struct brw_push_const_block {<br>
+ unsigned dwords; /* Dword count, not reg aligned */<br>
+ unsigned regs;<br>
+ unsigned size; /* Bytes, register aligned */<br>
+};<br>
+<br>
struct brw_cs_prog_data {<br>
struct brw_stage_prog_data base;<br>
<br>
@@ -437,6 +443,12 @@ struct brw_cs_prog_data {<br>
int thread_local_id_index;<br>
<br>
struct {<br>
+ struct brw_push_const_block cross_thread;<br>
+ struct brw_push_const_block per_thread;<br>
+ struct brw_push_const_block total;<br>
+ } push;<br>
+<br>
+ struct {<br>
/** @{<br>
* surface indices the CS-specific surfaces<br>
*/<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index 836ade0..bd37fbd 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -6479,6 +6479,61 @@ fs_visitor::emit_cs_work_group_id_setup()<br>
}<br>
<br>
static void<br>
+fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)<br>
+{<br>
+ block->dwords = dwords;<br>
+ block->regs = DIV_ROUND_UP(dwords, 8);<br>
+ block->size = block->regs * 32;<br>
+}<br>
+<br>
+static void<br>
+cs_fill_push_const_info(const struct brw_device_info *devinfo,<br>
+ struct brw_cs_prog_data *cs_prog_data)<br>
+{<br>
+ const struct brw_stage_prog_data *prog_data =<br>
+ (struct brw_stage_prog_data*) cs_prog_data;<br>
+ bool fill_thread_id =<br>
+ cs_prog_data->thread_local_id_index >= 0 &&<br>
+ cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;<br>
+ bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;<br>
+<br>
+ /* The thread ID should be stored in the last param dword */<br>
+ assert(prog_data->nr_params > 0 || !fill_thread_id);<br>
+ assert(!fill_thread_id ||<br>
+ cs_prog_data->thread_local_id_index ==<br>
+ (int)prog_data->nr_params - 1);<br>
+<br>
+ unsigned cross_thread_dwords, per_thread_dwords;<br>
+ if (cross_thread_supported && fill_thread_id) {<br>
+ /* Fill all but the last register with cross-thread payload */<br>
+ cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);<br>
+ per_thread_dwords = prog_data->nr_params - cross_thread_dwords;<br>
+ assert(per_thread_dwords > 0 && per_thread_dwords <= 8);<br></blockquote><div><br></div><div>If I understand you correctly here, you're putting the bottom registers (aligned down to 8) into the cross-thread space and putting whatever is left including the thread_local_id into the per-thread space. Seems reasonable. I probably would have been more lazy and burned a whole register on the local_id. :-)<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ } else if (cross_thread_supported && !fill_thread_id) {<br>
+ /* Fill all data using cross-thread payload */<br>
+ cross_thread_dwords = prog_data->nr_params;<br>
+ per_thread_dwords = 0u;<br>
+ } else {<br>
+ cross_thread_dwords = 0u;<br>
+ per_thread_dwords = prog_data->nr_params;<br></blockquote><div><br></div><div>Mind putting the !cross_thread_supported case first?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ }<br>
+<br>
+ fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);<br>
+ fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);<br>
+<br>
+ unsigned total_dwords =<br>
+ (cs_prog_data->push.per_thread.size * cs_prog_data->threads +<br>
+ cs_prog_data->push.cross_thread.size) / 4;<br>
+ fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);<br>
+<br>
+ assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||<br>
+ cs_prog_data->push.per_thread.size == 0);<br>
+ assert(cs_prog_data->push.cross_thread.dwords +<br>
+ cs_prog_data->push.per_thread.dwords ==<br>
+ prog_data->nr_params);<br>
+}<br>
+<br>
+static void<br>
cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)<br>
{<br>
cs_prog_data->simd_size = size;<br>
@@ -6536,6 +6591,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,<br>
} else {<br>
cfg = v8.cfg;<br>
cs_set_simd_size(prog_data, 8);<br>
+ cs_fill_push_const_info(compiler->devinfo, prog_data);<br>
prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;<br>
}<br>
}<br>
@@ -6561,6 +6617,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,<br>
} else {<br>
cfg = v16.cfg;<br>
cs_set_simd_size(prog_data, 16);<br>
+ cs_fill_push_const_info(compiler->devinfo, prog_data);<br>
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;<br>
}<br>
}<br>
@@ -6588,6 +6645,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,<br>
} else {<br>
cfg = v32.cfg;<br>
cs_set_simd_size(prog_data, 32);<br>
+ cs_fill_push_const_info(compiler->devinfo, prog_data);<br>
}<br>
}<br>
<span class="HOEnZb"><font color="#888888"><br>
--<br>
2.8.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>