[Mesa-dev] [PATCH v03 22/38] i965: Port gen6+ 3DSTATE_SF to genxml.
Rafael Antognolli
rafael.antognolli at intel.com
Tue May 2 01:43:10 UTC 2017
Emit sf state on Gen6+ using brw_batch_emit helper, using pack structs
from genxml.
v3:
- Reorganize code and reduce #if/#endif's (Ken)
- Style fixes (Ken)
- Always set AALINEDISTANCE_TRUE (Ken)
Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>
---
src/mesa/drivers/dri/i965/brw_state.h | 3 +-
src/mesa/drivers/dri/i965/gen6_sf_state.c | 189 +---------
src/mesa/drivers/dri/i965/gen7_sf_state.c | 156 +-------
src/mesa/drivers/dri/i965/gen8_sf_state.c | 73 +---
src/mesa/drivers/dri/i965/genX_state_upload.c | 420 ++++++++++++++++++-
5 files changed, 417 insertions(+), 424 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 3a10a8a..594757c 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -119,7 +119,6 @@ extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;
extern const struct brw_tracked_state gen6_sol_surface;
-extern const struct brw_tracked_state gen6_sf_state;
extern const struct brw_tracked_state gen6_sf_vp;
extern const struct brw_tracked_state gen6_urb;
extern const struct brw_tracked_state gen6_viewport_state;
@@ -137,7 +136,6 @@ extern const struct brw_tracked_state gen7_ps_state;
extern const struct brw_tracked_state gen7_push_constant_space;
extern const struct brw_tracked_state gen7_sbe_state;
extern const struct brw_tracked_state gen7_sf_clip_viewport;
-extern const struct brw_tracked_state gen7_sf_state;
extern const struct brw_tracked_state gen7_sol_state;
extern const struct brw_tracked_state gen7_te_state;
extern const struct brw_tracked_state gen7_tes_push_constants;
@@ -157,7 +155,6 @@ extern const struct brw_tracked_state gen8_ps_extra;
extern const struct brw_tracked_state gen8_ps_state;
extern const struct brw_tracked_state gen8_wm_state;
extern const struct brw_tracked_state gen8_sbe_state;
-extern const struct brw_tracked_state gen8_sf_state;
extern const struct brw_tracked_state gen8_sf_clip_viewport;
extern const struct brw_tracked_state gen8_vertices;
extern const struct brw_tracked_state gen8_vf_topology;
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 0f118b6..45b5769 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -263,192 +263,3 @@ calculate_attr_overrides(const struct brw_context *brw,
*/
*urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
}
-
-
-static void
-upload_sf_state(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
- uint32_t num_outputs = wm_prog_data->num_varying_inputs;
- uint32_t dw1, dw2, dw3, dw4;
- uint32_t point_sprite_enables;
- int i;
- /* _NEW_BUFFER */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
- const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-
- float point_size;
- uint16_t attr_overrides[16];
- uint32_t point_sprite_origin;
-
- dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
- dw2 = GEN6_SF_STATISTICS_ENABLE;
- dw3 = GEN6_SF_SCISSOR_ENABLE | GEN6_SF_LINE_AA_MODE_TRUE;
- dw4 = 0;
-
- if (brw->sf.viewport_transform_enable)
- dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
-
- /* _NEW_POLYGON */
- if (ctx->Polygon._FrontBit == render_to_fbo)
- dw2 |= GEN6_SF_WINDING_CCW;
-
- if (ctx->Polygon.OffsetFill)
- dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
-
- if (ctx->Polygon.OffsetLine)
- dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
-
- if (ctx->Polygon.OffsetPoint)
- dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
-
- switch (ctx->Polygon.FrontMode) {
- case GL_FILL:
- dw2 |= GEN6_SF_FRONT_SOLID;
- break;
-
- case GL_LINE:
- dw2 |= GEN6_SF_FRONT_WIREFRAME;
- break;
-
- case GL_POINT:
- dw2 |= GEN6_SF_FRONT_POINT;
- break;
-
- default:
- unreachable("not reached");
- }
-
- switch (ctx->Polygon.BackMode) {
- case GL_FILL:
- dw2 |= GEN6_SF_BACK_SOLID;
- break;
-
- case GL_LINE:
- dw2 |= GEN6_SF_BACK_WIREFRAME;
- break;
-
- case GL_POINT:
- dw2 |= GEN6_SF_BACK_POINT;
- break;
-
- default:
- unreachable("not reached");
- }
-
- /* _NEW_POLYGON */
- if (ctx->Polygon.CullFlag) {
- switch (ctx->Polygon.CullFaceMode) {
- case GL_FRONT:
- dw3 |= GEN6_SF_CULL_FRONT;
- break;
- case GL_BACK:
- dw3 |= GEN6_SF_CULL_BACK;
- break;
- case GL_FRONT_AND_BACK:
- dw3 |= GEN6_SF_CULL_BOTH;
- break;
- default:
- unreachable("not reached");
- }
- } else {
- dw3 |= GEN6_SF_CULL_NONE;
- }
-
- /* _NEW_LINE */
- {
- uint32_t line_width_u3_7 = brw_get_line_width(brw);
- dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
- }
- if (ctx->Line.SmoothFlag) {
- dw3 |= GEN6_SF_LINE_AA_ENABLE;
- dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
- }
- /* _NEW_MULTISAMPLE */
- if (multisampled_fbo && ctx->Multisample.Enabled)
- dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
-
- /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
- if (use_state_point_size(brw))
- dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
-
- /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
- point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-
- /* Clamp to the hardware limits and convert to fixed point */
- dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
-
- /*
- * Window coordinates in an FBO are inverted, which means point
- * sprite origin must be inverted, too.
- */
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
- point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
- } else {
- point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
- }
- dw1 |= point_sprite_origin;
-
- /* _NEW_LIGHT */
- if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
- dw4 |=
- (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
- (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
- (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
- } else {
- dw4 |=
- (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
- }
-
- /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
- * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
- */
- uint32_t urb_entry_read_length;
- uint32_t urb_entry_read_offset;
- calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &urb_entry_read_length, &urb_entry_read_offset);
- dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
-
- BEGIN_BATCH(20);
- OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
- OUT_BATCH(dw1);
- OUT_BATCH(dw2);
- OUT_BATCH(dw3);
- OUT_BATCH(dw4);
- OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */
- OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
- OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
- for (i = 0; i < 8; i++) {
- OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
- }
- OUT_BATCH(point_sprite_enables); /* dw16 */
- OUT_BATCH(wm_prog_data->flat_inputs);
- OUT_BATCH(0); /* wrapshortest enables 0-7 */
- OUT_BATCH(0); /* wrapshortest enables 8-15 */
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen6_sf_state = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_LIGHT |
- _NEW_LINE |
- _NEW_MULTISAMPLE |
- _NEW_POINT |
- _NEW_POLYGON |
- _NEW_PROGRAM,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_PRIMITIVE |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_VUE_MAP_GEOM_OUT,
- },
- .emit = upload_sf_state,
-};
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index d577a36..7ab8a99 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -107,159 +107,3 @@ const struct brw_tracked_state gen7_sbe_state = {
},
.emit = upload_sbe_state,
};
-
-static void
-upload_sf_state(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- uint32_t dw1, dw2, dw3;
- float point_size;
- /* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
- const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-
- dw1 = GEN6_SF_STATISTICS_ENABLE;
-
- if (brw->sf.viewport_transform_enable)
- dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
-
- /* _NEW_BUFFERS */
- dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
-
- /* _NEW_POLYGON */
- if (ctx->Polygon._FrontBit == render_to_fbo)
- dw1 |= GEN6_SF_WINDING_CCW;
-
- if (ctx->Polygon.OffsetFill)
- dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
-
- if (ctx->Polygon.OffsetLine)
- dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
-
- if (ctx->Polygon.OffsetPoint)
- dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
-
- switch (ctx->Polygon.FrontMode) {
- case GL_FILL:
- dw1 |= GEN6_SF_FRONT_SOLID;
- break;
-
- case GL_LINE:
- dw1 |= GEN6_SF_FRONT_WIREFRAME;
- break;
-
- case GL_POINT:
- dw1 |= GEN6_SF_FRONT_POINT;
- break;
-
- default:
- unreachable("not reached");
- }
-
- switch (ctx->Polygon.BackMode) {
- case GL_FILL:
- dw1 |= GEN6_SF_BACK_SOLID;
- break;
-
- case GL_LINE:
- dw1 |= GEN6_SF_BACK_WIREFRAME;
- break;
-
- case GL_POINT:
- dw1 |= GEN6_SF_BACK_POINT;
- break;
-
- default:
- unreachable("not reached");
- }
-
- dw2 = GEN6_SF_SCISSOR_ENABLE;
-
- if (ctx->Polygon.CullFlag) {
- switch (ctx->Polygon.CullFaceMode) {
- case GL_FRONT:
- dw2 |= GEN6_SF_CULL_FRONT;
- break;
- case GL_BACK:
- dw2 |= GEN6_SF_CULL_BACK;
- break;
- case GL_FRONT_AND_BACK:
- dw2 |= GEN6_SF_CULL_BOTH;
- break;
- default:
- unreachable("not reached");
- }
- } else {
- dw2 |= GEN6_SF_CULL_NONE;
- }
-
- /* _NEW_LINE */
- {
- uint32_t line_width_u3_7 = brw_get_line_width(brw);
- dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
- }
- if (ctx->Line.SmoothFlag) {
- dw2 |= GEN6_SF_LINE_AA_ENABLE;
- dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
- }
- if (ctx->Line.StippleFlag && brw->is_haswell) {
- dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
- }
- /* _NEW_MULTISAMPLE */
- if (multisampled_fbo && ctx->Multisample.Enabled)
- dw2 |= GEN6_SF_MSRAST_ON_PATTERN;
-
- /* FINISHME: Last Pixel Enable? Vertex Sub Pixel Precision Select?
- */
-
- dw3 = GEN6_SF_LINE_AA_MODE_TRUE;
-
- /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
- if (use_state_point_size(brw))
- dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
-
- /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
- point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-
- /* Clamp to the hardware limits and convert to fixed point */
- dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
-
- /* _NEW_LIGHT */
- if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
- dw3 |=
- (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
- (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
- (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
- } else {
- dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
- }
-
- BEGIN_BATCH(7);
- OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
- OUT_BATCH(dw1);
- OUT_BATCH(dw2);
- OUT_BATCH(dw3);
- OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */
- OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
- OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen7_sf_state = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_LIGHT |
- _NEW_LINE |
- _NEW_MULTISAMPLE |
- _NEW_POINT |
- _NEW_POLYGON |
- _NEW_PROGRAM,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_PRIMITIVE |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_VUE_MAP_GEOM_OUT,
- },
- .emit = upload_sf_state,
-};
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index d47adcd..3b1dd61 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -151,76 +151,3 @@ const struct brw_tracked_state gen8_sbe_state = {
},
.emit = upload_sbe,
};
-
-static void
-upload_sf(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- uint32_t dw1 = 0, dw2 = 0, dw3 = 0;
- float point_size;
-
- dw1 = GEN6_SF_STATISTICS_ENABLE;
-
- if (brw->sf.viewport_transform_enable)
- dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
-
- /* _NEW_LINE */
- uint32_t line_width_u3_7 = brw_get_line_width(brw);
- if (brw->gen >= 9 || brw->is_cherryview) {
- dw1 |= line_width_u3_7 << GEN9_SF_LINE_WIDTH_SHIFT;
- } else {
- dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
- }
-
- if (ctx->Line.SmoothFlag) {
- dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
- }
-
- /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
- point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-
- /* Clamp to the hardware limits and convert to fixed point */
- dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
-
- /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
- if (use_state_point_size(brw))
- dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
-
- /* _NEW_POINT | _NEW_MULTISAMPLE */
- if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
- !ctx->Point.PointSprite) {
- dw3 |= GEN8_SF_SMOOTH_POINT_ENABLE;
- }
-
- dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
-
- /* _NEW_LIGHT */
- if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
- dw3 |= (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
- (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
- (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
- } else {
- dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
- }
-
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2));
- OUT_BATCH(dw1);
- OUT_BATCH(dw2);
- OUT_BATCH(dw3);
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen8_sf_state = {
- .dirty = {
- .mesa = _NEW_LIGHT |
- _NEW_PROGRAM |
- _NEW_LINE |
- _NEW_MULTISAMPLE |
- _NEW_POINT,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_VUE_MAP_GEOM_OUT,
- },
- .emit = upload_sf,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 722f38f..05076a7 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -28,6 +28,7 @@
#include "brw_context.h"
#include "brw_state.h"
+#include "brw_util.h"
#include "intel_batchbuffer.h"
#include "intel_fbo.h"
@@ -113,6 +114,235 @@ __gen_combine_address(struct brw_context *brw, void *location,
_brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
_dst = NULL)
+#if GEN_GEN == 6
+/**
+ * Determine the appropriate attribute override value to store into the
+ * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
+ * override value contains two pieces of information: the location of the
+ * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
+ * flag indicating whether to "swizzle" the attribute based on the direction
+ * the triangle is facing.
+ *
+ * If an attribute is "swizzled", then the given VUE location is used for
+ * front-facing triangles, and the VUE location that immediately follows is
+ * used for back-facing triangles. We use this to implement the mapping from
+ * gl_FrontColor/gl_BackColor to gl_Color.
+ *
+ * urb_entry_read_offset is the offset into the VUE at which the SF unit is
+ * being instructed to begin reading attribute data. It can be set to a
+ * nonzero value to prevent the SF unit from wasting time reading elements of
+ * the VUE that are not needed by the fragment shader. It is measured in
+ * 256-bit increments.
+ */
+static void
+genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
+ const struct brw_vue_map *vue_map,
+ int urb_entry_read_offset, int fs_attr,
+ bool two_side_color, uint32_t *max_source_attr)
+{
+ /* Find the VUE slot for this attribute. */
+ int slot = vue_map->varying_to_slot[fs_attr];
+
+ /* Viewport and Layer are stored in the VUE header. We need to override
+ * them to zero if earlier stages didn't write them, as GL requires that
+ * they read back as zero when not explicitly set.
+ */
+ if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
+ attr->ComponentOverrideX = true;
+ attr->ComponentOverrideW = true;
+ attr->ConstantSource = CONST_0000;
+
+ if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
+ attr->ComponentOverrideY = true;
+ if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
+ attr->ComponentOverrideZ = true;
+
+ return;
+ }
+
+ /* If there was only a back color written but not front, use back
+ * as the color instead of undefined
+ */
+ if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
+ slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
+ if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
+ slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
+
+ if (slot == -1) {
+ /* This attribute does not exist in the VUE--that means that the vertex
+ * shader did not write to it. This means that either:
+ *
+ * (a) This attribute is a texture coordinate, and it is going to be
+ * replaced with point coordinates (as a consequence of a call to
+ * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
+ * hardware will ignore whatever attribute override we supply.
+ *
+ * (b) This attribute is read by the fragment shader but not written by
+ * the vertex shader, so its value is undefined. Therefore the
+ * attribute override we supply doesn't matter.
+ *
+ * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
+ * previous shader stage.
+ *
+ * Note that we don't have to worry about the cases where the attribute
+ * is gl_PointCoord or is undergoing point sprite coordinate
+ * replacement, because in those cases, this function isn't called.
+ *
+ * In case (c), we need to program the attribute overrides so that the
+ * primitive ID will be stored in this slot. In every other case, the
+ * attribute override we supply doesn't matter. So just go ahead and
+ * program primitive ID in every case.
+ */
+ attr->ComponentOverrideW = true;
+ attr->ComponentOverrideX = true;
+ attr->ComponentOverrideY = true;
+ attr->ComponentOverrideZ = true;
+ attr->ConstantSource = PRIM_ID;
+ return;
+ }
+
+ /* Compute the location of the attribute relative to urb_entry_read_offset.
+ * Each increment of urb_entry_read_offset represents a 256-bit value, so
+ * it counts for two 128-bit VUE slots.
+ */
+ int source_attr = slot - 2 * urb_entry_read_offset;
+ assert(source_attr >= 0 && source_attr < 32);
+
+ /* If we are doing two-sided color, and the VUE slot following this one
+ * represents a back-facing color, then we need to instruct the SF unit to
+ * do back-facing swizzling.
+ */
+ bool swizzling = two_side_color &&
+ ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
+ vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
+ (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
+ vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
+
+ /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
+ if (*max_source_attr < source_attr + swizzling)
+ *max_source_attr = source_attr + swizzling;
+
+ attr->SourceAttribute = source_attr;
+ if (swizzling)
+ attr->SwizzleSelect = INPUTATTR_FACING;
+}
+
+
+static void
+genX(calculate_attr_overrides)(const struct brw_context *brw,
+ struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
+ uint32_t *point_sprite_enables,
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset)
+{
+ const struct gl_context *ctx = &brw->ctx;
+
+ /* _NEW_POINT */
+ const struct gl_point_attrib *point = &ctx->Point;
+
+ /* BRW_NEW_FS_PROG_DATA */
+ const struct brw_wm_prog_data *wm_prog_data =
+ brw_wm_prog_data(brw->wm.base.prog_data);
+ uint32_t max_source_attr = 0;
+
+ *point_sprite_enables = 0;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM
+ *
+ * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
+ * the full vertex header. Otherwise, we can program the SF to start
+ * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
+ * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
+ * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
+ */
+
+ bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
+ (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+ *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+
+ /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
+ * description of dw10 Point Sprite Texture Coordinate Enable:
+ *
+ * "This field must be programmed to zero when non-point primitives
+ * are rendered."
+ *
+ * The SandyBridge PRM doesn't explicitly say that point sprite enables
+ * must be programmed to zero when rendering non-point primitives, but
+ * the IvyBridge PRM does, and if we don't, we get garbage.
+ *
+ * This is not required on Haswell, as the hardware ignores this state
+ * when drawing non-points -- although we do still need to be careful to
+ * correctly set the attr overrides.
+ *
+ * _NEW_POLYGON
+ * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
+ */
+ bool drawing_points = brw_is_drawing_points(brw);
+
+ for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+ int input_index = wm_prog_data->urb_setup[attr];
+
+ if (input_index < 0)
+ continue;
+
+ /* _NEW_POINT */
+ bool point_sprite = false;
+ if (drawing_points) {
+ if (point->PointSprite &&
+ (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
+ (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
+ point_sprite = true;
+ }
+
+ if (attr == VARYING_SLOT_PNTC)
+ point_sprite = true;
+
+ if (point_sprite)
+ *point_sprite_enables |= (1 << input_index);
+ }
+
+ /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
+ struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
+
+ if (!point_sprite) {
+ genX(get_attr_override)(&attribute,
+ &brw->vue_map_geom_out,
+ *urb_entry_read_offset, attr,
+ brw->ctx.VertexProgram._TwoSideEnabled,
+ &max_source_attr);
+ }
+
+ /* The hardware can only do the overrides on 16 overrides at a
+ * time, and the other up to 16 have to be lined up so that the
+ * input index = the output index. We'll need to do some
+ * tweaking to make sure that's the case.
+ */
+ if (input_index < 16)
+ attr_overrides[input_index] = attribute;
+ else
+ assert(attribute.SourceAttribute == input_index);
+ }
+
+ /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
+ * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+ *
+ * "This field should be set to the minimum length required to read the
+ * maximum source attribute. The maximum source attribute is indicated
+ * by the maximum value of the enabled Attribute # Source Attribute if
+ * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+ * enable is not set.
+ * read_length = ceiling((max_source_attr + 1) / 2)
+ *
+ * [errata] Corruption/Hang possible if length programmed larger than
+ * recommended"
+ *
+ * Similar text exists for Ivy Bridge.
+ */
+ *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
+}
+#endif
+
/* ---------------------------------------------------------------------- */
#if GEN_GEN >= 6
@@ -340,6 +570,190 @@ static const struct brw_tracked_state genX(clip_state) = {
.emit = genX(upload_clip_state),
};
+/* ---------------------------------------------------------------------- */
+
+static void
+genX(upload_sf)(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ float point_size;
+
+#if GEN_GEN <= 7
+ /* _NEW_BUFFERS */
+ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+#endif
+
+ brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
+ sf.StatisticsEnable = true;
+ sf.ViewportTransformEnable = brw->sf.viewport_transform_enable;
+
+#if GEN_GEN == 7
+ /* _NEW_BUFFERS */
+ sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
+#endif
+
+#if GEN_GEN <= 7
+ /* _NEW_POLYGON */
+ sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
+ sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
+ sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
+ sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
+
+ switch (ctx->Polygon.FrontMode) {
+ case GL_FILL:
+ sf.FrontFaceFillMode = FILL_MODE_SOLID;
+ break;
+ case GL_LINE:
+ sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
+ break;
+ case GL_POINT:
+ sf.FrontFaceFillMode = FILL_MODE_POINT;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ switch (ctx->Polygon.BackMode) {
+ case GL_FILL:
+ sf.BackFaceFillMode = FILL_MODE_SOLID;
+ break;
+ case GL_LINE:
+ sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
+ break;
+ case GL_POINT:
+ sf.BackFaceFillMode = FILL_MODE_POINT;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ sf.ScissorRectangleEnable = true;
+
+ if (ctx->Polygon.CullFlag) {
+ switch (ctx->Polygon.CullFaceMode) {
+ case GL_FRONT:
+ sf.CullMode = CULLMODE_FRONT;
+ break;
+ case GL_BACK:
+ sf.CullMode = CULLMODE_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ sf.CullMode = CULLMODE_BOTH;
+ break;
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ sf.CullMode = CULLMODE_NONE;
+ }
+
+#if GEN_IS_HASWELL
+ sf.LineStippleEnable = ctx->Line.StippleFlag;
+#endif
+
+ if (multisampled_fbo && ctx->Multisample.Enabled)
+ sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
+
+ sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+ sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+ sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+#endif
+
+ /* _NEW_LINE */
+ sf.LineWidth = brw_get_line_width_float(brw);
+
+ if (ctx->Line.SmoothFlag) {
+ sf.LineEndCapAntialiasingRegionWidth = _10pixels;
+#if GEN_GEN <= 7
+ sf.AntiAliasingEnable = true;
+#endif
+ }
+
+ /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
+ point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+ /* Clamp to the hardware limits */
+ sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
+
+ /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
+ if (use_state_point_size(brw))
+ sf.PointWidthSource = State;
+
+#if GEN_GEN >= 8
+ /* _NEW_POINT | _NEW_MULTISAMPLE */
+ if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
+ !ctx->Point.PointSprite)
+ sf.SmoothPointEnable = true;
+#endif
+
+ sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
+
+ /* _NEW_LIGHT */
+ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+ sf.TriangleStripListProvokingVertexSelect = 2;
+ sf.TriangleFanProvokingVertexSelect = 2;
+ sf.LineStripListProvokingVertexSelect = 1;
+ } else {
+ sf.TriangleFanProvokingVertexSelect = 1;
+ }
+
+#if GEN_GEN == 6
+ /* BRW_NEW_FS_PROG_DATA */
+ const struct brw_wm_prog_data *wm_prog_data =
+ brw_wm_prog_data(brw->wm.base.prog_data);
+
+ sf.AttributeSwizzleEnable = true;
+ sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
+
+ /*
+ * Window coordinates in an FBO are inverted, which means point
+ * sprite origin must be inverted, too.
+ */
+ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
+ sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
+ } else {
+ sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
+ }
+
+ /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
+ * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
+ */
+ uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
+ uint32_t point_sprite_enables;
+ genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
+ &urb_entry_read_length,
+ &urb_entry_read_offset);
+ sf.VertexURBEntryReadLength = urb_entry_read_length;
+ sf.VertexURBEntryReadOffset = urb_entry_read_offset;
+ sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
+ sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
+#endif
+ }
+}
+
+static const struct brw_tracked_state genX(sf_state) = {
+ .dirty = {
+ .mesa = _NEW_LIGHT |
+ _NEW_LINE |
+ _NEW_MULTISAMPLE |
+ _NEW_POINT |
+ _NEW_PROGRAM |
+ (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
+ .brw = BRW_NEW_BLORP |
+ BRW_NEW_CONTEXT |
+ BRW_NEW_VUE_MAP_GEOM_OUT |
+ (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
+ BRW_NEW_PRIMITIVE |
+ BRW_NEW_TES_PROG_DATA
+ : 0) |
+ (GEN_GEN == 6 ? BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_FRAGMENT_PROGRAM
+ : 0),
+ },
+ .emit = genX(upload_sf),
+};
+
#endif
/* ---------------------------------------------------------------------- */
@@ -572,7 +986,7 @@ genX(init_atoms)(struct brw_context *brw)
&gen6_vs_state,
&gen6_gs_state,
&genX(clip_state),
- &gen6_sf_state,
+ &genX(sf_state),
&gen6_wm_state,
&gen6_scissor_state,
@@ -661,7 +1075,7 @@ genX(init_atoms)(struct brw_context *brw)
&gen7_sol_state,
&genX(clip_state),
&gen7_sbe_state,
- &gen7_sf_state,
+ &genX(sf_state),
&gen7_wm_state,
&gen7_ps_state,
@@ -749,7 +1163,7 @@ genX(init_atoms)(struct brw_context *brw)
&genX(clip_state),
&genX(raster_state),
&gen8_sbe_state,
- &gen8_sf_state,
+ &genX(sf_state),
&gen8_ps_blend,
&gen8_ps_extra,
&gen8_ps_state,
--
git-series 0.9.1
More information about the mesa-dev
mailing list