[Mesa-dev] [PATCH 02/70] i965: Pack read-only booleans into a bitfield
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 7 13:13:06 PDT 2015
GCC's read access for single bits in a bitfield is reasonable (just a
move + flag comparison), so let's save some cachelines by packing the
write-once/read-many booleans together.
text data bss dec hex filename
6490134 191992 26192 6708318 665c5e lib64/i965_dri.so
6491766 191992 26192 6709950 6662be lib64/i965_dri.so
Small inflation due to the extra immediate masks and entirely dubious as
to whether it is worth it.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_context.h | 76 +++++++++++++++--------------
src/mesa/drivers/dri/i965/brw_device_info.h | 32 ++++++------
src/mesa/drivers/dri/i965/intel_screen.h | 18 +++----
3 files changed, 65 insertions(+), 61 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index cd43ac5..62e39be 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1088,16 +1088,6 @@ struct brw_context
GLuint stats_wm;
- /**
- * drirc options:
- * @{
- */
- bool no_rast;
- bool always_flush_batch;
- bool always_flush_cache;
- bool disable_throttling;
- bool precompile;
-
driOptionCache optionCache;
/** @} */
@@ -1105,36 +1095,33 @@ struct brw_context
GLenum reduced_primitive;
- /**
- * Set if we're either a debug context or the INTEL_DEBUG=perf environment
- * variable is set, this is the flag indicating to do expensive work that
- * might lead to a perf_debug() call.
- */
- bool perf_debug;
-
uint32_t max_gtt_map_object_size;
int gen;
int gt;
- bool is_g4x;
- bool is_baytrail;
- bool is_haswell;
- bool is_cherryview;
- bool is_broxton;
-
- bool has_hiz;
- bool has_separate_stencil;
- bool must_use_separate_stencil;
- bool has_llc;
- bool has_swizzling;
- bool has_surface_tile_offset;
- bool has_compr4;
- bool has_negative_rhw_bug;
- bool has_pln;
- bool no_simd8;
- bool use_rep_send;
- bool use_resource_streamer;
+ /* So long as we do not frequently write to these booleans, we can
+ * pack them into a bitfield with fair efficient (GCC converting the
+ * access into a mov + flag test).
+ */
+ bool is_g4x : 1;
+ bool is_baytrail : 1;
+ bool is_haswell : 1;
+ bool is_cherryview : 1;
+ bool is_broxton : 1;
+
+ bool has_hiz : 1;
+ bool has_separate_stencil : 1;
+ bool must_use_separate_stencil : 1;
+ bool has_llc : 1;
+ bool has_swizzling : 1;
+ bool has_surface_tile_offset : 1;
+ bool has_compr4 : 1;
+ bool has_negative_rhw_bug : 1;
+ bool has_pln : 1;
+ bool no_simd8 : 1;
+ bool use_rep_send : 1;
+ bool use_resource_streamer : 1;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
@@ -1143,7 +1130,24 @@ struct brw_context
* non-centroid interpolation for unlit pixels, at the expense of two extra
* fragment shader instructions.
*/
- bool needs_unlit_centroid_workaround;
+ bool needs_unlit_centroid_workaround : 1;
+
+ /**
+ * Set if we're either a debug context or the INTEL_DEBUG=perf environment
+ * variable is set, this is the flag indicating to do expensive work that
+ * might lead to a perf_debug() call.
+ */
+ bool perf_debug : 1;
+
+ /**
+ * drirc options:
+ * @{
+ */
+ bool no_rast : 1;
+ bool always_flush_batch : 1;
+ bool always_flush_cache : 1;
+ bool disable_throttling : 1;
+ bool precompile : 1;
GLuint NewGLState;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 2a73e93..b5502b8 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -30,29 +30,29 @@ struct brw_device_info
int gen; /**< Generation number: 4, 5, 6, 7, ... */
int gt;
- bool is_g4x;
- bool is_ivybridge;
- bool is_baytrail;
- bool is_haswell;
- bool is_cherryview;
- bool is_broxton;
+ bool is_g4x : 1;
+ bool is_ivybridge : 1;
+ bool is_baytrail : 1;
+ bool is_haswell : 1;
+ bool is_cherryview : 1;
+ bool is_broxton : 1;
- bool has_hiz_and_separate_stencil;
- bool must_use_separate_stencil;
+ bool has_hiz_and_separate_stencil : 1;
+ bool must_use_separate_stencil : 1;
- bool has_llc;
+ bool has_llc : 1;
- bool has_pln;
- bool has_compr4;
- bool has_surface_tile_offset;
- bool supports_simd16_3src;
- bool has_resource_streamer;
+ bool has_pln : 1;
+ bool has_compr4 : 1;
+ bool has_surface_tile_offset : 1;
+ bool supports_simd16_3src : 1;
+ bool has_resource_streamer : 1;
/**
* Quirks:
* @{
*/
- bool has_negative_rhw_bug;
+ bool has_negative_rhw_bug : 1;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
@@ -61,7 +61,7 @@ struct brw_device_info
* non-centroid interpolation for unlit pixels, at the expense of two extra
* fragment shader instructions.
*/
- bool needs_unlit_centroid_workaround;
+ bool needs_unlit_centroid_workaround : 1;
/** @} */
/**
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index fd5143e..0bae95e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -46,14 +46,14 @@ struct intel_screen
__DRIscreen *driScrnPriv;
- bool no_hw;
-
- bool hw_must_use_separate_stencil;
-
- bool hw_has_swizzling;
-
- int hw_has_timestamp;
-
+ /* So long as we do not frequently write to these booleans, we can
+ * pack them into a bitfield with fair efficient (GCC converting the
+ * access into a mov + flag test).
+ */
+ bool no_hw : 1;
+ bool hw_must_use_separate_stencil : 1;
+ bool hw_has_swizzling : 1;
+ unsigned hw_has_timestamp : 2;
/**
* Does the kernel support resource streamer?
*/
@@ -62,7 +62,7 @@ struct intel_screen
/**
* Does the kernel support context reset notifications?
*/
- bool has_context_reset_notification;
+ bool has_context_reset_notification : 1;
dri_bufmgr *bufmgr;
--
2.5.0
More information about the mesa-dev
mailing list