[Mesa-dev] [PATCH 10/11] mesa/glthread: decrease the batch size for better perf scaling

Thu Jun 22 01:03:05 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

This is the key to better performance.
---
 src/mesa/main/glthread.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
index 36692fe..dd65931 100644
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -19,30 +19,38 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #ifndef _GLTHREAD_H
 #define _GLTHREAD_H
 
 #include "main/mtypes.h"
 
-/* Command size is a number of bytes stored in a short. */
-#define MARSHAL_MAX_CMD_SIZE 65535
+/* The size of one batch and the maximum size of one call.
+ *
+ * This should be as low as possible, so that:
+ * - multiple synchronizations within a frame don't slow us down much
+ * - a smaller number of calls per frame can still get decent parallelism
+ * - the memory footprint of the queue is low, and with that comes a lower
+ *   chance of experiencing CPU cache thrashing
+ * but it should be high enough so that u_queue overhead remains negligible.
+ */
+#define MARSHAL_MAX_CMD_SIZE (8 * 1024)
 
 /* The number of batch slots in memory.
  *
  * One batch is being executed, one batch is being filled, the rest are
  * waiting batches. There must be at least 1 slot for a waiting batch,
  * so the minimum number of batches is 3.
  */
-#define MARSHAL_MAX_BATCHES 4
+#define MARSHAL_MAX_BATCHES 8
 
 #include <inttypes.h>
 #include <stdbool.h>
 #include <pthread.h>
 #include "util/u_queue.h"
 
 enum marshal_dispatch_cmd_id;
 
 /** A single batch of commands queued up for execution. */
 struct glthread_batch
-- 
2.7.4