[Intel-gfx] [PATCH 5/5] tools/null_state_gen: build cmd and state space separately

Mika Kuoppala mika.kuoppala at linux.intel.com
Fri Aug 1 20:19:56 CEST 2014


Instead of building batch directly to memory, build into cmd and
state arrays. This representation allows us more flexibility in batch
state expression and batch generation/relocation.

As a bonus, we can also attach the line information that produced the
batch data to help debugging.

There is no change in the output states produced. This can be considered
as a preparatory patch to help introduce gen8 golden state.

Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 tools/null_state_gen/intel_batchbuffer.c      |  251 +++++++++++++++++--------
 tools/null_state_gen/intel_batchbuffer.h      |   86 +++++----
 tools/null_state_gen/intel_null_state_gen.c   |  100 +++-------
 tools/null_state_gen/intel_renderstate_gen6.c |  136 ++++++--------
 tools/null_state_gen/intel_renderstate_gen7.c |  126 ++++---------
 tools/null_state_gen/intel_renderstate_gen8.c |  167 ++++++----------
 6 files changed, 404 insertions(+), 462 deletions(-)

diff --git a/tools/null_state_gen/intel_batchbuffer.c b/tools/null_state_gen/intel_batchbuffer.c
index 62e052a..2a0b340 100644
--- a/tools/null_state_gen/intel_batchbuffer.c
+++ b/tools/null_state_gen/intel_batchbuffer.c
@@ -29,145 +29,248 @@
  **************************************************************************/
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <assert.h>
 
 #include "intel_batchbuffer.h"
 
-int intel_batch_reset(struct intel_batchbuffer *batch,
-		      void *p,
-		      uint32_t size,
-		      uint32_t off)
+void bb_area_emit(struct bb_area *a, uint32_t dword, item_type type, const char *str)
 {
-	batch->err = -EINVAL;
-	batch->base = batch->base_ptr = p;
-	batch->state_base = batch->state_ptr = p;
+	struct bb_item *item;
+	assert(a != NULL);
+	assert(a->num_items < MAX_ITEMS);
+	item = &a->item[a->num_items];
 
-	if (off >= size || ALIGN(off, 4) != off)
-		return -EINVAL;
+	item->data = dword;
+	item->type = type;
+	strncpy(item->str, str, MAX_STRLEN);
+	item->str[MAX_STRLEN - 1] = 0;
 
-	batch->size = size;
+	a->num_items++;
+}
 
-	batch->state_base = batch->state_ptr = &batch->base[off];
+void bb_area_emit_offset(struct bb_area *a, unsigned offset, uint32_t dword, item_type type, const char *str)
+{
+	const unsigned i = offset / 4;
+	struct bb_item *item;
+	assert(a != NULL);
+	assert(a->num_items < MAX_ITEMS);
+	assert(i < a->num_items);
+	item = &a->item[i];
+
+	item->data = dword;
+	item->type = type;
+	strncpy(item->str, str, MAX_STRLEN);
+	item->str[MAX_STRLEN - 1] = 0;
+}
 
-	batch->num_relocs = 0;
-	batch->err = 0;
+static struct bb_item *bb_area_get(struct bb_area *a, unsigned i)
+{
+	assert (i < a->num_items);
+	return &a->item[i];
+}
 
-	return batch->err;
+static unsigned bb_area_items(struct bb_area *a)
+{
+	return a->num_items;
 }
 
-uint32_t intel_batch_state_used(struct intel_batchbuffer *batch)
+static unsigned long bb_area_used(struct bb_area *a)
 {
-	return batch->state_ptr - batch->state_base;
+	assert(a != NULL);
+	assert(a->num_items <= MAX_ITEMS);
+
+	return a->num_items * 4;
 }
 
-uint32_t intel_batch_state_offset(struct intel_batchbuffer *batch)
+static unsigned long bb_area_room(struct bb_area *a)
 {
-	return batch->state_ptr - batch->base;
+	assert (a != NULL);
+	assert (a->num_items <= MAX_ITEMS);
+
+	return (MAX_ITEMS - a->num_items) * 4;
 }
 
-void *intel_batch_state_alloc(struct intel_batchbuffer *batch,
-			      uint32_t size,
-			      uint32_t align)
+struct intel_batchbuffer *intel_batchbuffer_create(void)
 {
-	uint32_t cur;
-	uint32_t offset;
+	struct intel_batchbuffer *batch;
 
-	if (batch->err)
+	batch = calloc(1, sizeof(*batch));
+	if (batch == NULL)
 		return NULL;
 
-	cur  = intel_batch_state_offset(batch);
-	offset = ALIGN(cur, align);
+	batch->cmds = calloc(1, sizeof(struct bb_area));
+	if (batch->cmds == NULL) {
+		free(batch);
+		return NULL;
+	}
 
-	if (offset + size > batch->size) {
-		batch->err = -ENOSPC;
+	batch->state = calloc(1, sizeof(struct bb_area));
+	if (batch->state == NULL) {
+		free(batch->cmds);
+		free(batch);
 		return NULL;
 	}
 
-	batch->state_ptr = batch->base + offset + size;
+	batch->state_start_offset = -1;
+	batch->cmds_end_offset = -1;
 
-	memset(batch->base + cur, 0, size);
+	return batch;
+}
 
-	return batch->base + offset;
+static void bb_area_align(struct bb_area *a, unsigned align)
+{
+	if (align == 0)
+		return;
+
+	assert((align % 4) == 0);
+
+	while ((a->num_items * 4) % align != 0)
+		bb_area_emit(a, 0, PAD, "align pad");
 }
 
-int intel_batch_offset(struct intel_batchbuffer *batch, const void *ptr)
+static int reloc_exists(struct intel_batchbuffer *batch, uint32_t offset)
 {
-	return (uint8_t *)ptr - batch->base;
+	int i;
+
+	for (i = 0; i < batch->cmds->num_items; i++)
+		if ((batch->cmds->item[i].type == RELOC ||
+		     batch->cmds->item[i].type == RELOC_STATE) &&
+		    i * 4 == offset)
+			return 1;
+
+	return 0;
 }
 
-int intel_batch_state_copy(struct intel_batchbuffer *batch,
-			   const void *ptr,
-			   const uint32_t size,
-			   const uint32_t align)
+int intel_batch_is_reloc(struct intel_batchbuffer *batch, unsigned i)
 {
-	void * const p = intel_batch_state_alloc(batch, size, align);
+	return reloc_exists(batch, i * 4);
+}
 
-	if (p == NULL)
-		return -1;
+static void intel_batch_cmd_align(struct intel_batchbuffer *batch, unsigned align)
+{
+	bb_area_align(batch->cmds, align);
+}
 
-	return intel_batch_offset(batch, memcpy(p, ptr, size));
+static void intel_batch_state_align(struct intel_batchbuffer *batch, unsigned align)
+{
+	bb_area_align(batch->state, align);
 }
 
-uint32_t intel_batch_cmds_used(struct intel_batchbuffer *batch)
+unsigned intel_batch_num_cmds(struct intel_batchbuffer *batch)
 {
-	return batch->base_ptr - batch->base;
+	return bb_area_items(batch->cmds);
 }
 
-uint32_t intel_batch_total_used(struct intel_batchbuffer *batch)
+static unsigned intel_batch_num_state(struct intel_batchbuffer *batch)
 {
-	return batch->state_ptr - batch->base;
+	return bb_area_items(batch->state);
 }
 
-static uint32_t intel_batch_space(struct intel_batchbuffer *batch)
+struct bb_item *intel_batch_cmd_get(struct intel_batchbuffer *batch, unsigned i)
 {
-	return batch->state_base - batch->base_ptr;
+	return bb_area_get(batch->cmds, i);
 }
 
-int intel_batch_emit_dword(struct intel_batchbuffer *batch, uint32_t dword)
+struct bb_item *intel_batch_state_get(struct intel_batchbuffer *batch, unsigned i)
 {
-	uint32_t offset;
+	return bb_area_get(batch->state, i);
+}
 
-	if (batch->err)
-		return -1;
+uint32_t intel_batch_state_offset(struct intel_batchbuffer *batch, unsigned align)
+{
+	intel_batch_state_align(batch, align);
+	return bb_area_used(batch->state);
+}
 
-	if (intel_batch_space(batch) < 4) {
-		batch->err = -ENOSPC;
-		return -1;
-	}
+uint32_t intel_batch_state_alloc(struct intel_batchbuffer *batch, unsigned bytes, unsigned align,
+				 const char *str)
+{
+	unsigned offset;
+	unsigned dwords = bytes/4;
+	assert ((bytes % 4) == 0);
+	assert (bb_area_room(batch->state) >= bytes);
 
-	offset = intel_batch_offset(batch, batch->base_ptr);
+	offset = intel_batch_state_offset(batch, align);
 
-	*(uint32_t *) (batch->base_ptr) = dword;
-	batch->base_ptr += 4;
+	while (dwords--)
+		bb_area_emit(batch->state, 0, UNINITIALIZED, str);
 
 	return offset;
 }
 
-int intel_batch_emit_reloc(struct intel_batchbuffer *batch,
-			   const uint32_t delta)
+uint32_t intel_batch_state_copy(struct intel_batchbuffer *batch,
+				void *d, unsigned bytes,
+				unsigned align,
+				const char *str)
 {
-	uint32_t offset;
+	unsigned offset;
+	unsigned i;
+	unsigned dwords = bytes/4;
+	assert (d);
+	assert ((bytes % 4) == 0);
+	assert (bb_area_room(batch->state) >= bytes);
+
+	offset = intel_batch_state_offset(batch, align);
 
-	if (batch->err)
-		return -1;
+	for (i = 0; i < dwords; i++) {
+		char offsetinside[80];
+		sprintf(offsetinside, "%s: 0x%x", str, i * 4);
 
-	if (delta >= batch->size) {
-		batch->err = -EINVAL;
-		return -1;
+		uint32_t *s = (uint32_t *)(uint8_t *)d + i;
+		bb_area_emit(batch->state, *s, STATE, offsetinside);
 	}
 
-	offset = intel_batch_emit_dword(batch, delta);
+	return offset;
+}
+
+void intel_batch_relocate_state(struct intel_batchbuffer *batch)
+{
+	unsigned int i;
+
+	assert (batch->state_start_offset == -1);
 
-	if (batch->err)
-		return -1;
+	batch->cmds_end_offset = bb_area_used(batch->cmds) - 4;
 
-	if (batch->num_relocs >= MAX_RELOCS) {
-		batch->err = -ENOSPC;
-		return -1;
+	/* Hardcoded, could track max align done also */
+	intel_batch_cmd_align(batch, 64);
+
+	batch->state_start_offset = bb_area_used(batch->cmds);
+
+	for (i = 0; i < bb_area_items(batch->state); i++) {
+		const struct bb_item *s = bb_area_get(batch->state, i);
+
+		bb_area_emit(batch->cmds, s->data, s->type, s->str);
 	}
 
-	batch->relocs[batch->num_relocs++] = offset;
+	for (i = 0; i < bb_area_items(batch->cmds); i++) {
+		struct bb_item *s = bb_area_get(batch->cmds, i);
 
-	return offset;
+		if (s->type == STATE_OFFSET || s->type == RELOC_STATE)
+			s->data += batch->state_start_offset;
+	}
+}
+
+const char *intel_batch_type_as_str(const struct bb_item *item)
+{
+	switch (item->type) {
+	case UNINITIALIZED:
+		return "UNINITIALIZED";
+	case CMD:
+		return "CMD";
+	case STATE:
+		return "STATE";
+	case PAD:
+		return "PAD";
+	case RELOC:
+		return "RELOC";
+	case RELOC_STATE:
+		return "RELOC_STATE";
+	case STATE_OFFSET:
+		return "STATE_OFFSET";
+	}
+
+	return "UNKNOWN";
 }
diff --git a/tools/null_state_gen/intel_batchbuffer.h b/tools/null_state_gen/intel_batchbuffer.h
index f5c29db..e44c5c9 100644
--- a/tools/null_state_gen/intel_batchbuffer.h
+++ b/tools/null_state_gen/intel_batchbuffer.h
@@ -34,58 +34,64 @@
 #include <stdint.h>
 
 #define MAX_RELOCS 64
+#define MAX_ITEMS 4096
+#define MAX_STRLEN 256
+
 #define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
 
-struct intel_batchbuffer {
-	int err;
-	uint8_t *base;
-	uint8_t *base_ptr;
-	uint8_t *state_base;
-	uint8_t *state_ptr;
-	int size;
-
-	uint32_t relocs[MAX_RELOCS];
-	uint32_t num_relocs;
+typedef enum {
+	UNINITIALIZED,
+	CMD,
+	STATE,
+	RELOC,
+	RELOC_STATE,
+	STATE_OFFSET,
+	PAD,
+} item_type;
+
+struct bb_item {
+	uint32_t data;
+	item_type type;
+	char str[MAX_STRLEN];
 };
 
-#define OUT_BATCH(d) intel_batch_emit_dword(batch, d)
-#define OUT_RELOC(batch, read_domains, write_domain, delta) \
-	intel_batch_emit_reloc(batch, delta)
-
-int intel_batch_reset(struct intel_batchbuffer *batch,
-		       void *p,
-		       uint32_t size, uint32_t split_off);
-
-uint32_t intel_batch_state_used(struct intel_batchbuffer *batch);
+struct bb_area {
+	struct bb_item item[MAX_ITEMS];
+	unsigned long num_items;
+};
 
-void *intel_batch_state_alloc(struct intel_batchbuffer *batch,
-			      uint32_t size,
-			      uint32_t align);
+struct intel_batchbuffer {
+	struct bb_area *cmds;
+	struct bb_area *state;
+	unsigned long cmds_end_offset;
+	unsigned long state_start_offset;
+};
 
-int intel_batch_offset(struct intel_batchbuffer *batch, const void *ptr);
+struct intel_batchbuffer *intel_batchbuffer_create(void);
 
-int intel_batch_state_copy(struct intel_batchbuffer *batch,
-			   const void *ptr,
-			   const uint32_t size,
-			   const uint32_t align);
+#define OUT_BATCH(d) bb_area_emit(batch->cmds, d, CMD, #d)
+#define OUT_BATCH_STATE_OFFSET(d) bb_area_emit(batch->cmds, d, STATE_OFFSET, #d)
+#define OUT_RELOC(batch, read_domain, write_domain, d) bb_area_emit(batch->cmds, d, RELOC, #d)
+#define OUT_RELOC_STATE(batch, read_domain, write_domain, d) bb_area_emit(batch->cmds, d, RELOC_STATE, #d);
+#define OUT_STATE(d) bb_area_emit(batch->state, d, STATE, #d)
+#define OUT_STATE_OFFSET(offset) bb_area_emit(batch->state, offset, STATE_OFFSET, #offset)
+#define OUT_STATE_STRUCT(name, align) intel_batch_state_copy(batch, &name, sizeof(name), align, #name " " #align)
 
-uint32_t intel_batch_cmds_used(struct intel_batchbuffer *batch);
+uint32_t intel_batch_state_copy(struct intel_batchbuffer *batch, void *d, unsigned bytes, unsigned align,
+				const char *name);
+uint32_t intel_batch_state_alloc(struct intel_batchbuffer *batch, unsigned bytes, unsigned align,
+				 const char *name);
 
-int intel_batch_emit_dword(struct intel_batchbuffer *batch, uint32_t dword);
+unsigned intel_batch_num_cmds(struct intel_batchbuffer *batch);
 
-int intel_batch_emit_reloc(struct intel_batchbuffer *batch,
-			   const uint32_t delta);
+struct bb_item *intel_batch_cmd_get(struct intel_batchbuffer *batch, unsigned i);
+int intel_batch_is_reloc(struct intel_batchbuffer *batch, unsigned i);
 
-uint32_t intel_batch_total_used(struct intel_batchbuffer *batch);
+void intel_batch_relocate_state(struct intel_batchbuffer *batch);
 
-static inline int intel_batch_error(struct intel_batchbuffer *batch)
-{
-	return batch->err;
-}
+const char *intel_batch_type_as_str(const struct bb_item *item);
 
-static inline uint32_t intel_batch_state_start(struct intel_batchbuffer *batch)
-{
-	return batch->state_base - batch->base;
-}
+void bb_area_emit(struct bb_area *a, uint32_t dword, item_type type, const char *str);
+void bb_area_emit_offset(struct bb_area *a, unsigned i, uint32_t dword, item_type type, const char *str);
 
 #endif
diff --git a/tools/null_state_gen/intel_null_state_gen.c b/tools/null_state_gen/intel_null_state_gen.c
index 945926f..b337706 100644
--- a/tools/null_state_gen/intel_null_state_gen.c
+++ b/tools/null_state_gen/intel_null_state_gen.c
@@ -11,6 +11,8 @@ extern int gen6_setup_null_render_state(struct intel_batchbuffer *batch);
 extern int gen7_setup_null_render_state(struct intel_batchbuffer *batch);
 extern int gen8_setup_null_render_state(struct intel_batchbuffer *batch);
 
+static int debug = 0;
+
 static void print_usage(char *s)
 {
 	fprintf(stderr, "%s: <gen>\n"
@@ -18,17 +20,6 @@ static void print_usage(char *s)
 	       s);
 }
 
-static int is_reloc(struct intel_batchbuffer *batch, uint32_t offset)
-{
-	int i;
-
-	for (i = 0; i < batch->num_relocs; i++)
-		if (batch->relocs[i] == offset)
-			return 1;
-
-	return 0;
-}
-
 static int print_state(int gen, struct intel_batchbuffer *batch)
 {
 	int i;
@@ -36,33 +27,37 @@ static int print_state(int gen, struct intel_batchbuffer *batch)
 	printf("#include \"intel_renderstate.h\"\n\n");
 
 	printf("static const u32 gen%d_null_state_relocs[] = {\n", gen);
-	for (i = 0; i < batch->num_relocs; i++) {
-		printf("\t0x%08x,\n", batch->relocs[i]);
+	for (i = 0; i < batch->cmds->num_items; i++) {
+		if (intel_batch_is_reloc(batch, i))
+			printf("\t0x%08x,\n", i * 4);
 	}
 	printf("\t%d,\n", -1);
 	printf("};\n\n");
 
 	printf("static const u32 gen%d_null_state_batch[] = {\n", gen);
-	for (i = 0; i < batch->size; i += 4) {
-		const uint32_t *p = (void *)batch->base + i;
-		printf("\t0x%08x,", *p);
+	for (i = 0; i < intel_batch_num_cmds(batch); i++) {
+		const struct bb_item *cmd = intel_batch_cmd_get(batch, i);
+		printf("\t0x%08x,", cmd->data);
+
+		if (debug)
+			printf("\t /* 0x%08x %s '%s' */", i * 4,
+			       intel_batch_type_as_str(cmd), cmd->str);
 
-		if (i == intel_batch_cmds_used(batch) - 4)
+		if (i * 4 == batch->cmds_end_offset)
 			printf("\t /* cmds end */");
 
-		if (i == intel_batch_state_start(batch))
-			printf("\t /* state start */");
+		if (intel_batch_is_reloc(batch, i))
+			printf("\t /* reloc */");
 
+		if (i * 4 == batch->state_start_offset)
+			printf("\t /* state start */");
 
-		if (i == intel_batch_state_start(batch) +
-		    intel_batch_state_used(batch) - 4)
+		if (i == intel_batch_num_cmds(batch) - 1)
 			printf("\t /* state end */");
 
-		if (is_reloc(batch, i))
-			printf("\t /* reloc */");
-
 		printf("\n");
 	}
+
 	printf("};\n\nRO_RENDERSTATE(%d);\n", gen);
 
 	return 0;
@@ -70,23 +65,14 @@ static int print_state(int gen, struct intel_batchbuffer *batch)
 
 static int do_generate(int gen)
 {
-	int initial_size = 8192;
-	struct intel_batchbuffer batch;
-	void *p;
+	struct intel_batchbuffer *batch;
 	int ret = -EINVAL;
-	uint32_t cmd_len, state_len, size;
 	int (*null_state_gen)(struct intel_batchbuffer *batch) = NULL;
 
-	p = malloc(initial_size);
-	if (p == NULL)
+	batch = intel_batchbuffer_create();
+	if (batch == NULL)
 		return -ENOMEM;
 
-	assert(ALIGN(initial_size/2, STATE_ALIGN) == initial_size/2);
-
-	ret = intel_batch_reset(&batch, p, initial_size, initial_size/2);
-	if (ret)
-		goto out;
-
 	switch (gen) {
 	case 6:
 		null_state_gen = gen6_setup_null_render_state;
@@ -103,50 +89,26 @@ static int do_generate(int gen)
 
 	if (null_state_gen == NULL) {
 		printf("no generator found for %d\n", gen);
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
-	ret = null_state_gen(&batch);
-	if (ret < 0)
-		goto out;
-
-	cmd_len = intel_batch_cmds_used(&batch);
-	state_len = intel_batch_state_used(&batch);
-
-	size = cmd_len + state_len + ALIGN(cmd_len, STATE_ALIGN) - cmd_len;
-
-	ret = intel_batch_reset(&batch, p, size, ALIGN(cmd_len, STATE_ALIGN));
-	if (ret)
-		goto out;
+	null_state_gen(batch);
+	intel_batch_relocate_state(batch);
 
-	ret = null_state_gen(&batch);
-	if (ret < 0)
-		goto out;
+	ret = print_state(gen, batch);
 
-	assert(cmd_len == intel_batch_cmds_used(&batch));
-	assert(state_len == intel_batch_state_used(&batch));
-	assert(size == ret);
-
-	/* Batch buffer needs to end */
-	assert(*(uint32_t *)(p + cmd_len - 4) == (0xA << 23));
-
-	ret = print_state(gen, &batch);
-out:
-	free(p);
-
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return ret;
 }
 
 int main(int argc, char *argv[])
 {
-	if (argc != 2) {
+	if (argc < 2) {
 		print_usage(argv[0]);
 		return 1;
 	}
 
+	if (argc > 2)
+		debug = 1;
+
 	return do_generate(atoi(argv[1]));
 }
diff --git a/tools/null_state_gen/intel_renderstate_gen6.c b/tools/null_state_gen/intel_renderstate_gen6.c
index f169d02..5f922f7 100644
--- a/tools/null_state_gen/intel_renderstate_gen6.c
+++ b/tools/null_state_gen/intel_renderstate_gen6.c
@@ -33,31 +33,23 @@ static const uint32_t ps_kernel_nomask_affine[][4] = {
 static uint32_t
 gen6_bind_buf_null(struct intel_batchbuffer *batch)
 {
-	struct gen6_surface_state *ss;
-	int ret;
+	struct gen6_surface_state ss;
+	memset(&ss, 0, sizeof(ss));
 
-	ss = intel_batch_state_alloc(batch, sizeof(*ss), 32);
-	if (ss == NULL)
-		return -1;
-
-	memset(ss, 0, sizeof(*ss));
-
-	return intel_batch_offset(batch, ss);
+	return OUT_STATE_STRUCT(ss, 32);
 }
 
 static uint32_t
 gen6_bind_surfaces(struct intel_batchbuffer *batch)
 {
-	uint32_t *binding_table;
+	unsigned offset;
 
-	binding_table = intel_batch_state_alloc(batch, 32, 32);
-	if (binding_table == NULL)
-		return -1;
+	offset = intel_batch_state_alloc(batch, 32, 32, "bind surfaces");
 
-	binding_table[0] = gen6_bind_buf_null(batch);
-	binding_table[1] = gen6_bind_buf_null(batch);
+	bb_area_emit_offset(batch->state, offset, gen6_bind_buf_null(batch), STATE_OFFSET, "bind 1");
+	bb_area_emit_offset(batch->state, offset + 4, gen6_bind_buf_null(batch), STATE_OFFSET, "bind 2");
 
-	return intel_batch_offset(batch, binding_table);
+	return offset;
 }
 
 static void
@@ -108,7 +100,7 @@ gen6_emit_viewports(struct intel_batchbuffer *batch, uint32_t cc_vp)
 		  (4 - 2));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
-	OUT_BATCH(cc_vp);
+	OUT_BATCH_STATE_OFFSET(cc_vp);
 }
 
 static void
@@ -202,7 +194,7 @@ static void
 gen6_emit_cc(struct intel_batchbuffer *batch, uint32_t blend)
 {
 	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
-	OUT_BATCH(blend | 1);
+	OUT_BATCH_STATE_OFFSET(blend | 1);
 	OUT_BATCH(1024 | 1);
 	OUT_BATCH(1024 | 1);
 }
@@ -215,7 +207,7 @@ gen6_emit_sampler(struct intel_batchbuffer *batch, uint32_t state)
 		  (4 - 2));
 	OUT_BATCH(0); /* VS */
 	OUT_BATCH(0); /* GS */
-	OUT_BATCH(state);
+	OUT_BATCH_STATE_OFFSET(state);
 }
 
 static void
@@ -249,7 +241,7 @@ static void
 gen6_emit_wm(struct intel_batchbuffer *batch, int kernel)
 {
 	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
-	OUT_BATCH(kernel);
+	OUT_BATCH_STATE_OFFSET(kernel);
 	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
 		  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 	OUT_BATCH(0);
@@ -271,7 +263,7 @@ gen6_emit_binding_table(struct intel_batchbuffer *batch, uint32_t wm_table)
 		  (4 - 2));
 	OUT_BATCH(0);		/* vs */
 	OUT_BATCH(0);		/* gs */
-	OUT_BATCH(wm_table);
+	OUT_BATCH_STATE_OFFSET(wm_table);
 }
 
 static void
@@ -325,36 +317,32 @@ gen6_emit_vertex_elements(struct intel_batchbuffer *batch)
 static uint32_t
 gen6_create_cc_viewport(struct intel_batchbuffer *batch)
 {
-	struct gen6_cc_viewport *vp;
+	struct gen6_cc_viewport vp;
 
-	vp = intel_batch_state_alloc(batch, sizeof(*vp), 32);
-	if (vp == NULL)
-		return -1;
+	memset(&vp, 0, sizeof(vp));
 
-	vp->min_depth = -1.e35;
-	vp->max_depth = 1.e35;
+	vp.min_depth = -1.e35;
+	vp.max_depth = 1.e35;
 
-	return intel_batch_offset(batch, vp);
+	return OUT_STATE_STRUCT(vp, 32);
 }
 
 static uint32_t
 gen6_create_cc_blend(struct intel_batchbuffer *batch)
 {
-	struct gen6_blend_state *blend;
+	struct gen6_blend_state blend;
 
-	blend = intel_batch_state_alloc(batch, sizeof(*blend), 64);
-	if (blend == NULL)
-		return -1;
+	memset(&blend, 0, sizeof(blend));
 
-	blend->blend0.dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
-	blend->blend0.source_blend_factor = GEN6_BLENDFACTOR_ONE;
-	blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
-	blend->blend0.blend_enable = 1;
+	blend.blend0.dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
+	blend.blend0.source_blend_factor = GEN6_BLENDFACTOR_ONE;
+	blend.blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
+	blend.blend0.blend_enable = 1;
 
-	blend->blend1.post_blend_clamp_enable = 1;
-	blend->blend1.pre_blend_clamp_enable = 1;
+	blend.blend1.post_blend_clamp_enable = 1;
+	blend.blend1.pre_blend_clamp_enable = 1;
 
-	return intel_batch_offset(batch, blend);
+	return OUT_STATE_STRUCT(blend, 64);
 }
 
 static uint32_t
@@ -362,7 +350,7 @@ gen6_create_kernel(struct intel_batchbuffer *batch)
 {
 	return intel_batch_state_copy(batch, ps_kernel_nomask_affine,
 				      sizeof(ps_kernel_nomask_affine),
-				      64);
+				      64, "ps_kernel");
 }
 
 static uint32_t
@@ -370,70 +358,64 @@ gen6_create_sampler(struct intel_batchbuffer *batch,
 		    sampler_filter_t filter,
 		   sampler_extend_t extend)
 {
-	struct gen6_sampler_state *ss;
+	struct gen6_sampler_state ss;
 
-	ss = intel_batch_state_alloc(batch, sizeof(*ss), 32);
-	if (ss == NULL)
-		return -1;
+	memset(&ss, 0, sizeof(ss));
 
-	ss->ss0.lod_preclamp = 1;	/* GL mode */
+	ss.ss0.lod_preclamp = 1;	/* GL mode */
 
 	/* We use the legacy mode to get the semantics specified by
 	 * the Render extension. */
-	ss->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
+	ss.ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
 
 	switch (filter) {
 	default:
 	case SAMPLER_FILTER_NEAREST:
-		ss->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
-		ss->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
+		ss.ss0.min_filter = GEN6_MAPFILTER_NEAREST;
+		ss.ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
 		break;
 	case SAMPLER_FILTER_BILINEAR:
-		ss->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
-		ss->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
+		ss.ss0.min_filter = GEN6_MAPFILTER_LINEAR;
+		ss.ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
 		break;
 	}
 
 	switch (extend) {
 	default:
 	case SAMPLER_EXTEND_NONE:
-		ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-		ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-		ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+		ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+		ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+		ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
 		break;
 	case SAMPLER_EXTEND_REPEAT:
-		ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
-		ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
-		ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+		ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+		ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+		ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
 		break;
 	case SAMPLER_EXTEND_PAD:
-		ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-		ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-		ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+		ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+		ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+		ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
 		break;
 	case SAMPLER_EXTEND_REFLECT:
-		ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
-		ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
-		ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+		ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+		ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+		ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
 		break;
 	}
 
-	return intel_batch_offset(batch, ss);
+	return OUT_STATE_STRUCT(ss, 32);
 }
 
 static uint32_t
 gen6_create_vertex_buffer(struct intel_batchbuffer *batch)
 {
-	uint16_t *v;
-
-	v = intel_batch_state_alloc(batch, 2 * sizeof(uint16_t), 8);
-	if (v == NULL)
-		return -1;
+	uint16_t v[2];
 
 	v[0] = 0;
 	v[1] = 0;
 
-	return intel_batch_offset(batch, v);
+	return intel_batch_state_copy(batch, v, sizeof(v), 8, "vertex buffer");
 }
 
 static void gen6_emit_vertex_buffer(struct intel_batchbuffer *batch)
@@ -447,17 +429,15 @@ static void gen6_emit_vertex_buffer(struct intel_batchbuffer *batch)
 		  0 << VB0_BUFFER_INDEX_SHIFT |
 		  VB0_NULL_VERTEX_BUFFER |
 		  0 << VB0_BUFFER_PITCH_SHIFT);
-	OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
-	OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+	OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+	OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
 	OUT_BATCH(0);
 }
 
-int gen6_setup_null_render_state(struct intel_batchbuffer *batch)
+void gen6_setup_null_render_state(struct intel_batchbuffer *batch)
 {
 	uint32_t wm_state, wm_kernel, wm_table;
-	uint32_t cc_vp, cc_blend, offset;
-	uint32_t batch_end;
-	int ret;
+	uint32_t cc_vp, cc_blend;
 
 	wm_table  = gen6_bind_surfaces(batch);
 	wm_kernel = gen6_create_kernel(batch);
@@ -492,10 +472,4 @@ int gen6_setup_null_render_state(struct intel_batchbuffer *batch)
 	gen6_emit_vertex_buffer(batch);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	ret = intel_batch_error(batch);
-	if (ret == 0)
-		ret = intel_batch_total_used(batch);
-
-	return ret;
 }
diff --git a/tools/null_state_gen/intel_renderstate_gen7.c b/tools/null_state_gen/intel_renderstate_gen7.c
index 8fe8a80..22cd268 100644
--- a/tools/null_state_gen/intel_renderstate_gen7.c
+++ b/tools/null_state_gen/intel_renderstate_gen7.c
@@ -25,6 +25,7 @@
 #include "intel_batchbuffer.h"
 #include <lib/gen7_render.h>
 #include <lib/intel_reg.h>
+#include <string.h>
 #include <stdio.h>
 
 static const uint32_t ps_kernel[][4] = {
@@ -41,22 +42,7 @@ static const uint32_t ps_kernel[][4] = {
 static uint32_t
 gen7_bind_buf_null(struct intel_batchbuffer *batch)
 {
-	uint32_t *ss;
-
-	ss = intel_batch_state_alloc(batch, 8 * sizeof(*ss), 32);
-	if (ss == NULL)
-		return -1;
-
-	ss[0] = 0;
-	ss[1] = 0;
-	ss[2] = 0;
-	ss[3] = 0;
-	ss[4] = 0;
-	ss[5] = 0;
-	ss[6] = 0;
-	ss[7] = 0;
-
-	return intel_batch_offset(batch, ss);
+	return intel_batch_state_alloc(batch, 32, 32, "bind buf null");
 }
 
 static void
@@ -99,26 +85,7 @@ gen7_create_vertex_buffer(struct intel_batchbuffer *batch)
 {
 	uint16_t *v;
 
-	v = intel_batch_state_alloc(batch, 12*sizeof(*v), 8);
-	if (v == NULL)
-		return -1;
-
-	v[0] = 0;
-	v[1] = 0;
-	v[2] = 0;
-	v[3] = 0;
-
-	v[4] = 0;
-	v[5] = 0;
-	v[6] = 0;
-	v[7] = 0;
-
-	v[8] = 0;
-	v[9] = 0;
-	v[10] = 0;
-	v[11] = 0;
-
-	return intel_batch_offset(batch, v);
+	return intel_batch_state_alloc(batch, 12*sizeof(*v), 8, "vertex buffer");
 }
 
 static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch)
@@ -134,7 +101,7 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch)
 		  GEN7_VB0_NULL_VERTEX_BUFFER |
 		  4*2 << GEN7_VB0_BUFFER_PITCH_SHIFT);
 
-	OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+	OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
 	OUT_BATCH(~0);
 	OUT_BATCH(0);
 }
@@ -142,23 +109,21 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch)
 static uint32_t
 gen7_bind_surfaces(struct intel_batchbuffer *batch)
 {
-	uint32_t *binding_table;
+	unsigned offset;
 
-	binding_table = intel_batch_state_alloc(batch, 8, 32);
-	if (binding_table == NULL)
-		return -1;
+	offset = intel_batch_state_alloc(batch, 8, 32, "bind surfaces");
 
-	binding_table[0] = gen7_bind_buf_null(batch);
-	binding_table[1] = gen7_bind_buf_null(batch);
+	bb_area_emit_offset(batch->state, offset, gen7_bind_buf_null(batch), STATE_OFFSET, "bind 1");
+	bb_area_emit_offset(batch->state, offset + 4, gen7_bind_buf_null(batch), STATE_OFFSET, "bind 2");
 
-	return intel_batch_offset(batch, binding_table);
+	return offset;
 }
 
 static void
 gen7_emit_binding_table(struct intel_batchbuffer *batch)
 {
 	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(gen7_bind_surfaces(batch));
+	OUT_BATCH_STATE_OFFSET(gen7_bind_surfaces(batch));
 }
 
 static void
@@ -174,19 +139,16 @@ gen7_emit_drawing_rectangle(struct intel_batchbuffer *batch)
 static uint32_t
 gen7_create_blend_state(struct intel_batchbuffer *batch)
 {
-	struct gen7_blend_state *blend;
-
-	blend = intel_batch_state_alloc(batch, sizeof(*blend), 64);
-	if (blend == NULL)
-		return -1;
+	struct gen7_blend_state blend;
+	memset(&blend, 0, sizeof(blend));
 
-	blend->blend0.dest_blend_factor = GEN7_BLENDFACTOR_ZERO;
-	blend->blend0.source_blend_factor = GEN7_BLENDFACTOR_ONE;
-	blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
-	blend->blend1.post_blend_clamp_enable = 1;
-	blend->blend1.pre_blend_clamp_enable = 1;
+	blend.blend0.dest_blend_factor = GEN7_BLENDFACTOR_ZERO;
+	blend.blend0.source_blend_factor = GEN7_BLENDFACTOR_ONE;
+	blend.blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
+	blend.blend1.post_blend_clamp_enable = 1;
+	blend.blend1.pre_blend_clamp_enable = 1;
 
-	return intel_batch_offset(batch, blend);
+	return OUT_STATE_STRUCT(blend, 64);
 }
 
 static void
@@ -208,54 +170,48 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 static uint32_t
 gen7_create_cc_viewport(struct intel_batchbuffer *batch)
 {
-	struct gen7_cc_viewport *vp;
+	struct gen7_cc_viewport vp;
+	memset(&vp, 0, sizeof(vp));
 
-	vp = intel_batch_state_alloc(batch, sizeof(*vp), 32);
-	if (vp == NULL)
-		return -1;
+	vp.min_depth = -1.e35;
+	vp.max_depth = 1.e35;
 
-	vp->min_depth = -1.e35;
-	vp->max_depth = 1.e35;
-
-	return intel_batch_offset(batch, vp);
+	return OUT_STATE_STRUCT(vp, 32);
 }
 
 static void
 gen7_emit_cc(struct intel_batchbuffer *batch)
 {
 	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
-	OUT_BATCH(gen7_create_blend_state(batch));
+	OUT_BATCH_STATE_OFFSET(gen7_create_blend_state(batch));
 
 	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
-	OUT_BATCH(gen7_create_cc_viewport(batch));
+	OUT_BATCH_STATE_OFFSET(gen7_create_cc_viewport(batch));
 }
 
 static uint32_t
 gen7_create_sampler(struct intel_batchbuffer *batch)
 {
-	struct gen7_sampler_state *ss;
-
-	ss = intel_batch_state_alloc(batch, sizeof(*ss), 32);
-	if (ss == NULL)
-		return -1;
+	struct gen7_sampler_state ss;
+	memset(&ss, 0, sizeof(ss));
 
-	ss->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
-	ss->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
+	ss.ss0.min_filter = GEN7_MAPFILTER_NEAREST;
+	ss.ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
 
-	ss->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
-	ss->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
-	ss->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
+	ss.ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
+	ss.ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
+	ss.ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
 
-	ss->ss3.non_normalized_coord = 1;
+	ss.ss3.non_normalized_coord = 1;
 
-	return intel_batch_offset(batch, ss);
+	return OUT_STATE_STRUCT(ss, 32);
 }
 
 static void
 gen7_emit_sampler(struct intel_batchbuffer *batch)
 {
 	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(gen7_create_sampler(batch));
+	OUT_BATCH_STATE_OFFSET(gen7_create_sampler(batch));
 }
 
 static void
@@ -406,8 +362,8 @@ gen7_emit_ps(struct intel_batchbuffer *batch)
 		threads = 40 << IVB_PS_MAX_THREADS_SHIFT;
 
 	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
-	OUT_BATCH(intel_batch_state_copy(batch, ps_kernel,
-					 sizeof(ps_kernel), 64));
+	OUT_BATCH_STATE_OFFSET(intel_batch_state_copy(batch, ps_kernel,
+						      sizeof(ps_kernel), 64, "ps kernel"));
 	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
 		  2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 	OUT_BATCH(0); /* scratch address */
@@ -458,7 +414,7 @@ gen7_emit_null_depth_buffer(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);
 }
 
-int gen7_setup_null_render_state(struct intel_batchbuffer *batch)
+void gen7_setup_null_render_state(struct intel_batchbuffer *batch)
 {
 	int ret;
 
@@ -496,10 +452,4 @@ int gen7_setup_null_render_state(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);   /* index buffer offset, ignored */
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	ret = intel_batch_error(batch);
-	if (ret == 0)
-		ret = intel_batch_total_used(batch);
-
-	return ret;
 }
diff --git a/tools/null_state_gen/intel_renderstate_gen8.c b/tools/null_state_gen/intel_renderstate_gen8.c
index 807c2c8..4812b51 100644
--- a/tools/null_state_gen/intel_renderstate_gen8.c
+++ b/tools/null_state_gen/intel_renderstate_gen8.c
@@ -39,32 +39,21 @@ static const uint32_t ps_kernel[][4] = {
 static uint32_t
 gen8_bind_buf_null(struct intel_batchbuffer *batch)
 {
-	struct gen8_surface_state *ss;
+	struct gen8_surface_state ss;
+	memset(&ss, 0, sizeof(ss));
 
-	ss = intel_batch_state_alloc(batch, sizeof(*ss), 64);
-	if (ss == NULL)
-		return -1;
-
-	memset(ss, 0, sizeof(*ss));
-
-	return intel_batch_offset(batch, ss);
+	return OUT_STATE_STRUCT(ss, 64);
 }
 
 static uint32_t
 gen8_bind_surfaces(struct intel_batchbuffer *batch)
 {
-	uint32_t *binding_table, offset;
-
-	binding_table = intel_batch_state_alloc(batch, 8, 32);
-	if (binding_table == NULL)
-		return -1;
+	unsigned offset;
 
-	offset = intel_batch_offset(batch, binding_table);
+	offset = intel_batch_state_alloc(batch, 8, 32, "bind surfaces");
 
-	binding_table[0] =
-		gen8_bind_buf_null(batch);
-	binding_table[1] =
-		gen8_bind_buf_null(batch);
+	bb_area_emit_offset(batch->state, offset, gen8_bind_buf_null(batch), STATE_OFFSET, "bind 1");
+	bb_area_emit_offset(batch->state, offset + 4, gen8_bind_buf_null(batch), STATE_OFFSET, "bind 2");
 
 	return offset;
 }
@@ -72,26 +61,20 @@ gen8_bind_surfaces(struct intel_batchbuffer *batch)
 /* Mostly copy+paste from gen6, except wrap modes moved */
 static uint32_t
 gen8_create_sampler(struct intel_batchbuffer *batch) {
-	struct gen8_sampler_state *ss;
-	uint32_t offset;
-
-	ss = intel_batch_state_alloc(batch, sizeof(*ss), 64);
-	if (ss == NULL)
-		return -1;
-
-	offset = intel_batch_offset(batch, ss);
+	struct gen8_sampler_state ss;
+	memset(&ss, 0, sizeof(ss));
 
-	ss->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
-	ss->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
-	ss->ss3.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-	ss->ss3.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-	ss->ss3.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+	ss.ss0.min_filter = GEN6_MAPFILTER_NEAREST;
+	ss.ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
+	ss.ss3.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+	ss.ss3.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+	ss.ss3.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
 
 	/* I've experimented with non-normalized coordinates and using the LD
 	 * sampler fetch, but couldn't make it work. */
-	ss->ss3.non_normalized_coord = 0;
+	ss.ss3.non_normalized_coord = 0;
 
-	return offset;
+	return OUT_STATE_STRUCT(ss, 64);
 }
 
 static uint32_t
@@ -99,7 +82,7 @@ gen8_fill_ps(struct intel_batchbuffer *batch,
 	     const uint32_t kernel[][4],
 	     size_t size)
 {
-	return intel_batch_state_copy(batch, kernel, size, 64);
+	return intel_batch_state_copy(batch, kernel, size, 64, "ps kernel");
 }
 
 /**
@@ -115,13 +98,9 @@ gen8_fill_ps(struct intel_batchbuffer *batch,
 static uint32_t
 gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch)
 {
-	uint16_t *start;
-
-	start = intel_batch_state_alloc(batch, 2 * sizeof(*start), 8);
-	start[0] = 0;
-	start[1] = 0;
+	uint16_t *v;
 
-	return intel_batch_offset(batch, start);
+	return intel_batch_state_alloc(batch, 2 * sizeof(*v), 8, "vertex buffer");
 }
 
 /**
@@ -194,7 +173,7 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
 		  GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */
 		  VB0_NULL_VERTEX_BUFFER |
 		  0 << VB0_BUFFER_PITCH_SHIFT);
-	OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+	OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 }
@@ -202,94 +181,68 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
 static uint32_t
 gen6_create_cc_state(struct intel_batchbuffer *batch)
 {
-	struct gen6_color_calc_state *cc_state;
-	uint32_t offset;
-
-	cc_state = intel_batch_state_alloc(batch, sizeof(*cc_state), 64);
-	if (cc_state == NULL)
-		return -1;
+	struct gen6_color_calc_state cc_state;
+	memset(&cc_state, 0, sizeof(cc_state));
 
-	offset = intel_batch_offset(batch, cc_state);
-
-	return offset;
+	return OUT_STATE_STRUCT(cc_state, 64);
 }
 
 static uint32_t
 gen8_create_blend_state(struct intel_batchbuffer *batch)
 {
-	struct gen8_blend_state *blend;
+	struct gen8_blend_state blend;
 	int i;
-	uint32_t offset;
 
-	blend = intel_batch_state_alloc(batch, sizeof(*blend), 64);
-	if (blend == NULL)
-		return -1;
-
-	offset = intel_batch_offset(batch, blend);
+	memset(&blend, 0, sizeof(blend));
 
 	for (i = 0; i < 16; i++) {
-		blend->bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
-		blend->bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE;
-		blend->bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD;
-		blend->bs[i].pre_blend_color_clamp = 1;
-		blend->bs[i].color_buffer_blend = 0;
+		blend.bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
+		blend.bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE;
+		blend.bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD;
+		blend.bs[i].pre_blend_color_clamp = 1;
+		blend.bs[i].color_buffer_blend = 0;
 	}
 
-	return offset;
+	return OUT_STATE_STRUCT(blend, 64);
 }
 
 static uint32_t
 gen6_create_cc_viewport(struct intel_batchbuffer *batch)
 {
-	struct gen6_cc_viewport *vp;
-	uint32_t offset;
-
-	vp = intel_batch_state_alloc(batch, sizeof(*vp), 32);
-	if (vp == NULL)
-		return -1;
+	struct gen6_cc_viewport vp;
 
-	offset = intel_batch_offset(batch, vp);
+	memset(&vp, 0, sizeof(vp));
 
 	/* XXX I don't understand this */
-	vp->min_depth = -1.e35;
-	vp->max_depth = 1.e35;
+	vp.min_depth = -1.e35;
+	vp.max_depth = 1.e35;
 
-	return offset;
+	return OUT_STATE_STRUCT(vp, 32);
 }
 
 static uint32_t
 gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch) {
 	/* XXX these are likely not needed */
-	struct gen7_sf_clip_viewport *scv_state;
-	uint32_t offset;
-
-	scv_state = intel_batch_state_alloc(batch, sizeof(*scv_state), 64);
-	if (scv_state == NULL)
-		return -1;
+	struct gen7_sf_clip_viewport scv_state;
 
-	offset = intel_batch_offset(batch, scv_state);
+	memset(&scv_state, 0, sizeof(scv_state));
 
-	scv_state->guardband.xmin = 0;
-	scv_state->guardband.xmax = 1.0f;
-	scv_state->guardband.ymin = 0;
-	scv_state->guardband.ymax = 1.0f;
+	scv_state.guardband.xmin = 0;
+	scv_state.guardband.xmax = 1.0f;
+	scv_state.guardband.ymin = 0;
+	scv_state.guardband.ymax = 1.0f;
 
-	return offset;
+	return OUT_STATE_STRUCT(scv_state, 64);
 }
 
 static uint32_t
 gen6_create_scissor_rect(struct intel_batchbuffer *batch)
 {
-	struct gen6_scissor_rect *scissor;
-	uint32_t offset;
-
-	scissor = intel_batch_state_alloc(batch, sizeof(*scissor), 64);
-	if (scissor == NULL)
-		return -1;
+	struct gen6_scissor_rect scissor;
 
-	offset = intel_batch_offset(batch, scissor);
+	memset(&scissor, 0, sizeof(scissor));
 
-	return offset;
+	return OUT_STATE_STRUCT(scissor, 64);
 }
 
 static void
@@ -371,10 +324,10 @@ gen7_emit_urb(struct intel_batchbuffer *batch) {
 static void
 gen8_emit_cc(struct intel_batchbuffer *batch) {
 	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS);
-	OUT_BATCH(cc.blend_state | 1);
+	OUT_BATCH_STATE_OFFSET(cc.blend_state | 1);
 
 	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS);
-	OUT_BATCH(cc.cc_state | 1);
+	OUT_BATCH_STATE_OFFSET(cc.cc_state | 1);
 }
 
 static void
@@ -596,7 +549,7 @@ gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
 	OUT_BATCH(0);
 
 	OUT_BATCH(GEN7_3DSTATE_PS | (12-2));
-	OUT_BATCH(kernel);
+	OUT_BATCH_STATE_OFFSET(kernel);
 	OUT_BATCH(0); /* kernel hi */
 	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
 		  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
@@ -664,7 +617,7 @@ static void gen8_emit_vf_topology(struct intel_batchbuffer *batch)
 }
 
 /* Vertex elements MUST be defined before this according to spec */
-static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset)
+static void gen8_emit_primitive(struct intel_batchbuffer *batch)
 {
 	OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
 	OUT_BATCH(0);
@@ -679,7 +632,7 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
 	OUT_BATCH(0);	/* index buffer offset, ignored */
 }
 
-int gen8_setup_null_render_state(struct intel_batchbuffer *batch)
+void gen8_setup_null_render_state(struct intel_batchbuffer *batch)
 {
 	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
 	uint32_t scissor_state;
@@ -709,9 +662,9 @@ int gen8_setup_null_render_state(struct intel_batchbuffer *batch)
 	gen8_emit_state_base_address(batch);
 
 	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
-	OUT_BATCH(viewport.cc_state);
+	OUT_BATCH_STATE_OFFSET(viewport.cc_state);
 	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
-	OUT_BATCH(viewport.sf_clip_state);
+	OUT_BATCH_STATE_OFFSET(viewport.sf_clip_state);
 
 	gen7_emit_urb(batch);
 
@@ -732,15 +685,15 @@ int gen8_setup_null_render_state(struct intel_batchbuffer *batch)
 	gen8_emit_sf(batch);
 
 	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
-	OUT_BATCH(ps_binding_table);
+	OUT_BATCH_STATE_OFFSET(ps_binding_table);
 
 	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS);
-	OUT_BATCH(ps_sampler_state);
+	OUT_BATCH_STATE_OFFSET(ps_sampler_state);
 
 	gen8_emit_ps(batch, ps_kernel_off);
 
 	OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS);
-	OUT_BATCH(scissor_state);
+	OUT_BATCH_STATE_OFFSET(scissor_state);
 
 	gen8_emit_depth(batch);
 
@@ -752,13 +705,7 @@ int gen8_setup_null_render_state(struct intel_batchbuffer *batch)
 	gen6_emit_vertex_elements(batch);
 
 	gen8_emit_vf_topology(batch);
-	gen8_emit_primitive(batch, vertex_buffer);
+	gen8_emit_primitive(batch);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	ret = intel_batch_error(batch);
-	if (ret == 0)
-		ret = intel_batch_total_used(batch);
-
-	return ret;
 }
-- 
1.7.9.5




More information about the Intel-gfx mailing list