[igt-dev] [PATCH i-g-t v3 4/4] lib/intel_batchbuffer: Move batch functions from media/render/gpgpu libs

Tue Apr 17 20:29:12 UTC 2018

On 17/04/18 02:11, Lukasz Kalamarz wrote:
> Batch functions were copy/pasted across several libs.
> With moving it into intel_batchbuffer lib test can now be
> easly maintained without worrying that we forgot to modify
> older version of lib.
> 
> v2: Added documentation into lib and rebased patch
> v3: Fixed typos and rebased patch
> 
> Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
> Cc: Katarzyna Dec <katarzyna.dec at intel.com>
> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   lib/gpgpu_fill.c        |  6 ++--
>   lib/gpu_fill.c          | 68 ++++++++++----------------------------
>   lib/gpu_fill.h          | 15 ---------
>   lib/intel_batchbuffer.c | 88 +++++++++++++++++++++++++++++++++++++++++++++----
>   lib/intel_batchbuffer.h | 18 ++++++++--
>   lib/media_fill_gen7.c   |  2 +-
>   lib/media_fill_gen8.c   |  2 +-
>   lib/media_fill_gen9.c   |  2 +-
>   lib/media_spin.c        | 63 ++++++++---------------------------
>   lib/rendercopy_gen6.c   | 77 ++++++++++---------------------------------
>   lib/rendercopy_gen7.c   | 65 +++++++++---------------------------
>   lib/rendercopy_gen8.c   | 82 ++++++++++++++-------------------------------
>   lib/rendercopy_gen9.c   | 82 ++++++++++++++-------------------------------
>   13 files changed, 218 insertions(+), 352 deletions(-)
> 
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index 72a1445..010dde0 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -137,7 +137,7 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen7_render_flush(batch, batch_end);
> @@ -185,7 +185,7 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen7_render_flush(batch, batch_end);
> @@ -234,7 +234,7 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen7_render_flush(batch, batch_end);
> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> index f1fe5b3..5c1e217 100644
> --- a/lib/gpu_fill.c
> +++ b/lib/gpu_fill.c
> @@ -24,41 +24,6 @@
>   
>   #include "gpu_fill.h"
>   
> -uint32_t
> -batch_used(struct intel_batchbuffer *batch)
> -{
> -	return batch->ptr - batch->buffer;
> -}
> -
> -uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = ALIGN(offset, align);
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
> -{
> -	uint32_t offset = batch_align(batch, align);
> -	batch->ptr += size;
> -	return memset(batch->buffer + offset, 0, size);
> -}
> -
> -uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
> -{
> -	return (uint8_t *)ptr - batch->buffer;
> -}
> -
> -uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
> -{
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> -}
> -
>   void
>   gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>   {
> @@ -78,8 +43,10 @@ gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>   	uint8_t *curbe_buffer;
>   	uint32_t offset;
>   
> -	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
> -	offset = batch_offset(batch, curbe_buffer);
> +	curbe_buffer = intel_batchbuffer_subdata_alloc(batch,
> +						       sizeof(uint32_t) * 8,
> +						       64);
> +	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
>   	*curbe_buffer = color;
>   
>   	return offset;
> @@ -102,8 +69,8 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   
>   	ss->ss0.surface_type = GEN7_SURFACE_2D;
>   	ss->ss0.surface_format = format;
> @@ -116,7 +83,7 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
>   
>   	ss->ss1.base_addr = buf->bo->offset;
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				batch_offset(batch, ss) + 4,
> +				intel_batchbuffer_subdata_offset(batch, ss) + 4,
>   				buf->bo, 0,
>   				read_domain, write_domain);
>   	igt_assert(ret == 0);
> @@ -140,8 +107,8 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
>   {
>   	uint32_t *binding_table, offset;
>   
> -	binding_table = batch_alloc(batch, 32, 64);
> -	offset = batch_offset(batch, binding_table);
> +	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
>   	if (IS_GEN7(batch->devid))
>   		binding_table[0] = gen7_fill_surface_state(batch, dst,
>   						GEN7_SURFACEFORMAT_R8_UNORM, 1);
> @@ -159,7 +126,7 @@ gen7_fill_kernel(struct intel_batchbuffer *batch,
>   {
>   	uint32_t offset;
>   
> -	offset = batch_copy(batch, kernel, size, 64);
> +	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
>   
>   	return offset;
>   }
> @@ -175,8 +142,8 @@ gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
>   	binding_table_offset = gen7_fill_binding_table(batch, dst);
>   	kernel_offset = gen7_fill_kernel(batch, kernel, size);
>   
> -	idd = batch_alloc(batch, sizeof(*idd), 64);
> -	offset = batch_offset(batch, idd);
> +	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, idd);
>   
>   	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>   
> @@ -401,8 +368,8 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   
>   	ss->ss0.surface_type = GEN8_SURFACE_2D;
>   	ss->ss0.surface_format = format;
> @@ -418,7 +385,8 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
>   	ss->ss8.base_addr = buf->bo->offset;
>   
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				batch_offset(batch, ss) + 8 * 4,
> +				intel_batchbuffer_subdata_offset(batch,
> +				ss) + 8 * 4,

For readability I would keep "ss" on the same line as 
intel_batchbuffer_subdata_offset even if we go over 80 chars. If you 
prefer to use a new line you need to at least indent ss to be aligned 
with the other parameter passed to intel_batchbuffer_subdata_offset. 
This applies to several other places in this patch

>   				buf->bo, 0,
>   				read_domain, write_domain);
>   	igt_assert(ret == 0);
> @@ -445,8 +413,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
>   	binding_table_offset = gen7_fill_binding_table(batch, dst);
>   	kernel_offset = gen7_fill_kernel(batch, kernel, size);
>   
> -	idd = batch_alloc(batch, sizeof(*idd), 64);
> -	offset = batch_offset(batch, idd);
> +	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, idd);
>   
>   	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>   
> diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
> index 072e9f7..067d498 100644
> --- a/lib/gpu_fill.h
> +++ b/lib/gpu_fill.h
> @@ -37,21 +37,6 @@
>   #include "intel_chipset.h"
>   #include <assert.h>
>   
> -uint32_t
> -batch_used(struct intel_batchbuffer *batch);
> -
> -uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align);
> -
> -void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align);
> -
> -uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr);
> -
> -uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align);
> -
>   void
>   gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
>   
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index 10d4dce..54eab56 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -66,9 +66,75 @@
>    */
>   
>   /**
> + * intel_batchbuffer_align:
> + * @batch: batchbuffer object
> + * @align: value in bytes to which we want to align
> + *

Could use a 1-liner here to describe what the function does, something like

"aligns the current in-batch offset to the given value"

> + * Returns: Batchbuffer offset aligned to a given value.

s/a given/the given/ ?

> + */
> +uint32_t
> +intel_batchbuffer_align(struct intel_batchbuffer *batch, uint32_t align)
> +{
> +	uint32_t offset = batch->ptr - batch->buffer;
> +
> +	offset = ALIGN(offset, align);
> +	batch->ptr = batch->buffer + offset;
> +	return offset;
> +}
> +
> +/**
> + * intel_batchbuffer_round_upto:
> + * @batch: batchbuffer object
> + * @divisor: amount of bytes need to allocate

Wrong description for divisor. Also, this function does something 
similar to the _align function since they both round up to a multiple of 
the given value, so a 1-line description here as well to highlight the 
difference would be nice.
Alternatively, since this function only has 1 user we could leave it in 
the user file for now.

> + * > + * Returns: Calculated offset rounded up to a given divisor.
> + */
> +uint32_t
> +intel_batchbuffer_round_upto(struct intel_batchbuffer *batch, uint32_t divisor)
> +{
> +	uint32_t offset = batch->ptr - batch->buffer;
> +
> +	offset = (offset + divisor-1) / divisor * divisor;
> +	batch->ptr = batch->buffer + offset;
> +	return offset;
> +}
> +
> +/**
> + * intel_batchbuffer_subdata_alloc:
> + * @batch: batchbuffer object
> + * @size: amount of bytes need to allocate
> + * @align: value in bytes to which we want to align
> + *
> + * Allocate @size bytes within @batch.

Missing description of return value. Also we could use either an assert 
that there is enough space or a note to say that checking that the batch 
was not overflowed is left to the caller.

> + */
> +void *
> +intel_batchbuffer_subdata_alloc(struct intel_batchbuffer *batch, uint32_t size,
> +				uint32_t align)
> +{
> +	uint32_t offset = intel_batchbuffer_align(batch, align);
> +
> +	batch->ptr += size;
> +	return memset(batch->buffer + offset, 0, size);
> +}
> +
> +/**
> + * intel_batchbuffer_subdata_offset:
> + * @batch: batchbuffer object
> + * @ptr: pointer to given data
> + *
> + * Returns: Offset between given pointer and batchbuffer.

This sentence is a bit unclear to me. What we return here is the 
location of the data within the batch (i.e. offset from the base of the 
batch).

> + */
> +uint32_t
> +intel_batchbuffer_subdata_offset(struct intel_batchbuffer *batch, void *ptr)
> +{
> +	return (uint8_t *)ptr - batch->buffer;
> +}
> +
> +/**
>    * intel_batchbuffer_reset:
>    * @batch: batchbuffer object
>    *
> + *

nitpick: this newline here can be dropped

>    * Resets @batch by allocating a new gem buffer object as backing storage.
>    */
>   void
> @@ -288,22 +354,30 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
>   }
>   
>   /**
> - * intel_batchbuffer_data:
> + * intel_batchbuffer_copy_data:
>    * @batch: batchbuffer object
> - * @data: pointer to the data to write into the batchbuffer
> + * @data: pointer to data, which will be copied

The original description for @data felt clearer to me.

>    * @bytes: number of bytes to write into the batchbuffer
> + * @align: value in bytes to which we want to align
>    *
>    * This transfers the given @data into the batchbuffer. Note that the length
>    * must be DWORD aligned, i.e. multiples of 32bits.
> + *
> + * Returns: Offset of copied data.
>    */
> -void
> -intel_batchbuffer_data(struct intel_batchbuffer *batch,
> -                       const void *data, unsigned int bytes)
> +uint32_t
> +intel_batchbuffer_copy_data(struct intel_batchbuffer *batch,
> +			    const void *data, unsigned int bytes,
> +			    uint32_t align)
>   {
> +	uint32_t *subdata, *copied_data;
> +
>   	igt_assert((bytes & 3) == 0);
>   	intel_batchbuffer_require_space(batch, bytes);

intel_batchbuffer_require_space can potentially cause weird behaviour in 
your use-case, because it will submit the batch and create a new one if 
there is not enough space available. AFAICS this function had no users 
before this patch and all the users you're adding do their own 
validation of the used space, so we could potentially remove the 
intel_batchbuffer_require_space check (adding a comment in the 
description) and add an igt_assert for the space instead. The assert 
could go in intel_batchbuffer_subdata_alloc as mentioned above to cover 
the other use-cases as well.

> -	memcpy(batch->ptr, data, bytes);
> -	batch->ptr += bytes;
> +	subdata = intel_batchbuffer_subdata_alloc(batch, bytes, align);
> +	copied_data = memcpy(subdata, data, bytes);
> +

You don't need both subdata and copied_data here since they're going to 
be identical.

Daniele

> +	return intel_batchbuffer_subdata_offset(batch, copied_data);
>   }
>   
>   /**
> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index 6744bcb..3183fee 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -40,8 +40,9 @@ void intel_batchbuffer_flush_with_context(struct intel_batchbuffer *batch,
>   
>   void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
>   
> -void intel_batchbuffer_data(struct intel_batchbuffer *batch,
> -                            const void *data, unsigned int bytes);
> +uint32_t intel_batchbuffer_copy_data(struct intel_batchbuffer *batch,
> +				const void *data, unsigned int bytes,
> +				uint32_t align);
>   
>   void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
>   				  drm_intel_bo *buffer,
> @@ -50,6 +51,19 @@ void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
>   				  uint32_t write_domain,
>   				  int fenced);
>   
> +uint32_t
> +intel_batchbuffer_align(struct intel_batchbuffer *batch, uint32_t align);
> +
> +uint32_t
> +intel_batchbuffer_round_upto(struct intel_batchbuffer *batch, uint32_t divisor);
> +
> +void *
> +intel_batchbuffer_subdata_alloc(struct intel_batchbuffer *batch,
> +				uint32_t size, uint32_t align);
> +
> +uint32_t
> +intel_batchbuffer_subdata_offset(struct intel_batchbuffer *batch, void *ptr);
> +
>   /* Inline functions - might actually be better off with these
>    * non-inlined.  Certainly better off switching all command packets to
>    * be passed as structs rather than dwords, but that's a little bit of
> diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
> index 5a8c32f..3dc5617 100644
> --- a/lib/media_fill_gen7.c
> +++ b/lib/media_fill_gen7.c
> @@ -79,7 +79,7 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen7_render_flush(batch, batch_end);
> diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
> index d6dd741..63fe72e 100644
> --- a/lib/media_fill_gen8.c
> +++ b/lib/media_fill_gen8.c
> @@ -82,7 +82,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen7_render_flush(batch, batch_end);
> diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
> index a9a829f..78e892f 100644
> --- a/lib/media_fill_gen9.c
> +++ b/lib/media_fill_gen9.c
> @@ -91,7 +91,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen7_render_flush(batch, batch_end);
> diff --git a/lib/media_spin.c b/lib/media_spin.c
> index 580c109..20af549 100644
> --- a/lib/media_spin.c
> +++ b/lib/media_spin.c
> @@ -45,42 +45,6 @@ static const uint32_t spin_kernel[][4] = {
>   	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 }, /* send.ts (16)null<1> r112<0;1;0>:d 0x82000010 */
>   };
>   
> -static uint32_t
> -batch_used(struct intel_batchbuffer *batch)
> -{
> -	return batch->ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = ALIGN(offset, align);
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -static void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
> -{
> -	uint32_t offset = batch_align(batch, align);
> -	batch->ptr += size;
> -	return memset(batch->buffer + offset, 0, size);
> -}
> -
> -static uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
> -{
> -	return (uint8_t *)ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
> -	   uint32_t align)
> -{
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> -}
> -
>   static void
>   gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>   {
> @@ -100,8 +64,8 @@ gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
>   	uint32_t *curbe_buffer;
>   	uint32_t offset;
>   
> -	curbe_buffer = batch_alloc(batch, 64, 64);
> -	offset = batch_offset(batch, curbe_buffer);
> +	curbe_buffer = intel_batchbuffer_subdata_alloc(batch, 64, 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
>   	*curbe_buffer = iters;
>   
>   	return offset;
> @@ -124,8 +88,8 @@ gen8_spin_surface_state(struct intel_batchbuffer *batch,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   
>   	ss->ss0.surface_type = GEN8_SURFACE_2D;
>   	ss->ss0.surface_format = format;
> @@ -141,7 +105,8 @@ gen8_spin_surface_state(struct intel_batchbuffer *batch,
>   	ss->ss8.base_addr = buf->bo->offset;
>   
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				batch_offset(batch, ss) + 8 * 4,
> +				intel_batchbuffer_subdata_offset(batch,
> +				ss) + 8 * 4,
>   				buf->bo, 0,
>   				read_domain, write_domain);
>   	igt_assert_eq(ret, 0);
> @@ -164,8 +129,8 @@ gen8_spin_binding_table(struct intel_batchbuffer *batch,
>   {
>   	uint32_t *binding_table, offset;
>   
> -	binding_table = batch_alloc(batch, 32, 64);
> -	offset = batch_offset(batch, binding_table);
> +	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
>   
>   	binding_table[0] = gen8_spin_surface_state(batch, dst,
>   					GEN8_SURFACEFORMAT_R8_UNORM, 1);
> @@ -180,7 +145,7 @@ gen8_spin_media_kernel(struct intel_batchbuffer *batch,
>   {
>   	uint32_t offset;
>   
> -	offset = batch_copy(batch, kernel, size, 64);
> +	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
>   
>   	return offset;
>   }
> @@ -197,8 +162,8 @@ gen8_spin_interface_descriptor(struct intel_batchbuffer *batch,
>   	kernel_offset = gen8_spin_media_kernel(batch, spin_kernel,
>   					       sizeof(spin_kernel));
>   
> -	idd = batch_alloc(batch, sizeof(*idd), 64);
> -	offset = batch_offset(batch, idd);
> +	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, idd);
>   
>   	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>   
> @@ -444,7 +409,7 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen8_render_flush(batch, batch_end);
> @@ -482,7 +447,7 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen8_render_flush(batch, batch_end);
> @@ -532,7 +497,7 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
>   	gen8_render_flush(batch, batch_end);
> diff --git a/lib/rendercopy_gen6.c b/lib/rendercopy_gen6.c
> index 8c24cf8..9dcfb86 100644
> --- a/lib/rendercopy_gen6.c
> +++ b/lib/rendercopy_gen6.c
> @@ -48,50 +48,6 @@ static const uint32_t ps_kernel_nomask_affine[][4] = {
>   	{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
>   };
>   
> -static uint32_t
> -batch_used(struct intel_batchbuffer *batch)
> -{
> -	return batch->ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = ALIGN(offset, align);
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -static uint32_t
> -batch_round_upto(struct intel_batchbuffer *batch, uint32_t divisor)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = (offset + divisor-1) / divisor * divisor;
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -static void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
> -{
> -	uint32_t offset = batch_align(batch, align);
> -	batch->ptr += size;
> -	return memset(batch->buffer + offset, 0, size);
> -}
> -
> -static uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
> -{
> -	return (uint8_t *)ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
> -{
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> -}
> -
>   static void
>   gen6_render_flush(struct intel_batchbuffer *batch,
>   		  drm_intel_context *context, uint32_t batch_end)
> @@ -120,7 +76,7 @@ gen6_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 32);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
>   	ss->ss0.surface_type = GEN6_SURFACE_2D;
>   	ss->ss0.surface_format = format;
>   
> @@ -129,7 +85,8 @@ gen6_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
>   	ss->ss1.base_addr = buf->bo->offset;
>   
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				      batch_offset(batch, ss) + 4,
> +				      intel_batchbuffer_subdata_offset(batch,
> +				      ss) + 4,
>   				      buf->bo, 0,
>   				      read_domain, write_domain);
>   	igt_assert(ret == 0);
> @@ -140,7 +97,7 @@ gen6_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
>   	ss->ss3.tiled_surface = buf->tiling != I915_TILING_NONE;
>   	ss->ss3.tile_walk     = buf->tiling == I915_TILING_Y;
>   
> -	return batch_offset(batch, ss);
> +	return intel_batchbuffer_subdata_offset(batch, ss);
>   }
>   
>   static uint32_t
> @@ -150,14 +107,14 @@ gen6_bind_surfaces(struct intel_batchbuffer *batch,
>   {
>   	uint32_t *binding_table;
>   
> -	binding_table = batch_alloc(batch, 32, 32);
> +	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 32);
>   
>   	binding_table[0] =
>   		gen6_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
>   	binding_table[1] =
>   		gen6_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
>   
> -	return batch_offset(batch, binding_table);
> +	return intel_batchbuffer_subdata_offset(batch, binding_table);
>   }
>   
>   static void
> @@ -427,12 +384,12 @@ gen6_create_cc_viewport(struct intel_batchbuffer *batch)
>   {
>   	struct gen6_cc_viewport *vp;
>   
> -	vp = batch_alloc(batch, sizeof(*vp), 32);
> +	vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
>   
>   	vp->min_depth = -1.e35;
>   	vp->max_depth = 1.e35;
>   
> -	return batch_offset(batch, vp);
> +	return intel_batchbuffer_subdata_offset(batch, vp);
>   }
>   
>   static uint32_t
> @@ -440,7 +397,7 @@ gen6_create_cc_blend(struct intel_batchbuffer *batch)
>   {
>   	struct gen6_blend_state *blend;
>   
> -	blend = batch_alloc(batch, sizeof(*blend), 64);
> +	blend = intel_batchbuffer_subdata_alloc(batch, sizeof(*blend), 64);
>   
>   	blend->blend0.dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
>   	blend->blend0.source_blend_factor = GEN6_BLENDFACTOR_ONE;
> @@ -450,13 +407,13 @@ gen6_create_cc_blend(struct intel_batchbuffer *batch)
>   	blend->blend1.post_blend_clamp_enable = 1;
>   	blend->blend1.pre_blend_clamp_enable = 1;
>   
> -	return batch_offset(batch, blend);
> +	return intel_batchbuffer_subdata_offset(batch, blend);
>   }
>   
>   static uint32_t
>   gen6_create_kernel(struct intel_batchbuffer *batch)
>   {
> -	return batch_copy(batch, ps_kernel_nomask_affine,
> +	return intel_batchbuffer_copy_data(batch, ps_kernel_nomask_affine,
>   			  sizeof(ps_kernel_nomask_affine),
>   			  64);
>   }
> @@ -468,7 +425,7 @@ gen6_create_sampler(struct intel_batchbuffer *batch,
>   {
>   	struct gen6_sampler_state *ss;
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 32);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
>   	ss->ss0.lod_preclamp = 1;	/* GL mode */
>   
>   	/* We use the legacy mode to get the semantics specified by
> @@ -511,7 +468,7 @@ gen6_create_sampler(struct intel_batchbuffer *batch,
>   		break;
>   	}
>   
> -	return batch_offset(batch, ss);
> +	return intel_batchbuffer_subdata_offset(batch, ss);
>   }
>   
>   static void gen6_emit_vertex_buffer(struct intel_batchbuffer *batch)
> @@ -535,7 +492,7 @@ static uint32_t gen6_emit_primitive(struct intel_batchbuffer *batch)
>   		  0 << 9 |
>   		  4);
>   	OUT_BATCH(3);	/* vertex count */
> -	offset = batch_used(batch);
> +	offset = batch->ptr - batch->buffer;
>   	OUT_BATCH(0);	/* vertex_index */
>   	OUT_BATCH(1);	/* single instance */
>   	OUT_BATCH(0);	/* start instance location */
> @@ -557,7 +514,7 @@ void gen6_render_copyfunc(struct intel_batchbuffer *batch,
>   	intel_batchbuffer_flush_with_context(batch, context);
>   
>   	batch->ptr = batch->buffer + 1024;
> -	batch_alloc(batch, 64, 64);
> +	intel_batchbuffer_subdata_alloc(batch, 64, 64);
>   	wm_table  = gen6_bind_surfaces(batch, src, dst);
>   	wm_kernel = gen6_create_kernel(batch);
>   	wm_state  = gen6_create_sampler(batch,
> @@ -594,10 +551,10 @@ void gen6_render_copyfunc(struct intel_batchbuffer *batch,
>   	offset = gen6_emit_primitive(batch);
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   
>   	*(uint32_t*)(batch->buffer + offset) =
> -		batch_round_upto(batch, VERTEX_SIZE)/VERTEX_SIZE;
> +		intel_batchbuffer_round_upto(batch, VERTEX_SIZE)/VERTEX_SIZE;
>   
>   	emit_vertex_2s(batch, dst_x + width, dst_y + height);
>   	emit_vertex_normalized(batch, src_x + width, igt_buf_width(src));
> diff --git a/lib/rendercopy_gen7.c b/lib/rendercopy_gen7.c
> index 0049e27..785adb3 100644
> --- a/lib/rendercopy_gen7.c
> +++ b/lib/rendercopy_gen7.c
> @@ -32,41 +32,6 @@ static const uint32_t ps_kernel[][4] = {
>   	{ 0x05800031, 0x20001fa8, 0x008d0e20, 0x90031000 },
>   };
>   
> -static uint32_t
> -batch_used(struct intel_batchbuffer *batch)
> -{
> -	return batch->ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = ALIGN(offset, align);
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -static void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
> -{
> -	uint32_t offset = batch_align(batch, align);
> -	batch->ptr += size;
> -	return memset(batch->buffer + offset, 0, size);
> -}
> -
> -static uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
> -{
> -	return (uint8_t *)ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
> -{
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> -}
> -
>   static void
>   gen7_render_flush(struct intel_batchbuffer *batch,
>   		  drm_intel_context *context, uint32_t batch_end)
> @@ -108,7 +73,7 @@ gen7_bind_buf(struct intel_batchbuffer *batch,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, 8 * sizeof(*ss), 32);
> +	ss = intel_batchbuffer_subdata_alloc(batch, 8 * sizeof(*ss), 32);
>   
>   	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
>   		 gen7_tiling_bits(buf->tiling) |
> @@ -125,12 +90,13 @@ gen7_bind_buf(struct intel_batchbuffer *batch,
>   		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
>   
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				      batch_offset(batch, ss) + 4,
> +				      intel_batchbuffer_subdata_offset(batch,
> +				      ss) + 4,
>   				      buf->bo, 0,
>   				      read_domain, write_domain);
>   	igt_assert(ret == 0);
>   
> -	return batch_offset(batch, ss);
> +	return intel_batchbuffer_subdata_offset(batch, ss);
>   }
>   
>   static void
> @@ -175,7 +141,7 @@ gen7_create_vertex_buffer(struct intel_batchbuffer *batch,
>   {
>   	uint16_t *v;
>   
> -	v = batch_alloc(batch, 12 * sizeof(*v), 8);
> +	v = intel_batchbuffer_subdata_alloc(batch, 12 * sizeof(*v), 8);
>   
>   	v[0] = dst_x + width;
>   	v[1] = dst_y + height;
> @@ -192,7 +158,7 @@ gen7_create_vertex_buffer(struct intel_batchbuffer *batch,
>   	v[10] = src_x;
>   	v[11] = src_y;
>   
> -	return batch_offset(batch, v);
> +	return intel_batchbuffer_subdata_offset(batch, v);
>   }
>   
>   static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
> @@ -219,14 +185,14 @@ gen7_bind_surfaces(struct intel_batchbuffer *batch,
>   {
>   	uint32_t *binding_table;
>   
> -	binding_table = batch_alloc(batch, 8, 32);
> +	binding_table = intel_batchbuffer_subdata_alloc(batch, 8, 32);
>   
>   	binding_table[0] =
>   		gen7_bind_buf(batch, dst, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
>   	binding_table[1] =
>   		gen7_bind_buf(batch, src, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
>   
> -	return batch_offset(batch, binding_table);
> +	return intel_batchbuffer_subdata_offset(batch, binding_table);
>   }
>   
>   static void
> @@ -253,7 +219,7 @@ gen7_create_blend_state(struct intel_batchbuffer *batch)
>   {
>   	struct gen7_blend_state *blend;
>   
> -	blend = batch_alloc(batch, sizeof(*blend), 64);
> +	blend = intel_batchbuffer_subdata_alloc(batch, sizeof(*blend), 64);
>   
>   	blend->blend0.dest_blend_factor = GEN7_BLENDFACTOR_ZERO;
>   	blend->blend0.source_blend_factor = GEN7_BLENDFACTOR_ONE;
> @@ -261,7 +227,7 @@ gen7_create_blend_state(struct intel_batchbuffer *batch)
>   	blend->blend1.post_blend_clamp_enable = 1;
>   	blend->blend1.pre_blend_clamp_enable = 1;
>   
> -	return batch_offset(batch, blend);
> +	return intel_batchbuffer_subdata_offset(batch, blend);
>   }
>   
>   static void
> @@ -285,11 +251,11 @@ gen7_create_cc_viewport(struct intel_batchbuffer *batch)
>   {
>   	struct gen7_cc_viewport *vp;
>   
> -	vp = batch_alloc(batch, sizeof(*vp), 32);
> +	vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
>   	vp->min_depth = -1.e35;
>   	vp->max_depth = 1.e35;
>   
> -	return batch_offset(batch, vp);
> +	return intel_batchbuffer_subdata_offset(batch, vp);
>   }
>   
>   static void
> @@ -308,7 +274,7 @@ gen7_create_sampler(struct intel_batchbuffer *batch)
>   {
>   	struct gen7_sampler_state *ss;
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 32);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
>   
>   	ss->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
>   	ss->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
> @@ -319,7 +285,7 @@ gen7_create_sampler(struct intel_batchbuffer *batch)
>   
>   	ss->ss3.non_normalized_coord = 1;
>   
> -	return batch_offset(batch, ss);
> +	return intel_batchbuffer_subdata_offset(batch, ss);
>   }
>   
>   static void
> @@ -544,7 +510,8 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch,
>   	blend_state = gen7_create_blend_state(batch);
>   	cc_viewport = gen7_create_cc_viewport(batch);
>   	ps_sampler_off = gen7_create_sampler(batch);
> -	ps_kernel_off = batch_copy(batch, ps_kernel, sizeof(ps_kernel), 64);
> +	ps_kernel_off = intel_batchbuffer_copy_data(batch, ps_kernel,
> +						    sizeof(ps_kernel), 64);
>   	vertex_buffer = gen7_create_vertex_buffer(batch,
>   						  src_x, src_y,
>   						  dst_x, dst_y,
> diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
> index fe3fedf..fbf049f 100644
> --- a/lib/rendercopy_gen8.c
> +++ b/lib/rendercopy_gen8.c
> @@ -129,41 +129,6 @@ static void annotation_flush(struct annotations_context *aub,
>   						 aub->index);
>   }
>   
> -static uint32_t
> -batch_used(struct intel_batchbuffer *batch)
> -{
> -	return batch->ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = ALIGN(offset, align);
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -static void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
> -{
> -	uint32_t offset = batch_align(batch, align);
> -	batch->ptr += size;
> -	return memset(batch->buffer + offset, 0, size);
> -}
> -
> -static uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
> -{
> -	return (uint8_t *)ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
> -{
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> -}
> -
>   static void
>   gen6_render_flush(struct intel_batchbuffer *batch,
>   		  drm_intel_context *context, uint32_t batch_end)
> @@ -195,8 +160,8 @@ gen8_bind_buf(struct intel_batchbuffer *batch,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   	annotation_add_state(aub, AUB_TRACE_SURFACE_STATE, offset, sizeof(*ss));
>   
>   	ss->ss0.surface_type = GEN6_SURFACE_2D;
> @@ -212,7 +177,8 @@ gen8_bind_buf(struct intel_batchbuffer *batch,
>   	ss->ss8.base_addr = buf->bo->offset;
>   
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				      batch_offset(batch, ss) + 8 * 4,
> +				      intel_batchbuffer_subdata_offset(batch,
> +				      ss) + 8 * 4,
>   				      buf->bo, 0,
>   				      read_domain, write_domain);
>   	igt_assert(ret == 0);
> @@ -237,8 +203,8 @@ gen8_bind_surfaces(struct intel_batchbuffer *batch,
>   {
>   	uint32_t *binding_table, offset;
>   
> -	binding_table = batch_alloc(batch, 8, 32);
> -	offset = batch_offset(batch, binding_table);
> +	binding_table = intel_batchbuffer_subdata_alloc(batch, 8, 32);
> +	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
>   	annotation_add_state(aub, AUB_TRACE_BINDING_TABLE, offset, 8);
>   
>   	binding_table[0] =
> @@ -259,8 +225,8 @@ gen8_create_sampler(struct intel_batchbuffer *batch,
>   	struct gen8_sampler_state *ss;
>   	uint32_t offset;
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   	annotation_add_state(aub, AUB_TRACE_SAMPLER_STATE,
>   			     offset, sizeof(*ss));
>   
> @@ -285,7 +251,7 @@ gen8_fill_ps(struct intel_batchbuffer *batch,
>   {
>   	uint32_t offset;
>   
> -	offset = batch_copy(batch, kernel, size, 64);
> +	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
>   	annotation_add_state(aub, AUB_TRACE_KERNEL_INSTRUCTIONS, offset, size);
>   
>   	return offset;
> @@ -312,7 +278,7 @@ gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch,
>   	void *start;
>   	uint32_t offset;
>   
> -	batch_align(batch, 8);
> +	intel_batchbuffer_align(batch, 8);
>   	start = batch->ptr;
>   
>   	emit_vertex_2s(batch, dst_x + width, dst_y + height);
> @@ -327,7 +293,7 @@ gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch,
>   	emit_vertex_normalized(batch, src_x, igt_buf_width(src));
>   	emit_vertex_normalized(batch, src_y, igt_buf_height(src));
>   
> -	offset = batch_offset(batch, start);
> +	offset = intel_batchbuffer_subdata_offset(batch, start);
>   	annotation_add_state(aub, AUB_TRACE_VERTEX_BUFFER,
>   			     offset, 3 * VERTEX_SIZE);
>   	return offset;
> @@ -413,8 +379,9 @@ gen6_create_cc_state(struct intel_batchbuffer *batch,
>   	struct gen6_color_calc_state *cc_state;
>   	uint32_t offset;
>   
> -	cc_state = batch_alloc(batch, sizeof(*cc_state), 64);
> -	offset = batch_offset(batch, cc_state);
> +	cc_state = intel_batchbuffer_subdata_alloc(batch,
> +						   sizeof(*cc_state), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, cc_state);
>   	annotation_add_state(aub, AUB_TRACE_CC_STATE,
>   			     offset, sizeof(*cc_state));
>   
> @@ -429,8 +396,8 @@ gen8_create_blend_state(struct intel_batchbuffer *batch,
>   	int i;
>   	uint32_t offset;
>   
> -	blend = batch_alloc(batch, sizeof(*blend), 64);
> -	offset = batch_offset(batch, blend);
> +	blend = intel_batchbuffer_subdata_alloc(batch, sizeof(*blend), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, blend);
>   	annotation_add_state(aub, AUB_TRACE_BLEND_STATE,
>   			     offset, sizeof(*blend));
>   
> @@ -452,8 +419,8 @@ gen6_create_cc_viewport(struct intel_batchbuffer *batch,
>   	struct gen6_cc_viewport *vp;
>   	uint32_t offset;
>   
> -	vp = batch_alloc(batch, sizeof(*vp), 32);
> -	offset = batch_offset(batch, vp);
> +	vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
> +	offset = intel_batchbuffer_subdata_offset(batch, vp);
>   	annotation_add_state(aub, AUB_TRACE_CC_VP_STATE,
>   			     offset, sizeof(*vp));
>   
> @@ -472,8 +439,9 @@ gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch,
>   	struct gen7_sf_clip_viewport *scv_state;
>   	uint32_t offset;
>   
> -	scv_state = batch_alloc(batch, sizeof(*scv_state), 64);
> -	offset = batch_offset(batch, scv_state);
> +	scv_state = intel_batchbuffer_subdata_alloc(batch,
> +						    sizeof(*scv_state), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, scv_state);
>   	annotation_add_state(aub, AUB_TRACE_CLIP_VP_STATE,
>   			     offset, sizeof(*scv_state));
>   
> @@ -492,8 +460,8 @@ gen6_create_scissor_rect(struct intel_batchbuffer *batch,
>   	struct gen6_scissor_rect *scissor;
>   	uint32_t offset;
>   
> -	scissor = batch_alloc(batch, sizeof(*scissor), 64);
> -	offset = batch_offset(batch, scissor);
> +	scissor = intel_batchbuffer_subdata_alloc(batch, sizeof(*scissor), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, scissor);
>   	annotation_add_state(aub, AUB_TRACE_SCISSOR_STATE,
>   			     offset, sizeof(*scissor));
>   
> @@ -934,7 +902,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>   
>   	intel_batchbuffer_flush_with_context(batch, context);
>   
> -	batch_align(batch, 8);
> +	intel_batchbuffer_align(batch, 8);
>   
>   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>   
> @@ -1019,7 +987,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   	annotation_add_batch(&aub_annotations, batch_end);
>   
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index e646e97..9bd1cbb 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -130,41 +130,6 @@ static void annotation_flush(struct annotations_context *ctx,
>   						 ctx->index);
>   }
>   
> -static uint32_t
> -batch_used(struct intel_batchbuffer *batch)
> -{
> -	return batch->ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
> -{
> -	uint32_t offset = batch_used(batch);
> -	offset = ALIGN(offset, align);
> -	batch->ptr = batch->buffer + offset;
> -	return offset;
> -}
> -
> -static void *
> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
> -{
> -	uint32_t offset = batch_align(batch, align);
> -	batch->ptr += size;
> -	return memset(batch->buffer + offset, 0, size);
> -}
> -
> -static uint32_t
> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
> -{
> -	return (uint8_t *)ptr - batch->buffer;
> -}
> -
> -static uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
> -{
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> -}
> -
>   static void
>   gen6_render_flush(struct intel_batchbuffer *batch,
>   		  drm_intel_context *context, uint32_t batch_end)
> @@ -193,8 +158,8 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
>   		read_domain = I915_GEM_DOMAIN_SAMPLER;
>   	}
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_SURFACE_STATE,
>   			     offset, sizeof(*ss));
>   
> @@ -211,7 +176,8 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
>   	ss->ss8.base_addr = buf->bo->offset;
>   
>   	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				      batch_offset(batch, ss) + 8 * 4,
> +				      intel_batchbuffer_subdata_offset(batch,
> +				      ss) + 8 * 4,
>   				      buf->bo, 0,
>   				      read_domain, write_domain);
>   	assert(ret == 0);
> @@ -235,8 +201,8 @@ gen8_bind_surfaces(struct intel_batchbuffer *batch,
>   {
>   	uint32_t *binding_table, offset;
>   
> -	binding_table = batch_alloc(batch, 8, 32);
> -	offset = batch_offset(batch, binding_table);
> +	binding_table = intel_batchbuffer_subdata_alloc(batch, 8, 32);
> +	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
>   			     offset, 8);
>   
> @@ -254,8 +220,8 @@ gen8_create_sampler(struct intel_batchbuffer *batch) {
>   	struct gen8_sampler_state *ss;
>   	uint32_t offset;
>   
> -	ss = batch_alloc(batch, sizeof(*ss), 64);
> -	offset = batch_offset(batch, ss);
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, ss);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_SAMPLER_STATE,
>   			     offset, sizeof(*ss));
>   
> @@ -279,7 +245,7 @@ gen8_fill_ps(struct intel_batchbuffer *batch,
>   {
>   	uint32_t offset;
>   
> -	offset = batch_copy(batch, kernel, size, 64);
> +	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_KERNEL_INSTRUCTIONS,
>   			     offset, size);
>   
> @@ -306,7 +272,7 @@ gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch,
>   	void *start;
>   	uint32_t offset;
>   
> -	batch_align(batch, 8);
> +	intel_batchbuffer_align(batch, 8);
>   	start = batch->ptr;
>   
>   	emit_vertex_2s(batch, dst_x + width, dst_y + height);
> @@ -321,7 +287,7 @@ gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch,
>   	emit_vertex_normalized(batch, src_x, igt_buf_width(src));
>   	emit_vertex_normalized(batch, src_y, igt_buf_height(src));
>   
> -	offset = batch_offset(batch, start);
> +	offset = intel_batchbuffer_subdata_offset(batch, start);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_VERTEX_BUFFER,
>   			     offset, 3 * VERTEX_SIZE);
>   	return offset;
> @@ -406,8 +372,9 @@ gen6_create_cc_state(struct intel_batchbuffer *batch)
>   	struct gen6_color_calc_state *cc_state;
>   	uint32_t offset;
>   
> -	cc_state = batch_alloc(batch, sizeof(*cc_state), 64);
> -	offset = batch_offset(batch, cc_state);
> +	cc_state = intel_batchbuffer_subdata_alloc(batch,
> +						   sizeof(*cc_state), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, cc_state);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_CC_STATE,
>   			     offset, sizeof(*cc_state));
>   
> @@ -421,8 +388,8 @@ gen8_create_blend_state(struct intel_batchbuffer *batch)
>   	int i;
>   	uint32_t offset;
>   
> -	blend = batch_alloc(batch, sizeof(*blend), 64);
> -	offset = batch_offset(batch, blend);
> +	blend = intel_batchbuffer_subdata_alloc(batch, sizeof(*blend), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, blend);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_BLEND_STATE,
>   			     offset, sizeof(*blend));
>   
> @@ -443,8 +410,8 @@ gen6_create_cc_viewport(struct intel_batchbuffer *batch)
>   	struct gen6_cc_viewport *vp;
>   	uint32_t offset;
>   
> -	vp = batch_alloc(batch, sizeof(*vp), 32);
> -	offset = batch_offset(batch, vp);
> +	vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
> +	offset = intel_batchbuffer_subdata_offset(batch, vp);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_CC_VP_STATE,
>   			     offset, sizeof(*vp));
>   
> @@ -461,8 +428,9 @@ gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch) {
>   	struct gen7_sf_clip_viewport *scv_state;
>   	uint32_t offset;
>   
> -	scv_state = batch_alloc(batch, sizeof(*scv_state), 64);
> -	offset = batch_offset(batch, scv_state);
> +	scv_state = intel_batchbuffer_subdata_alloc(batch,
> +						    sizeof(*scv_state), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, scv_state);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_CLIP_VP_STATE,
>   			     offset, sizeof(*scv_state));
>   
> @@ -480,8 +448,8 @@ gen6_create_scissor_rect(struct intel_batchbuffer *batch)
>   	struct gen6_scissor_rect *scissor;
>   	uint32_t offset;
>   
> -	scissor = batch_alloc(batch, sizeof(*scissor), 64);
> -	offset = batch_offset(batch, scissor);
> +	scissor = intel_batchbuffer_subdata_alloc(batch, sizeof(*scissor), 64);
> +	offset = intel_batchbuffer_subdata_offset(batch, scissor);
>   	annotation_add_state(&aub_annotations, AUB_TRACE_SCISSOR_STATE,
>   			     offset, sizeof(*scissor));
>   
> @@ -940,7 +908,7 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
>   
>   	intel_batchbuffer_flush_with_context(batch, context);
>   
> -	batch_align(batch, 8);
> +	intel_batchbuffer_align(batch, 8);
>   
>   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>   
> @@ -1023,7 +991,7 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> -	batch_end = batch_align(batch, 8);
> +	batch_end = intel_batchbuffer_align(batch, 8);
>   	assert(batch_end < BATCH_STATE_SPLIT);
>   	annotation_add_batch(&aub_annotations, batch_end);
>   
>