[Mesa-dev] [PATCH 18/25] i965: Import surface lowering code.

Francisco Jerez currojerez at riseup.net
Mon Dec 2 11:39:26 PST 2013


This patch implements two helper classes that will take care of
lowering image and atomic counter built-in calls into typed and
untyped surface reads, writes and atomics.  They are expected to be
called from the FS and VEC4 visitor.  Loads and stores to images of an
unsupported format are transformed into typed surface operations of
the closest supported format, generating code that handles type
conversion, component packing and unpacking as necessary.

In cases where the hardware has no suitable closest format (formats of
more than 64bpp on IVB and 128bpp on HSW) untyped surface access is
used, which might be slow because the tiling calculation is handled in
the shader -- though all the alternatives we've come up with so far,
including using the texture sampler for unsupported surface reads,
multiplying the texture width by some factor to cover the whole image
memory, and preprocessing images to convert them to a supported
format, seem to be even slower.
---
 src/mesa/drivers/dri/i965/Makefile.sources         |    3 +
 .../drivers/dri/i965/brw_fs_surface_visitor.cpp    |  829 ++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_surface_visitor.h |  165 +++
 src/mesa/drivers/dri/i965/brw_reg.h                |    5 +
 src/mesa/drivers/dri/i965/brw_surface_visitor.cpp  | 1208 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_surface_visitor.h    |  233 ++++
 .../drivers/dri/i965/brw_vec4_surface_visitor.cpp  |  846 ++++++++++++++
 .../drivers/dri/i965/brw_vec4_surface_visitor.h    |  169 +++
 8 files changed, 3458 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_visitor.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_visitor.h
 create mode 100644 src/mesa/drivers/dri/i965/brw_surface_visitor.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_surface_visitor.h
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 193f2db..c71909a 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -61,6 +61,7 @@ i965_FILES = \
 	brw_fs_generator.cpp \
 	brw_fs_live_variables.cpp \
 	brw_fs_reg_allocate.cpp \
+	brw_fs_surface_visitor.cpp \
 	brw_fs_vector_splitting.cpp \
 	brw_fs_visitor.cpp \
 	brw_gs.c \
@@ -88,6 +89,7 @@ i965_FILES = \
 	brw_state_dump.c \
 	brw_state_upload.c \
 	brw_surface_formats.c \
+	brw_surface_visitor.cpp \
 	brw_tex.c \
 	brw_tex_layout.c \
 	brw_urb.c \
@@ -99,6 +101,7 @@ i965_FILES = \
 	brw_vec4_gs_visitor.cpp \
 	brw_vec4_live_variables.cpp \
 	brw_vec4_reg_allocate.cpp \
+	brw_vec4_surface_visitor.cpp \
 	brw_vec4_visitor.cpp \
 	brw_vec4_vp.cpp \
 	brw_vec4_vs_visitor.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_visitor.cpp
new file mode 100644
index 0000000..dca0706
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_visitor.cpp
@@ -0,0 +1,829 @@
+/*
+ * Copyright 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Francisco Jerez <currojerez at riseup.net>
+ */
+
+#include "brw_fs_surface_visitor.h"
+
+namespace {
+   fs_inst &
+   exec_all(fs_inst &inst)
+   {
+      inst.force_writemask_all = true;
+      return inst;
+   }
+
+   fs_inst &
+   exec_half(unsigned half, fs_inst &inst)
+   {
+      if (half == 1)
+         inst.force_sechalf = true;
+      else
+         inst.force_uncompressed = true;
+
+      return inst;
+   }
+
+   fs_inst &
+   exec_predicated(backend_reg flag, fs_inst &inst)
+   {
+      if (flag.file != BAD_FILE) {
+         inst.predicate = BRW_PREDICATE_NORMAL;
+         inst.flag_subreg = flag.fixed_hw_reg.subnr / 2;
+      }
+
+      return inst;
+   }
+
+   struct brw_reg
+   get_sample_mask(fs_visitor *v)
+   {
+      if (v->fp->UsesKill) {
+         return brw_flag_reg(0, 1);
+      } else {
+         if (v->brw->gen >= 6)
+            return retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD);
+         else
+            return retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
+      }
+   }
+}
+
+brw_fs_surface_visitor::brw_fs_surface_visitor(fs_visitor *v) :
+   brw_surface_visitor(v), v(v)
+{
+}
+
+fs_inst &
+brw_fs_surface_visitor::emit(opcode op, fs_reg dst,
+                             fs_reg src0,
+                             fs_reg src1,
+                             fs_reg src2) const
+{
+   fs_inst inst(op, dst, src0, src1, src2);
+
+   return *v->emit(inst);
+}
+
+fs_reg
+brw_fs_surface_visitor::make_grf(unsigned type, unsigned size) const
+{
+   return fs_reg(GRF, v->virtual_grf_alloc(size), type);
+}
+
+fs_reg
+brw_fs_surface_visitor::make_mrf(unsigned reg) const
+{
+   return fs_reg(MRF, reg, BRW_REGISTER_TYPE_UD);
+}
+
+void
+brw_fs_surface_visitor::emit_assign_vector(
+   backend_reg dst, backend_reg src, unsigned size) const
+{
+   for (unsigned i = 0; i < size; ++i)
+      emit(BRW_OPCODE_MOV, offset(dst, i), offset(src, i));
+}
+
+/**
+ * Copy one of the halves of a SIMD16 vector to a SIMD8 vector.
+ */
+void
+brw_fs_surface_visitor::emit_pack_vector_half(
+   fs_reg dst, fs_reg src,
+   unsigned i, unsigned size) const
+{
+   const unsigned w = v->dispatch_width / 8;
+
+   for (unsigned j = 0; j < size; ++j)
+      exec_half(i,
+                emit(BRW_OPCODE_MOV,
+                     half(offset(dst, j / w), j % w),
+                     half(offset(src, j), i)));
+}
+
+/**
+ * Copy a SIMD8 vector to one of the halves of a SIMD16 vector.
+ */
+void
+brw_fs_surface_visitor::emit_unpack_vector_half(
+   fs_reg dst, fs_reg src,
+   unsigned i, unsigned size) const
+{
+   const unsigned w = v->dispatch_width / 8;
+
+   for (unsigned j = 0; j < size; ++j)
+      exec_half(i,
+                emit(BRW_OPCODE_MOV,
+                     half(offset(dst, j), i),
+                     half(offset(src, j / w), j % w)));
+}
+
+/**
+ * Initialize the header present in some surface access messages.
+ */
+void
+brw_fs_surface_visitor::emit_surface_header(struct fs_reg dst) const
+{
+   assert(dst.file == MRF);
+   exec_all(exec_half(0, emit(BRW_OPCODE_MOV, dst, 0)));
+   exec_all(emit(BRW_OPCODE_MOV, brw_uvec_mrf(1, dst.reg, 7),
+                 get_sample_mask(v)));
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_coordinate_check(
+   backend_reg image, backend_reg addr, unsigned dims) const
+{
+   fs_reg size = offset(image, BRW_IMAGE_PARAM_SIZE_OFFSET);
+
+   for (unsigned i = 0; i < dims; ++i) {
+      fs_inst &inst = emit(BRW_OPCODE_CMP, reg_null_d,
+                           offset(retype(addr, BRW_REGISTER_TYPE_UD), i),
+                           offset(size, i));
+
+      if (i > 0)
+         inst.predicate = BRW_PREDICATE_NORMAL;
+
+      inst.conditional_mod = BRW_CONDITIONAL_L;
+      inst.flag_subreg = 0;
+   }
+
+   return brw_flag_reg(0, 0);
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_coordinate_address_calculation(
+   backend_reg image, backend_reg addr, unsigned dims) const
+{
+   fs_reg x = retype(offset(addr, 0), BRW_REGISTER_TYPE_UD);
+   fs_reg y = retype(offset(addr, 1), BRW_REGISTER_TYPE_UD);
+   fs_reg z = retype(offset(addr, 2), BRW_REGISTER_TYPE_UD);
+   fs_reg offset_x = offset(image, BRW_IMAGE_PARAM_OFFSET_OFFSET + 0);
+   fs_reg offset_y = offset(image, BRW_IMAGE_PARAM_OFFSET_OFFSET + 1);
+   fs_reg stride_x = offset(image, BRW_IMAGE_PARAM_STRIDE_OFFSET + 0);
+   fs_reg stride_y = offset(image, BRW_IMAGE_PARAM_STRIDE_OFFSET + 1);
+   fs_reg stride_z = offset(image, BRW_IMAGE_PARAM_STRIDE_OFFSET + 2);
+   fs_reg stride_w = offset(image, BRW_IMAGE_PARAM_STRIDE_OFFSET + 3);
+   fs_reg tile_x = offset(image, BRW_IMAGE_PARAM_TILING_OFFSET + 0);
+   fs_reg tile_y = offset(image, BRW_IMAGE_PARAM_TILING_OFFSET + 1);
+   fs_reg tile_z = offset(image, BRW_IMAGE_PARAM_TILING_OFFSET + 2);
+   fs_reg swz_x = offset(image, BRW_IMAGE_PARAM_SWIZZLING_OFFSET + 0);
+   fs_reg swz_y = offset(image, BRW_IMAGE_PARAM_SWIZZLING_OFFSET + 1);
+   fs_reg high_x = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg high_y = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg high_z = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg zero = make_grf(BRW_REGISTER_TYPE_UD, 1)
+      .apply_stride(0);
+
+   exec_all(emit(BRW_OPCODE_MOV, zero, 0));
+
+   /* Shift the coordinates by the fixed surface offset. */
+   emit(BRW_OPCODE_ADD, x, x, offset_x);
+   if (dims > 1)
+      emit(BRW_OPCODE_ADD, y, y, offset_y);
+
+   if (dims > 2) {
+      /* Decompose z into a major and a minor index. */
+      emit(BRW_OPCODE_SHR, high_z, z, tile_z);
+      emit(BRW_OPCODE_BFE, z, tile_z, zero, z);
+
+      /* Calculate the vertical slice offset. */
+      emit(BRW_OPCODE_MUL, high_z, stride_w, high_z);
+      emit(BRW_OPCODE_ADD, y, y, high_z);
+
+      /* Calculate the horizontal slice offset. */
+      emit(BRW_OPCODE_MUL, z, stride_z, z);
+      emit(BRW_OPCODE_ADD, x, x, z);
+   }
+
+   if (dims > 1) {
+      /* Decompose x and y into major and minor indices. */
+      emit(BRW_OPCODE_SHR, high_x, x, tile_x);
+      emit(BRW_OPCODE_SHR, high_y, y, tile_y);
+
+      emit(BRW_OPCODE_BFE, x, tile_x, zero, x);
+      emit(BRW_OPCODE_BFE, y, tile_y, zero, y);
+
+      /* Calculate the pixel index from the start of the tile row.
+       * Equivalent to:
+       *   dst = (high_x << tile_y << tile_x) + (low_y << tile_x) + low_x
+       */
+      emit(BRW_OPCODE_SHL, high_x, high_x, tile_y);
+      emit(BRW_OPCODE_ADD, dst, high_x, y);
+      emit(BRW_OPCODE_SHL, dst, dst, tile_x);
+      emit(BRW_OPCODE_ADD, dst, dst, x);
+
+      /* Multiply by the Bpp value. */
+      emit(BRW_OPCODE_MUL, dst, dst, stride_x);
+
+      /* Add it to the start offset of the tile row. */
+      emit(BRW_OPCODE_MUL, high_y, stride_y, high_y);
+      emit(BRW_OPCODE_SHL, high_y, high_y, tile_y);
+      emit(BRW_OPCODE_ADD, dst, dst, high_y);
+
+      if (v->brw->has_swizzling) {
+         fs_reg bit_x = make_grf(BRW_REGISTER_TYPE_UD, 1);
+         fs_reg bit_y = make_grf(BRW_REGISTER_TYPE_UD, 1);
+
+         /* Take into account the two dynamically specified shifts. */
+         emit(BRW_OPCODE_SHR, bit_x, dst, swz_x);
+         emit(BRW_OPCODE_SHR, bit_y, dst, swz_y);
+
+         /* XOR bit_x and bit_y with bit 6 of the memory address. */
+         emit(BRW_OPCODE_XOR, bit_x, bit_x, bit_y);
+         emit(BRW_OPCODE_AND, bit_x, bit_x, 1 << 6);
+         emit(BRW_OPCODE_XOR, dst, dst, bit_x);
+      }
+
+   } else {
+      /* Multiply by the Bpp value. */
+      emit(BRW_OPCODE_MUL, dst, x, stride_x);
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_untyped_read(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   unsigned dims, unsigned size) const
+{
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, size);
+   unsigned mlen = 0;
+
+   /* Initialize the message header. */
+   emit_surface_header(make_mrf(mlen));
+   mlen++;
+
+   /* Set the surface read offset. */
+   emit_assign_vector(make_mrf(mlen), addr, dims);
+   mlen += dims * v->dispatch_width / 8;
+
+   /* Emit the instruction. */
+   fs_inst &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
+                 surface, size));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+   inst.regs_written = size;
+
+   return dst;
+}
+
+void
+brw_fs_surface_visitor::emit_untyped_write(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src, unsigned dims, unsigned size) const
+{
+   unsigned mlen = 0;
+
+   /* Initialize the message header. */
+   emit_surface_header(make_mrf(mlen));
+   mlen++;
+
+   /* Set the surface write offset. */
+   emit_assign_vector(make_mrf(mlen), addr, dims);
+   mlen += dims * v->dispatch_width / 8;
+
+   /* Set the source value. */
+   emit_assign_vector(make_mrf(mlen), src, size);
+   mlen += size * v->dispatch_width / 8;
+
+   /* Emit the instruction. */
+   fs_inst &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE, fs_reg(),
+                 surface, size));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_untyped_atomic(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src0, backend_reg src1,
+   unsigned dims, unsigned op) const
+{
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   unsigned mlen = 0;
+
+   /* Initialize the message header. */
+   emit_surface_header(make_mrf(mlen));
+   mlen++;
+
+   /* Set the atomic operation offset. */
+   emit_assign_vector(make_mrf(mlen), addr, dims);
+   mlen += dims * v->dispatch_width / 8;
+
+   /* Set the atomic operation arguments. */
+   if (src0.file != BAD_FILE) {
+      emit(BRW_OPCODE_MOV, make_mrf(mlen), src0);
+      mlen += v->dispatch_width / 8;
+   }
+
+   if (src1.file != BAD_FILE) {
+      emit(BRW_OPCODE_MOV, make_mrf(mlen), src1);
+      mlen += v->dispatch_width / 8;
+   }
+
+   /* Emit the instruction. */
+   fs_inst &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
+                 surface, op));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_typed_read(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   unsigned dims, unsigned size) const
+{
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, size);
+   const unsigned w = v->dispatch_width / 8;
+
+   for (unsigned i = 0; i < w; ++i) {
+      const unsigned rlen = (size + w - 1) / w;
+      fs_reg tmp = make_grf(BRW_REGISTER_TYPE_UD, rlen);
+      unsigned mlen = 0;
+
+      /* Initialize the message header. */
+      emit_surface_header(make_mrf(mlen));
+      mlen++;
+
+      /* Set the surface read address. */
+      emit_pack_vector_half(make_mrf(mlen), addr, i, dims);
+      mlen += dims;
+
+      /* Emit the instruction. */
+      fs_inst &inst = exec_half(i, exec_predicated(flag,
+         emit(SHADER_OPCODE_TYPED_SURFACE_READ, tmp,
+              surface, size)));
+      inst.base_mrf = 0;
+      inst.mlen = mlen;
+      inst.regs_written = rlen;
+
+      /* Unpack the result. */
+      emit_unpack_vector_half(dst, tmp, i, size);
+   }
+
+   return dst;
+}
+
+void
+brw_fs_surface_visitor::emit_typed_write(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src, unsigned dims, unsigned size) const
+{
+   for (unsigned i = 0; i < v->dispatch_width / 8; ++i) {
+      unsigned mlen = 0;
+
+      /* Initialize the message header. */
+      emit_surface_header(make_mrf(mlen));
+      mlen++;
+
+      /* Set the surface write address. */
+      emit_pack_vector_half(make_mrf(mlen), addr, i, dims);
+      mlen += dims;
+
+      /* Set the source value. */
+      emit_pack_vector_half(make_mrf(mlen), src, i, size);
+      mlen += size;
+
+      /* Emit the instruction. */
+      fs_inst &inst = exec_half(i, exec_predicated(flag,
+         emit(SHADER_OPCODE_TYPED_SURFACE_WRITE, fs_reg(),
+              surface, size)));
+      inst.base_mrf = 0;
+      inst.mlen = mlen;
+   }
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_typed_atomic(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src0, backend_reg src1,
+   unsigned dims, unsigned op) const
+{
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, 1);
+
+   for (unsigned i = 0; i < v->dispatch_width / 8; ++i) {
+      unsigned mlen = 0;
+
+      /* Initialize the message header. */
+      emit_surface_header(make_mrf(mlen));
+      mlen++;
+
+      /* Set the atomic operation address. */
+      emit_pack_vector_half(make_mrf(mlen), addr, i, dims);
+      mlen += dims;
+
+      /* Set the source arguments. */
+      if (src0.file != BAD_FILE) {
+         emit_pack_vector_half(make_mrf(mlen), src0, i, 1);
+         mlen++;
+      }
+
+      if (src1.file != BAD_FILE) {
+         emit_pack_vector_half(make_mrf(mlen), src1, i, 1);
+         mlen++;
+      }
+
+      /* Emit the instruction. */
+      fs_inst &inst = exec_half(i, exec_predicated(flag,
+         emit(SHADER_OPCODE_TYPED_ATOMIC, half(dst, i),
+              surface, op)));
+      inst.base_mrf = 0;
+      inst.mlen = mlen;
+   }
+
+   return dst;
+}
+
+void
+brw_fs_surface_visitor::emit_memory_fence() const
+{
+   emit(SHADER_OPCODE_MEMORY_FENCE);
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_pad(
+   backend_reg flag, backend_reg src, unsigned size) const
+{
+   fs_reg dst = make_grf(src.type, 4);
+
+   for (unsigned i = 0; i < 4; ++i) {
+      unsigned x = (i == 3 ? 1 : 0);
+      fs_reg pad = (src.type == BRW_REGISTER_TYPE_F ?
+                    fs_reg(float(x)) : fs_reg(x));
+
+      if (i < size) {
+         if (flag.file != BAD_FILE)
+            exec_predicated(flag, emit(BRW_OPCODE_SEL, offset(dst, i),
+                                       offset(src, i), pad));
+         else
+            emit(BRW_OPCODE_MOV, offset(dst, i), offset(src, i));
+
+      } else {
+         emit(BRW_OPCODE_MOV, offset(dst, i), pad);
+      }
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_pack_generic(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   const unsigned shift[] = { shift_r, shift_g, shift_b, shift_a };
+   const unsigned width[] = { width_r, width_g, width_b, width_a };
+   const unsigned bits = width_r + width_g + width_b + width_a;
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, bits / 32);
+   bool seen[4] = {};
+
+   for (unsigned i = 0; i < Elements(width); ++i) {
+      if (width[i]) {
+         const unsigned j = shift[i] / 32;
+         const unsigned k = shift[i] % 32;
+         const unsigned m = (1ull << width[i]) - 1;
+         fs_reg tmp = make_grf(BRW_REGISTER_TYPE_UD, 1);
+
+         if (seen[j]) {
+            /* Insert the source value into the bit field if we have
+             * already written to this dword.
+             */
+            emit(BRW_OPCODE_MOV, tmp, m << k);
+            emit(BRW_OPCODE_BFI2, offset(dst, j),
+                 tmp, offset(src, i), offset(dst, j));
+
+         } else {
+            /* Otherwise just mask and copy the value over. */
+            emit(BRW_OPCODE_AND, offset(dst, j),
+                 offset(src, i), m);
+
+            if (k)
+               emit(BRW_OPCODE_SHL, offset(dst, j),
+                    offset(dst, j), k);
+
+            seen[j] = true;
+         }
+      }
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_unpack_generic(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   const unsigned shift[] = { shift_r, shift_g, shift_b, shift_a };
+   const unsigned width[] = { width_r, width_g, width_b, width_a };
+   const unsigned n = !!width_r + !!width_g + !!width_b + !!width_a;
+   fs_reg dst = make_grf(src.type, n);
+
+   for (unsigned i = 0; i < Elements(width); ++i) {
+      if (width[i]) {
+         /* Discard the most significant bits. */
+         emit(BRW_OPCODE_SHL, offset(dst, i),
+              offset(src, shift[i] / 32),
+              32 - shift[i] % 32 - width[i]);
+
+         /* Shift it back to the least significant bits using an
+          * arithmetic shift to get sign extension on signed types.
+          */
+         emit(BRW_OPCODE_ASR, offset(dst, i),
+              offset(dst, i), 32 - width[i]);
+      }
+   }
+
+   return dst;
+}
+
+namespace {
+   unsigned
+   type_for_width(unsigned width)
+   {
+      switch (width) {
+      case 8:
+         return BRW_REGISTER_TYPE_UB;
+      case 16:
+         return BRW_REGISTER_TYPE_UW;
+      case 32:
+         return BRW_REGISTER_TYPE_UD;
+      default:
+         unreachable();
+      }
+   }
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_pack_homogeneous(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   const unsigned shift[] = { shift_r, shift_g, shift_b, shift_a };
+   const unsigned width[] = { width_r, width_g, width_b, width_a };
+   const unsigned type = type_for_width(width[0]);
+   fs_reg dst = make_grf(BRW_REGISTER_TYPE_UD, type_sz(type));
+   fs_reg csrc = retype(fs_reg(src), type).apply_stride(4 / type_sz(type));
+   fs_reg cdst = retype(dst, type).apply_stride(4 / type_sz(type));
+   bool seen[4] = {};
+
+   for (unsigned i = 0; i < 4; ++i) {
+      if (width[i]) {
+         const unsigned j = shift[i] / 32;
+         const unsigned k = shift[i] % 32;
+
+         if (seen[j]) {
+            /* Insert the source value into the bit field if we have
+             * already written to this dword.
+             */
+            emit(BRW_OPCODE_MOV, offset(byte_offset(cdst, k / 8), j),
+                 offset(csrc, i));
+
+         } else {
+            /* Otherwise overwrite the whole dword to make sure that
+             * unused fields are initialized to zero.
+             */
+            emit(BRW_OPCODE_SHL, offset(dst, j),
+                 offset(csrc, i), k);
+
+            seen[j] = true;
+         }
+      }
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_unpack_homogeneous(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   const unsigned shift[] = { shift_r, shift_g, shift_b, shift_a };
+   const unsigned width[] = { width_r, width_g, width_b, width_a };
+   const unsigned type = type_for_width(width[0]);
+   fs_reg tmp = retype(fs_reg(src), type).apply_stride(4 / type_sz(type));
+   fs_reg dst = make_grf(src.type, 4);
+
+   for (unsigned i = 0; i < 4; ++i) {
+      if (width[i])
+         emit(BRW_OPCODE_MOV,
+              offset(dst, i),
+              offset(byte_offset(tmp, shift[i] % 32 / 8), shift[i] / 32));
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_convert_to_integer(
+   backend_reg src,
+   unsigned mask0, unsigned width0,
+   unsigned mask1, unsigned width1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned width[] = { width0, width1 };
+
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      for (unsigned j = 0; j < 4; ++j) {
+         if (mask[i] & (1 << j)) {
+            const int32_t max = (type_is_signed(src.type) ?
+                                 (1 << (width[i] - 1)) - 1 :
+                                 (1 << width[i]) - 1);
+
+            /* Clamp to the minimum value. */
+            if (type_is_signed(src.type))
+               emit(BRW_OPCODE_SEL, offset(src, j),
+                    offset(src, j), - max - 1)
+               .conditional_mod = BRW_CONDITIONAL_G;
+
+            /* Clamp to the maximum value. */
+            emit(BRW_OPCODE_SEL, offset(src, j),
+                 offset(src, j), max)
+               .conditional_mod = BRW_CONDITIONAL_L;
+         }
+      }
+   }
+
+   return src;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_convert_from_scaled(
+   backend_reg src,
+   unsigned mask0, float scale0,
+   unsigned mask1, float scale1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const float scale[] = { scale0, scale1 };
+   fs_reg dst = retype(src, BRW_REGISTER_TYPE_F);
+
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      for (unsigned j = 0; j < 4; ++j) {
+         if (mask[i] & (1 << j)) {
+            /* Convert to float and divide by the normalization
+             * constant.
+             */
+            emit(BRW_OPCODE_MOV, offset(dst, j), offset(src, j));
+            emit(BRW_OPCODE_MUL, offset(dst, j), offset(dst, j),
+                    fs_reg(1.0f / scale[i]));
+
+            /* Clamp to the minimum value. */
+            if (type_is_signed(src.type))
+               emit(BRW_OPCODE_SEL, offset(dst, j),
+                    offset(dst, j), -1.0f)
+                  .conditional_mod = BRW_CONDITIONAL_G;
+         }
+      }
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_convert_to_scaled(
+   backend_reg src, unsigned type,
+   unsigned mask0, float scale0,
+   unsigned mask1, float scale1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const float scale[] = { scale0, scale1 };
+   fs_reg dst = retype(src, type);
+
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      for (unsigned j = 0; j < 4; ++j) {
+         if (mask[i] & (1 << j)) {
+            /* Clamp to the minimum value. */
+            if (type_is_signed(type))
+               emit(BRW_OPCODE_SEL, offset(src, j),
+                    offset(src, j), -1.0f)
+                  .conditional_mod = BRW_CONDITIONAL_G;
+
+            /* Clamp to the maximum value. */
+            emit(BRW_OPCODE_SEL, offset(src, j),
+                 offset(src, j), 1.0f)
+               .conditional_mod = BRW_CONDITIONAL_L;
+
+            /* Multiply by the normalization constant and convert to
+             * integer.
+             */
+            emit(BRW_OPCODE_MUL, offset(src, j), offset(src, j),
+                    scale[i]);
+            emit(BRW_OPCODE_MOV, offset(dst, j), offset(src, j));
+         }
+      }
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_convert_from_float(
+   backend_reg src,
+   unsigned mask0, unsigned width0,
+   unsigned mask1, unsigned width1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned width[] = { width0, width1 };
+   fs_reg dst = retype(src, BRW_REGISTER_TYPE_F);
+
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      for (unsigned j = 0; j < 4; ++j) {
+         if (mask[i] & (1 << j)) {
+            /* Extend 10-bit and 11-bit floating point numbers to 15
+             * bits.  This works because they have a 5-bit exponent
+             * just like the 16-bit floating point format, and they
+             * have no sign bit.
+             */
+            if (width[i] < 16)
+               emit(BRW_OPCODE_SHL, offset(src, j),
+                       offset(src, j), 15 - width[i]);
+
+            /* Convert to a 32-bit float. */
+            emit(BRW_OPCODE_F16TO32, offset(dst, j), offset(src, j));
+         }
+      }
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_fs_surface_visitor::emit_convert_to_float(
+   backend_reg src,
+   unsigned mask0, unsigned width0,
+   unsigned mask1, unsigned width1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned width[] = { width0, width1 };
+   fs_reg dst = retype(src, BRW_REGISTER_TYPE_UD);
+
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      for (unsigned j = 0; j < 4; ++j) {
+         if (mask[i] & (1 << j)) {
+            /* Clamp to the minimum value. */
+            if (width[i] < 16)
+               emit(BRW_OPCODE_SEL, offset(src, j),
+                       offset(src, j), 0.0f)
+                  .conditional_mod = BRW_CONDITIONAL_G;
+
+            /* Convert to a 16-bit float. */
+            emit(BRW_OPCODE_F32TO16, offset(dst, j), offset(src, j));
+
+            /* Discard the least significant bits to get a floating
+             * point number of the requested width.  This works
+             * because the 10-bit and 11-bit floating point formats
+             * have a 5-bit exponent just like the 16-bit format, and
+             * they have no sign bit.
+             */
+            if (width[i] < 16)
+               v->emit(BRW_OPCODE_SHR, offset(dst, j),
+                       offset(dst, j), 15 - width[i]);
+         }
+      }
+   }
+
+   return dst;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_visitor.h b/src/mesa/drivers/dri/i965/brw_fs_surface_visitor.h
new file mode 100644
index 0000000..635289d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_visitor.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Francisco Jerez <currojerez at riseup.net>
+ */
+#ifndef BRW_FS_SURFACE_VISITOR_H
+#define BRW_FS_SURFACE_VISITOR_H
+
+#include "brw_surface_visitor.h"
+#include "brw_fs.h"
+
+class brw_fs_surface_visitor : public brw_surface_visitor {
+public:
+   brw_fs_surface_visitor(fs_visitor *v);
+
+protected:
+   fs_inst &
+   emit(opcode op, fs_reg dst = fs_reg(),
+        fs_reg src0 = fs_reg(),
+        fs_reg src1 = fs_reg(),
+        fs_reg src2 = fs_reg()) const;
+
+   fs_reg
+   make_grf(unsigned type, unsigned size) const;
+
+   fs_reg
+   make_mrf(unsigned reg) const;
+
+   virtual void
+   emit_assign_vector(backend_reg dst, backend_reg src, unsigned size) const;
+
+   void
+   emit_pack_vector_half(fs_reg dst, fs_reg src,
+                         unsigned i, unsigned size) const;
+
+   void
+   emit_unpack_vector_half(fs_reg dst, fs_reg src,
+                           unsigned i, unsigned size) const;
+
+   void
+   emit_surface_header(fs_reg dst) const;
+
+   virtual backend_reg
+   emit_coordinate_check(backend_reg image, backend_reg addr,
+                         unsigned dims) const;
+
+   virtual backend_reg
+   emit_coordinate_address_calculation(backend_reg image, backend_reg addr,
+                                       unsigned dims) const;
+
+   virtual backend_reg
+   emit_untyped_read(backend_reg flag, backend_reg surface,
+                     backend_reg addr,
+                     unsigned dims, unsigned size) const;
+
+   virtual void
+   emit_untyped_write(backend_reg flag, backend_reg surface,
+                      backend_reg addr, backend_reg src,
+                      unsigned dims, unsigned size) const;
+
+   virtual backend_reg
+   emit_untyped_atomic(backend_reg flag, backend_reg surface,
+                       backend_reg addr,
+                       backend_reg src0, backend_reg src1,
+                       unsigned dims, unsigned op) const;
+
+   virtual backend_reg
+   emit_typed_read(backend_reg flag, backend_reg surface,
+                   backend_reg addr,
+                   unsigned dims, unsigned size) const;
+
+   virtual void
+   emit_typed_write(backend_reg flag, backend_reg surface,
+                    backend_reg addr, backend_reg src,
+                    unsigned dims, unsigned size) const;
+
+   virtual backend_reg
+   emit_typed_atomic(backend_reg flag, backend_reg surface,
+                     backend_reg addr,
+                     backend_reg src0, backend_reg src1,
+                     unsigned dims, unsigned op) const;
+
+   virtual void
+   emit_memory_fence() const;
+
+   virtual backend_reg
+   emit_pad(backend_reg flag, backend_reg src, unsigned size) const;
+
+   virtual backend_reg
+   emit_pack_generic(backend_reg src,
+                     unsigned shift_r = 0, unsigned width_r = 0,
+                     unsigned shift_g = 0, unsigned width_g = 0,
+                     unsigned shift_b = 0, unsigned width_b = 0,
+                     unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_unpack_generic(backend_reg src,
+                       unsigned shift_r = 0, unsigned width_r = 0,
+                       unsigned shift_g = 0, unsigned width_g = 0,
+                       unsigned shift_b = 0, unsigned width_b = 0,
+                       unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_pack_homogeneous(backend_reg src,
+                         unsigned shift_r = 0, unsigned width_r = 0,
+                         unsigned shift_g = 0, unsigned width_g = 0,
+                         unsigned shift_b = 0, unsigned width_b = 0,
+                         unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_unpack_homogeneous(backend_reg src,
+                           unsigned shift_r = 0, unsigned width_r = 0,
+                           unsigned shift_g = 0, unsigned width_g = 0,
+                           unsigned shift_b = 0, unsigned width_b = 0,
+                           unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_convert_to_integer(backend_reg src,
+                           unsigned mask0 = 0, unsigned width0 = 0,
+                           unsigned mask1 = 0, unsigned width1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_from_scaled(backend_reg src,
+                            unsigned mask0 = 0, float scale0 = 0,
+                            unsigned mask1 = 0, float scale1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_to_scaled(backend_reg src, unsigned type,
+                          unsigned mask0 = 0, float scale0 = 0,
+                          unsigned mask1 = 0, float scale1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_from_float(backend_reg src,
+                           unsigned mask0 = 0, unsigned width0 = 0,
+                           unsigned mask1 = 0, unsigned width1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_to_float(backend_reg src,
+                         unsigned mask0 = 0, unsigned width0 = 0,
+                         unsigned mask1 = 0, unsigned width1 = 0) const;
+
+   fs_visitor *v;
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index a1d25cf..b809df2 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -78,8 +78,13 @@ extern "C" {
 #define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XXXY      BRW_SWIZZLE4(0,0,0,1)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
+#define BRW_SWIZZLE_YYZZ      BRW_SWIZZLE4(1,1,2,2)
 #define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_YZYZ      BRW_SWIZZLE4(1,2,1,2)
+#define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
 #define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
 
diff --git a/src/mesa/drivers/dri/i965/brw_surface_visitor.cpp b/src/mesa/drivers/dri/i965/brw_surface_visitor.cpp
new file mode 100644
index 0000000..07511b5
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_surface_visitor.cpp
@@ -0,0 +1,1208 @@
+/*
+ * Copyright 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Francisco Jerez <currojerez at riseup.net>
+ */
+
+#include "brw_surface_visitor.h"
+#include "brw_context.h"
+
+brw_surface_visitor::brw_surface_visitor(backend_visitor *v) :
+   v(v)
+{
+}
+
+void
+brw_surface_visitor::visit_atomic_counter_intrinsic(ir_call *ir) const
+{
+   const char *callee = ir->callee->function_name();
+   ir_dereference *deref = static_cast<ir_dereference *>(
+      ir->actual_parameters.get_head());
+   const backend_reg offset = v->visit_result(deref);
+   const backend_reg surface =
+      brw_imm_ud(v->stage_prog_data->binding_table.abo_start +
+                 deref->variable_referenced()->atomic.buffer_index);
+   backend_reg tmp;
+
+   if (!strcmp("__intrinsic_atomic_read", callee)) {
+      tmp = emit_untyped_read(backend_reg(), surface, offset, 1, 1);
+
+   } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+      tmp = emit_untyped_atomic(backend_reg(), surface, offset,
+                                backend_reg(), backend_reg(),
+                                1, BRW_AOP_INC);
+
+   } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+      tmp = emit_untyped_atomic(backend_reg(), surface, offset,
+                                backend_reg(), backend_reg(),
+                                1, BRW_AOP_PREDEC);
+   }
+
+   if (ir->return_deref) {
+      backend_reg dst = v->visit_result(ir->return_deref);
+      emit_assign_vector(dst, tmp, 1);
+   }
+}
+
+namespace {
+   /**
+    * Process the parameters passed to an image intrinsic call.
+    */
+   struct image_intrinsic_parameters {
+      image_intrinsic_parameters(backend_visitor *v, ir_call *ir)
+      {
+         exec_list_iterator it = ir->actual_parameters.iterator();
+
+         image_var = static_cast<ir_dereference *>(it.get())->
+            variable_referenced();
+
+         image = visit_next(v, it);
+         addr = visit_next(v, it);
+
+         if (image_var->type->fields.image.dimension == GLSL_IMAGE_DIM_MS)
+            sample = visit_next(v, it);
+
+         for (int i = 0; it.has_next(); ++i)
+            src[i] = visit_next(v, it);
+
+         if (ir->return_deref)
+            dst = v->visit_result(ir->return_deref);
+      }
+
+      ir_variable *image_var;
+
+      backend_reg image;
+      backend_reg addr;
+      backend_reg sample;
+      backend_reg src[2];
+      backend_reg dst;
+
+   private:
+      backend_reg
+      visit_next(backend_visitor *v, exec_list_iterator &it) const
+      {
+         ir_dereference *deref = static_cast<ir_dereference *>(it.get());
+         it.next();
+         return v->visit_result(deref);
+      }
+   };
+
+   /**
+    * Get the appropriate atomic op for an image atomic intrinsic.
+    */
+   unsigned
+   get_image_atomic_op(const char *callee, ir_variable *image)
+   {
+      const glsl_base_type base_type = image->type->fields.image.type;
+
+      if (!strcmp("__intrinsic_image_atomic_add", callee))
+         return BRW_AOP_ADD;
+
+      else if (!strcmp("__intrinsic_image_atomic_min", callee))
+         return (base_type == GLSL_TYPE_UINT ? BRW_AOP_UMIN : BRW_AOP_IMIN);
+
+      else if (!strcmp("__intrinsic_image_atomic_max", callee))
+         return (base_type == GLSL_TYPE_UINT ? BRW_AOP_UMAX : BRW_AOP_IMAX);
+
+      else if (!strcmp("__intrinsic_image_atomic_and", callee))
+         return BRW_AOP_AND;
+
+      else if (!strcmp("__intrinsic_image_atomic_or", callee))
+         return BRW_AOP_OR;
+
+      else if (!strcmp("__intrinsic_image_atomic_xor", callee))
+         return BRW_AOP_XOR;
+
+      else if (!strcmp("__intrinsic_image_atomic_exchange", callee))
+         return BRW_AOP_MOV;
+
+      else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee))
+         return BRW_AOP_CMPWR;
+
+      else
+         unreachable();
+   }
+}
+
+void
+brw_surface_visitor::visit_image_intrinsic(ir_call *ir) const
+{
+   image_intrinsic_parameters p(v, ir);
+   const char *callee = ir->callee->function_name();
+   const unsigned dims = p.image_var->type->coordinate_components();
+   const GLenum format = (p.image_var->image.write_only ? GL_NONE :
+                          p.image_var->image.format);
+   backend_reg tmp;
+
+   if (!strcmp("__intrinsic_image_load", callee))
+      tmp = emit_image_load(p.image, p.addr, format, dims);
+
+   else if (!strcmp("__intrinsic_image_store", callee))
+      emit_image_store(p.image, p.addr, p.src[0], format, dims);
+
+   else
+      tmp = emit_image_atomic(p.image, p.addr, p.src[0], p.src[1],
+                              format, get_image_atomic_op(callee, p.image_var),
+                              dims);
+
+   if (ir->return_deref) {
+      const unsigned size = (ir->return_deref->variable_referenced()->
+                             type->components());
+      emit_assign_vector(p.dst, tmp, size);
+   }
+}
+
+void
+brw_surface_visitor::visit_barrier_intrinsic(ir_call *ir) const
+{
+   emit_memory_fence();
+}
+
+backend_reg
+brw_surface_visitor::emit_image_load(backend_reg image, backend_reg addr,
+                                     GLenum format, unsigned dims) const
+{
+   backend_reg flag, tmp;
+
+   switch (format) {
+   case GL_RGBA32F:
+      /* Hardware surface format: RAW */
+      flag = emit_coordinate_check(image, addr, dims);
+      addr = emit_coordinate_address_calculation(image, addr, dims);
+      tmp = emit_untyped_read(flag, image, addr, 1, 4);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_F);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGBA16F:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: RAW */
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+      }
+
+      tmp = emit_convert_from_float(tmp, WRITEMASK_XYZW, 16);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RG32F:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+      } else {
+         /* Hardware surface format: RAW */
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+      }
+
+      tmp = retype(tmp, BRW_REGISTER_TYPE_F);
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_RG16F:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16);
+      }
+
+      tmp = emit_convert_from_float(tmp, WRITEMASK_XY, 16);
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_R11F_G11F_B10F:
+      /* Hardware surface format: R32_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      tmp = emit_unpack_generic(tmp, 0, 11, 11, 11, 22, 10);
+      tmp = emit_convert_from_float(tmp, WRITEMASK_XY, 11,
+                                    WRITEMASK_Z, 10);
+      tmp = emit_pad(flag, tmp, 3);
+      return tmp;
+
+   case GL_R32F:
+      /* Hardware surface format: R32_FLOAT */
+      tmp = emit_typed_read(flag, image, addr, dims, 4);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_F);
+      return tmp;
+
+   case GL_R16F:
+      /* Hardware surface format: R16_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      tmp = emit_convert_from_float(tmp, WRITEMASK_X, 16);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   case GL_RGBA32UI:
+      /* Hardware surface format: RAW */
+      flag = emit_coordinate_check(image, addr, dims);
+      addr = emit_coordinate_address_calculation(image, addr, dims);
+      tmp = emit_untyped_read(flag, image, addr, 1, 4);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGBA16UI:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: RAW */
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         tmp = emit_pad(flag, tmp, 4);
+      }
+      return tmp;
+
+   case GL_RGB10_A2UI:
+      /* Hardware surface format: R32_UINT */
+      flag = emit_coordinate_check(image, addr, dims);
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      tmp = emit_unpack_generic(tmp, 0, 10, 10, 10, 20, 10, 30, 2);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGBA8UI:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         flag = emit_coordinate_check(image, addr, dims);
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+         tmp = emit_pad(flag, tmp, 4);
+      }
+      return tmp;
+
+   case GL_RG32UI:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+      } else {
+         /* Hardware surface format: RAW */
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      }
+
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_RG16UI:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16);
+         tmp = emit_pad(flag, tmp, 2);
+      }
+      return tmp;
+
+   case GL_RG8UI:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8);
+         tmp = emit_pad(flag, tmp, 2);
+      }
+      return tmp;
+
+   case GL_R32UI:
+      /* Hardware surface format: R32_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 4);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      return tmp;
+
+   case GL_R16UI:
+      /* Hardware surface format: R16_UINT */
+      if (v->brw->is_haswell) {
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16);
+         tmp = emit_pad(flag, tmp, 1);
+      }
+      return tmp;
+
+   case GL_R8UI:
+      /* Hardware surface format: R8_UINT */
+      if (v->brw->is_haswell) {
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8);
+         tmp = emit_pad(flag, tmp, 1);
+      }
+      return tmp;
+
+   case GL_RGBA32I:
+      /* Hardware surface format: RAW */
+      flag = emit_coordinate_check(image, addr, dims);
+      addr = emit_coordinate_address_calculation(image, addr, dims);
+      tmp = emit_untyped_read(flag, image, addr, 1, 4);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGBA16I:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 32, 16, 64, 16, 96, 16);
+      } else {
+         /* Hardware surface format: RAW */
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         tmp = emit_pad(flag, tmp, 4);
+      }
+      return tmp;
+
+   case GL_RGBA8I:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 32, 8, 64, 8, 96, 8);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         flag = emit_coordinate_check(image, addr, dims);
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+         tmp = emit_pad(flag, tmp, 4);
+      }
+      return tmp;
+
+   case GL_RG32I:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+      } else {
+         /* Hardware surface format: RAW */
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      }
+
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_RG16I:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 32, 16);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16);
+      }
+
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_RG8I:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 32, 8);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8);
+      }
+
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_R32I:
+      /* Hardware surface format: R32_INT */
+      tmp = emit_typed_read(flag, image, addr, dims, 4);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      return tmp;
+
+   case GL_R16I:
+      /* Hardware surface format: R16_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      tmp = emit_unpack_homogeneous(tmp, 0, 16);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   case GL_R8I:
+      /* Hardware surface format: R8_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      tmp = emit_unpack_homogeneous(tmp, 0, 8);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   case GL_RGBA16:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: RAW */
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XYZW, 65535.0);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGB10_A2:
+      /* Hardware surface format: R32_UINT */
+      flag = emit_coordinate_check(image, addr, dims);
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      tmp = emit_unpack_generic(tmp, 0, 10, 10, 10, 20, 10, 30, 2);
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XYZ, 1023.0,
+                                     WRITEMASK_W, 3.0);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGBA8:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XYZW, 255.0);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RG16:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XY, 65535.0);
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_RG8:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XY, 255.0);
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_R16:
+      /* Hardware surface format: R16_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+
+      if (!v->brw->is_haswell)
+         tmp = emit_unpack_homogeneous(tmp, 0, 16);
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_X, 65535.0);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   case GL_R8:
+      /* Hardware surface format: R8_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+
+      if (!v->brw->is_haswell)
+         tmp = emit_unpack_homogeneous(tmp, 0, 8);
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_X, 255.0);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   case GL_RGBA16_SNORM:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 32, 16, 64, 16, 96, 16);
+      } else {
+         /* Hardware surface format: RAW */
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         tmp = emit_untyped_read(flag, image, addr, 1, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XYZW, 32767.0);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RGBA8_SNORM:
+      flag = emit_coordinate_check(image, addr, dims);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 4);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 32, 8, 64, 8, 96, 8);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XYZW, 127.0);
+      tmp = emit_pad(flag, tmp, 4);
+      return tmp;
+
+   case GL_RG16_SNORM:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 32, 16);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XY, 32767.0);
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_RG8_SNORM:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 2);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 32, 8);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_typed_read(flag, image, addr, dims, 1);
+         tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+         tmp = emit_unpack_homogeneous(tmp, 0, 8, 8, 8);
+      }
+
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_XY, 127.0);
+      tmp = emit_pad(flag, tmp, 2);
+      return tmp;
+
+   case GL_R16_SNORM:
+      /* Hardware surface format: R16_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      tmp = emit_unpack_homogeneous(tmp, 0, 16);
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_X, 32767.0);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   case GL_R8_SNORM:
+      /* Hardware surface format: R8_UINT */
+      tmp = emit_typed_read(flag, image, addr, dims, 1);
+      tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+      tmp = emit_unpack_homogeneous(tmp, 0, 8);
+      tmp = emit_convert_from_scaled(tmp, WRITEMASK_X, 127.0);
+      tmp = emit_pad(flag, tmp, 1);
+      return tmp;
+
+   default:
+      unreachable();
+   }
+}
+
+void
+brw_surface_visitor::emit_image_store(backend_reg image, backend_reg addr,
+                                      backend_reg src,
+                                      GLenum format, unsigned dims) const
+{
+   backend_reg flag, tmp;
+
+   switch (format) {
+   case GL_NONE:
+      emit_typed_write(flag, image, addr, src, dims, 4);
+      return;
+
+   case GL_RGBA32F:
+      /* Hardware surface format: RAW */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      flag = emit_coordinate_check(image, addr, dims);
+      addr = emit_coordinate_address_calculation(image, addr, dims);
+      emit_untyped_write(flag, image, addr, tmp, 1, 4);
+      return;
+
+   case GL_RGBA16F:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_float(tmp, WRITEMASK_XYZW, 16);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RG32F:
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RG16F:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_float(tmp, WRITEMASK_XY, 16);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_R11F_G11F_B10F:
+      /* Hardware surface format: R32_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_float(tmp, WRITEMASK_XY, 11,
+                                  WRITEMASK_Z, 10);
+      tmp = emit_pack_generic(tmp, 0, 11, 11, 11, 22, 10);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R32F:
+      /* Hardware surface format: R32_FLOAT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R16F:
+      /* Hardware surface format: R16_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_float(tmp, WRITEMASK_X, 16);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_RGBA32UI:
+      /* Hardware surface format: RAW */
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+      flag = emit_coordinate_check(image, addr, dims);
+      addr = emit_coordinate_address_calculation(image, addr, dims);
+      emit_untyped_write(flag, image, addr, tmp, 1, 4);
+      return;
+
+   case GL_RGBA16UI:
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         tmp = emit_convert_to_integer(tmp, WRITEMASK_XYZW, 16);
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RGB10_A2UI:
+      /* Hardware surface format: R32_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_XYZ, 10,
+                                    WRITEMASK_W, 2);
+      tmp = emit_pack_generic(tmp, 0, 10, 10, 10, 20, 10, 30, 2);
+      flag = emit_coordinate_check(image, addr, dims);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_RGBA8UI:
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_convert_to_integer(tmp, WRITEMASK_XYZW, 8);
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+         flag = emit_coordinate_check(image, addr, dims);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG32UI:
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RG16UI:
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_convert_to_integer(tmp, WRITEMASK_XY, 16);
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG8UI:
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_convert_to_integer(tmp, WRITEMASK_XY, 8);
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_R32UI:
+      /* Hardware surface format: R32_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R16UI:
+      /* Hardware surface format: R16_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R8UI:
+      /* Hardware surface format: R8_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_UD);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_RGBA32I:
+      /* Hardware surface format: RAW */
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      flag = emit_coordinate_check(image, addr, dims);
+      addr = emit_coordinate_address_calculation(image, addr, dims);
+      emit_untyped_write(flag, image, addr, tmp, 1, 4);
+      return;
+
+   case GL_RGBA16I:
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_XYZW, 16);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RGBA8I:
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_XYZW, 8);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+         flag = emit_coordinate_check(image, addr, dims);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG32I:
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         tmp = retype(src, BRW_REGISTER_TYPE_UD);
+         tmp = emit_unpack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         tmp = retype(src, BRW_REGISTER_TYPE_D);
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RG16I:
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_XY, 16);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG8I:
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_XY, 8);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_R32I:
+      /* Hardware surface format: R32_INT */
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R16I:
+      /* Hardware surface format: R16_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_X, 16);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R8I:
+      /* Hardware surface format: R8_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_D);
+      tmp = emit_convert_to_integer(tmp, WRITEMASK_X, 8);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_RGBA16:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_XYZW, 65535.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RGB10_A2:
+      /* Hardware surface format: R32_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_XYZ, 1023.0,
+                                   WRITEMASK_W, 3.0);
+      tmp = emit_pack_generic(tmp, 0, 10, 10, 10, 20, 10, 30, 2);
+      flag = emit_coordinate_check(image, addr, dims);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_RGBA8:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_XYZW, 255.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+         flag = emit_coordinate_check(image, addr, dims);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG16:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_XY, 65535.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG8:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_XY, 255.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_R16:
+      /* Hardware surface format: R16_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_X, 65535.0);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R8:
+      /* Hardware surface format: R8_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_UD,
+                                   WRITEMASK_X, 255.0);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_RGBA16_SNORM:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_D,
+                                   WRITEMASK_XYZW, 32767.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16B16A16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: RAW */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16, 32, 16, 48, 16);
+         flag = emit_coordinate_check(image, addr, dims);
+         addr = emit_coordinate_address_calculation(image, addr, dims);
+         emit_untyped_write(flag, image, addr, tmp, 1, 2);
+      }
+      return;
+
+   case GL_RGBA8_SNORM:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_D,
+                                   WRITEMASK_XYZW, 127.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8B8A8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 4);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8, 16, 8, 24, 8);
+         flag = emit_coordinate_check(image, addr, dims);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG16_SNORM:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_D,
+                                   WRITEMASK_XY, 32767.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R16G16_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R32_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 16, 16, 16);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_RG8_SNORM:
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_D,
+                                   WRITEMASK_XY, 127.0);
+
+      if (v->brw->is_haswell) {
+         /* Hardware surface format: R8G8_UINT */
+         emit_typed_write(flag, image, addr, tmp, dims, 2);
+      } else {
+         /* Hardware surface format: R16_UINT */
+         tmp = emit_pack_homogeneous(tmp, 0, 8, 8, 8);
+         emit_typed_write(flag, image, addr, tmp, dims, 1);
+      }
+      return;
+
+   case GL_R16_SNORM:
+      /* Hardware surface format: R16_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_D,
+                                   WRITEMASK_X, 32767.0);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   case GL_R8_SNORM:
+      /* Hardware surface format: R8_UINT */
+      tmp = retype(src, BRW_REGISTER_TYPE_F);
+      tmp = emit_convert_to_scaled(tmp, BRW_REGISTER_TYPE_D,
+                                   WRITEMASK_X, 127.0);
+      emit_typed_write(flag, image, addr, tmp, dims, 1);
+      return;
+
+   default:
+      unreachable();
+   }
+}
+
+backend_reg
+brw_surface_visitor::emit_image_atomic(backend_reg image, backend_reg addr,
+                                       backend_reg src0, backend_reg src1,
+                                       GLenum format, unsigned op,
+                                       unsigned dims) const
+{
+   switch (format) {
+   case GL_R32UI:
+      /* Hardware surface format: R32_UINT */
+      return emit_typed_atomic(backend_reg(), image, addr, src0, src1,
+                               dims, op);
+
+   case GL_R32I:
+      /* Hardware surface format: R32_INT */
+      return emit_typed_atomic(backend_reg(), image, addr, src0, src1,
+                               dims, op);
+
+   default:
+      unreachable();
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_surface_visitor.h b/src/mesa/drivers/dri/i965/brw_surface_visitor.h
new file mode 100644
index 0000000..1dcc1aa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_surface_visitor.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Francisco Jerez <currojerez at riseup.net>
+ */
+#ifndef BRW_SURFACE_VISITOR_H
+#define BRW_SURFACE_VISITOR_H
+
+#include "brw_shader.h"
+
+class brw_surface_visitor {
+public:
+   brw_surface_visitor(backend_visitor *v);
+
+   /**
+    * Lower an atomic counter intrinsic call.
+    */
+   void
+   visit_atomic_counter_intrinsic(ir_call *ir) const;
+
+   /**
+    * Lower an image intrinsic call.
+    */
+   void
+   visit_image_intrinsic(ir_call *ir) const;
+
+   /**
+    * Lower a memory barrier intrinsic call.
+    */
+   void
+   visit_barrier_intrinsic(ir_call *ir) const;
+
+protected:
+   backend_reg
+   emit_image_load(backend_reg image, backend_reg addr,
+                   GLenum format, unsigned dims) const;
+
+   void
+   emit_image_store(backend_reg image, backend_reg addr,
+                    backend_reg src,
+                    GLenum format, unsigned dims) const;
+
+   backend_reg
+   emit_image_atomic(backend_reg image, backend_reg addr,
+                     backend_reg src0, backend_reg src1,
+                     GLenum format, unsigned op, unsigned dims) const;
+
+   virtual void
+   emit_assign_vector(backend_reg dst, backend_reg src,
+                      unsigned size) const = 0;
+
+   /**
+    * Check if the surface coordinates \p addr are within the bounds
+    * of the surface \p image and return the comparison result in a
+    * flag register.
+    */
+   virtual backend_reg
+   emit_coordinate_check(backend_reg image, backend_reg addr,
+                         unsigned dims) const = 0;
+
+   /**
+    * Calculate the memory offset for surface coordinate \p addr.
+    */
+   virtual backend_reg
+   emit_coordinate_address_calculation(backend_reg surface, backend_reg addr,
+                                       unsigned dims) const = 0;
+
+   virtual backend_reg
+   emit_untyped_read(backend_reg flag, backend_reg surface,
+                     backend_reg addr,
+                     unsigned dims, unsigned size) const = 0;
+
+   virtual void
+   emit_untyped_write(backend_reg flag, backend_reg surface,
+                      backend_reg addr, backend_reg src,
+                      unsigned dims, unsigned size) const = 0;
+
+   virtual backend_reg
+   emit_untyped_atomic(backend_reg flag, backend_reg surface,
+                       backend_reg addr,
+                       backend_reg src0, backend_reg src1,
+                       unsigned dims, unsigned op) const = 0;
+
+   virtual backend_reg
+   emit_typed_read(backend_reg flag, backend_reg surface,
+                   backend_reg addr,
+                   unsigned dims, unsigned size) const = 0;
+
+   virtual void
+   emit_typed_write(backend_reg flag, backend_reg surface,
+                    backend_reg addr, backend_reg src,
+                    unsigned dims, unsigned size) const = 0;
+
+   virtual backend_reg
+   emit_typed_atomic(backend_reg flag, backend_reg surface,
+                     backend_reg addr,
+                     backend_reg src0, backend_reg src1,
+                     unsigned dims, unsigned op) const = 0;
+
+   virtual void
+   emit_memory_fence() const = 0;
+
+   /**
+    * If the flag register evaluates to true, extend the input vector
+    * \p src from \p size components to four components padding with
+    * (0, 0, 0, 1).  Otherwise discard the input and return
+    * (0, 0, 0, 1).
+    */
+   virtual backend_reg
+   emit_pad(backend_reg flag, backend_reg src, unsigned size) const = 0;
+
+   /**
+    * Pack up to four vector components into a scalar value using the
+    * specified bit field positions.
+    */
+   virtual backend_reg
+   emit_pack_generic(backend_reg src,
+                     unsigned shift_r = 0, unsigned width_r = 0,
+                     unsigned shift_g = 0, unsigned width_g = 0,
+                     unsigned shift_b = 0, unsigned width_b = 0,
+                     unsigned shift_a = 0, unsigned width_a = 0) const = 0;
+
+   /**
+    * Unpack up to four vector components from a scalar value using the
+    * specified bit field positions.
+    */
+   virtual backend_reg
+   emit_unpack_generic(backend_reg src,
+                       unsigned shift_r = 0, unsigned width_r = 0,
+                       unsigned shift_g = 0, unsigned width_g = 0,
+                       unsigned shift_b = 0, unsigned width_b = 0,
+                       unsigned shift_a = 0, unsigned width_a = 0) const = 0;
+
+   /**
+    * Pack up to four vector components into a scalar value using the
+    * specified bit field positions.  The widths are assumed to be
+    * equal to each other and to the size of a supported register data
+    * type.  The shifts are assumed to be width-aligned.
+    */
+   virtual backend_reg
+   emit_pack_homogeneous(backend_reg src,
+                         unsigned shift_r = 0, unsigned width_r = 0,
+                         unsigned shift_g = 0, unsigned width_g = 0,
+                         unsigned shift_b = 0, unsigned width_b = 0,
+                         unsigned shift_a = 0, unsigned width_a = 0) const = 0;
+
+   /**
+    * Unpack up to four vector components from a scalar value using
+    * the specified bit field positions.  The widths are assumed to be
+    * equal to each other and to the size of a supported register data
+    * type.  The shifts are assumed to be width-aligned.
+    */
+   virtual backend_reg
+   emit_unpack_homogeneous(backend_reg src,
+                           unsigned shift_r = 0, unsigned width_r = 0,
+                           unsigned shift_g = 0, unsigned width_g = 0,
+                           unsigned shift_b = 0, unsigned width_b = 0,
+                           unsigned shift_a = 0, unsigned width_a = 0) const = 0;
+
+   /**
+    * Convert to an integer data type of variable width, clamping the
+    * source as necessary.  Different width values can be specified
+    * for two different subsets of the input components.
+    */
+   virtual backend_reg
+   emit_convert_to_integer(backend_reg src,
+                           unsigned mask0 = 0, unsigned width0 = 0,
+                           unsigned mask1 = 0, unsigned width1 = 0) const = 0;
+
+   /**
+    * Convert from a signed or unsigned normalized fixed point
+    * fraction.  Different normalization constants can be specified
+    * for two different subsets of the input components.
+    */
+   virtual backend_reg
+   emit_convert_from_scaled(backend_reg src,
+                            unsigned mask0 = 0, float scale0 = 0,
+                            unsigned mask1 = 0, float scale1 = 0) const = 0;
+
+   /**
+    * Convert to a signed or unsigned normalized fixed point fraction.
+    * Different normalization constants can be specified for two
+    * different subsets of the input components.
+    */
+   virtual backend_reg
+   emit_convert_to_scaled(backend_reg src, unsigned type,
+                          unsigned mask0 = 0, float scale0 = 0,
+                          unsigned mask1 = 0, float scale1 = 0) const = 0;
+
+   /**
+    * Convert from a packed floating point number of variable width.
+    * Different width values can be specified for two different
+    * subsets of the input components.
+    */
+   virtual backend_reg
+   emit_convert_from_float(backend_reg src,
+                           unsigned mask0 = 0, unsigned width0 = 0,
+                           unsigned mask1 = 0, unsigned width1 = 0) const = 0;
+
+   /**
+    * Convert to a packed floating point number of variable width.
+    * Different width values can be specified for two different
+    * subsets of the input components.
+    */
+   virtual backend_reg
+   emit_convert_to_float(backend_reg src,
+                         unsigned mask0 = 0, unsigned width0 = 0,
+                         unsigned mask1 = 0, unsigned width1 = 0) const = 0;
+
+   backend_visitor *v;
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.cpp
new file mode 100644
index 0000000..3528bbe
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.cpp
@@ -0,0 +1,846 @@
+/*
+ * Copyright 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Francisco Jerez <currojerez at riseup.net>
+ */
+
+#include "brw_vec4_surface_visitor.h"
+
+using namespace brw;
+
+namespace {
+   vec4_instruction &
+   exec_all(vec4_instruction &inst)
+   {
+      inst.force_writemask_all = true;
+      return inst;
+   }
+
+   vec4_instruction &
+   exec_predicated(backend_reg flag, vec4_instruction &inst)
+   {
+      if (flag.file != BAD_FILE)
+         inst.predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+
+      return inst;
+   }
+}
+
+brw_vec4_surface_visitor::brw_vec4_surface_visitor(vec4_visitor *v) :
+   brw_surface_visitor(v), v(v)
+{
+}
+
+vec4_instruction &
+brw_vec4_surface_visitor::emit(opcode op, dst_reg dst,
+                               src_reg src0,
+                               src_reg src1,
+                               src_reg src2) const
+{
+   return *v->emit(op, dst, src0, src1, src2);
+}
+
+src_reg
+brw_vec4_surface_visitor::make_grf(unsigned type, unsigned size) const
+{
+   const unsigned num_registers = (size + 3) / 4;
+   return retype(src_reg(GRF, v->virtual_grf_alloc(num_registers), NULL),
+                 type);
+}
+
+src_reg
+brw_vec4_surface_visitor::make_mrf(unsigned reg) const
+{
+   return retype(src_reg(MRF, reg, NULL), BRW_REGISTER_TYPE_UD);
+}
+
+void
+brw_vec4_surface_visitor::emit_assign_vector(
+   backend_reg dst, backend_reg src, unsigned size) const
+{
+   const unsigned mask = (1 << size) - 1;
+
+   emit(BRW_OPCODE_MOV, writemask(dst, mask), src);
+}
+
+void
+brw_vec4_surface_visitor::emit_assign_with_pad(
+   dst_reg dst, src_reg src, unsigned size) const
+{
+   const unsigned mask = (1 << size) - 1;
+
+   emit(BRW_OPCODE_MOV, writemask(dst, mask), src);
+
+   if (dst.writemask & ~mask)
+      emit(BRW_OPCODE_MOV, writemask(dst, ~mask), 0);
+}
+
+/**
+ * Copy a SIMD4x2 vector to its transpose SIMD8x4 vector.
+ */
+void
+brw_vec4_surface_visitor::emit_assign_to_transpose(
+   dst_reg dst, src_reg src, unsigned size) const
+{
+   for (unsigned i = 0; i < size; ++i) {
+      emit(BRW_OPCODE_MOV,
+           writemask(offset(dst, i), WRITEMASK_X),
+           swizzle(src, BRW_SWIZZLE4(i, i, i, i)));
+   }
+}
+
+/**
+ * Copy a SIMD4x2 vector from its transpose SIMD8x4 vector.
+ */
+void
+brw_vec4_surface_visitor::emit_assign_from_transpose(
+   dst_reg dst, src_reg src, unsigned size) const
+{
+   for (unsigned i = 0; i < size; ++i) {
+      emit(BRW_OPCODE_MOV,
+           writemask(dst, 1 << i),
+           swizzle(offset(src, i), BRW_SWIZZLE_XXXX));
+   }
+}
+
+/**
+ * Initialize the header present in some surface access messages.
+ */
+void
+brw_vec4_surface_visitor::emit_surface_header(struct dst_reg dst) const
+{
+   assert(dst.file == MRF);
+   exec_all(emit(BRW_OPCODE_MOV, dst, 0));
+
+   if (!v->brw->is_haswell) {
+      /* The sample mask is used on IVB for the SIMD8 messages that
+       * have no SIMD4x2 counterpart.  We only use the two X channels
+       * in that case, mask everything else out.
+       */
+      exec_all(emit(BRW_OPCODE_MOV,
+                    brw_writemask(brw_uvec_mrf(4, dst.reg, 4), WRITEMASK_W),
+                    0x11));
+   }
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_coordinate_check(
+   backend_reg image, backend_reg addr, unsigned dims) const
+{
+   src_reg size = offset(image, BRW_IMAGE_PARAM_SIZE_OFFSET / 4);
+   struct brw_reg flag = brw_flag_reg(0, 0);
+
+   /* Using swizzle_for_size() in the source values makes sure that
+    * the flag register result has valid comparison bits replicated to
+    * all four channels and we can use the ALL4H predication mode
+    * later on.
+    */
+   emit(BRW_OPCODE_CMP, brw_writemask(brw_null_reg(), WRITEMASK_XYZW),
+        swizzle(retype(addr, BRW_REGISTER_TYPE_UD), swizzle_for_size(dims)),
+        swizzle(size, swizzle_for_size(dims)))
+      .conditional_mod = BRW_CONDITIONAL_L;
+
+   return flag;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_coordinate_address_calculation(
+   backend_reg image, backend_reg addr, unsigned dims) const
+{
+   const unsigned mask = (1 << dims) - 1;
+   src_reg off = offset(image, BRW_IMAGE_PARAM_OFFSET_OFFSET / 4);
+   src_reg stride = offset(image, BRW_IMAGE_PARAM_STRIDE_OFFSET / 4);
+   src_reg tile = offset(image, BRW_IMAGE_PARAM_TILING_OFFSET / 4);
+   src_reg swz = offset(image, BRW_IMAGE_PARAM_SWIZZLING_OFFSET / 4);
+   src_reg dst = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   src_reg tmp = make_grf(BRW_REGISTER_TYPE_UD, 4);
+
+   /* Shift the coordinates by the fixed surface offset. */
+   emit(BRW_OPCODE_ADD, writemask(addr, WRITEMASK_XY & mask),
+        addr, off);
+
+   if (dims > 2) {
+      /* Decompose z into a major (tmp.w) and a minor (tmp.z)
+       * index.
+       */
+      emit(BRW_OPCODE_SHL, writemask(tmp, WRITEMASK_Z),
+           addr, negate(tile));
+
+      emit(BRW_OPCODE_SHR, writemask(tmp, WRITEMASK_Z),
+           tmp, negate(tile));
+
+      emit(BRW_OPCODE_SHR, writemask(tmp, WRITEMASK_W),
+           swizzle(addr, BRW_SWIZZLE_ZZZZ),
+           swizzle(tile, BRW_SWIZZLE_ZZZZ));
+
+      /* Calculate the horizontal (tmp.z) and vertical (tmp.w) slice
+       * offset.
+       */
+      emit(BRW_OPCODE_MUL, writemask(tmp, WRITEMASK_ZW),
+           stride, tmp);
+      emit(BRW_OPCODE_ADD, writemask(addr, WRITEMASK_XY),
+           addr, swizzle(tmp, BRW_SWIZZLE_ZWZW));
+   }
+
+   if (dims > 1) {
+      /* Calculate the minor x (tmp.x) and y (tmp.y) indices. */
+      emit(BRW_OPCODE_SHL, writemask(tmp, WRITEMASK_XY),
+           addr, negate(tile));
+
+      emit(BRW_OPCODE_SHR, writemask(tmp, WRITEMASK_XY),
+           tmp, negate(tile));
+
+      /* Calculate the major x (tmp.z) and y (tmp.w) indices. */
+      emit(BRW_OPCODE_SHR, writemask(tmp, WRITEMASK_ZW),
+           swizzle(addr, BRW_SWIZZLE_XYXY),
+           swizzle(tile, BRW_SWIZZLE_XYXY));
+
+      /* Multiply the minor indices and the major x index (tmp.x,
+       * tmp.y and tmp.w) by the Bpp, and the major y index (tmp.w) by
+       * the vertical stride.
+       */
+      emit(BRW_OPCODE_MUL, writemask(tmp, WRITEMASK_XYZW),
+           swizzle(stride, BRW_SWIZZLE_XXXY), tmp);
+
+      /* Multiply by the tile dimensions using two shift instructions.
+       * Equivalent to:
+       *   minor.y = minor.y << tile.x
+       *   major.x = major.x << tile.x << tile.y
+       *   major.y = major.y << tile.y
+       */
+      emit(BRW_OPCODE_SHL, writemask(tmp, WRITEMASK_ZW),
+           swizzle(tmp, BRW_SWIZZLE_ZWZW),
+           swizzle(tile, BRW_SWIZZLE_YYYY));
+
+      emit(BRW_OPCODE_SHL, writemask(tmp, WRITEMASK_YZ),
+           swizzle(tmp, BRW_SWIZZLE_YYZZ),
+           swizzle(tile, BRW_SWIZZLE_XXXX));
+
+      /* Add everything up. */
+      emit(BRW_OPCODE_ADD, writemask(tmp, WRITEMASK_XY),
+           swizzle(tmp, BRW_SWIZZLE_XYXY),
+           swizzle(tmp, BRW_SWIZZLE_ZWZW));
+
+      emit(BRW_OPCODE_ADD, writemask(dst, WRITEMASK_X),
+           swizzle(tmp, BRW_SWIZZLE_XXXX),
+           swizzle(tmp, BRW_SWIZZLE_YYYY));
+
+      if (v->brw->has_swizzling) {
+         /* Take into account the two dynamically specified shifts. */
+         emit(BRW_OPCODE_SHR, writemask(tmp, WRITEMASK_XY),
+              swizzle(dst, BRW_SWIZZLE_XXXX), swz);
+
+         /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
+         emit(BRW_OPCODE_XOR, writemask(tmp, WRITEMASK_X),
+              swizzle(tmp, BRW_SWIZZLE_XXXX),
+              swizzle(tmp, BRW_SWIZZLE_YYYY));
+
+         emit(BRW_OPCODE_AND, writemask(tmp, WRITEMASK_X),
+              tmp, 1 << 6);
+
+         emit(BRW_OPCODE_XOR, writemask(dst, WRITEMASK_X),
+              dst, tmp);
+      }
+
+   } else {
+      /* Multiply by the Bpp value. */
+      emit(BRW_OPCODE_MUL, writemask(dst, WRITEMASK_X),
+           addr, stride);
+   }
+
+   return dst;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_untyped_read(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   unsigned dims, unsigned size) const
+{
+   src_reg dst = make_grf(BRW_REGISTER_TYPE_UD, size);
+   unsigned mlen = 0;
+
+   /* Set the surface read address. */
+   emit_assign_with_pad(make_mrf(mlen), addr, dims);
+   mlen++;
+
+   /* Emit the instruction. */
+   vec4_instruction &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
+                 surface, size));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+
+   return dst;
+}
+
+void
+brw_vec4_surface_visitor::emit_untyped_write(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src, unsigned dims, unsigned size) const
+{
+   const unsigned mask = (v->brw->is_haswell ? (1 << size) - 1 : 1);
+   unsigned mlen = 0;
+
+   /* Set the surface write address. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), addr, dims);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), addr, dims);
+      mlen += dims;
+   }
+
+   /* Set the source value. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), src, size);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), src, size);
+      mlen += size;
+   }
+
+   /* Emit the instruction.  Note that this is translated into the
+    * SIMD8 untyped surface write message on IVB because the
+    * hardware lacks a SIMD4x2 counterpart.
+    */
+   vec4_instruction &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+                 brw_writemask(brw_null_reg(), mask),
+                 surface, size));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_untyped_atomic(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src0, backend_reg src1,
+   unsigned dims, unsigned op) const
+{
+   src_reg dst = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   unsigned mlen = 0;
+
+   /* Set the atomic operation address. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), addr, dims);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), addr, dims);
+      mlen += dims;
+   }
+
+   /* Set the source arguments. */
+   if (v->brw->is_haswell) {
+      if (src0.file != BAD_FILE)
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_X),
+              src0);
+
+      if (src1.file != BAD_FILE)
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_Y),
+              swizzle(src1, BRW_SWIZZLE_XXXX));
+
+      mlen++;
+
+   } else {
+      if (src0.file != BAD_FILE) {
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_X),
+              src0);
+         mlen++;
+      }
+
+      if (src1.file != BAD_FILE) {
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_X),
+              src1);
+         mlen++;
+      }
+   }
+
+   /* Emit the instruction.  Note that this is translated into the
+    * SIMD8 untyped atomic message on IVB because the hardware lacks a
+    * SIMD4x2 counterpart.
+    */
+   vec4_instruction &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_UNTYPED_ATOMIC,
+                 writemask(dst, WRITEMASK_X),
+                 surface, op));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+
+   return dst;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_typed_read(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   unsigned dims, unsigned size) const
+{
+   const unsigned rlen = size * (v->brw->is_haswell ? 1 : 8);
+   src_reg tmp = make_grf(BRW_REGISTER_TYPE_UD, rlen);
+   src_reg dst = make_grf(BRW_REGISTER_TYPE_UD, size);
+   unsigned mlen = 0;
+
+   /* Initialize the message header. */
+   emit_surface_header(make_mrf(mlen));
+   mlen++;
+
+   /* Set the surface read address. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), addr, dims);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), addr, dims);
+      mlen += dims;
+   }
+
+   /* Emit the instruction.  Note that this is translated into the
+    * SIMD8 typed surface read message on IVB because the hardware
+    * lacks a SIMD4x2 counterpart.
+    */
+   vec4_instruction &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_TYPED_SURFACE_READ, tmp,
+                 surface, size));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+
+   /* Transpose the result. */
+   if (v->brw->is_haswell)
+      dst = tmp;
+   else
+      emit_assign_from_transpose(dst, tmp, size);
+
+   return dst;
+}
+
+void
+brw_vec4_surface_visitor::emit_typed_write(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src, unsigned dims, unsigned size) const
+{
+   unsigned mlen = 0;
+
+   /* Initialize the message header. */
+   emit_surface_header(make_mrf(mlen));
+   mlen++;
+
+   /* Set the surface write address. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), addr, dims);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), addr, dims);
+      mlen += dims;
+   }
+
+   /* Set the source value. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), src, size);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), src, size);
+      mlen += size;
+   }
+
+   /* Emit the instruction.  Note that this is translated into the
+    * SIMD8 typed surface write message on IVB because the hardware
+    * lacks a SIMD4x2 counterpart.
+    */
+   vec4_instruction &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_TYPED_SURFACE_WRITE, brw_null_reg(),
+                 surface, size));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_typed_atomic(
+   backend_reg flag, backend_reg surface, backend_reg addr,
+   backend_reg src0, backend_reg src1,
+   unsigned dims, unsigned op) const
+{
+   src_reg dst = make_grf(BRW_REGISTER_TYPE_UD, 1);
+   unsigned mlen = 0;
+
+   /* Initialize the message header. */
+   emit_surface_header(make_mrf(mlen));
+   mlen++;
+
+   /* Set the atomic operation address. */
+   if (v->brw->is_haswell) {
+      emit_assign_with_pad(make_mrf(mlen), addr, dims);
+      mlen++;
+   } else {
+      emit_assign_to_transpose(make_mrf(mlen), addr, dims);
+      mlen += dims;
+   }
+
+   /* Set the source arguments. */
+   if (v->brw->is_haswell) {
+      if (src0.file != BAD_FILE)
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_X),
+              src0);
+
+      if (src1.file != BAD_FILE)
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_Y),
+              swizzle(src1, BRW_SWIZZLE_XXXX));
+
+      mlen++;
+
+   } else {
+      if (src0.file != BAD_FILE) {
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_X),
+              src0);
+         mlen++;
+      }
+
+      if (src1.file != BAD_FILE) {
+         emit(BRW_OPCODE_MOV, writemask(make_mrf(mlen), WRITEMASK_X),
+              src1);
+         mlen++;
+      }
+   }
+
+   /* Emit the instruction.  Note that this is translated into the
+    * SIMD8 typed atomic message on IVB because the hardware lacks a
+    * SIMD4x2 counterpart.
+    */
+   vec4_instruction &inst = exec_predicated(
+      flag, emit(SHADER_OPCODE_TYPED_ATOMIC,
+                 writemask(dst, WRITEMASK_X),
+                 surface, op));
+   inst.base_mrf = 0;
+   inst.mlen = mlen;
+
+   return dst;
+}
+
+void
+brw_vec4_surface_visitor::emit_memory_fence() const
+{
+   emit(SHADER_OPCODE_MEMORY_FENCE);
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_pad(
+   backend_reg flag, backend_reg src, unsigned size) const
+{
+   const unsigned src_mask = (1 << size) - 1;
+   const unsigned pad_mask = (0xf & ~src_mask);
+   struct brw_reg pad = brw_imm_vf4(0, 0, 0, 1);
+
+   if (flag.file != BAD_FILE) {
+      src_reg dst = make_grf(src.type, 4);
+
+      emit(BRW_OPCODE_MOV, writemask(dst, WRITEMASK_XYZW), pad);
+      exec_predicated(flag, emit(BRW_OPCODE_SEL, writemask(dst, src_mask),
+                                 src, dst));
+      return dst;
+
+   } else {
+      if (pad_mask)
+         emit(BRW_OPCODE_MOV, writemask(src, pad_mask), pad);
+
+      return src;
+   }
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_pack_generic(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   const unsigned mask = (!!width_r << 0 | !!width_g << 1 |
+                          !!width_b << 2 | !!width_a << 3);
+   const bool homogeneous = ((!width_g || width_r == width_g) &&
+                             (!width_b || width_g == width_b) &&
+                             (!width_a || width_b == width_a));
+   const unsigned bits = width_r + width_g + width_b + width_a;
+   src_reg shift = make_grf(BRW_REGISTER_TYPE_UD, 4);
+
+   /* Shift left to discard the most significant bits. */
+   emit(BRW_OPCODE_MOV, writemask(shift, mask),
+        (homogeneous ? brw_imm_ud(32 - width_r) :
+         brw_imm_vf4(32 - width_r, 32 - width_g,
+                     32 - width_b, 32 - width_a)));
+
+   emit(BRW_OPCODE_SHL, writemask(src, mask), src, shift);
+
+   /* Shift right to the final bit field positions. */
+   emit(BRW_OPCODE_MOV, writemask(shift, mask),
+        brw_imm_vf4(32 - shift_r % 32 - width_r,
+                    32 - shift_g % 32 - width_g,
+                    32 - shift_b % 32 - width_b,
+                    32 - shift_a % 32 - width_a));
+
+   emit(BRW_OPCODE_SHR, writemask(src, mask), src, shift);
+
+   /* Add everything up. */
+   if (mask >> 2)
+      emit(BRW_OPCODE_OR,
+           writemask(src, WRITEMASK_XY),
+           swizzle(src, BRW_SWIZZLE_XZXZ),
+           swizzle(src, (mask >> 3 ? BRW_SWIZZLE_YWYW :
+                         BRW_SWIZZLE_YZYZ)));
+
+   if (mask >> 1 && bits <= 32)
+      emit(BRW_OPCODE_OR,
+           writemask(src, WRITEMASK_X),
+           swizzle(src, BRW_SWIZZLE_XXXX),
+           swizzle(src, BRW_SWIZZLE_YYYY));
+
+   return src;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_unpack_generic(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   const unsigned mask = (!!width_r << 0 | !!width_g << 1 |
+                          !!width_b << 2 | !!width_a << 3);
+   const bool homogeneous = ((!width_g || width_r == width_g) &&
+                             (!width_b || width_g == width_b) &&
+                             (!width_a || width_b == width_a));
+   src_reg shift = make_grf(BRW_REGISTER_TYPE_UD, 4);
+   src_reg dst = make_grf(src.type, 4);
+
+   /* Shift left to discard the most significant bits. */
+   emit(BRW_OPCODE_MOV, writemask(shift, mask),
+        brw_imm_vf4(32 - shift_r % 32 - width_r,
+                    32 - shift_g % 32 - width_g,
+                    32 - shift_b % 32 - width_b,
+                    32 - shift_a % 32 - width_a));
+
+   emit(BRW_OPCODE_SHL, writemask(dst, mask),
+        swizzle(src, BRW_SWIZZLE4(shift_r / 32, shift_g / 32,
+                                  shift_b / 32, shift_a / 32)),
+        shift);
+
+   /* Shift back to the least significant bits using an arithmetic
+    * shift to get sign extension on signed types.
+    */
+   emit(BRW_OPCODE_MOV, writemask(shift, mask),
+        (homogeneous ? brw_imm_ud(32 - width_r) :
+         brw_imm_vf4(32 - width_r, 32 - width_g,
+                     32 - width_b, 32 - width_a)));
+
+   emit(BRW_OPCODE_ASR, writemask(dst, mask), dst, shift);
+
+   return dst;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_pack_homogeneous(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   /* We could do the same with less instructions if we had some way
+    * to use Align1 addressing in the VEC4 visitor.  Just use the
+    * general path for now...
+    */
+   return emit_pack_generic(src, shift_r, width_r, shift_g, width_g,
+                            shift_b, width_b, shift_a, width_a);
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_unpack_homogeneous(
+   backend_reg src,
+   unsigned shift_r, unsigned width_r,
+   unsigned shift_g, unsigned width_g,
+   unsigned shift_b, unsigned width_b,
+   unsigned shift_a, unsigned width_a) const
+{
+   /* We could do the same with less instructions if we had some way
+    * to use Align1 addressing in the VEC4 visitor.  Just use the
+    * general path for now...
+    */
+   return emit_unpack_generic(src, shift_r, width_r, shift_g, width_g,
+                              shift_b, width_b, shift_a, width_a);
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_convert_to_integer(
+   backend_reg src,
+   unsigned mask0, unsigned width0,
+   unsigned mask1, unsigned width1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned width[] = { width0, width1 };
+
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      if (mask[i]) {
+         const int32_t max = (type_is_signed(src.type) ?
+                              (1 << (width[i] - 1)) - 1 :
+                              (1 << width[i]) - 1);
+
+         /* Clamp to the minimum value. */
+         if (type_is_signed(src.type))
+            emit(BRW_OPCODE_SEL, writemask(src, mask[i]),
+                 src, - max - 1)
+               .conditional_mod = BRW_CONDITIONAL_G;
+
+         /* Clamp to the maximum value. */
+         emit(BRW_OPCODE_SEL, writemask(src, mask[i]),
+              src, max)
+            .conditional_mod = BRW_CONDITIONAL_L;
+      }
+   }
+
+   return src;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_convert_from_scaled(
+   backend_reg src,
+   unsigned mask0, float scale0,
+   unsigned mask1, float scale1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned full_mask = mask0 | mask1;
+   const float scale[] = { scale0, scale1 };
+   src_reg dst = retype(src, BRW_REGISTER_TYPE_F);
+
+   /* Convert to float. */
+   emit(BRW_OPCODE_MOV, writemask(dst, full_mask), src);
+
+   /* Divide by the normalization constants. */
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      if (mask[i])
+         emit(BRW_OPCODE_MUL, writemask(dst, mask[i]),
+              dst, 1.0f / scale[i]);
+   }
+
+   /* Clamp to the minimum value. */
+   if (type_is_signed(src.type))
+      emit(BRW_OPCODE_SEL, writemask(dst, full_mask),
+           dst, -1.0f)
+         .conditional_mod = BRW_CONDITIONAL_G;
+
+   return dst;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_convert_to_scaled(
+   backend_reg src, unsigned type,
+   unsigned mask0, float scale0,
+   unsigned mask1, float scale1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned full_mask = mask0 | mask1;
+   const float scale[] = { scale0, scale1 };
+   src_reg dst = retype(src, type);
+
+   /* Clamp to the minimum value. */
+   if (type_is_signed(type))
+      emit(BRW_OPCODE_SEL, writemask(src, full_mask),
+           src, -1.0f)
+         .conditional_mod = BRW_CONDITIONAL_G;
+
+   /* Clamp to the maximum value. */
+   emit(BRW_OPCODE_SEL, writemask(src, full_mask),
+        src, 1.0f)
+      .conditional_mod = BRW_CONDITIONAL_L;
+
+   /* Multiply by the normalization constants. */
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      if (mask[i])
+         emit(BRW_OPCODE_MUL, writemask(src, mask[i]),
+              src, scale[i]);
+   }
+
+   /* Convert to integer. */
+   emit(BRW_OPCODE_MOV, writemask(dst, full_mask), src);
+
+   return dst;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_convert_from_float(
+   backend_reg src,
+   unsigned mask0, unsigned width0,
+   unsigned mask1, unsigned width1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned full_mask = mask0 | mask1;
+   const unsigned width[] = { width0, width1 };
+   src_reg dst = retype(src, BRW_REGISTER_TYPE_F);
+
+   /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+    * This works because they have a 5-bit exponent just like the
+    * 16-bit floating point format, and they have no sign bit.
+    */
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      if (mask[i] && width[i] < 16)
+         emit(BRW_OPCODE_SHL, writemask(src, mask[i]),
+              src, 15 - width[i]);
+   }
+
+   /* Convert to 32-bit floating point. */
+   emit(BRW_OPCODE_F16TO32, writemask(dst, full_mask), src);
+
+   return dst;
+}
+
+backend_reg
+brw_vec4_surface_visitor::emit_convert_to_float(
+   backend_reg src,
+   unsigned mask0, unsigned width0,
+   unsigned mask1, unsigned width1) const
+{
+   const unsigned mask[] = { mask0, mask1 };
+   const unsigned width[] = { width0, width1 };
+   const unsigned full_mask = mask0 | mask1;
+   const unsigned clamp_mask = ((width0 < 16 ? mask0 : 0) |
+                                (width1 < 16 ? mask1 : 0));
+   src_reg dst = retype(src, BRW_REGISTER_TYPE_UD);
+
+   /* Clamp to the minimum value. */
+   if (clamp_mask)
+      emit(BRW_OPCODE_SEL, writemask(src, clamp_mask),
+           src, 0.0f)
+         .conditional_mod = BRW_CONDITIONAL_G;
+
+   /* Convert to 16-bit floating-point. */
+   emit(BRW_OPCODE_F32TO16, writemask(dst, full_mask), src);
+
+   /* Discard the least significant bits to get floating point numbers
+    * of the requested width.  This works because the 10-bit and
+    * 11-bit floating point formats have a 5-bit exponent just like
+    * the 16-bit format, and they have no sign bit.
+    */
+   for (unsigned i = 0; i < Elements(mask); ++i) {
+      if (mask[i] && width[i] < 16)
+         v->emit(BRW_OPCODE_SHR, writemask(dst, mask[i]),
+                 dst, 15 - width[i]);
+   }
+
+   return dst;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.h
new file mode 100644
index 0000000..76389f2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_visitor.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Francisco Jerez <currojerez at riseup.net>
+ */
+#ifndef BRW_VEC4_SURFACE_VISITOR_H
+#define BRW_VEC4_SURFACE_VISITOR_H
+
+#include "brw_surface_visitor.h"
+#include "brw_vec4.h"
+
+class brw_vec4_surface_visitor : public brw_surface_visitor {
+public:
+   brw_vec4_surface_visitor(brw::vec4_visitor *v);
+
+protected:
+   brw::vec4_instruction &
+   emit(opcode op, brw::dst_reg dst = brw::dst_reg(),
+        brw::src_reg src0 = brw::src_reg(),
+        brw::src_reg src1 = brw::src_reg(),
+        brw::src_reg src2 = brw::src_reg()) const;
+
+   brw::src_reg
+   make_grf(unsigned type, unsigned size) const;
+
+   brw::src_reg
+   make_mrf(unsigned reg) const;
+
+   virtual void
+   emit_assign_vector(backend_reg dst, backend_reg src, unsigned size) const;
+
+   void
+   emit_assign_with_pad(brw::dst_reg dst, brw::src_reg src,
+                        unsigned size) const;
+
+   void
+   emit_assign_to_transpose(brw::dst_reg dst, brw::src_reg src,
+                            unsigned size) const;
+
+   void
+   emit_assign_from_transpose(brw::dst_reg dst, brw::src_reg src,
+                              unsigned size) const;
+
+   void
+   emit_surface_header(brw::dst_reg dst) const;
+
+   virtual backend_reg
+   emit_coordinate_check(backend_reg image, backend_reg addr,
+                         unsigned dims) const;
+
+   virtual backend_reg
+   emit_coordinate_address_calculation(backend_reg image, backend_reg addr,
+                                       unsigned dims) const;
+
+   virtual backend_reg
+   emit_untyped_read(backend_reg flag, backend_reg surface,
+                     backend_reg addr,
+                     unsigned dims, unsigned size) const;
+
+   virtual void
+   emit_untyped_write(backend_reg flag, backend_reg surface,
+                      backend_reg addr, backend_reg src,
+                      unsigned dims, unsigned size) const;
+
+   virtual backend_reg
+   emit_untyped_atomic(backend_reg flag, backend_reg surface,
+                       backend_reg addr,
+                       backend_reg src0, backend_reg src1,
+                       unsigned dims, unsigned op) const;
+
+   virtual backend_reg
+   emit_typed_read(backend_reg flag, backend_reg surface,
+                   backend_reg addr,
+                   unsigned dims, unsigned size) const;
+
+   virtual void
+   emit_typed_write(backend_reg flag, backend_reg surface,
+                    backend_reg addr, backend_reg src,
+                    unsigned dims, unsigned size) const;
+
+   virtual backend_reg
+   emit_typed_atomic(backend_reg flag, backend_reg surface,
+                     backend_reg addr,
+                     backend_reg src0, backend_reg src1,
+                     unsigned dims, unsigned op) const;
+
+   virtual void
+   emit_memory_fence() const;
+
+   virtual backend_reg
+   emit_pad(backend_reg flag, backend_reg src, unsigned size) const;
+
+   virtual backend_reg
+   emit_pack_generic(backend_reg src,
+                     unsigned shift_r = 0, unsigned width_r = 0,
+                     unsigned shift_g = 0, unsigned width_g = 0,
+                     unsigned shift_b = 0, unsigned width_b = 0,
+                     unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_unpack_generic(backend_reg src,
+                       unsigned shift_r = 0, unsigned width_r = 0,
+                       unsigned shift_g = 0, unsigned width_g = 0,
+                       unsigned shift_b = 0, unsigned width_b = 0,
+                       unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_pack_homogeneous(backend_reg src,
+                         unsigned shift_r = 0, unsigned width_r = 0,
+                         unsigned shift_g = 0, unsigned width_g = 0,
+                         unsigned shift_b = 0, unsigned width_b = 0,
+                         unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_unpack_homogeneous(backend_reg src,
+                           unsigned shift_r = 0, unsigned width_r = 0,
+                           unsigned shift_g = 0, unsigned width_g = 0,
+                           unsigned shift_b = 0, unsigned width_b = 0,
+                           unsigned shift_a = 0, unsigned width_a = 0) const;
+
+   virtual backend_reg
+   emit_convert_to_integer(backend_reg src,
+                           unsigned mask0 = 0, unsigned width0 = 0,
+                           unsigned mask1 = 0, unsigned width1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_from_scaled(backend_reg src,
+                            unsigned mask0 = 0, float scale0 = 0,
+                            unsigned mask1 = 0, float scale1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_to_scaled(backend_reg src, unsigned type,
+                          unsigned mask0 = 0, float scale0 = 0,
+                          unsigned mask1 = 0, float scale1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_from_float(backend_reg src,
+                           unsigned mask0 = 0, unsigned width0 = 0,
+                           unsigned mask1 = 0, unsigned width1 = 0) const;
+
+   virtual backend_reg
+   emit_convert_to_float(backend_reg src,
+                         unsigned mask0 = 0, unsigned width0 = 0,
+                         unsigned mask1 = 0, unsigned width1 = 0) const;
+
+   brw::vec4_visitor *v;
+};
+
+#endif
-- 
1.8.3.4



More information about the mesa-dev mailing list