[Mesa-dev] [PATCH] clover: Pass image attributes to the kernel

Fri Jul 10 11:08:19 PDT 2015

Zoltan Gilian <zoltan.gilian at gmail.com> writes:

> Read-only and write-only image arguments are recognized and
> distinguished.
> Attributes of the image arguments are passed to the kernel as implicit
> arguments.

Thanks, this looks much better.  One thing that still seems kind of
unfortunate is the fact that you've added a single "image_attributes"
argument that lumps image dimensions with format.  I expect the set of
targets that need format metadata to be a strict superset of the targets
that need image dimensions, so it would be nice if the target could
specify them as separate arguments (e.g. semantic::image_size and
::image_format).

Another related point is that you've chosen to pass the metadata for all
images together at the end of the input buffer.  I have the suspicion
that it would simplify both the OpenCL front-end and compiler back-end
code if the image metadata was interleaved with images themselves.
E.g. for each image argument and kernel the target would request an
argument list like

 type::imageNd semantic::general,
 type::scalar semantic::image_format,
 type::scalar semantic::image_size

and assume a struct-like layout for each image argument in the input
buffer:

 struct image_argument {
        uint32_t index;
        uint32_t size[3];
        uint32_t format[2];
 };

For the back-end this would imply that the offset between a given image
argument and metadata field would be fixed, independent of how many
other arguments and how many images are being passed to the kernel, and
for the front-end it would mean you could get rid of the first pass of
the argument list you've added to exec_context::bind() (you could just
take the image from the last explicit_arg argument seen).

Some more nit-picks below.

> ---
>  src/gallium/state_trackers/clover/core/kernel.cpp  |  27 ++++++
>  src/gallium/state_trackers/clover/core/kernel.hpp  |  13 ++-
>  src/gallium/state_trackers/clover/core/memory.cpp  |   2 +-
>  src/gallium/state_trackers/clover/core/module.hpp  |   3 +-
>  .../state_trackers/clover/llvm/invocation.cpp      | 102 ++++++++++++++++++++-
>  5 files changed, 140 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp
> index 0756f06..d7d42a6 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.cpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
> @@ -159,6 +159,14 @@ kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
>     auto msec = find(type_equals(module::section::text), m.secs);
>     auto explicit_arg = kern._args.begin();
>  
> +   std::vector<image_argument*> image_args;
> +   for (const auto& arg: kern._args) {
> +      if (auto img_arg = dynamic_cast<image_argument*>(arg.get())) {
> +         image_args.push_back(img_arg);
> +      }
> +   }
> +   auto image_arg = image_args.begin();
> +
>     for (auto &marg : margs) {
>        switch (marg.semantic) {
>        case module::argument::general:
> @@ -182,9 +190,28 @@ kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
>           }
>           break;
>        }
> +      case module::argument::image_attributes: {
> +         auto img = (*image_arg++)->get_image();
> +         cl_image_format fmt = img->format();
> +         auto attributes = std::vector<cl_int>({
> +               static_cast<cl_int>(img->width()),
> +               static_cast<cl_int>(img->height()),
> +               static_cast<cl_int>(img->depth()),
> +               static_cast<cl_int>(fmt.image_channel_data_type),
> +               static_cast<cl_int>(fmt.image_channel_order)});

How about casting to cl_uint instead?  And you could do:

 std::vector<cl_uint> attributes {
   ...
 };

> +
> +         for (auto x: attributes) {
> +            auto arg = argument::create(marg);
> +
> +            arg->set(sizeof(x), &x);
> +            arg->bind(*this, marg);
> +         }
> +         break;
> +      }
>        }
>     }
>  
> +

Unnecessary whitespace. 

>     // Create a new compute state if anything changed.
>     if (!st || q != _q ||
>         cs.req_local_mem != mem_local ||
> diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp
> index d6432a4..be9f783 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.hpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.hpp
> @@ -190,7 +190,14 @@ namespace clover {
>           pipe_surface *st;
>        };
>  
> -      class image_rd_argument : public argument {
> +      class image_argument : public argument {
> +      public:
> +         const image *get_image() const { return img; }

Can we call this method get() so the duality with set() is more obvious?

> +      protected:
> +         image *img;
> +      };
> +
> +      class image_rd_argument : public image_argument {
>        public:
>           virtual void set(size_t size, const void *value);
>           virtual void bind(exec_context &ctx,
> @@ -198,11 +205,10 @@ namespace clover {
>           virtual void unbind(exec_context &ctx);
>  
>        private:
> -         image *img;
>           pipe_sampler_view *st;
>        };
>  
> -      class image_wr_argument : public argument {
> +      class image_wr_argument : public image_argument {
>        public:
>           virtual void set(size_t size, const void *value);
>           virtual void bind(exec_context &ctx,
> @@ -210,7 +216,6 @@ namespace clover {
>           virtual void unbind(exec_context &ctx);
>  
>        private:
> -         image *img;
>           pipe_surface *st;
>        };
>  
> diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp
> index 055336a..b852e68 100644
> --- a/src/gallium/state_trackers/clover/core/memory.cpp
> +++ b/src/gallium/state_trackers/clover/core/memory.cpp
> @@ -189,7 +189,7 @@ image2d::image2d(clover::context &ctx, cl_mem_flags flags,
>                   const cl_image_format *format, size_t width,
>                   size_t height, size_t row_pitch,
>                   void *host_ptr) :
> -   image(ctx, flags, format, width, height, 0,
> +   image(ctx, flags, format, width, height, 1,
>           row_pitch, 0, height * row_pitch, host_ptr) {
>  }
>  
> diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp
> index 9d65688..99b1490 100644
> --- a/src/gallium/state_trackers/clover/core/module.hpp
> +++ b/src/gallium/state_trackers/clover/core/module.hpp
> @@ -72,7 +72,8 @@ namespace clover {
>           enum semantic {
>              general,
>              grid_dimension,
> -            grid_offset
> +            grid_offset,
> +            image_attributes
>           };
>  
>           argument(enum type type, size_t size,
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index 9b91fee..d0c9fcb 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -80,6 +80,7 @@
>  using namespace clover;
>  
>  namespace {
> +

Unnecessary whitespace.

>  #if 0
>     void
>     build_binary(const std::string &source, const std::string &target,
> @@ -340,17 +341,66 @@ namespace {
>        PM.run(*mod);
>     }
>  
> +   const llvm::MDNode *
> +   get_kernel_metadata(const llvm::Function *kernel_func) {
> +      auto mod = kernel_func->getParent();
> +      auto kernels_node = mod->getNamedMetadata("opencl.kernels");
> +      if (!kernels_node) {
> +         return nullptr;
> +      }
> +
> +      const llvm::MDNode *kernel_node = nullptr;
> +      for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) {
> +#if HAVE_LLVM >= 0x0306
> +         auto func = llvm::mdconst::dyn_extract<llvm::Function>(
> +#else
> +         auto func = llvm::dyn_cast<llvm::Function>(
> +#endif
> +                                    kernels_node->getOperand(i)->getOperand(0));
> +         if (func == kernel_func) {
> +            kernel_node = kernels_node->getOperand(i);
> +            break;
> +         }
> +      }
> +
> +      return kernel_node;
> +   }
> +
> +   std::vector<llvm::StringRef>
> +   get_kernel_access_qualifiers(const llvm::Function *kernel_func) {
> +      auto num_args = kernel_func->getArgumentList().size();
> +      auto access_quals = std::vector<llvm::StringRef>(num_args);
> +
> +      auto kernel_node = get_kernel_metadata(kernel_func);
> +      auto aq_node = llvm::cast<llvm::MDNode>(kernel_node->getOperand(2));
> +      auto str_node = llvm::cast<llvm::MDString>(aq_node->getOperand(0));
> +      assert(str_node->getString() == "kernel_arg_access_qual" &&
> +             "Cannot find kernel_arg_access_qual metadata node.");
> +      assert(aq_node->getNumOperands() == num_args + 1 &&
> +             "Wrong number of operands in kernel_arg_access_qual metadata.");
> +
> +      for (unsigned i = 1; i < aq_node->getNumOperands(); ++i) {
> +        auto aq = llvm::cast<llvm::MDString>(aq_node->getOperand(i));

access_quals.push_back(llvm::cast...)?

> +        access_quals[i-1] = aq->getString();
> +      }
> +
> +      return access_quals;
> +   }
> +
>     std::vector<module::argument>
>     get_kernel_args(const llvm::Module *mod, const std::string &kernel_name,
>                     const clang::LangAS::Map &address_spaces) {
>  
>        std::vector<module::argument> args;
>        llvm::Function *kernel_func = mod->getFunction(kernel_name);
> +      auto access_quals = get_kernel_access_qualifiers(kernel_func);
>  
>        llvm::DataLayout TD(mod);
>  
> +      unsigned arg_idx = 0;
> +      unsigned num_img_args = 0;
>        for (llvm::Function::const_arg_iterator I = kernel_func->arg_begin(),
> -                                      E = kernel_func->arg_end(); I != E; ++I) {
> +                           E = kernel_func->arg_end(); I != E; ++I, ++arg_idx) {
>           const llvm::Argument &arg = *I;
>  
>           llvm::Type *arg_type = arg.getType();
> @@ -375,6 +425,43 @@ namespace {
>           }
>  
>           if (arg_type->isPointerTy()) {
> +            // XXX: Figure out LLVM->OpenCL address space mappings for each
> +            // target.  I think we need to ask clang what these are.  For now,
> +            // pretend everything is in the global address space.
> +            llvm::Type *elem_type = arg_type->getPointerElementType();
> +            if (elem_type->isStructTy()) {
> +               llvm::StringRef type_name = elem_type->getStructName();
> +               llvm::StringRef access_qual = access_quals[arg_idx];
> +
> +               bool is_image2d = type_name.startswith("opencl.image2d_t");
> +               bool is_image3d = type_name.startswith("opencl.image3d_t");

How about you open the 'if (is_image2d || is_image3d)' block here?  The
assertion and marg_type assignment below would look less confusing.

Thanks.

> +               bool is_write_only = access_qual == "write_only";
> +               bool is_read_only = access_qual == "read_only";
> +
> +               assert(((!is_image2d && !is_image3d) ||
> +                       is_write_only || is_read_only) &&
> +                      "Wrong image access qualifier");
> +
> +               typename module::argument::type marg_type;
> +               if (is_image2d && is_read_only) {
> +                  marg_type = module::argument::image2d_rd;
> +               } else if (is_image2d && is_write_only) {
> +                  marg_type = module::argument::image2d_wr;
> +               } else if (is_image3d && is_read_only) {
> +                  marg_type = module::argument::image3d_rd;
> +               } else if (is_image3d && is_write_only) {
> +                  marg_type = module::argument::image3d_wr;
> +               }
> +
> +               if (is_image2d || is_image3d) {
> +                  args.push_back(module::argument(marg_type,
> +                                                  arg_store_size, target_size,
> +                                                  target_align,
> +                                                  module::argument::zero_ext));
> +                  num_img_args++;
> +                  continue;
> +               }
> +            }
>              unsigned address_space = llvm::cast<llvm::PointerType>(arg_type)->getAddressSpace();
>              if (address_space == address_spaces[clang::LangAS::opencl_local
>                                                       - clang::LangAS::Offset]) {
> @@ -430,6 +517,19 @@ namespace {
>                            module::argument::zero_ext,
>                            module::argument::grid_offset));
>  
> +      llvm::Type *img_attr_type =
> +         TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_int) * 8);
> +
> +      auto img_attr_arg =
> +         module::argument(module::argument::scalar, sizeof(cl_int),
> +                          TD.getTypeStoreSize(img_attr_type),
> +                          TD.getABITypeAlignment(img_attr_type),
> +                          module::argument::zero_ext,
> +                          module::argument::image_attributes);
> +      for (unsigned i = 0; i < num_img_args; ++i) {
> +         args.push_back(img_attr_arg);
> +      }
> +
>        return args;
>     }
>  
> -- 
> 2.4.2
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150710/9190baca/attachment.sig>