[Mesa-dev] [PATCH] clover: Pass image attributes to the kernel

Francisco Jerez currojerez at riseup.net
Sat Jul 25 04:06:03 PDT 2015


Zoltan Gilian <zoltan.gilian at gmail.com> writes:

> Read-only and write-only image arguments are recognized and
> distinguished.
> Attributes of the image arguments are passed to the kernel as implicit
> arguments.
> ---
>  src/gallium/state_trackers/clover/core/kernel.cpp  |  46 ++++++-
>  src/gallium/state_trackers/clover/core/kernel.hpp  |  13 +-
>  src/gallium/state_trackers/clover/core/memory.cpp  |   2 +-
>  src/gallium/state_trackers/clover/core/module.hpp  |   4 +-
>  .../state_trackers/clover/llvm/invocation.cpp      | 147 ++++++++++++++++++++-
>  5 files changed, 198 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp
> index 0756f06..1a6c28f 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.cpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
> @@ -158,13 +158,18 @@ kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
>     auto margs = find(name_equals(kern.name()), m.syms).args;
>     auto msec = find(type_equals(module::section::text), m.secs);
>     auto explicit_arg = kern._args.begin();
> +   image_argument *last_image_arg = nullptr;
>  
>     for (auto &marg : margs) {
>        switch (marg.semantic) {
> -      case module::argument::general:
> +      case module::argument::general: {
> +         auto image_arg = dynamic_cast<image_argument*>(explicit_arg->get());
> +         if (image_arg) {
> +            last_image_arg = image_arg;
> +         }
>           (*(explicit_arg++))->bind(*this, marg);
>           break;
> -
> +      }
>        case module::argument::grid_dimension: {
>           const cl_uint dimension = grid_offset.size();
>           auto arg = argument::create(marg);
> @@ -182,6 +187,36 @@ kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
>           }
>           break;
>        }
> +      case module::argument::image_size: {
> +         assert(last_image_arg);
> +         auto img = last_image_arg->get();

Instead of carrying around an extra variable during the loop, you could
achieve the same effect more locally by doing:

|            auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1))
|                       .get();

The cast to reference would also make sure that the argument is of the
specified type or otherwise throw std::bad_cast which is as good as an
assertion failure.

> +         std::vector<cl_uint> image_size({
> +               img->width(),
> +               img->height(),
> +               img->depth()});

Parentheses are not necessary around the curly braces, you could just:

|         std::vector<cl_uint> image_size {
|               img->width(),
|               img->height(),
|               img->depth()
|         };

It also seems like you've got rid of the static casts you had
previously?  These image methods return a size_t value which may be of
different size than cl_uint and cause a warning.

> +         for (auto x: image_size) {

Leave a space around the colon for consistency, also in the cases below.

> +            auto arg = argument::create(marg);
> +
> +            arg->set(sizeof(x), &x);
> +            arg->bind(*this, marg);
> +         }
> +         break;
> +      }
> +      case module::argument::image_format: {
> +         assert(last_image_arg);
> +         auto img = last_image_arg->get();
> +         cl_image_format fmt = img->format();
> +         std::vector<cl_uint> image_format({
> +               fmt.image_channel_data_type,
> +               fmt.image_channel_order});

Same comments apply here.

> +         for (auto x: image_format) {
> +            auto arg = argument::create(marg);
> +
> +            arg->set(sizeof(x), &x);
> +            arg->bind(*this, marg);
> +         }
> +         break;
> +      }
>        }
>     }
>  
> @@ -532,6 +567,13 @@ kernel::sampler_argument::set(size_t size, const void *value) {
>  void
>  kernel::sampler_argument::bind(exec_context &ctx,
>                                 const module::argument &marg) {
> +   auto v = bytes(ctx.samplers.size());
> +
> +   extend(v, module::argument::zero_ext, marg.target_size);
> +   byteswap(v, ctx.q->device().endianness());
> +   align(ctx.input, marg.target_align);
> +   insert(ctx.input, v);
> +

My expectation here was that the compiler would be able to hard-code
sampler indices in the shader without the API passing any actual data in
the input buffer.  Doesn't that work for you?

>     st = s->bind(*ctx.q);
>     ctx.samplers.push_back(st);
>  }
> diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp
> index d6432a4..007ffcc 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.hpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.hpp
> @@ -190,7 +190,14 @@ namespace clover {
>           pipe_surface *st;
>        };
>  
> -      class image_rd_argument : public argument {
> +      class image_argument : public argument {
> +      public:
> +         const image *get() const { return img; }

Please write the return statement on its own line for consistency.

> +      protected:
> +         image *img;
> +      };
> +
> +      class image_rd_argument : public image_argument {
>        public:
>           virtual void set(size_t size, const void *value);
>           virtual void bind(exec_context &ctx,
> @@ -198,11 +205,10 @@ namespace clover {
>           virtual void unbind(exec_context &ctx);
>  
>        private:
> -         image *img;
>           pipe_sampler_view *st;
>        };
>  
> -      class image_wr_argument : public argument {
> +      class image_wr_argument : public image_argument {
>        public:
>           virtual void set(size_t size, const void *value);
>           virtual void bind(exec_context &ctx,
> @@ -210,7 +216,6 @@ namespace clover {
>           virtual void unbind(exec_context &ctx);
>  
>        private:
> -         image *img;
>           pipe_surface *st;
>        };
>  
> diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp
> index 055336a..b852e68 100644
> --- a/src/gallium/state_trackers/clover/core/memory.cpp
> +++ b/src/gallium/state_trackers/clover/core/memory.cpp
> @@ -189,7 +189,7 @@ image2d::image2d(clover::context &ctx, cl_mem_flags flags,
>                   const cl_image_format *format, size_t width,
>                   size_t height, size_t row_pitch,
>                   void *host_ptr) :
> -   image(ctx, flags, format, width, height, 0,
> +   image(ctx, flags, format, width, height, 1,
>           row_pitch, 0, height * row_pitch, host_ptr) {
>  }
>  
> diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp
> index 9d65688..5db0548 100644
> --- a/src/gallium/state_trackers/clover/core/module.hpp
> +++ b/src/gallium/state_trackers/clover/core/module.hpp
> @@ -72,7 +72,9 @@ namespace clover {
>           enum semantic {
>              general,
>              grid_dimension,
> -            grid_offset
> +            grid_offset,
> +            image_size,
> +            image_format
>           };
>  
>           argument(enum type type, size_t size,
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index 967284d..e5cd59c 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -340,18 +340,91 @@ namespace {
>        PM.run(*mod);
>     }
>  
> +   // Kernel metadata
> +
> +   const llvm::MDNode *
> +   get_kernel_metadata(const llvm::Function *kernel_func) {
> +      auto mod = kernel_func->getParent();
> +      auto kernels_node = mod->getNamedMetadata("opencl.kernels");
> +      if (!kernels_node) {
> +         return nullptr;
> +      }
> +
> +      const llvm::MDNode *kernel_node = nullptr;
> +      for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) {
> +#if HAVE_LLVM >= 0x0306
> +         auto func = llvm::mdconst::dyn_extract<llvm::Function>(
> +#else
> +         auto func = llvm::dyn_cast<llvm::Function>(
> +#endif
> +                                    kernels_node->getOperand(i)->getOperand(0));
> +         if (func == kernel_func) {
> +            kernel_node = kernels_node->getOperand(i);
> +            break;
> +         }
> +      }
> +
> +      return kernel_node;
> +   }
> +
> +   llvm::MDNode*
> +   node_from_op_checked(const llvm::MDOperand &md_operand,
> +                        llvm::StringRef expect_name,
> +                        unsigned expect_num_args)
> +   {
> +      auto node = llvm::cast<llvm::MDNode>(md_operand);
> +      assert(node->getNumOperands() == expect_num_args &&
> +             "Wrong number of operands.");
> +
> +      auto str_node = llvm::cast<llvm::MDString>(node->getOperand(0));
> +      assert(str_node->getString() == expect_name &&
> +             "Wrong metadata node name.");
> +
> +      return node;
> +   }
> +
> +   struct kernel_arg_md {
> +      llvm::StringRef type_name;
> +      llvm::StringRef access_qual;
> +      kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
> +         type_name(type_name_), access_qual(access_qual_) {}
> +   };
> +
> +   std::vector<kernel_arg_md>
> +   get_kernel_arg_md(const llvm::Function *kernel_func) {
> +      auto num_args = kernel_func->getArgumentList().size();
> +
> +      auto kernel_node = get_kernel_metadata(kernel_func);
> +      auto aq = node_from_op_checked(kernel_node->getOperand(2),
> +                                     "kernel_arg_access_qual", num_args + 1);
> +      auto ty = node_from_op_checked(kernel_node->getOperand(3),
> +                                     "kernel_arg_type", num_args + 1);
> +
> +      auto res = std::vector<kernel_arg_md>();
> +      res.reserve(num_args);

std::vector<kernel_arg_md> res;

> +      for (unsigned i = 0; i < num_args; ++i) {
> +         res.push_back(kernel_arg_md(
> +            llvm::cast<llvm::MDString>(ty->getOperand(i+1))->getString(),
> +            llvm::cast<llvm::MDString>(aq->getOperand(i+1))->getString()));
> +      }
> +
> +      return res;
> +   }
> +
>     std::vector<module::argument>
>     get_kernel_args(const llvm::Module *mod, const std::string &kernel_name,
>                     const clang::LangAS::Map &address_spaces) {
>  
>        std::vector<module::argument> args;
>        llvm::Function *kernel_func = mod->getFunction(kernel_name);
> +      assert(kernel_func && "Kernel name not found in module.");
> +      auto arg_md = get_kernel_arg_md(kernel_func);
>  
>        llvm::DataLayout TD(mod);
> +      llvm::Type *size_type =
> +         TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
>  
> -      for (llvm::Function::const_arg_iterator I = kernel_func->arg_begin(),
> -                                      E = kernel_func->arg_end(); I != E; ++I) {
> -         const llvm::Argument &arg = *I;
> +      for (const auto &arg: kernel_func->args()) {
>  
>           llvm::Type *arg_type = arg.getType();
>           const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
> @@ -369,6 +442,68 @@ namespace {
>           unsigned target_size = TD.getTypeStoreSize(target_type);
>           unsigned target_align = TD.getABITypeAlignment(target_type);
>  
> +         llvm::StringRef type_name = arg_md[arg.getArgNo()].type_name;
> +         llvm::StringRef access_qual = arg_md[arg.getArgNo()].access_qual;
> +
> +         // Image
> +         bool is_image2d = type_name == "image2d_t";
> +         bool is_image3d = type_name == "image3d_t";

These should be marked const.

> +         if (is_image2d || is_image3d) {
> +            bool is_write_only = access_qual == "write_only";
> +            bool is_read_only = access_qual == "read_only";
> +

And these too.

> +            typename module::argument::type marg_type;
> +            if (is_image2d && is_read_only) {
> +               marg_type = module::argument::image2d_rd;
> +            } else if (is_image2d && is_write_only) {
> +               marg_type = module::argument::image2d_wr;
> +            } else if (is_image3d && is_read_only) {
> +               marg_type = module::argument::image3d_rd;
> +            } else if (is_image3d && is_write_only) {
> +               marg_type = module::argument::image3d_wr;
> +            } else {
> +               assert(0 && "Wrong image access qualifier");
> +            }
> +
> +            args.push_back(module::argument(marg_type,
> +                                            arg_store_size, target_size,
> +                                            target_align,
> +                                            module::argument::zero_ext));
> +            continue;
> +         }
> +
> +         // Sampler
> +         if (type_name == "sampler_t") {
> +            args.push_back(module::argument(module::argument::sampler,
> +                                            arg_store_size, target_size,
> +                                            target_align,
> +                                            module::argument::zero_ext));
> +            continue;
> +         }
> +
> +         // Image size implicit argument
> +         if (type_name == "image_size") {

I don't think it's a good idea to use such obvious names for the
implicit argument types.  Couldn't they collide with some type declared
by the user that happens to have the same name?  How about
"__clover_image_size" or something similar?  (Identifiers starting with
double underscore are reserved for the implementation at least on C and
GLSL, not sure about OpenCL-C)

> +            args.push_back(module::argument(module::argument::scalar,
> +                                            sizeof(cl_uint),
> +                                            TD.getTypeStoreSize(size_type),
> +                                            TD.getABITypeAlignment(size_type),
> +                                            module::argument::zero_ext,
> +                                            module::argument::image_size));
> +            continue;
> +         }
> +
> +         // Image format implicit argument
> +         if (type_name == "image_format") {
> +            args.push_back(module::argument(module::argument::scalar,
> +                                            sizeof(cl_uint),
> +                                            TD.getTypeStoreSize(size_type),
> +                                            TD.getABITypeAlignment(size_type),
> +                                            module::argument::zero_ext,
> +                                            module::argument::image_format));
> +            continue;
> +         }
> +
> +         // Other types
>           if (llvm::isa<llvm::PointerType>(arg_type) && arg.hasByValAttr()) {
>              arg_type =
>                    llvm::dyn_cast<llvm::PointerType>(arg_type)->getElementType();
> @@ -413,9 +548,6 @@ namespace {
>        // Append implicit arguments.  XXX - The types, ordering and
>        // vector size of the implicit arguments should depend on the
>        // target according to the selected calling convention.
> -      llvm::Type *size_type =
> -         TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
> -
>        args.push_back(
>           module::argument(module::argument::scalar, sizeof(cl_uint),
>                            TD.getTypeStoreSize(size_type),
> @@ -744,6 +876,9 @@ clover::compile_program_llvm(const std::string &source,
>           std::vector<char> code = compile_native(mod, triple, processor,
>                                                   get_debug_flags() & DBG_ASM,
>                                                   r_log);
> +         // Target-specific passes may alter functions
> +         kernels.clear();
> +         find_kernels(mod, kernels);

Bah...  To make this sort of mistake impossible I'd get rid of the
"kernels" argument of optimize() and build_module_native/llvm(), and
make them call find_kernels() directly.  It would also be nice to have
find_kernels() return its result as return value instead of returning
void, so you could declare kernel arrays as const and initialize them at
the same point.

>           m = build_module_native(code, mod, kernels, address_spaces, r_log);
>           break;
>        }
> -- 
> 2.4.6
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150725/3511074d/attachment-0001.sig>


More information about the mesa-dev mailing list