[Mesa-dev] [PATCH 6/6] clover: Add support for handling reqd_work_group_size attribute v2
Tom Stellard
thomas.stellard at amd.com
Mon Mar 30 08:31:42 PDT 2015
This patch enables clover to return the correct value for
CL_KERNEL_COMPILE_WORK_GROUP_SIZE and also verify that the correct
local_work_size is used when enqueuing kernels with this attribute.
v2:
- Store the work group size as part of the symbol rather than as a
kernel argument.
---
src/gallium/state_trackers/clover/api/kernel.cpp | 9 ++++--
src/gallium/state_trackers/clover/core/kernel.cpp | 11 +++++--
src/gallium/state_trackers/clover/core/kernel.hpp | 2 +-
src/gallium/state_trackers/clover/core/module.cpp | 1 +
src/gallium/state_trackers/clover/core/module.hpp | 9 ++++--
.../state_trackers/clover/llvm/invocation.cpp | 36 ++++++++++++++++++++--
.../state_trackers/clover/tgsi/compiler.cpp | 7 ++++-
7 files changed, 64 insertions(+), 11 deletions(-)
diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp
index 05cc392..8d8694c 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -24,6 +24,8 @@
#include "core/kernel.hpp"
#include "core/event.hpp"
+#include <functional>
+
using namespace clover;
CLOVER_API cl_kernel
@@ -161,7 +163,7 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
break;
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
- buf.as_vector<size_t>() = kern.required_block_size();
+ buf.as_vector<size_t>() = kern.required_block_size(dev);
break;
case CL_KERNEL_LOCAL_MEM_SIZE:
@@ -242,12 +244,15 @@ namespace {
if (d_block_size) {
auto block_size = range(d_block_size, dims);
+ auto reqd_block_size = kern.required_block_size(q.device());
if (any_of(is_zero(), block_size) ||
any_of(greater(), block_size, q.device().max_block_size()))
throw error(CL_INVALID_WORK_ITEM_SIZE);
- if (any_of(modulus(), grid_size, block_size))
+ if (any_of(modulus(), grid_size, block_size) ||
+ (fold(multiplies(), 1u, reqd_block_size) &&
+ any_of(std::not_equal_to<size_t>(), block_size, reqd_block_size)))
throw error(CL_INVALID_WORK_GROUP_SIZE);
if (fold(multiplies(), 1u, block_size) >
diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp
index 442762c..606e4e9 100644
--- a/src/gallium/state_trackers/clover/core/kernel.cpp
+++ b/src/gallium/state_trackers/clover/core/kernel.cpp
@@ -120,8 +120,15 @@ kernel::optimal_block_size(const command_queue &q,
}
std::vector<size_t>
-kernel::required_block_size() const {
- return { 0, 0, 0 };
+kernel::required_block_size(const device &dev) const {
+
+ const clover::module &m = program().binary(dev);
+ auto symbol = find(name_equals(name()), m.syms);
+
+ std::vector<size_t> block_size(symbol.reqd_wgs.begin(),
+ symbol.reqd_wgs.end());
+ block_size.resize(3);
+ return block_size;
}
kernel::argument_range
diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp
index d6432a4..4bef5b8 100644
--- a/src/gallium/state_trackers/clover/core/kernel.hpp
+++ b/src/gallium/state_trackers/clover/core/kernel.hpp
@@ -130,7 +130,7 @@ namespace clover {
optimal_block_size(const command_queue &q,
const std::vector<size_t> &grid_size) const;
std::vector<size_t>
- required_block_size() const;
+ required_block_size(const device &dev) const;
argument_range args();
const_argument_range args() const;
diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp
index be10e35..f7bb0bf 100644
--- a/src/gallium/state_trackers/clover/core/module.cpp
+++ b/src/gallium/state_trackers/clover/core/module.cpp
@@ -171,6 +171,7 @@ namespace {
_proc(s, x.section);
_proc(s, x.offset);
_proc(s, x.args);
+ _proc(s, x.reqd_wgs);
}
};
diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp
index ee6caf9..c0bc320 100644
--- a/src/gallium/state_trackers/clover/core/module.hpp
+++ b/src/gallium/state_trackers/clover/core/module.hpp
@@ -101,14 +101,17 @@ namespace clover {
struct symbol {
symbol(const compat::vector<char> &name, resource_id section,
- size_t offset, const compat::vector<argument> &args) :
- name(name), section(section), offset(offset), args(args) { }
- symbol() : name(), section(0), offset(0), args() { }
+ size_t offset, const compat::vector<argument> &args,
+ const compat::vector<size_t> reqd_wgs) :
+ name(name), section(section), offset(offset), args(args),
+ reqd_wgs(reqd_wgs){ }
+ symbol() : name(), section(0), offset(0), args() { }
compat::vector<char> name;
resource_id section;
size_t offset;
compat::vector<argument> args;
+ compat::vector<size_t> reqd_wgs;
};
void serialize(compat::ostream &os) const;
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 28198a5..70ef526 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -29,6 +29,7 @@
#include <clang/Basic/TargetInfo.h>
#include <llvm/Bitcode/BitstreamWriter.h>
#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/IR/Constants.h>
#if HAVE_LLVM < 0x0305
#include <llvm/Linker.h>
#else
@@ -135,6 +136,7 @@ namespace {
llvm::Function *fn;
size_t offset;
+ compat::vector<size_t> reqd_work_group_size;
};
void debug_log(const std::string &msg, const std::string &suffix) {
@@ -306,6 +308,34 @@ namespace {
kernel.fn = llvm::dyn_cast<llvm::Function>(
#endif
kernel_node->getOperand(i)->getOperand(0));
+ for (unsigned md_idx = 1,
+ md_e = kernel_node->getOperand(i)->getNumOperands();
+ md_idx != md_e; ++md_idx) {
+ const llvm::MDNode *md_node = llvm::dyn_cast<llvm::MDNode>(
+ kernel_node->getOperand(i)->getOperand(md_idx).get());
+
+ const llvm::MDString *md_name =
+ llvm::dyn_cast<llvm::MDString>(md_node->getOperand(0));
+ if (!md_name)
+ continue;
+
+ if (!md_name->getString().equals("reqd_work_group_size"))
+ continue;
+
+ for (unsigned reqd_idx = 0; reqd_idx < 3; ++reqd_idx) {
+ const llvm::ConstantInt *reqd_size =
+#if HAVE_LLVM >= 0x0306
+ llvm::mdconst::dyn_extract<llvm::ConstantInt>(
+#else
+ llvm::dyn_cast<llvm:ConstantInt>(
+#endif
+ md_node->getOperand(reqd_idx + 1).get());
+
+ if (!reqd_size)
+ break;
+ kernel.reqd_work_group_size.push_back(reqd_size->getZExtValue());
+ }
+ }
}
}
@@ -478,7 +508,8 @@ namespace {
get_kernel_args(mod, kernel, address_spaces);
std::string kernel_name = kernel.fn->getName().str();
- m.syms.push_back(module::symbol(kernel_name, 0, i, args ));
+ m.syms.push_back(module::symbol(kernel_name, 0, i, args,
+ kernel.reqd_work_group_size));
}
header.num_bytes = llvm_bitcode.size();
@@ -656,7 +687,8 @@ namespace {
compat::vector<module::argument> args =
get_kernel_args(mod, *i, address_spaces);
std::string kernel_name = (*i).fn->getName().str();
- m.syms.push_back(module::symbol(kernel_name, 0, (*i).offset, args ));
+ m.syms.push_back(module::symbol(kernel_name, 0, (*i).offset, args,
+ (*i).reqd_work_group_size));
}
return m;
diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp
index 93dfeb5..448f4cd 100644
--- a/src/gallium/state_trackers/clover/tgsi/compiler.cpp
+++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp
@@ -41,6 +41,11 @@ namespace {
std::string name, tok;
module::size_t offset;
compat::vector<module::argument> args;
+ compat::vector<size_t> reqd_wgs;
+
+ reqd_wgs.push_back(0);
+ reqd_wgs.push_back(0);
+ reqd_wgs.push_back(0);
if (!(ts >> name))
continue;
@@ -71,7 +76,7 @@ namespace {
throw build_error("invalid kernel argument");
}
- m.syms.push_back({ name, 0, offset, args });
+ m.syms.push_back({ name, 0, offset, args, reqd_wgs });
}
}
--
2.0.4
More information about the mesa-dev
mailing list