[Mesa-dev] [PATCH] Bounding box avx2 intrinsic algorithm for openGL/GLES
kedar.j.karanje at intel.com
kedar.j.karanje at intel.com
Fri Aug 31 05:25:21 UTC 2018
From: "J Karanje, Kedar" <kedar.j.karanje at intel.com>
The feature is enabled by default during make however we need to
add the following to drirc to enable the feature at runtime.
<option name="enable_bounding_box_culling" value="true"/>
vbo: Main algorithm & code to check for MVP & vertex position location
Build Files: Flags to enable BBOX Code and check AVX version
compiler: Code to recognize simple shader
(gl_position is a simple function of mvp and vertex)
i965 & util: dri query to check if feature is enabled
vbo: Implements a bounding box algorithm for mesa,we hook into the default
drawelements and drawrangelements and the MVP & vertex positions location
and the corresponding program is got,we re-create the frustum planes
using this data and also create a box around the object and use the 8
vertices (box vertices) and check if the box is within the frustum or not,
we drop the draw calls that are completely outside the view frustum and
go for sub-boxes for objects that are intersecting with the frustum planes.
The current patch has been verified on KBL+Ubuntu 16.04, we noticed
8~10% improvements in GFxBench TREX offscreen and ~2% for Manhattan offscreen,
Platforms where avx2 is not supported shall still see ~6-8% improvement, the
other KPIs were not impacted.
Based on empirical data we have set minimum vertex count as 999 and the
sub-box size as 198, this provides the best results, we have also implemented
some level of caching for the box co-od and frustum plane co-od.
we have also optimized some algorithms to use avx2 when a target supports it.
Shader classification code is currently in hir and we have got review comments
to move the same to NIR.
Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar at intel.com>
Signed-off-by: Yogesh Marathe <yogesh.marathe at intel.com>
---
Android.common.mk | 19 +
configure.ac | 34 +-
src/compiler/glsl/ast_to_hir.cpp | 168 +++-
src/compiler/glsl/glsl_parser_extras.cpp | 10 +
src/compiler/glsl/glsl_parser_extras.h | 7 +
src/compiler/glsl/linker.cpp | 18 +
src/intel/common/gen_debug.c | 7 +
src/mesa/Makefile.sources | 11 +
src/mesa/drivers/dri/i965/brw_context.c | 17 +
src/mesa/drivers/dri/i965/intel_screen.c | 4 +
src/mesa/main/bufferobj.c | 19 +
src/mesa/main/mtypes.h | 51 +
src/mesa/program/Android.mk | 1 +
src/mesa/program/program.c | 3 +
src/mesa/vbo/vbo_bbox.c | 1538 ++++++++++++++++++++++++++++++
src/mesa/vbo/vbo_bbox.h | 383 ++++++++
src/mesa/vbo/vbo_bbox_cache.c | 195 ++++
src/mesa/vbo/vbo_context.c | 11 +-
src/mesa/vbo/vbo_exec_array.c | 37 +-
src/util/00-mesa-defaults.conf | 4 +
src/util/xmlpool/t_options.h | 5 +
21 files changed, 2535 insertions(+), 7 deletions(-)
mode change 100644 => 100755 src/compiler/glsl/ast_to_hir.cpp
create mode 100644 src/mesa/vbo/vbo_bbox.c
create mode 100644 src/mesa/vbo/vbo_bbox.h
create mode 100644 src/mesa/vbo/vbo_bbox_cache.c
diff --git a/Android.common.mk b/Android.common.mk
index aa1b266..efd6792 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -21,6 +21,8 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
+MESA_BBOX_ENABLE=true
+
ifeq ($(LOCAL_IS_HOST_MODULE),true)
LOCAL_CFLAGS += -D_GNU_SOURCE
endif
@@ -80,6 +82,10 @@ LOCAL_CFLAGS += \
-fno-trapping-math \
-Wno-sign-compare
+ifeq ($(MESA_BBOX_ENABLE),true)
+LOCAL_CFLAGS += -DMESA_BBOX_OPT
+endif
+
LOCAL_CPPFLAGS += \
-D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS \
@@ -87,6 +93,10 @@ LOCAL_CPPFLAGS += \
-Wno-error=non-virtual-dtor \
-Wno-non-virtual-dtor
+ifeq ($(MESA_BBOX_ENABLE),true)
+LOCAL_CPPFLAGS += -DMESA_BBOX_OPT
+endif
+
# mesa requires at least c99 compiler
LOCAL_CONLYFLAGS += \
-std=c99
@@ -98,6 +108,15 @@ ifeq ($(filter 5 6 7 8 9, $(MESA_ANDROID_MAJOR_VERSION)),)
LOCAL_CFLAGS += -DHAVE_TIMESPEC_GET
endif
+ifeq ($(MESA_BBOX_ENABLE),true)
+#if defined(CONFIG_AS_AVX)
+LOCAL_CONLYFLAGS += -mavx
+#elif
+LOCAL_CONLYFLAGS += -msse4.1
+#endif
+endif
+
+
ifeq ($(strip $(MESA_ENABLE_ASM)),true)
ifeq ($(TARGET_ARCH),x86)
LOCAL_CFLAGS += \
diff --git a/configure.ac b/configure.ac
index 4d9d9e5..dcdbcf3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -278,7 +278,8 @@ _SAVE_LDFLAGS="$LDFLAGS"
_SAVE_CPPFLAGS="$CPPFLAGS"
dnl Compiler macros
-DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
+DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DMESA_BBOX_OPT"
+dnl DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
AC_SUBST([DEFINES])
android=no
case "$host_os" in
@@ -295,10 +296,38 @@ esac
AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes)
+
+dnl Conditional parameters for enabling BBOX file compilation in Makefile
+dnl bbox=yes
+dnl MESA_BBOX_ENABLE=true
+dnl AM_CONDITIONAL([MESA_BBOX_ENABLE], [test x$bbox= xyes])
+
dnl
dnl Check compiler flags
dnl
-AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall"])
+
+AC_ARG_WITH([swr-archs],
+ [AS_HELP_STRING([--with-swr-archs@<:@=DIRS...@:>@],
+ [comma delimited swr architectures list, e.g.
+ "avx,avx2,sse4.1,sse4.2" @<:@default="sse4.1,sse4.2"@:>@])],
+ [with_swr_archs="$withval"],
+ [with_swr_archs="sse4.1,sse4.2"])
+
+swr_archs=`IFS=', '; echo $with_swr_archs`
+if test "$swr_archs" = "avx"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx"])
+elif test "$swr_archs" = "avx2"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx -mavx2"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx -mavx2"])
+elif test "$swr_archs" = "sse4.1"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1"])
+elif test "$swr_archs" = "sse4.2"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1 -msse4.2"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -msse4.2"])
+fi
+
AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes], [CFLAGS="$CFLAGS -Werror=missing-prototypes"])
AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes], [CFLAGS="$CFLAGS -Wmissing-prototypes"])
@@ -313,7 +342,6 @@ dnl
dnl Check C++ compiler flags
dnl
AC_LANG_PUSH([C++])
-AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall"])
AX_CHECK_COMPILE_FLAG([-fno-math-errno], [CXXFLAGS="$CXXFLAGS -fno-math-errno"])
AX_CHECK_COMPILE_FLAG([-fno-trapping-math], [CXXFLAGS="$CXXFLAGS -fno-trapping-math"])
AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [VISIBILITY_CXXFLAGS="-fvisibility=hidden"])
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
old mode 100644
new mode 100755
index 5d3f10b..f4e8dea
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -62,7 +62,6 @@
#include "builtin_functions.h"
using namespace ir_builder;
-
static void
detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
exec_list *instructions);
@@ -1325,6 +1324,124 @@ ast_expression::set_is_lhs(bool new_value)
this->subexpressions[0]->set_is_lhs(new_value);
}
+#ifdef MESA_BBOX_OPT
+static bool
+is_simple_shader(exec_list *instructions, ast_expression *simple_ast_root,
+ struct _mesa_glsl_parse_state *state)
+{
+ ast_expression * subex0 = simple_ast_root->subexpressions[0];
+ ast_expression * subex1 = simple_ast_root->subexpressions[1];
+
+ char temp_identifier[100];
+
+ subex0->set_is_lhs(true);
+
+ if (subex1->oper == ast_mul)
+ {
+ ir_rvalue *rhsParts[3];
+ rhsParts[0] = subex1->subexpressions[0]->hir(instructions, state);
+ rhsParts[1] = subex1->subexpressions[1]->hir(instructions, state);
+
+ if (rhsParts[0]->type->gl_type == GL_FLOAT_MAT4 &&
+ rhsParts[1]->type->gl_type == GL_FLOAT_VEC4) {
+ foreach_list_typed (ast_node, ast, link,
+ &subex1->subexpressions[1]->expressions) {
+ if (((ast_expression *)ast)->oper != ast_identifier &&
+ (((ast_expression *)ast)->oper != ast_int_constant) &&
+ (((ast_expression *)ast)->oper != ast_float_constant)) {
+ return false;
+ }
+ if (((ast_expression *)ast)->oper == ast_identifier &&
+ ((ast_expression *)ast)->primary_expression.identifier) {
+ strncpy((char *)state->stateVertPosition,
+ ((ast_expression *)ast)->primary_expression.identifier,
+ strlen(((ast_expression *)ast)->primary_expression.identifier));
+ }
+ }
+ }
+ if (subex0->oper == ast_identifier) {
+ if (!strcmp(subex0->primary_expression.identifier,"gl_Position")) {
+ if(subex0 && subex0->primary_expression.identifier) {
+ strncpy((char *)temp_identifier,
+ subex0->primary_expression.identifier,
+ strlen(subex0->primary_expression.identifier));
+ } else {
+ return false;
+ }
+ if(subex1 && subex1->subexpressions[0] &&
+ subex1->subexpressions[0]->primary_expression.identifier) {
+ strncpy((char *)state->stateMVP,
+ subex1->subexpressions[0]->primary_expression.identifier,
+ strlen(subex1->subexpressions[0]->primary_expression.identifier));
+ } else {
+ return false;
+ }
+
+ return true;
+ } else {
+ if(subex0 && subex0->primary_expression.identifier) {
+ strncpy((char *)temp_identifier,
+ subex0->primary_expression.identifier,
+ strlen(subex0->primary_expression.identifier));
+ } else {
+ return true;
+ }
+
+ if (subex1->subexpressions[0]->oper == ast_identifier) {
+ if(subex1 && subex1->subexpressions[0] &&
+ subex1->subexpressions[0]->primary_expression.identifier) {
+ strncpy((char *)state->stateMVP,
+ subex1->subexpressions[0]->primary_expression.identifier,
+ strlen(subex1->subexpressions[0]->primary_expression.identifier));
+ } else {
+ return true;
+ }
+ }
+ return false; //Return false to trigger further parsing.
+ }
+ } else {
+ return false;
+ }
+ } else {
+ if (subex0->primary_expression.identifier != NULL)
+ if (!strcmp(subex0->primary_expression.identifier,"gl_Position") &&
+ (strlen(temp_identifier) > 0)) { //gl_position = temp;
+ if (subex1->oper == ast_identifier) {
+ if (!strcmp((char *)temp_identifier,
+ subex1->primary_expression.identifier)) {
+ return true;
+ }
+ } else {
+ return false;
+ }
+ }
+ }
+ return false;
+}
+/*
+ * Function to check if LHS of assign is an input variable Eg: in_position
+ */
+
+static bool
+is_attribute(struct _mesa_glsl_parse_state *state)
+{
+ ir_variable *const var = state->symbols->get_variable(state->stateVertPosition);
+ ir_rvalue *result = NULL;
+ void *ctx = state;
+ if (var != NULL)
+ {
+ result = new(ctx) ir_dereference_variable(var);
+ (void)result;
+ if (var->data.mode == ir_var_shader_in)
+ return true;
+
+ }
+
+ return false;
+
+}
+#endif //MESA_BBOX_OPT
+
ir_rvalue *
ast_expression::do_hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state,
@@ -1400,6 +1517,55 @@ ast_expression::do_hir(exec_list *instructions,
unreachable("ast_aggregate: Should never get here.");
case ast_assign: {
+#ifdef MESA_BBOX_OPT
+ if (state->stage == MESA_SHADER_VERTEX &&
+ !state->state_bbox_simple_shader &&
+ is_simple_shader(instructions,this,state) &&
+ !state->state_shader_analysis_complete)
+ {
+ state->state_bbox_simple_shader = true;
+ }
+
+ if (state->state_bbox_simple_shader &&
+ !state->state_shader_analysis_complete) {
+ if (!is_attribute(state)) {
+ if ((ir_dereference_variable *)op[0] != NULL &&
+ ((ir_dereference_variable *)op[0])->var->name != NULL &&
+ strlen(state->stateVertPosition) != 0)
+ if (!strcmp(((ir_dereference_variable *)op[0])->var->name,
+ state->stateVertPosition) ) {
+ if (((ir_instruction *)op[0])->ir_type == ir_type_variable ||
+ ((ir_instruction *)op[0])->ir_type ==
+ ir_type_dereference_variable &&
+ (ir_dereference_variable *)op[1] != NULL &&
+ ((ir_dereference_variable *)op[1])->ir_type ==
+ ir_type_dereference_variable) {
+ if ((((ir_instruction *)op[1])->ir_type == ir_type_variable ||
+ ((ir_instruction *)op[1])->ir_type ==
+ ir_type_dereference_variable) &&
+ ((ir_dereference_variable *)op[1])->var->name) {
+ strncpy((char *)state->stateVertPosition,
+ (char *)((ir_dereference_variable *)op[1])->var->name,
+ strlen(((ir_dereference_variable *)op[1])->var->name));
+ state->state_shader_analysis_complete = true;
+ }
+ }
+ else {
+ state->state_bbox_simple_shader = false;
+ state->state_shader_analysis_complete = true;
+ }
+ }
+ else {
+ state->state_bbox_simple_shader = false;
+ state->state_shader_analysis_complete = true;
+
+ }
+ }
+ else {
+ state->state_shader_analysis_complete = true;
+ }
+ }
+#endif //MESA_BBOX_OPT
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index 42ba88f..c540b3a 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2121,6 +2121,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
if (!state->error && !state->translation_unit.is_empty())
_mesa_ast_to_hir(shader->ir, state);
+#ifdef MESA_BBOX_OPT
+ shader->shader_bbox_simple_shader = state->state_bbox_simple_shader;
+ if (shader->shader_bbox_simple_shader)
+ {
+ strncpy((char *)shader->shaderMVP,state->stateMVP,
+ strlen(state->stateMVP));
+ strncpy((char *)shader->shaderVertPosition,state->stateVertPosition,
+ strlen(state->stateVertPosition));
+ }
+#endif //MESA_BBOX_OPT
if (!state->error) {
validate_ir_tree(shader->ir);
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index da44d37..b951a66 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -887,6 +887,13 @@ struct _mesa_glsl_parse_state {
* so we can check totals aren't too large.
*/
unsigned clip_dist_size, cull_dist_size;
+
+#ifdef MESA_BBOX_OPT
+ bool state_shader_analysis_complete;
+ bool state_bbox_simple_shader;
+ char stateMVP[20] = {'\0'};
+ char stateVertPosition[100] = {'\0'};
+#endif
};
# define YYLLOC_DEFAULT(Current, Rhs, N) \
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 3ce78fe..210f37f 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4876,6 +4876,24 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
switch (stage) {
case MESA_SHADER_VERTEX:
+#ifdef MESA_BBOX_OPT
+ if (shader_list[MESA_SHADER_VERTEX][0] != NULL) {
+ sh->linked_bbox_simple_shader =
+ shader_list[MESA_SHADER_VERTEX][0]->shader_bbox_simple_shader;
+ /*TBD: How do we handle multiple Vertex Shaders
+ * being linked ??
+ * MVP Name copied to get MVP in VBO
+ */
+ strncpy((char *)sh->linkedshaderMVP,
+ shader_list[MESA_SHADER_VERTEX][0]->shaderMVP,
+ strlen(shader_list[MESA_SHADER_VERTEX][0]->shaderMVP));
+ /* Vertex Position attribute name */
+ strncpy((char *)sh->linkedshaderVertPosition,
+ shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition,
+ strlen(
+ shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition));
+ }
+#endif
validate_vertex_shader_executable(prog, sh, ctx);
break;
case MESA_SHADER_TESS_CTRL:
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f..c677208 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -106,6 +106,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
static void
brw_process_intel_debug_variable_once(void)
{
+#if defined(__ANDROID__) || defined(ANDROID)
+ setenv("MESA_GLSL_CACHE_DISABLE","true",1);
+ setenv("MESA_BBOX_MIN_VERTEX_CNT", "999", 1);
+ setenv("MESA_BBOX_OPT_ENABLE", "3", 1);
+ setenv("MESA_OPT_SPLIT_SIZE", "198", 1);
+#endif
+
INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
}
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 0d3c277..8148622 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -1,4 +1,5 @@
### Lists of source files, included by Makefiles
+MESA_BBOX_ENABLE = false
# this is part of MAIN_FILES
MAIN_ES_FILES = \
@@ -400,6 +401,7 @@ TNL_FILES = \
tnl/t_vp_build.c \
tnl/t_vp_build.h
+
VBO_FILES = \
vbo/vbo_attrib.h \
vbo/vbo_attrib_tmp.h \
@@ -422,6 +424,15 @@ VBO_FILES = \
vbo/vbo_save.h \
vbo/vbo_save_loopback.c
+#ifeq($(MESA_BBOX_ENABLE), true)
+
+VBO_FILES += \
+ vbo/vbo_bbox_cache.c \
+ vbo/vbo_bbox.c \
+ vbo/vbo_bbox.h
+
+#endif
+
STATETRACKER_FILES = \
state_tracker/st_atifs_to_tgsi.c \
state_tracker/st_atifs_to_tgsi.h \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6ba64e4..03cf861 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -48,6 +48,10 @@
#include "vbo/vbo.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
+
#include "drivers/common/driverfuncs.h"
#include "drivers/common/meta.h"
#include "utils.h"
@@ -890,6 +894,11 @@ brw_process_driconf_options(struct brw_context *brw)
ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
+#ifdef MESA_BBOX_OPT
+ ctx->Const.EnableBoundingBoxCulling =
+ driQueryOptionb(options, "enable_bounding_box_culling");
+#endif //MESA_BBOX_OPT
+
ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
driComputeOptionsSha1(&brw->screen->optionCache,
ctx->Const.dri_config_options_sha1);
@@ -1001,6 +1010,14 @@ brwCreateContext(gl_api api,
brw_process_driconf_options(brw);
+#ifdef MESA_BBOX_OPT
+ if (ctx->Const.EnableBoundingBoxCulling) {
+ MESA_BBOX("EnableBoundingBoxCulling True\n");
+ vbo_bbox_init(ctx);
+ } else
+ MESA_BBOX("EnableBoundingBoxCulling False\n");
+#endif //MESA_BBOX_OPT
+
if (INTEL_DEBUG & DEBUG_PERF)
brw->perf_debug = true;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index eaf5a3b..35c7624 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -88,6 +88,10 @@ DRI_CONF_BEGIN
DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
+#ifdef MESA_BBOX_OPT
+ DRI_CONF_ENABLE_BOUNDING_BOX_CULLING("true")
+#endif
+
DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
DRI_CONF_DESC(en, "Perform code generation at shader link time.")
DRI_CONF_OPT_END
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 1d1e51b..67a369b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -48,6 +48,9 @@
#include "varray.h"
#include "util/u_atomic.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
/* Debug flags */
/*#define VBO_DEBUG*/
@@ -2254,6 +2257,10 @@ buffer_sub_data(GLenum target, GLuint buffer, GLintptr offset,
if (no_error || validate_buffer_sub_data(ctx, bufObj, offset, size, func))
_mesa_buffer_sub_data(ctx, bufObj, offset, size, data);
+
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_element_buffer_update(ctx,bufObj,data,offset,size);
+#endif
}
@@ -2589,9 +2596,17 @@ validate_and_unmap_buffer(struct gl_context *ctx,
#endif
#ifdef VBO_DEBUG
+#ifdef MESA_BBOX_OPT
+ if (bufObj->StorageFlags & GL_MAP_WRITE_BIT) {
+#else
if (bufObj->AccessFlags & GL_MAP_WRITE_BIT) {
+#endif
GLuint i, unchanged = 0;
+#ifdef MESA_BBOX_OPT
+ GLubyte *b = (GLubyte *) bufObj->Data;
+#else
GLubyte *b = (GLubyte *) bufObj->Pointer;
+#endif
GLint pos = -1;
/* check which bytes changed */
for (i = 0; i < bufObj->Size - 1; i++) {
@@ -3154,7 +3169,11 @@ map_buffer_range(struct gl_context *ctx, struct gl_buffer_object *bufObj,
/* Access must be write only */
if ((access & GL_MAP_WRITE_BIT) && (!(access & ~GL_MAP_WRITE_BIT))) {
GLuint i;
+#ifdef MESA_BBOX_OPT
+ GLubyte *b = (GLubyte *) bufObj->Data;
+#else
GLubyte *b = (GLubyte *) bufObj->Pointer;
+#endif
for (i = 0; i < bufObj->Size; i++)
b[i] = i & 0xff;
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 9fd577d..c262173 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1359,6 +1359,9 @@ struct gl_buffer_object
bool MinMaxCacheDirty;
bool HandleAllocated; /**< GL_ARB_bindless_texture */
+#ifdef MESA_BBOX_OPT
+ int data_change_counter; //TBD: Same as RefCount to check
+#endif
};
@@ -2535,6 +2538,12 @@ struct gl_linked_shader
* stores information that is also needed during linking.
*/
struct gl_shader_spirv_data *spirv_data;
+
+#ifdef MESA_BBOX_OPT
+ bool linked_bbox_simple_shader;
+ char linkedshaderMVP[20];
+ char linkedshaderVertPosition[100];
+#endif
};
@@ -2631,6 +2640,11 @@ struct gl_shader
/* ARB_gl_spirv related data */
struct gl_shader_spirv_data *spirv_data;
+#ifdef MESA_BBOX_OPT
+ bool shader_bbox_simple_shader;
+ char shaderMVP[20];
+ char shaderVertPosition[100];
+#endif
};
@@ -2902,6 +2916,9 @@ struct gl_shader_program_data
* ARB_gl_spirv extension.
*/
bool spirv;
+#ifdef MESA_BBOX_OPT
+ GLuint vbo_bbox_mvp_location;
+#endif
};
/**
@@ -3108,6 +3125,10 @@ struct gl_pipeline_shader_state
/** Pipeline objects */
struct _mesa_HashTable *Objects;
+#ifdef MESA_BBOX_OPT
+ /* Bounding box draw optimization control structure */
+ struct mesa_bbox_opt *BboxOpt;
+#endif
};
/**
@@ -4076,6 +4097,12 @@ struct gl_constants
/** GL_ARB_gl_spirv */
struct spirv_supported_capabilities SpirVCapabilities;
+
+#ifdef MESA_BBOX_OPT
+ /** MESA_BBOX_OPT Runtime enable_bounding_box_culling*/
+ bool EnableBoundingBoxCulling;
+#endif
+
};
@@ -4720,6 +4747,21 @@ struct gl_semaphore_object
GLuint Name; /**< hash table ID/name */
};
+#ifdef MESA_BBOX_OPT
+/**
+ * Bounding volume classification types
+ */
+typedef enum
+{
+ BOUNDING_VOLUME_AABB = 0,
+ BOUNDING_VOLUME_OBB = 1,
+ BOUNDING_VOLUME_SPHERE = 2,
+ BOUNDING_VOLUME_DOP = 3,
+ BOUNDING_VOULME_MIXED = 4,
+ BOUNDING_VOLUME_MAX = 5,
+} bounding_volume_type;
+#endif //MESA_BBOX_OPT
+
/**
* Mesa rendering context.
*
@@ -5096,6 +5138,15 @@ struct gl_context
struct hash_table_u64 *ResidentTextureHandles;
struct hash_table_u64 *ResidentImageHandles;
/*@}*/
+
+#ifdef MESA_BBOX_OPT
+ /**
+ * Bounding volume type
+ *
+ */
+ bounding_volume_type volume_type;
+#endif //MESA_BBOX_OPT
+
};
/**
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index c6470e6..6489d3e 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -75,6 +75,7 @@ $(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program_lexer.l
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa \
+ $(MESA_TOP)/src/mesa/vbo \
$(MESA_TOP)/src/compiler/nir \
$(MESA_TOP)/src/gallium/auxiliary \
$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 6ab1bf5..e0b3563 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -43,6 +43,9 @@
#include "util/ralloc.h"
#include "util/u_atomic.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
/**
* A pointer to this dummy program is put into the hash table when
diff --git a/src/mesa/vbo/vbo_bbox.c b/src/mesa/vbo/vbo_bbox.c
new file mode 100644
index 0000000..f1e153d
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox.c
@@ -0,0 +1,1538 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#pragma GCC optimize (0)
+#include "vbo_bbox.h"
+#include <fcntl.h>
+#define BBOX_MAX_FRAMES_TO_DYNAMICALLY_UPDATE (5)
+#define BBOX_MIN_EFFECTIVE_DRAWS_TO_DYNAMICALLY_UPDATE (5)
+#define BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (-100)
+#define BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (100)
+#define BBOX_MIN_GPU_HEAD_ROOM_TO_PROCESS_ELEMENT_BUFFER (100)
+
+/**
+ * Min part of split draw that we want to drop
+ */
+#define BBOX_MIN_SPLITTED_DRAW_TO_DROP (1)
+
+#ifdef __AVX__
+static __m256 fullMin;
+static __m256 fullMax;
+#else
+static __m128 fullMin;
+static __m128 fullMax;
+#endif
+
+/* Segment functions */
+
+static inline GLboolean
+intersect(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+ return (s_src->Left < s_tar->Right && s_src->Right > s_tar->Left);
+}
+
+
+static inline GLboolean
+subsegment(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+ return (s_src->Left <= s_tar->Left && s_src->Right >= s_tar->Right);
+}
+
+
+static inline GLboolean
+superset(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+ return subsegment(s_tar,s_src);
+}
+
+
+static void
+normalize(vbo_bbox_frustum_plane *fr_plane)
+{
+ GLfloat a,b,c;
+ a = fr_plane->a;
+ b = fr_plane->b;
+ c = fr_plane->c;
+
+ GLfloat norm = 1.0f/sqrt(a*a + b*b + c*c);
+
+ fr_plane->a *= norm;
+ fr_plane->b *= norm;
+ fr_plane->c *= norm;
+ fr_plane->d *= norm;
+};
+
+static inline
+int vbo_bbox_get_delay(struct mesa_bbox_opt *opt)
+{
+ if (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_RECALC) {
+ if (opt->calc_delay > BBOX_CALC_MAX_DELAY) {
+ opt->calc_delay = BBOX_CALC_MIN_DELAY;
+ }
+ return opt->calc_delay++;
+ }
+ else {
+ return 0;
+ }
+}
+
+static inline
+bool vbo_init_sub_bbox_array(int bbox_count, struct vbo_bbox_cache_data* data)
+{
+ bool allocate_bbox = true;
+ if(data->sub_bbox_array != NULL) {
+ if(bbox_count == data->sub_bbox_cnt) {
+ allocate_bbox = false;
+ }
+ else {
+ free(data->sub_bbox_array);
+ data->sub_bbox_array = NULL;
+ }
+ }
+ if (allocate_bbox)
+ data->sub_bbox_array = (struct bounding_info*) malloc(
+ bbox_count * sizeof (struct bounding_info));
+
+ if (!data->sub_bbox_array)
+ return false;
+
+ data->sub_bbox_cnt = bbox_count;
+ return true;
+}
+
+static inline GLint
+vbo_bbox_get_mvp(struct gl_context *ctx)
+{
+ struct gl_linked_shader * linked_shader =
+ ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ return _mesa_GetUniformLocation(ctx->Shader.ActiveProgram->Name,
+ linked_shader->linkedshaderMVP);
+}
+
+/*
+ * Gets the currently linked shaders flag for simple shader
+ *
+ */
+static inline int
+vbo_is_simple_shader(struct gl_context *ctx)
+{
+ struct gl_linked_shader * linked_shader =
+ ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ return linked_shader->linked_bbox_simple_shader;
+}
+
+/**
+ * Get current VAO
+ */
+static inline struct gl_vertex_array_object*
+vbo_get_current_vao(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_vertex_array_object* vao = gc->Array.VAO;
+ assert(vao);
+
+ return vao;
+}
+
+/**
+ * Returns the location of the "position" in the attributes of the
+ * currently active program
+ */
+static inline
+int vbo_get_simple_vs_position_attr_location(
+ struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_linked_shader * linked_shader =
+ gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+ GLint vertexPosLocation = _mesa_GetAttribLocation(
+ gc->Shader.ActiveProgram->Name,
+ linked_shader->linkedshaderVertPosition);
+ if (vertexPosLocation >= 0)
+ return vertexPosLocation;
+ else
+ return -1;
+}
+
+/**
+ * Get element-buffer handle of the current VAO
+ */
+static inline struct gl_buffer_object*
+vbo_get_current_element_buffer(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_buffer_object* element_buffer =
+ vbo_get_current_vao(gc)->IndexBufferObj;
+ assert(element_buffer);
+ return element_buffer;
+}
+
+/**
+ * Get vertex-binding of position from the current VAO
+ */
+static inline struct gl_vertex_buffer_binding*
+vbo_get_current_vertex_buffer_binding_of_position(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_vertex_array_object* vao = vbo_get_current_vao(gc);
+
+ GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask;
+ const struct gl_array_attributes *attrib_array =
+ &vao->VertexAttrib[ffs(mask) - 1];
+ struct gl_vertex_buffer_binding *buffer_binding =
+ &vao->BufferBinding[attrib_array->BufferBindingIndex];
+
+ return buffer_binding;
+}
+
+/**
+ * Get vertex-buffer handle of the current VAO
+ */
+static inline struct gl_buffer_object*
+vbo_get_current_vertex_buffer(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_buffer_object* pVertexBuffer =
+ vbo_get_current_vertex_buffer_binding_of_position(gc)->BufferObj;
+ assert(pVertexBuffer);
+ return pVertexBuffer;
+}
+
+
+/**
+ * Condition to enter bounding box optimization
+ */
+static inline bool
+vbo_bbox_check_supported_draw_call(struct gl_context *const gc,
+ GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices, GLint basevertex)
+{
+
+ assert(gc);
+ int shader_scenario;
+ struct gl_linked_shader *_LinkedShaders;
+
+ /* Check if the minimum vertex count is met. */
+ if (count < (GLsizei) mesa_bbox_env_variables.bbox_min_vrtx_count) {
+ /* Count is most common cause to bail out form optimization
+ * so should be first.
+ */
+ MESA_BBOX("Aborting MESA_BBOX :%d: Vertex count too small, minimum count = %d\n",
+ count,mesa_bbox_env_variables.bbox_min_vrtx_count);
+ return false;
+ }
+
+ if (mode != GL_TRIANGLES) {
+ MESA_BBOX("Aborting MESA_BBOX :%d: Primitive mode is not GL_TRIANGLES, \
+ mode = %d\n", count, mode);
+ return false;
+ }
+
+ /* Examine current shader */
+ if (!gc->_Shader->ActiveProgram) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: No active GLSL program.\n", count);
+ return false;
+ }
+
+ /* BASIC Shader scenario is when we have just VS & FS */
+ if (gc->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] != NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL] == NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] == NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] == NULL) {
+ shader_scenario = 0;
+ }
+ else
+ shader_scenario = 1;
+
+ if (shader_scenario) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: GLSL program must contain only vertex and \
+ fragment shaders, shader scenario = \n", count );
+ return false;
+ }
+
+ if (!vbo_is_simple_shader(gc)) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: GLSL vertex shader does not have simple \
+ position calculation \n", count);
+ return false;
+ }
+ if (gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]) {
+ _LinkedShaders =
+ gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ MESA_BBOX("MVP:%s, VertPos:%s\n",_LinkedShaders->linkedshaderMVP,
+ _LinkedShaders->linkedshaderVertPosition);
+ }
+
+ /* Examine element buffer */
+ struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);
+ if ((!element_buffer) || element_buffer->Name == 0) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer name is 0\n", count);
+ return false;
+ }
+
+ if (!(element_buffer->StorageFlags &
+ (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){
+ MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer not resident: %#x\n", count,
+ element_buffer->StorageFlags);
+ return false;
+ }
+
+ /* Get VertexPosLocation */
+ int vertexPosLocation = 0;
+ if (gc->Shader.ActiveProgram)
+ vertexPosLocation = vbo_get_simple_vs_position_attr_location(gc);
+ if (vertexPosLocation < 0)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: VertexPosition Location is inValid:\n", count);
+ return false;
+ }
+
+ struct gl_vertex_array_object* vao = vbo_get_current_vao(gc);
+ int posAttribMapMode =
+ _mesa_vao_attribute_map[vao->_AttributeMapMode][vertexPosLocation];
+
+ if (((vao->_Enabled >> posAttribMapMode) & 0x1) != 1)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex data does not come from VBO , GL-API:%d\n", count,gc->API);
+//#if !defined(__ANDROID__) || !defined(ANDROID)
+//This is not specific to Android but to the GLES API
+ if (gc->API != API_OPENGLES && gc->API != API_OPENGLES2)
+ return false;
+//#endif
+ }
+
+ struct gl_buffer_object* vertexattrib_buffer =
+ vbo_get_current_vertex_buffer(gc);
+ if ((!vertexattrib_buffer) || vertexattrib_buffer->Name == 0) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex buffer %p name is %d\n", count,
+ vertexattrib_buffer,vertexattrib_buffer->Name);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+ return false;
+#endif
+ }
+
+ if (!(vertexattrib_buffer->StorageFlags &
+ (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){
+ MESA_BBOX("Aborting MESA_BBOX:%d:Vertex buffer not resident %#x \n", count,
+ vertexattrib_buffer->StorageFlags);
+ MESA_BBOX("Aborting MESA_BBOX:VAO AttributeMode is %d\n", vao->_AttributeMapMode);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+ return false;
+#endif
+ }
+
+ /* Examine vertex position attribute configuration */
+ if (vao->VertexAttrib[posAttribMapMode].Enabled) {
+ if (vao->VertexAttrib[posAttribMapMode].Size != 3)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib size :%d, only 3 supported\n",
+ count, vao->VertexAttrib[VERT_ATTRIB_POS].Size);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+ return false;
+#endif
+ }
+ if (vao->VertexAttrib[posAttribMapMode].Type != GL_FLOAT)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib type is %d, only GL_FLOAT \
+ supported\n", count, vao->VertexAttrib[VERT_ATTRIB_POS].Type);
+ return false;
+ }
+ }
+
+ if (type != GL_UNSIGNED_SHORT) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: type is %d, only GL_UNSIGNED_SHORT \
+ supported\n", count, type);
+ return false;
+ }
+ if (basevertex != 0) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: basevertex is 0 \n", count);
+ return false;
+ }
+
+ /* If size ==3 and type == GL_FLOAT, then element stride must be 12. */
+ assert(vao->VertexAttrib[VERT_ATTRIB_POS].StrideB == 12);
+
+ /* When transform feedback is capturing we cannot do early clipping since
+ * xfb must write unclipped vertices
+ * Note - we could check for IsCapturing() but that would require
+ * more elaborate checking for VBO modifications.
+ */
+ if (gc->TransformFeedback.CurrentObject->Active) {
+ MESA_BBOX("MESA_BBOX:%d: Transform feedback is active, \
+ cannot clip\n", count);
+ return false;
+ }
+ return true;
+}
+
+
+/**
+ * Check condition to enter bounding box optimization and if draw call
+ * is suitable prepare key describing given geometry.
+ */
+static inline
+void vbo_bbox_prepare_key(struct gl_context *const gc, GLenum mode,
+ GLsizei count, GLenum type, GLuint type_size,
+ const GLvoid *indices, GLint basevertex,
+ vbo_bbox_cache_key *key)
+{
+ assert(gc);
+
+ /* Examine element buffer */
+ struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);
+ struct gl_buffer_object* vertexattrib_buffer =
+ vbo_get_current_vertex_buffer(gc);
+ struct gl_vertex_buffer_binding* vbinding =
+ vbo_get_current_vertex_buffer_binding_of_position(gc);
+
+ memset(key,0,sizeof(vbo_bbox_cache_key));
+
+ key->mode = mode;
+ key->count = count;
+ key->indices_type = type;
+ key->type_size = type_size;
+ key->indices = (GLint) (uintptr_t) indices;
+ key->basevertex = basevertex;
+
+ key->element_buf_name = element_buffer->Name;
+ key->vertex_buf_name = vertexattrib_buffer->Name;
+
+ key->offset = (GLint)vbinding->Offset;
+ key->stride = vbinding->Stride;
+}
+
+
+/**
+ * Create a bounding box descriptor in a form of 8 correctly
+ * ordered vertex coordinates. The order of coordinates is significant.
+ */
+static
+void vbo_bbox_create_bounding_box(float* const minVec3f, float* const maxVec3F,
+ vbo_vec4f* vertices4)
+{
+ assert(minVec3f);
+ assert(maxVec3F);
+ assert(vertices4);
+
+ float Xmin = minVec3f[0];
+ float Ymin = minVec3f[1];
+ float Zmin = minVec3f[2];
+ float Wmin = 1.0f;
+
+ float Xmax = maxVec3F[0];
+ float Ymax = maxVec3F[1];
+ float Zmax = maxVec3F[2];
+ float Wmax = 1.0f;
+
+ float* v = (float*)vertices4;
+ int i = 0;
+ v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+ v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+
+ v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+ v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+}
+
+#ifdef __AVX__
+/* Calculate bbox Subbox Coordinates */
+static void
+vbo_bbox_calc_subbox_coordinates(unsigned int vertSubBox,unsigned int vertCount,
+ unsigned int first_idx,unsigned int second_idx,
+ unsigned short* indices,float* vertices,
+ unsigned int stride,
+ struct vbo_bbox_cache_data *data)
+{
+
+ /* Retrieving the starting offset of the first and second subbox */
+ unsigned int first = first_idx * vertSubBox;
+ unsigned int second = second_idx * vertSubBox;
+
+ float tmpVertexBuf[8] = {0.0};
+ float tmp_buf_min[8] = {0.0};
+ float tmp_buf_max[8] = {0.0};
+
+ __m256 subMin = _mm256_set1_ps(FLT_MAX);
+ __m256 subMax = _mm256_set1_ps(-FLT_MAX);
+
+ /* Run both the subboxes for vertex count */
+ for(unsigned int iter = 0; iter < vertCount; iter++){
+ /* Calculate the vertex offset of first subbox */
+ unsigned short index1 = indices[first+iter];
+ /* Fetching the vertices for the first subbox */
+ float* vertex1 = (float *)((char *)(vertices) + stride*index1);
+ memcpy(tmpVertexBuf,vertex1, 3 * sizeof(float));
+
+ /* Calculate the vertex offset of second subbox */
+ unsigned short index2 = indices[second+iter];
+
+ /* Fetching the vertices for the second subbox */
+ float* vertex2 = (float *)((char *)(vertices) + stride*index2);
+ memcpy(tmpVertexBuf+4,vertex2, 3 * sizeof(float));
+
+ __m256 tmp = _mm256_loadu_ps(tmpVertexBuf);
+ subMin = _mm256_min_ps(subMin, tmp);
+ subMax = _mm256_max_ps(subMax, tmp);
+ }
+
+ /* compare full box values */
+ fullMin = _mm256_min_ps(fullMin, subMin);
+ fullMax = _mm256_max_ps(fullMax, subMax);
+
+ /* store results */
+ _mm256_storeu_ps(tmp_buf_min, subMin);
+ _mm256_storeu_ps(tmp_buf_max, subMax);
+
+
+ /* Update the min and max values in sub box coordinates for both
+ * the subboxes
+ */
+ vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+ &(data->sub_bbox_array[first_idx].bounding_volume.vert_vec4[0]));
+ vbo_bbox_create_bounding_box(tmp_buf_min+4, tmp_buf_max+4,
+ &(data->sub_bbox_array[second_idx].bounding_volume.vert_vec4[0]));
+}
+#else
+static void
+vbo_bbox_calc_subbox_coordinates(
+ unsigned int vertSubBox,
+ unsigned int vertCount,
+ unsigned int idx,
+ unsigned short *indices,
+ float *vertices,
+ unsigned int stride,
+ struct vbo_bbox_cache_data *data)
+{
+ /* Retrieving the starting offset of the first and second subbox */
+ unsigned int first = idx * vertSubBox;
+
+ float tmpVertexBuf[4] = {0.0};
+ float tmp_buf_min[4] = {0.0};
+ float tmp_buf_max[4] = {0.0};
+
+ __m128 subMin = _mm_set1_ps(FLT_MAX);
+ __m128 subMax = _mm_set1_ps(-FLT_MAX);
+
+ /* Run both the subboxes for vertex count */
+ for(unsigned int iter = 0; iter < vertCount; iter++){
+
+ /* Calculate the vertex offset of first subbox */
+ unsigned short index = indices[first+iter];
+ /* Fetching the vertices for the first subbox */
+ float* vertex = (float *)((char *)(vertices) + stride*index);
+ memcpy(tmpVertexBuf,vertex, 3 * sizeof(float));
+
+ __m128 tmp = _mm_loadu_ps(tmpVertexBuf);
+ subMin = _mm_min_ps(subMin, tmp);
+ subMax = _mm_max_ps(subMax, tmp);
+ }
+
+ /* compare full box values */
+ fullMin = _mm_min_ps(fullMin, subMin);
+ fullMax = _mm_max_ps(fullMax, subMax);
+
+ /* store results */
+ _mm_storeu_ps(tmp_buf_min, subMin);
+ _mm_storeu_ps(tmp_buf_max, subMax);
+
+ /* Update the min and max values in sub box coordinates for both
+ * the subboxes
+ */
+ vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+ &(data->sub_bbox_array[idx].bounding_volume.vert_vec4[0]));
+}
+#endif
+
+/**
+ * Get pointer to VBO data.
+ * Pointer should be suitable for fast data reading, not data change.
+ */
+static
+bool vbo_bbox_get_vbo_ptr(struct gl_context* gc, struct gl_buffer_object* vbo,
+ int offset, void** data, int* dataSize)
+{
+ assert(gc);
+ assert(vbo);
+ GLubyte* vboDataPtr = NULL;
+
+ if (offset >= vbo->Size) {
+ return false;
+ }
+ vboDataPtr = _mesa_MapNamedBuffer(vbo->Name,GL_WRITE_ONLY_ARB);
+ if (vboDataPtr == NULL) {
+ return false;
+ }
+ *data = vboDataPtr + offset;
+ *dataSize = vbo->Size - offset;
+
+ return true;
+}
+
+/**
+ * Unlock VBO
+ */
+static inline
+void vbo_bbox_release_vbo_ptr(struct gl_context* gc,
+ struct gl_buffer_object* vbo)
+{
+ assert(gc);
+ assert(vbo);
+ _mesa_UnmapNamedBuffer_no_error(vbo->Name);
+}
+
+/**
+ * Check if given range of indices contains only degenerate triangles.
+ */
+static
+bool vbo_bbox_is_degenerate(GLvoid *indices, GLuint count_in)
+{
+ assert(indices);
+ assert(count_in % 3 == 0);
+
+ GLuint triangle_count = count_in / 3;
+ GLuint input_idx = 0;
+
+ GLushort* ptr = (GLushort*)indices;
+ for (GLuint i = 0; i < triangle_count; i++) {
+ GLushort a = ptr[input_idx++];
+ GLushort b = ptr[input_idx++];
+ GLushort c = ptr[input_idx++];
+ if (!(a == b || a == c || b == c)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Calculate bounding boxes for given geometry.
+ */
+static
+bool vbo_bbox_calculate_bounding_boxes_with_indices(struct gl_context *const gc,
+ const vbo_bbox_cache_key *key,
+ struct vbo_bbox_cache_data *data,
+ void* indexData,
+ int indexDataSize)
+{
+ assert(gc);
+
+ void* vertex_data = NULL;
+ int vertex_datasize = 0;
+ int vert_per_subBbox = mesa_bbox_env_variables.bbox_split_size;
+ int sub_bbox_cnt = (key->count + vert_per_subBbox -1)/vert_per_subBbox;
+ int *subbox_array;
+ int idx =0;
+ int non_degen_count = 0;
+
+ assert(sub_bbox_cnt);
+
+ struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+ key->element_buf_name);
+
+ struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,
+ key->vertex_buf_name);
+
+ if (element_buffer == NULL || vertexattrib_buffer == NULL) {
+ return false;
+ }
+
+ if (!vbo_bbox_get_vbo_ptr(gc, vertexattrib_buffer,(int) key->offset,
+ &vertex_data, &vertex_datasize)) {
+ return false;
+ }
+
+ assert(vertex_data);
+ assert(vertex_datasize > 0);
+ assert(indexData);
+ assert(indexDataSize > 0);
+ assert(key->indices_type == GL_UNSIGNED_SHORT);
+ assert(indexDataSize > key->count * (int)key->type_size);
+
+ /* Allocate memory for bounding boxes */
+ if (!vbo_init_sub_bbox_array(sub_bbox_cnt,data)) {
+ vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);
+ return false;
+ }
+ /* Initialize size of bounding boxes */
+ for (int i = 0; i < sub_bbox_cnt; i++) {
+ data->sub_bbox_array[i].vert_count = (i==sub_bbox_cnt-1)?
+ (key->count - i*vert_per_subBbox):vert_per_subBbox;
+ data->sub_bbox_array[i].start_offset = i*vert_per_subBbox * key->type_size;
+ }
+
+ subbox_array = malloc(sub_bbox_cnt * sizeof(int));
+
+ /* Check if all triangles withing bbox are degenerate (i.e triangles with
+ zero area) */
+ for (int i = 0; i < sub_bbox_cnt; i++) {
+ GLubyte* ptr = (GLubyte *)indexData;
+ data->sub_bbox_array[i].is_degenerate =
+ vbo_bbox_is_degenerate((ptr + data->sub_bbox_array[i].start_offset),
+ data->sub_bbox_array[i].vert_count);
+
+ if(!data->sub_bbox_array[i].is_degenerate)
+ {
+ subbox_array[idx++] = i;
+ non_degen_count++;
+ }
+ }
+
+ float tmp_buf_min[8] = {0.0};
+ float tmp_buf_max[8] = {0.0};
+
+#ifdef __AVX__
+ int odd = non_degen_count % 2;
+ int num_iter = non_degen_count/2;
+ int iter;
+
+ fullMin = _mm256_set1_ps(FLT_MAX);
+ fullMax = _mm256_set1_ps(-FLT_MAX);
+ idx = 0;
+ for(iter = 0; iter < num_iter;iter++){
+ idx = 2*iter;
+ if(data->sub_bbox_array[subbox_array[idx]].vert_count ==
+ data->sub_bbox_array[subbox_array[idx+1]].vert_count)
+ {
+ /* call the algorithm with the count */
+ vbo_bbox_calc_subbox_coordinates(
+ vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx]].vert_count,
+ subbox_array[idx],
+ subbox_array[idx+1],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data
+ );
+ }
+ else
+ {
+ /* call the first one separately */
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx]].vert_count,
+ subbox_array[idx],
+ subbox_array[idx],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+
+ /* call the second one separately */
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx+1]].vert_count,
+ subbox_array[idx+1],
+ subbox_array[idx+1],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+
+ }
+
+ }
+
+ if(odd)
+ {
+ idx = 2*iter;
+ /* call the last one separately */
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx]].vert_count,
+ subbox_array[idx],
+ subbox_array[idx],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+ }
+
+ /* Finding the minimum from the full box 256 */
+ __m128 firstlane = _mm256_extractf128_ps(fullMin,0);
+ __m128 secondlane = _mm256_extractf128_ps(fullMin,1);
+ firstlane = _mm_min_ps(firstlane,secondlane);
+ _mm_storeu_ps(tmp_buf_min,firstlane);
+
+ /* Finding the maximum from the full box 256 */
+ firstlane = _mm256_extractf128_ps(fullMax,0);
+ secondlane = _mm256_extractf128_ps(fullMax,1);
+ firstlane = _mm_max_ps(firstlane,secondlane);
+ _mm_storeu_ps(tmp_buf_max,firstlane);
+
+#else
+ fullMin = _mm_set1_ps(FLT_MAX);
+ fullMax = _mm_set1_ps(-FLT_MAX);
+
+ for(unsigned int i=0; i< non_degen_count; i++){
+ //call the algorithm with the count
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[i]].vert_count,
+ subbox_array[i],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+ }
+ _mm_storeu_ps(tmp_buf_min, fullMin);
+ _mm_storeu_ps(tmp_buf_max, fullMax);
+#endif
+
+ /* Set up bounding box as 8 vertices and store in bbox data */
+ vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+ &(data->full_box.bounding_volume.vert_vec4[0]));
+
+ if(subbox_array)
+ free(subbox_array);
+ vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);
+ data->valid = true;
+
+ return true;
+}
+
+
+/**
+ * Calculate bounding boxes for given geometry.
+ */
+static inline
+bool vbo_bbox_calculate_bounding_boxes(struct gl_context *const gc,
+ const vbo_bbox_cache_key *key,
+ struct vbo_bbox_cache_data* data)
+{
+ assert(gc);
+ assert(key->indices_type == GL_UNSIGNED_SHORT);
+
+ void* pIndexData = NULL;
+ int indexDataSize = 0;
+
+ struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+ key->element_buf_name);
+ if (element_buffer == NULL) {
+ return false;
+ }
+ if (!vbo_bbox_get_vbo_ptr(gc, element_buffer, (int) key->indices,
+ &pIndexData, &indexDataSize)) {
+ return false;
+ }
+
+ bool ret = vbo_bbox_calculate_bounding_boxes_with_indices(gc, key, data,
+ pIndexData, indexDataSize);
+
+ vbo_bbox_release_vbo_ptr(gc, element_buffer);
+
+ return ret;
+}
+
+
+/**
+ * Create new bounding box cache entry
+ */
+static
+struct vbo_bbox_cache_data* vbo_bbox_create_data(struct gl_context *const gc,
+ const vbo_bbox_cache_key *key)
+{
+ assert(gc);
+
+ struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);
+ struct gl_buffer_object* vertexattrib_buffer =
+ vbo_get_current_vertex_buffer(gc);
+
+ if ((vertexattrib_buffer == NULL) ||
+ (element_buffer == NULL)){
+ return NULL;
+ }
+
+ mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+ assert(BboxOpt);
+
+ struct vbo_bbox_cache_data* data = (struct vbo_bbox_cache_data *) malloc(
+ sizeof (struct vbo_bbox_cache_data));
+ data->full_box.is_degenerate = false;
+
+ if (data == NULL){
+ return NULL;
+ }
+ /* Initialize the cache data and variables in cache data */
+ data->valid = false;
+ data->need_new_calculation = true;
+ data->init_delaycnt = 0;
+ data->init_delaylimit = 0;
+ data->vertpos_vbo_changecnt = 0;
+ data->indices_vbo_changecnt = 0;
+ data->sub_bbox_cnt = 0;
+ data->sub_bbox_array = NULL;
+ data->drawcnt_bbox_helped = 0;
+ data->last_use_frame = 0;
+ data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter;
+ data->indices_vbo_changecnt = element_buffer->data_change_counter;
+
+ /* This defines for how many cache hits we wait before actually creating the
+ * data */
+ data->init_delaylimit = vbo_bbox_get_delay(BboxOpt);
+
+ /* At this point data is not valid yet */
+ _mesa_bbox_cache_insert(gc,gc->Pipeline.BboxOpt->cache,
+ key,sizeof(vbo_bbox_cache_key),data);
+
+ return data;
+}
+
+
+/**
+ * Check if contents of the VBO buffers have changed since data entry
+ * was created.
+ */
+static inline
+bool vbo_bbox_validate_data(
+ struct gl_context *const gc,
+ const vbo_bbox_cache_key *key,
+ struct vbo_bbox_cache_data* data)
+{
+ assert(gc);
+
+ struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+ key->element_buf_name);
+
+ struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,
+ key->vertex_buf_name);
+
+ if (element_buffer == NULL || vertexattrib_buffer == NULL) {
+ return false;
+ }
+
+ if ((element_buffer->data_change_counter != data->indices_vbo_changecnt) ||
+ (vertexattrib_buffer->data_change_counter != data->vertpos_vbo_changecnt)
+ ) {
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Retrieve bounding box data from cache.
+ */
+static inline
+struct vbo_bbox_cache_data* vbo_bbox_get_bounding_boxes(
+ struct gl_context *const gc,
+ const vbo_bbox_cache_key *key)
+ {
+
+ assert(gc);
+ mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+ assert(BboxOpt);
+ struct vbo_bbox_cache_data* data = _mesa_search_bbox_cache(BboxOpt->cache,
+ key, sizeof(vbo_bbox_cache_key));
+ if (data) {
+ if (data->need_new_calculation == false)
+ {
+ /* Data is initialized and valid */
+ if (data->valid) {
+ if (vbo_bbox_validate_data(gc, key, data)) {
+ data->mvp_valid = true;
+ return data;
+ }
+ }
+ else {
+ data->valid = false;
+ return NULL;
+ }
+ }
+ }
+ else {
+ /* Data does not exist, create it */
+ data = vbo_bbox_create_data(gc, key);
+ if (data == NULL)
+ {
+ return NULL;
+ }
+ }
+ if ((data->need_new_calculation) &&
+ (data->init_delaycnt++ >= data->init_delaylimit)) {
+ data->valid = false;
+ data->need_new_calculation = false;
+ data->mvp_valid = false;
+
+ if (!vbo_bbox_validate_data(gc, key, data)) {
+ struct gl_buffer_object * element_buffer =
+ _mesa_lookup_bufferobj(gc,key->element_buf_name);
+
+ struct gl_buffer_object * vertexattrib_buffer =
+ _mesa_lookup_bufferobj(gc,key->vertex_buf_name);
+
+ if ((vertexattrib_buffer == NULL) ||
+ (element_buffer == NULL)) {
+ return NULL;
+ }
+ data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter;
+ data->indices_vbo_changecnt = element_buffer->data_change_counter;
+ }
+ if (gc->volume_type == BOUNDING_VOLUME_AABB) {
+ /* Calculate bounding boxes */
+ if (vbo_bbox_calculate_bounding_boxes(gc, key, data)) {
+ return data;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * This function is called when we updating the element buffer. because the
+ * element-buffer has changed we have to update the relevant bbox data:
+ */
+void vbo_bbox_element_buffer_update(struct gl_context *const gc,
+ struct gl_buffer_object *buffer,
+ const void* data,
+ int offset,
+ int size)
+{
+ mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+
+ if (BboxOpt) {
+ struct gl_segment updateSegment;
+ updateSegment.Left = offset;
+ updateSegment.Right = offset+size;
+
+ mesa_bbox_cache * buffer_map = BboxOpt->cache;
+ if (buffer_map) {
+ struct vbo_bbox_cache_data *c;
+ GLuint i = 0;
+ for (i = 0; i < buffer_map->size; i++)
+ for (c = buffer_map->items[i]; c != NULL ; c = c->next) {
+ struct vbo_bbox_cache_data * bbox_data = buffer_map->items[i];
+ struct gl_buffer_object *buffObj = _mesa_lookup_bufferobj(gc,
+ bbox_data->key->element_buf_name);
+
+ if(buffObj != buffer)
+ continue;
+
+ if (!bbox_data) {
+
+ assert(bbox_data);
+ return;
+ }
+
+ bbox_data->indices_vbo_changecnt = buffer->data_change_counter;
+
+ struct gl_segment element_bufferSegment;
+ element_bufferSegment.Left = bbox_data->key->indices;
+ element_bufferSegment.Right = bbox_data->key->indices +
+ (bbox_data->key->count * bbox_data->key->type_size);
+
+ if (bbox_data->valid &&
+ intersect(&element_bufferSegment, &updateSegment)) {
+ bbox_data->valid = false;
+ bbox_data->need_new_calculation = true;
+
+ if (superset(&element_bufferSegment,&updateSegment))
+ {
+ int offset_in_newdata = bbox_data->key->indices - offset;
+ int newsize = size - offset_in_newdata;
+
+ assert(offset_in_newdata >= 0);
+ assert(newsize > 0);
+
+ GLchar * start_data = (GLchar *)data + offset_in_newdata;
+
+ if (vbo_bbox_calculate_bounding_boxes_with_indices(gc,
+ bbox_data->key, bbox_data, start_data, newsize)) {
+ bbox_data->need_new_calculation = false;
+ bbox_data->mvp_valid = false;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Generate 6 clip planes from MVP.
+ */
+static
+void vbo_bbox_get_frustum_from_mvp(vbo_bbox_frustum *frustum, GLmatrix* mvpin)
+{
+
+ GLfloat in_mat[4][4] = {0};
+ {
+ #define M(row,col) m[col*4+row]
+ in_mat[0][0] = mvpin->M(0,0);
+ in_mat[0][1] = mvpin->M(0,1);
+ in_mat[0][2] = mvpin->M(0,2);
+ in_mat[0][3] = mvpin->M(0,3);
+
+ in_mat[1][0] = mvpin->M(1,0);
+ in_mat[1][1] = mvpin->M(1,1);
+ in_mat[1][2] = mvpin->M(1,2);
+ in_mat[1][3] = mvpin->M(1,3);
+
+ in_mat[2][0] = mvpin->M(2,0);
+ in_mat[2][1] = mvpin->M(2,1);
+ in_mat[2][2] = mvpin->M(2,2);
+ in_mat[2][3] = mvpin->M(2,3);
+
+ in_mat[3][0] = mvpin->M(3,0);
+ in_mat[3][1] = mvpin->M(3,1);
+ in_mat[3][2] = mvpin->M(3,2);
+ in_mat[3][3] = mvpin->M(3,3);
+ #undef M
+ }
+
+ /* Frustum plane calculation */
+
+ /* Left plane */
+ frustum->plane[0].a = in_mat[3][0] + in_mat[0][0];
+ frustum->plane[0].b = in_mat[3][1] + in_mat[0][1];
+ frustum->plane[0].c = in_mat[3][2] + in_mat[0][2];
+ frustum->plane[0].d = in_mat[3][3] + in_mat[0][3];
+
+ /* Right plane */
+ frustum->plane[1].a = in_mat[3][0] - in_mat[0][0];
+ frustum->plane[1].b = in_mat[3][1] - in_mat[0][1];
+ frustum->plane[1].c = in_mat[3][2] - in_mat[0][2];
+ frustum->plane[1].d = in_mat[3][3] - in_mat[0][3];
+
+ /* Top plane */
+ frustum->plane[2].a = in_mat[3][0] - in_mat[1][0];
+ frustum->plane[2].b = in_mat[3][1] - in_mat[1][1];
+ frustum->plane[2].c = in_mat[3][2] - in_mat[1][2];
+ frustum->plane[2].d = in_mat[3][3] - in_mat[1][3];
+
+ /* Bottom plane */
+ frustum->plane[3].a = in_mat[3][0] + in_mat[1][0];
+ frustum->plane[3].b = in_mat[3][1] + in_mat[1][1];
+ frustum->plane[3].c = in_mat[3][2] + in_mat[1][2];
+ frustum->plane[3].d = in_mat[3][3] + in_mat[1][3];
+
+ /* Far plane */
+ frustum->plane[4].a = in_mat[3][0] - in_mat[2][0];
+ frustum->plane[4].b = in_mat[3][1] - in_mat[2][1];
+ frustum->plane[4].c = in_mat[3][2] - in_mat[2][2];
+ frustum->plane[4].d = in_mat[3][3] - in_mat[2][3];
+
+ /* Near plane */
+ frustum->plane[5].a = in_mat[3][0] + in_mat[2][0];
+ frustum->plane[5].b = in_mat[3][1] + in_mat[2][1];
+ frustum->plane[5].c = in_mat[3][2] + in_mat[2][2];
+ frustum->plane[5].d = in_mat[3][3] + in_mat[2][3];
+
+ /* Calculate octants */
+ for(int n = 0; n < 6; ++n) {
+ frustum->octant[n] = (frustum->plane[n].a >=0 ? 1 : 0) |
+ (frustum->plane[n].b >=0 ? 2 : 0) |
+ (frustum->plane[n].c >=0 ? 4 : 0);
+ normalize(&(frustum->plane[n]));
+ }
+}
+
+
+/**
+ * Calculate distance form a point to place
+ *
+ */
+static inline
+float vbo_bbox_dist_from_point_to_plane(
+ const vbo_bbox_frustum_plane *plane,
+ const vbo_vec4f *point)
+{
+ return (plane->a * point->x + plane->b * point->y + plane->c *
+ point->z + plane->d);
+}
+
+
+/**
+ * Description:
+ * Bounding box clipping algorthm
+ * BBOX_CLIP_INSIDE - bounding box is fully inside frustum
+ * BBOX_CLIP_OUTSIDE - bounding box is fully outside frustum
+ * BBOX_CLIP_INTERSECT - bounding box intersects with frustum
+ */
+static
+enum vbo_bbox_clip_result vbo_bbox_fast_clipping_test(bounding_info* bbox,
+ const vbo_bbox_frustum *frustum)
+{
+ assert(bbox);
+ vbo_vec4f *aabb = bbox->bounding_volume.vert_vec4;
+
+ enum vbo_bbox_clip_result result = BBOX_CLIP_INSIDE;
+ for (int i = 0; i < 6; ++i)
+ {
+ unsigned char normalOctant = frustum->octant[i];
+
+ /* Test near and far vertices of AABB according to plane normal.
+ * Plane equation can be normalized to save some divisions.
+ */
+ float farDistance = vbo_bbox_dist_from_point_to_plane(
+ &(frustum->plane[i]),
+ &(aabb[normalOctant]));
+ if (farDistance < 0.0f) {
+ return BBOX_CLIP_OUTSIDE;
+ }
+
+ float nearDistance = vbo_bbox_dist_from_point_to_plane(
+ &(frustum->plane[i]),
+ &(aabb[normalOctant ^ 7]));
+ if (nearDistance < 0.0f) {
+ result = BBOX_CLIP_INTERSECT;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Wrapper for clip algorithm.
+ */
+static inline
+enum vbo_bbox_clip_result vbo_bbox_clip(bounding_info* bbox,
+ const vbo_bbox_frustum *frustum)
+{
+ assert(bbox);
+ if (bbox->is_degenerate) {
+ return BBOX_CLIP_DEGEN;
+ }
+ return vbo_bbox_fast_clipping_test(bbox, frustum);
+}
+
+/**
+ * Bounding box drawelements implementation
+ */
+void
+vbo_bbox_drawelements(struct gl_context *ctx, GLenum mode,
+ GLboolean index_bounds_valid, GLuint start, GLuint end,
+ GLsizei count, GLenum type, const GLvoid * indices,
+ GLint basevertex, GLuint numInstances,
+ GLuint baseInstance)
+{
+ assert(ctx);
+ GLuint type_size;
+ /* BOUNDING VOLUME: Checks would remain same I guess */
+ bool draw_call_supported = vbo_bbox_check_supported_draw_call(ctx,
+ mode,
+ count,
+ type,
+ indices,
+ basevertex);
+ if (!draw_call_supported ||
+ (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_CLIPPING)) {
+ MESA_BBOX("Aborting MESA_BBOX : BBOX Is not ENABLED !!! \n");
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count, type, indices, basevertex, numInstances, baseInstance);
+ return;
+ }
+
+ type_size = sizeof(GLushort);
+ vbo_bbox_cache_key key;/* Need not initialize key its done in prepare key */
+ vbo_bbox_prepare_key(ctx, mode, count, type, type_size, indices, basevertex,
+ &key);
+ /*BOUNDING VOLUME: Call bounding volume creation based on bounding volume
+ * type */
+
+ struct vbo_bbox_cache_data* cached_bbox = vbo_bbox_get_bounding_boxes(ctx,
+ &key);
+
+ if (cached_bbox == NULL) {
+ MESA_BBOX("Aborting MESA_BBOX : New Object not in Cache!!! \n");
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, end,
+ count, type, indices, basevertex,
+ numInstances, baseInstance);
+ return;
+ }
+
+ GLint loc = vbo_bbox_get_mvp(ctx);
+ if (loc < 0) {
+ MESA_BBOX("MVP Location Error\n");
+ /* TBD: Free cache here */
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count, type, indices, basevertex,
+ numInstances, baseInstance);
+ return;
+ }
+
+ GLfloat *mvp_ptr = (GLfloat *)
+ ctx->_Shader->ActiveProgram->data->UniformStorage[loc].storage;
+ vbo_bbox_frustum frustum;
+ bool recalculate_subbox_clip = false;
+
+ if(cached_bbox->mvp_valid == false ||
+ memcmp(cached_bbox->mvp, mvp_ptr,16*sizeof(GLfloat))) {
+ memcpy(&(cached_bbox->mvp), mvp_ptr, 16*sizeof(GLfloat));
+ cached_bbox->mvpin.m = cached_bbox->mvp;
+ vbo_bbox_get_frustum_from_mvp(&frustum,&(cached_bbox->mvpin));
+
+ /* BOUNDING VOLUME: Call specific function to calculate clip results */
+
+ if (ctx->volume_type == BOUNDING_VOLUME_AABB) {
+ cached_bbox->full_box.clip_result =
+ vbo_bbox_clip(&(cached_bbox->full_box), &frustum);
+ }
+ recalculate_subbox_clip = true;
+ }
+
+ /* Calculate frustum planes */
+ MESA_BBOX("MESA_BBOX: Full Box ClipResult:%d \n",
+ cached_bbox->full_box.clip_result);
+ switch (cached_bbox->full_box.clip_result) {
+ case BBOX_CLIP_OUTSIDE:
+ /* Geometry outside view frustum, dont draw it */
+ return;
+ case BBOX_CLIP_INSIDE:
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count, type, indices, basevertex,
+ numInstances, baseInstance);
+ return;
+ /* case BBOX_CLIP_INTERSECT: */
+ default:
+ MESA_BBOX("MESA_BBOX: Vertices INTERSECTING with the frustum, going"
+ " for Sub Bboxes: \n");
+ break;
+ }
+
+ GLsizei count_to_send = 0;
+ GLsizei count_to_drop = 0;
+ GLvoid* offset_to_send = NULL;
+ bool clipped = false;
+ unsigned potential_clipped = 0;
+
+ for (int i = 0; i < cached_bbox->sub_bbox_cnt; i++) {
+ int new_count = cached_bbox->sub_bbox_array[i].vert_count;
+
+ if(recalculate_subbox_clip) {
+ if (ctx->volume_type == BOUNDING_VOLUME_AABB) {
+ cached_bbox->sub_bbox_array[i].clip_result =
+ vbo_bbox_clip(&(cached_bbox->sub_bbox_array[i]), &frustum);
+ }
+ }
+
+ switch (cached_bbox->sub_bbox_array[i].clip_result)
+ {
+ case BBOX_CLIP_OUTSIDE:
+ count_to_drop += new_count;
+ potential_clipped += new_count;
+
+ break;
+ case BBOX_CLIP_DEGEN:
+ count_to_drop += new_count;
+ potential_clipped += new_count;
+ break;
+ default:
+ /* Sub bounding box intersects with view, draw/save it
+ * for later draw
+ */
+ if (count_to_send == 0) {
+ /* Starting new batch */
+ count_to_send = new_count;
+ offset_to_send = (char*)indices +
+ cached_bbox->sub_bbox_array[i].start_offset;
+ }
+ else {
+ if (count_to_drop >=
+ (int)(mesa_bbox_env_variables.bbox_split_size *
+ BBOX_MIN_SPLITTED_DRAW_TO_DROP)) {
+
+ /* Draw accumulated geometry */
+ vbo_validated_drawrangeelements(ctx, mode,
+ index_bounds_valid, start, end, count_to_send, type,
+ offset_to_send, basevertex, numInstances, baseInstance);
+
+ /* Reset accumulated draws */
+ count_to_send = 0;
+ offset_to_send = (char*)indices +
+ cached_bbox->sub_bbox_array[i].start_offset;
+
+ clipped = true;
+ }
+ else
+ {
+ count_to_send += count_to_drop;
+ }
+
+ /* append to current batch of sent primitives */
+ count_to_send += new_count;
+ }
+
+ count_to_drop = 0;
+ break;
+ }
+ }
+
+ if (count_to_send > 0)
+ {
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count_to_send, type,
+ offset_to_send, basevertex,
+ numInstances, baseInstance);
+ }
+
+
+ clipped |= (count_to_drop >= (int)(mesa_bbox_env_variables.bbox_split_size *
+ BBOX_MIN_SPLITTED_DRAW_TO_DROP));
+
+ if (clipped)
+ {
+ cached_bbox->drawcnt_bbox_helped =
+ MIN(cached_bbox->drawcnt_bbox_helped + 1,
+ BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER);
+ }
+ else
+ {
+ if (potential_clipped == 0)
+ {
+ cached_bbox->drawcnt_bbox_helped =
+ MAX(cached_bbox->drawcnt_bbox_helped - 1,
+ BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER);
+ }
+ }
+}
+
+/**
+ * Initialization for bounding box optimization
+ */
+void vbo_bbox_init(struct gl_context* const gc)
+{
+ assert(gc);
+
+ const char * mesa_bbox_opt = getenv("MESA_BBOX_OPT_ENABLE");
+ if (mesa_bbox_opt !=NULL)
+ mesa_bbox_env_variables.bbox_enable = atoi(mesa_bbox_opt);
+
+ mesa_bbox_opt = getenv("MESA_OPT_SPLIT_SIZE");
+ if (mesa_bbox_opt !=NULL)
+ mesa_bbox_env_variables.bbox_split_size = atoi(mesa_bbox_opt);
+
+ mesa_bbox_opt = getenv("MESA_BBOX_MIN_VERTEX_CNT");
+ if (mesa_bbox_opt != NULL)
+ mesa_bbox_env_variables.bbox_min_vrtx_count = atoi(mesa_bbox_opt);
+
+ if (!gc->Const.EnableBoundingBoxCulling) {
+ mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF;
+ mesa_bbox_env_variables.bbox_split_size = 0x7fffffff;
+ mesa_bbox_env_variables.bbox_min_vrtx_count = 0;
+ }
+
+ mesa_bbox_opt = getenv("MESA_OPT_TRACE_LEVEL");
+ if (mesa_bbox_opt !=NULL)
+ mesa_bbox_env_variables.bbox_trace_level = atoi(mesa_bbox_opt);
+
+ if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_AUTO) {
+ /* Android/Linux: enable of Gen7.5, Gen8 and Gen9 */
+ #if (IGFX_GEN == IGFX_GEN9)
+ mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_SMART;
+ #else
+ mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF;
+ #endif
+ if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_SMART && 1)
+ {
+ mesa_bbox_env_variables.bbox_enable =
+ MESA_BBOX_ENABLE_FORCE_CLIPPING;
+ }
+ }
+ /* BOUNDING VOLUME: Add initializations based on bounding volume here */
+ switch (gc->volume_type) {
+ case BOUNDING_VOLUME_AABB:
+ if (mesa_bbox_env_variables.bbox_enable) {
+ mesa_bbox_env_variables.bbox_split_size =
+ (mesa_bbox_env_variables.bbox_split_size / 3) * 3;
+ mesa_bbox_env_variables.bbox_min_vrtx_count =
+ (mesa_bbox_env_variables.bbox_min_vrtx_count / 3) * 3;
+
+ assert(gc->Pipeline.BboxOpt == NULL);
+
+ gc->Pipeline.BboxOpt = malloc(sizeof(mesa_bbox_opt));
+ if (!gc->Pipeline.BboxOpt) {
+ /* No memory, disable bbox optimization */
+ mesa_bbox_env_variables.bbox_enable =
+ MESA_BBOX_ENABLE_OFF;
+ mesa_bbox_env_variables.bbox_split_size = 0x7fffffff;
+ mesa_bbox_env_variables.bbox_min_vrtx_count = 0;
+ return;
+ }
+ else {
+ gc->Pipeline.BboxOpt->calc_delay = BBOX_CALC_MIN_DELAY;
+ gc->Pipeline.BboxOpt->cache = _mesa_new_bbox_cache();
+
+ if (!gc->Pipeline.BboxOpt->cache) {
+ free(gc->Pipeline.BboxOpt);
+ MESA_BBOX("MESA_BBOX: Cache creation failed\n");
+ return;
+ }
+ }
+ if (mesa_bbox_env_variables.bbox_trace_level > 0) {
+ MESA_BBOX("\nMESA BBOX OPT config: \
+ bboxOptEnable = %d, bboxOptMinVertexCount = %d,\
+ bboxOptSplitSize = %d\n",
+ mesa_bbox_env_variables.bbox_enable,
+ mesa_bbox_env_variables.bbox_min_vrtx_count,
+ mesa_bbox_env_variables.bbox_split_size);
+ }
+ }
+ /* Initialize some function pointers so that we dont have to
+ * check bounding volume type for every draw call */
+ break;
+ case BOUNDING_VOLUME_OBB:
+ /* Init for OBB bounding volume */
+ break;
+ case BOUNDING_VOLUME_SPHERE:
+ /* Init for SPHERE bounding volume */
+ break;
+ case BOUNDING_VOLUME_DOP:
+ /* Init for DOP bounding volume */
+ break;
+ default:
+ MESA_BBOX("BOUNDING VOLUME TYPE IS INCORRECT\n");
+ break;
+ }
+}
+
+
+/**
+ * Free resources associated with bounding box optimization.
+ * To be called when context is destroyed
+ */
+void vbo_bbox_free(struct gl_context* const gc)
+{
+ assert(gc);
+
+ if (gc->Pipeline.BboxOpt) {
+
+ if(gc->Pipeline.BboxOpt->cache)
+ {
+ _mesa_delete_bbox_cache(gc,gc->Pipeline.BboxOpt->cache);
+ gc->Pipeline.BboxOpt->cache = NULL;
+ }
+
+ if(gc->Pipeline.BboxOpt) {
+ free(gc->Pipeline.BboxOpt);
+ gc->Pipeline.BboxOpt = NULL;
+ }
+ }
+}
diff --git a/src/mesa/vbo/vbo_bbox.h b/src/mesa/vbo/vbo_bbox.h
new file mode 100644
index 0000000..1dd98f2
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#ifndef _VBO_BBOX_H_
+#define _VBO_BBOX_H_
+
+#include <stdio.h>
+#include "main/arrayobj.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/varray.h"
+#include "main/bufferobj.h"
+#include "main/arrayobj.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/transformfeedback.h"
+#include "main/mtypes.h"
+#include "compiler/glsl/ir_uniform.h"
+#include "main/shaderapi.h"
+#include "main/uniforms.h"
+#include "sys/param.h"
+#include "program/prog_cache.h"
+/* For Intrinsic functions */
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <mmintrin.h>
+#include <immintrin.h>
+
+#if defined(__ANDROID__) || defined(ANDROID)
+#include <cutils/log.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FRUSTUM_PLANE_COUNT 6
+#define BBOX_CALC_MIN_DELAY 2
+#define BBOX_CALC_MAX_DELAY 5
+
+#undef LOG_TAG
+#define LOG_TAG "MESA_BBOX_LOG"
+
+/* Default, driver will select mode for given GPU and OS */
+#define MESA_BBOX_ENABLE_AUTO -1
+/* Disable BBOX */
+#define MESA_BBOX_ENABLE_OFF 0
+/* Enable BBOX, bot clipping */
+#define MESA_BBOX_ENABLE_SMART 1
+/* Enable BBOX, clipping will be done regardless of GPU utilization */
+#define MESA_BBOX_ENABLE_FORCE_CLIPPING 2
+/* Enable BBOX, force immediate bbox recalculation and clipping */
+#define MESA_BBOX_ENABLE_FORCE_RECALC 3
+
+/**
+ * MESA BBOX PRINTS
+ * Uncomment below line to enable debugging logs
+ */
+#define MESA_BBOX_DEBUG 0
+
+#if defined(__ANDROID__) || defined (ANDROID)
+
+#if MESA_BBOX_DEBUG == 2
+#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__)
+#define MESA_BBOX(...) ALOGE(__VA_ARGS__)
+
+#elif MESA_BBOX_DEBUG == 1
+#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__)
+#define MESA_BBOX(...)
+
+#else
+#define MESA_BBOX_PRINT(...)
+#define MESA_BBOX(...)
+#endif //MESA_BBOX_DEBUG
+
+#else //ANDROID
+
+#if MESA_BBOX_DEBUG == 2
+#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__)
+#define MESA_BBOX(...) printf(__VA_ARGS__)
+
+#elif MESA_BBOX_DEBUG == 1
+#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__)
+#define MESA_BBOX(...)
+
+#else
+#define MESA_BBOX_PRINT(...)
+#define MESA_BBOX(...)
+#endif //MESA_BBOX_DEBUG
+
+#endif //ANDROID
+
+/**
+ * MESA Bbox options environment variables
+ */
+int env_opt_val;
+const char *env_opt;
+
+typedef struct vbo_bbox_env_variable {
+ GLuint bbox_min_vrtx_count;
+ GLuint bbox_enable;
+ GLuint bbox_split_size;
+ GLuint bbox_trace_level;
+} bbox_env;
+bbox_env mesa_bbox_env_variables;
+
+void
+vbo_bbox_init(struct gl_context *const gc);
+
+void
+vbo_bbox_free(struct gl_context *const gc);
+
+
+void
+vbo_bbox_element_buffer_update(struct gl_context *const gc,
+ struct gl_buffer_object *buffer,
+ const void* data,
+ int offset,
+ int size);
+
+void
+vbo_validated_drawrangeelements(struct gl_context *ctx,
+ GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start,
+ GLuint end,
+ GLsizei count,
+ GLenum type,
+ const GLvoid * indices,
+ GLint basevertex,
+ GLuint numInstances,
+ GLuint baseInstance);
+
+void
+vbo_bbox_drawelements(struct gl_context *ctx,
+ GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start,
+ GLuint end,
+ GLsizei count,
+ GLenum type,
+ const GLvoid * indices,
+ GLint basevertex,
+ GLuint numInstances,
+ GLuint baseInstance);
+
+/**
+ * Segment Functions
+ */
+typedef struct gl_segment
+{
+ GLint Left;
+ GLint Right;
+
+}segment;
+
+/**
+ * Clip algorithm result
+ */
+enum vbo_bbox_clip_result
+{
+ BBOX_CLIP_OUTSIDE=0,
+ BBOX_CLIP_INTERSECT=1,
+ BBOX_CLIP_INSIDE=2,
+ BBOX_CLIP_DEGEN = 3,
+ BBOX_CLIP_ERROR = 4,
+};
+
+typedef struct gl_matrixRec
+{
+ GLfloat melem[4][4];
+ GLenum matrixType;
+} gl_matrix;
+
+/**
+ * Structure to describe plane
+ */
+typedef struct vbo_bbox_frustum_plane
+{
+ GLfloat a, b, c, d;
+} vbo_bbox_frustum_plane;
+
+
+/**
+ * Planes and octants with their normals
+ */
+typedef struct vbo_bbox_frustum
+{
+ const unsigned char planeCount;
+ struct vbo_bbox_frustum_plane plane[FRUSTUM_PLANE_COUNT];
+ unsigned char octant[FRUSTUM_PLANE_COUNT];
+} vbo_bbox_frustum;
+
+/*
+ * Axis Aligned Bounding Box
+ */
+typedef union vbo_vec4f {
+ GLfloat data[4];
+ struct {
+ GLfloat x, y, z, w;
+ };
+} vbo_vec4f;
+
+/*
+ * Oriented Bounding Box
+ */
+typedef struct oriented_bounding_box {
+ GLfloat x,y,z,w;
+} oriented_box;
+
+/*
+ * Spherical Bounding volume
+ */
+typedef struct spherical_bounding_volume {
+ GLfloat x,y,z,r; // x^2+y^2+z^2 = r^2
+} spherical_volume;
+
+/*
+ * 8-Discrete oriented polytopes
+ */
+typedef struct dop_bounding_volume {
+ GLfloat x,y,z,r,a,b;//TBD Not sure of the representation for 8-DOP yet!
+} dop_volume;
+
+/*
+ * Bounding volumes for AABB, OBB, SPHERE, DOP etc
+ */
+typedef union bounding_volume_info {
+ vbo_vec4f vert_vec4[8]; /* Bbox mix man coordinates */
+ oriented_box obb[8];
+ spherical_volume sphere;
+ dop_volume dop[8];
+} bounding_volume_info;
+
+
+typedef struct vbo_bbox_cache_key
+{
+ /* From API call */
+ GLenum mode; /* GL_TRAINGLES are only mode supported currently */
+ GLsizei count; /* Number if indices in draw call */
+ GLenum indices_type; /* must be GL_UNSIGNED_SHORT for now */
+ GLuint type_size;
+ GLuint type_sizeShift;
+ GLint indices; /* Offset to index VBO */
+ GLint basevertex; /* Only 0 supported for now. */
+
+ /* VBO objects names */
+ GLuint element_buf_name;
+ GLuint vertex_buf_name;
+
+ /* Vertex position attribute configuration */
+ GLint offset;
+ GLint size; /* Size of attribute, must be 3 for now */
+ GLenum vertDataType; /* Must be GL_FLOAT */
+ GLsizei stride; /* Any */
+ GLuint elementStride;
+} vbo_bbox_cache_key;
+
+typedef struct bounding_info
+{
+ int vert_count; /* Number of vertices this bbox covers */
+ int start_offset; /* Start offset for this bbox */
+ bool is_degenerate; /* Triangle can not be formed */
+ enum vbo_bbox_clip_result clip_result;
+
+ bounding_volume_info bounding_volume; /* Bbox mix man coordinates */
+
+} bounding_info;
+
+/**
+ * Cached information about (multiple) bounding boxes
+ */
+struct vbo_bbox_cache_data
+{
+ /* Valid data indicator, will be set to false if VBO have been
+ * modified by application
+ */
+ bool valid;
+
+ /* Indicates if bounding boxes were calculated for this geometry */
+ bool need_new_calculation;
+
+ /* Controls delay with which bounding boxes are calculated */
+ int init_delaycnt;
+ int init_delaylimit;
+
+ /* Data change indicator for VBOs */
+ int vertpos_vbo_changecnt;
+ int indices_vbo_changecnt;
+
+ /* How many bounding boxes are stored for this geometry */
+ int sub_bbox_cnt;
+
+ /* How many draws call the bbox was effective */
+ int drawcnt_bbox_helped;
+
+ int last_use_frame;
+
+ GLuint hash;
+
+ unsigned keysize;
+
+ vbo_bbox_cache_key *key;
+
+ bool mvp_valid;
+ GLmatrix mvpin;
+ GLfloat mvp[16];
+ vbo_bbox_frustum frustum;
+
+ /* Pointer to array of bboxes */
+ bounding_info* sub_bbox_array;
+
+ /* Bounding box that covers whole geometry */
+ bounding_info full_box;
+
+ struct vbo_bbox_cache_data *next;
+};
+
+typedef struct mesa_bbox_cache {
+ struct vbo_bbox_cache_data **items;
+ struct vbo_bbox_cache_data *last;
+ GLuint size, n_items;
+} mesa_bbox_cache;
+
+typedef struct mesa_bbox_opt
+{
+ mesa_bbox_cache * cache;
+ GLuint calc_delay;
+} mesa_bbox_opt;
+
+/**
+ * VBO BBox Cache functions, replica of program cache functions
+ *
+ */
+struct mesa_bbox_cache *
+_mesa_new_bbox_cache(void);
+
+void
+_mesa_delete_bbox_cache(struct gl_context *ctx,
+ struct mesa_bbox_cache *cache);
+
+struct vbo_bbox_cache_data *
+_mesa_search_bbox_cache(struct mesa_bbox_cache *cache,
+ const void *key, GLuint keysize);
+
+void
+_mesa_bbox_cache_insert(struct gl_context *ctx,
+ struct mesa_bbox_cache *cache,
+ const void *key,
+ GLuint keysize,
+ struct vbo_bbox_cache_data *CachedData);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif //_VBO_BBOX_H_
diff --git a/src/mesa/vbo/vbo_bbox_cache.c b/src/mesa/vbo/vbo_bbox_cache.c
new file mode 100644
index 0000000..09bd1dd
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox_cache.c
@@ -0,0 +1,195 @@
+
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "vbo/vbo_bbox.h"
+
+#define CACHE_SIZE 17
+
+/**
+ * Compute hash index from state key.
+ */
+static GLuint
+bbox_hash_key(const void *key, GLuint key_size)
+{
+ const GLuint *ikey = (const GLuint *) key;
+ GLuint hash = 0, i;
+
+ assert(key_size >= 4);
+
+ /* Make a slightly better attempt at a hash function:
+ */
+ for (i = 0; i < key_size / sizeof(*ikey); i++)
+ {
+ hash += ikey[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ return hash;
+}
+
+
+/**
+ * Rebuild/expand the hash table to accommodate more entries
+ */
+static void
+bbox_rehash(struct mesa_bbox_cache *cache)
+{
+ struct vbo_bbox_cache_data **items;
+ struct vbo_bbox_cache_data *c, *next;
+ GLuint size, i;
+
+ cache->last = NULL;
+
+ size = cache->size * 3;
+ items = calloc(size, sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ free(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+static void
+bbox_clear_cache(struct gl_context *ctx, mesa_bbox_cache *cache)
+{
+ struct vbo_bbox_cache_data *c, *next;
+ GLuint i;
+
+ cache->last = NULL;
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ free(c->key);
+ free(c->sub_bbox_array);
+ free(c);
+ }
+ cache->items[i] = NULL;
+ }
+ cache->n_items = 0;
+}
+
+
+
+mesa_bbox_cache *
+_mesa_new_bbox_cache(void)
+{
+ mesa_bbox_cache *cache = CALLOC_STRUCT(mesa_bbox_cache);
+ if (cache) {
+ cache->size = CACHE_SIZE;
+ cache->items = calloc(cache->size, sizeof(struct vbo_bbox_cache_data));
+ if (!cache->items) {
+ MESA_BBOX("Func:%s cache-size=%d "
+ "Cannot allocate items freeing cache\n",
+ __func__,cache->size);
+ free(cache);
+ return NULL;
+ }
+ MESA_BBOX("Func:%s cache:%#x cache->size=%d \n",
+ __func__,cache,cache->size);
+ return cache;
+ }
+ else {
+ MESA_BBOX("cache is Null in Func:%s\n",__func__);
+ return cache;
+ }
+}
+
+
+void
+_mesa_delete_bbox_cache(struct gl_context *ctx, mesa_bbox_cache *cache)
+{
+ bbox_clear_cache(ctx, cache);
+ free(cache->items);
+ free(cache);
+}
+
+struct vbo_bbox_cache_data *
+_mesa_search_bbox_cache(mesa_bbox_cache *cache,
+ const void *key, GLuint keysize)
+{
+ MESA_BBOX("Func:%s cache:%#x \n",__func__,cache);
+ if (cache->last &&
+ cache->last->key->mode == ((vbo_bbox_cache_key *)key)->mode &&
+ cache->last->key->count == ((vbo_bbox_cache_key *)key)->count &&
+ cache->last->key->indices == ((vbo_bbox_cache_key *)key)->indices) {
+ return cache->last;
+ }
+ else {
+ const GLuint hash = bbox_hash_key(key, keysize);
+ struct vbo_bbox_cache_data *c;
+ MESA_BBOX("cache:%#x,hash:%d,cache->size:%d\n",cache,hash,cache->size);
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->hash == hash &&
+ c->key->mode == ((vbo_bbox_cache_key *)key)->mode &&
+ c->key->count == ((vbo_bbox_cache_key *)key)->count &&
+ c->key->indices == ((vbo_bbox_cache_key *)key)->indices) {
+ cache->last = c;
+ return c;
+ }
+ }
+ return NULL;
+ }
+}
+
+
+void
+_mesa_bbox_cache_insert(struct gl_context *ctx,struct mesa_bbox_cache *cache,
+ const void *key, GLuint keysize,
+ struct vbo_bbox_cache_data *CachedData)
+{
+ const GLuint hash = bbox_hash_key(key, keysize);
+
+ CachedData->hash = hash;
+
+ CachedData->key = calloc(1, keysize);
+ memcpy(CachedData->key, key, keysize);
+ CachedData->keysize = keysize;
+
+ if (cache->n_items > cache->size * 1.5) {
+ if (cache->size < 1000)
+ bbox_rehash(cache);
+ else
+ bbox_clear_cache(ctx, cache);
+ }
+
+ cache->n_items++;
+ CachedData->next = cache->items[hash % cache->size];
+ cache->items[hash % cache->size] = CachedData;
+}
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index cf9405d..8c608c2 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -36,6 +36,9 @@
#include "vbo.h"
#include "vbo_private.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo_bbox.h"
+#endif
static GLuint
check_size(const GLfloat *attr)
@@ -199,6 +202,10 @@ _vbo_CreateContext(struct gl_context *ctx)
if (ctx->API == API_OPENGL_COMPAT)
vbo_save_init(ctx);
+#ifdef MESA_BBOX_OPT //Hard coded to AABB for now
+ ctx->volume_type = BOUNDING_VOLUME_AABB;
+#endif
+
vbo->VAO = _mesa_new_vao(ctx, ~((GLuint)0));
/* The exec VAO assumes to have all arributes bound to binding 0 */
for (unsigned i = 0; i < VERT_ATTRIB_MAX; ++i)
@@ -219,7 +226,9 @@ _vbo_DestroyContext(struct gl_context *ctx)
_ae_destroy_context(ctx);
ctx->aelt_context = NULL;
}
-
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_free(ctx);
+#endif
if (vbo) {
_mesa_reference_buffer_object(ctx, &vbo->binding.BufferObj, NULL);
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 51c000e..3321a21 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -39,6 +39,10 @@
#include "main/macros.h"
#include "main/transformfeedback.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo_bbox.h"
+#endif
+
typedef struct {
GLuint count;
GLuint primCount;
@@ -784,6 +788,16 @@ skip_draw_elements(struct gl_context *ctx, GLsizei count,
* Do the rendering for a glDrawElements or glDrawRangeElements call after
* we've validated buffer bounds, etc.
*/
+#ifdef MESA_BBOX_OPT
+void
+vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start, GLuint end,
+ GLsizei count, GLenum type,
+ const GLvoid * indices,
+ GLint basevertex, GLuint numInstances,
+ GLuint baseInstance)
+#else
static void
vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
GLboolean index_bounds_valid,
@@ -792,6 +806,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
const GLvoid * indices,
GLint basevertex, GLuint numInstances,
GLuint baseInstance)
+#endif
{
struct _mesa_index_buffer ib;
struct _mesa_prim prim;
@@ -997,6 +1012,11 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
_mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
_mesa_enum_to_string(mode), count,
_mesa_enum_to_string(type), indices);
+#ifdef MESA_BBOX_OPT
+ MESA_BBOX_PRINT("glDrawElements(%s, %u, %s, %p)\n",
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices);
+#endif
FLUSH_FOR_DRAW(ctx);
@@ -1011,9 +1031,13 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
return;
}
-
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+ count, type, indices, 0, 1, 0);
+#else
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
count, type, indices, 0, 1, 0);
+#endif
}
@@ -1045,8 +1069,13 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
return;
}
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+ count, type, indices, basevertex, 1, 0);
+#else
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
count, type, indices, basevertex, 1, 0);
+#endif
}
@@ -1078,9 +1107,13 @@ vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
indices, numInstances))
return;
}
-
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+ count, type, indices, 0, numInstances, 0);
+#else
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
count, type, indices, 0, numInstances, 0);
+#endif
}
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index ad59efb..15f064b 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -197,6 +197,10 @@ TODO: document the other workarounds.
<option name="force_glsl_extensions_warn" value="true" />
</application>
+ <application name="GLBenchmark" executable="GLBenchmark">
+ <option name="enable_bounding_box_culling" value="true"/>
+ </application>
+
<!-- The GL thread whitelist is below, workarounds are above.
Keep it that way. -->
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index ecf495a..c3f8928 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -135,6 +135,11 @@ DRI_CONF_OPT_BEGIN_B(allow_glsl_cross_stage_interpolation_mismatch, def) \
DRI_CONF_DESC(en,gettext("Allow interpolation qualifier mismatch across shader stages")) \
DRI_CONF_OPT_END
+#define DRI_CONF_ENABLE_BOUNDING_BOX_CULLING(def) \
+DRI_CONF_OPT_BEGIN_B(enable_bounding_box_culling, def) \
+ DRI_CONF_DESC(en,gettext("Enable bounding box culling in CPU")) \
+DRI_CONF_OPT_END
+
/**
* \brief Image quality-related options
*/
--
2.7.4
More information about the mesa-dev
mailing list