[Mesa-dev] [PATCH] Bounding box avx2 intrinsic algorithm for openGL/GLES

kedar.j.karanje at intel.com kedar.j.karanje at intel.com
Fri Aug 31 05:25:21 UTC 2018


From: "J Karanje, Kedar" <kedar.j.karanje at intel.com>

The feature is enabled by default during make however we need to
add the following to drirc to enable the feature at runtime.
<option name="enable_bounding_box_culling" value="true"/>

vbo: Main algorithm & code to check for MVP & vertex position location
Build Files: Flags to enable BBOX Code and check AVX version
compiler: Code to recognize simple shader
          (gl_position is a simple function of mvp and vertex)
i965 & util: dri query to check if feature is enabled

vbo: Implements a bounding box algorithm for mesa,we hook into the default
    drawelements and drawrangelements and the MVP & vertex positions location
    and the corresponding program is got,we re-create the frustum planes
    using this data and also create a box around the object and use the 8
    vertices (box vertices) and check if the box is within the frustum or not,
    we drop the draw calls that are completely outside the view frustum and
    go for sub-boxes for objects that are intersecting with the frustum planes.

The current patch has been verified on KBL+Ubuntu 16.04, we noticed
8~10% improvements in GFxBench TREX offscreen and ~2% for Manhattan offscreen,
Platforms where avx2 is not supported shall still see ~6-8% improvement, the
other KPIs were not impacted.

Based on empirical data we have set minimum vertex count as 999 and the
sub-box size as 198, this provides the best results, we have also implemented
some level of caching for the box co-od and frustum plane co-od.
we have also optimized some algorithms to use avx2 when a target supports it.

Shader classification code is currently in hir and we have got review comments
to move the same to NIR.

Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar at intel.com>
Signed-off-by: Yogesh Marathe <yogesh.marathe at intel.com>
---
 Android.common.mk                        |   19 +
 configure.ac                             |   34 +-
 src/compiler/glsl/ast_to_hir.cpp         |  168 +++-
 src/compiler/glsl/glsl_parser_extras.cpp |   10 +
 src/compiler/glsl/glsl_parser_extras.h   |    7 +
 src/compiler/glsl/linker.cpp             |   18 +
 src/intel/common/gen_debug.c             |    7 +
 src/mesa/Makefile.sources                |   11 +
 src/mesa/drivers/dri/i965/brw_context.c  |   17 +
 src/mesa/drivers/dri/i965/intel_screen.c |    4 +
 src/mesa/main/bufferobj.c                |   19 +
 src/mesa/main/mtypes.h                   |   51 +
 src/mesa/program/Android.mk              |    1 +
 src/mesa/program/program.c               |    3 +
 src/mesa/vbo/vbo_bbox.c                  | 1538 ++++++++++++++++++++++++++++++
 src/mesa/vbo/vbo_bbox.h                  |  383 ++++++++
 src/mesa/vbo/vbo_bbox_cache.c            |  195 ++++
 src/mesa/vbo/vbo_context.c               |   11 +-
 src/mesa/vbo/vbo_exec_array.c            |   37 +-
 src/util/00-mesa-defaults.conf           |    4 +
 src/util/xmlpool/t_options.h             |    5 +
 21 files changed, 2535 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 src/compiler/glsl/ast_to_hir.cpp
 create mode 100644 src/mesa/vbo/vbo_bbox.c
 create mode 100644 src/mesa/vbo/vbo_bbox.h
 create mode 100644 src/mesa/vbo/vbo_bbox_cache.c

diff --git a/Android.common.mk b/Android.common.mk
index aa1b266..efd6792 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -21,6 +21,8 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
+MESA_BBOX_ENABLE=true
+
 ifeq ($(LOCAL_IS_HOST_MODULE),true)
 LOCAL_CFLAGS += -D_GNU_SOURCE
 endif
@@ -80,6 +82,10 @@ LOCAL_CFLAGS += \
 	-fno-trapping-math \
 	-Wno-sign-compare
 
+ifeq ($(MESA_BBOX_ENABLE),true)
+LOCAL_CFLAGS += -DMESA_BBOX_OPT
+endif
+
 LOCAL_CPPFLAGS += \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
@@ -87,6 +93,10 @@ LOCAL_CPPFLAGS += \
 	-Wno-error=non-virtual-dtor \
 	-Wno-non-virtual-dtor
 
+ifeq ($(MESA_BBOX_ENABLE),true)
+LOCAL_CPPFLAGS += -DMESA_BBOX_OPT
+endif
+
 # mesa requires at least c99 compiler
 LOCAL_CONLYFLAGS += \
 	-std=c99
@@ -98,6 +108,15 @@ ifeq ($(filter 5 6 7 8 9, $(MESA_ANDROID_MAJOR_VERSION)),)
 LOCAL_CFLAGS += -DHAVE_TIMESPEC_GET
 endif
 
+ifeq ($(MESA_BBOX_ENABLE),true)
+#if defined(CONFIG_AS_AVX)
+LOCAL_CONLYFLAGS += -mavx
+#elif
+LOCAL_CONLYFLAGS += -msse4.1
+#endif
+endif
+
+
 ifeq ($(strip $(MESA_ENABLE_ASM)),true)
 ifeq ($(TARGET_ARCH),x86)
 LOCAL_CFLAGS += \
diff --git a/configure.ac b/configure.ac
index 4d9d9e5..dcdbcf3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -278,7 +278,8 @@ _SAVE_LDFLAGS="$LDFLAGS"
 _SAVE_CPPFLAGS="$CPPFLAGS"
 
 dnl Compiler macros
-DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
+DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DMESA_BBOX_OPT"
+dnl DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
 AC_SUBST([DEFINES])
 android=no
 case "$host_os" in
@@ -295,10 +296,38 @@ esac
 
 AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes)
 
+
+dnl Conditional parameters for enabling BBOX file compilation in Makefile
+dnl bbox=yes
+dnl MESA_BBOX_ENABLE=true
+dnl AM_CONDITIONAL([MESA_BBOX_ENABLE], [test x$bbox= xyes])
+
 dnl
 dnl Check compiler flags
 dnl
-AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall"])
+
+AC_ARG_WITH([swr-archs],
+    [AS_HELP_STRING([--with-swr-archs@<:@=DIRS...@:>@],
+        [comma delimited swr architectures list, e.g.
+        "avx,avx2,sse4.1,sse4.2" @<:@default="sse4.1,sse4.2"@:>@])],
+    [with_swr_archs="$withval"],
+    [with_swr_archs="sse4.1,sse4.2"])
+
+swr_archs=`IFS=', '; echo $with_swr_archs`
+if test "$swr_archs" = "avx"; then
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx"])
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx"])
+elif test "$swr_archs" = "avx2"; then
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx -mavx2"])
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx -mavx2"])
+elif test "$swr_archs" = "sse4.1"; then
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1"])
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1"])
+elif test "$swr_archs" = "sse4.2"; then
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1 -msse4.2"])
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -msse4.2"])
+fi
+
 AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
 AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],            [CFLAGS="$CFLAGS -Werror=missing-prototypes"])
 AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],                  [CFLAGS="$CFLAGS -Wmissing-prototypes"])
@@ -313,7 +342,6 @@ dnl
 dnl Check C++ compiler flags
 dnl
 AC_LANG_PUSH([C++])
-AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall"])
 AX_CHECK_COMPILE_FLAG([-fno-math-errno],                       [CXXFLAGS="$CXXFLAGS -fno-math-errno"])
 AX_CHECK_COMPILE_FLAG([-fno-trapping-math],                    [CXXFLAGS="$CXXFLAGS -fno-trapping-math"])
 AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],                   [VISIBILITY_CXXFLAGS="-fvisibility=hidden"])
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
old mode 100644
new mode 100755
index 5d3f10b..f4e8dea
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -62,7 +62,6 @@
 #include "builtin_functions.h"
 
 using namespace ir_builder;
-
 static void
 detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
                                exec_list *instructions);
@@ -1325,6 +1324,124 @@ ast_expression::set_is_lhs(bool new_value)
       this->subexpressions[0]->set_is_lhs(new_value);
 }
 
+#ifdef MESA_BBOX_OPT
+static bool
+is_simple_shader(exec_list *instructions, ast_expression *simple_ast_root,
+                 struct _mesa_glsl_parse_state *state)
+{
+   ast_expression * subex0 = simple_ast_root->subexpressions[0];
+   ast_expression * subex1 = simple_ast_root->subexpressions[1];
+
+   char temp_identifier[100];
+
+   subex0->set_is_lhs(true);
+
+   if (subex1->oper == ast_mul)
+   {
+      ir_rvalue *rhsParts[3];
+      rhsParts[0] = subex1->subexpressions[0]->hir(instructions, state);
+      rhsParts[1] = subex1->subexpressions[1]->hir(instructions, state);
+
+      if (rhsParts[0]->type->gl_type == GL_FLOAT_MAT4 &&
+          rhsParts[1]->type->gl_type == GL_FLOAT_VEC4) {
+         foreach_list_typed (ast_node, ast, link,
+                             &subex1->subexpressions[1]->expressions) {
+            if (((ast_expression *)ast)->oper != ast_identifier &&
+               (((ast_expression *)ast)->oper != ast_int_constant) &&
+               (((ast_expression *)ast)->oper != ast_float_constant)) {
+               return false;
+            }
+            if (((ast_expression *)ast)->oper == ast_identifier &&
+                ((ast_expression *)ast)->primary_expression.identifier) {
+                   strncpy((char *)state->stateVertPosition,
+                   ((ast_expression *)ast)->primary_expression.identifier,
+                strlen(((ast_expression *)ast)->primary_expression.identifier));
+            }
+         }
+      }
+      if (subex0->oper == ast_identifier) {
+         if (!strcmp(subex0->primary_expression.identifier,"gl_Position")) {
+            if(subex0 && subex0->primary_expression.identifier) {
+               strncpy((char *)temp_identifier,
+                    subex0->primary_expression.identifier,
+                    strlen(subex0->primary_expression.identifier));
+            } else {
+              return false;
+            }
+            if(subex1 && subex1->subexpressions[0] &&
+              subex1->subexpressions[0]->primary_expression.identifier) {
+                strncpy((char *)state->stateMVP,
+                   subex1->subexpressions[0]->primary_expression.identifier,
+            strlen(subex1->subexpressions[0]->primary_expression.identifier));
+            } else {
+              return false;
+            }
+
+            return true;
+         } else {
+            if(subex0 && subex0->primary_expression.identifier) {
+               strncpy((char *)temp_identifier,
+                  subex0->primary_expression.identifier,
+                  strlen(subex0->primary_expression.identifier));
+            } else {
+              return true;
+            }
+
+            if (subex1->subexpressions[0]->oper == ast_identifier) {
+               if(subex1 && subex1->subexpressions[0] &&
+                  subex1->subexpressions[0]->primary_expression.identifier) {
+                     strncpy((char *)state->stateMVP,
+                     subex1->subexpressions[0]->primary_expression.identifier,
+              strlen(subex1->subexpressions[0]->primary_expression.identifier));
+               } else {
+                  return true;
+               }
+            }
+            return false; //Return false to trigger further parsing.
+         }
+      } else {
+        return false;
+      }
+   } else {
+      if (subex0->primary_expression.identifier != NULL)
+        if (!strcmp(subex0->primary_expression.identifier,"gl_Position") &&
+          (strlen(temp_identifier) > 0)) { //gl_position = temp;
+           if (subex1->oper == ast_identifier) {
+              if (!strcmp((char *)temp_identifier,
+                 subex1->primary_expression.identifier)) {
+                   return true;
+              }
+           } else {
+              return false;
+           }
+        }
+   }
+   return false;
+}
+/*
+ * Function to check if LHS of assign is an input variable Eg: in_position
+ */
+
+static bool
+is_attribute(struct _mesa_glsl_parse_state *state)
+{
+   ir_variable *const var = state->symbols->get_variable(state->stateVertPosition);
+   ir_rvalue *result = NULL;
+   void *ctx = state;
+   if (var != NULL)
+   {
+         result = new(ctx) ir_dereference_variable(var);
+         (void)result;
+         if (var->data.mode ==  ir_var_shader_in)
+             return true;
+
+   }
+
+   return false;
+
+}
+#endif //MESA_BBOX_OPT
+
 ir_rvalue *
 ast_expression::do_hir(exec_list *instructions,
                        struct _mesa_glsl_parse_state *state,
@@ -1400,6 +1517,55 @@ ast_expression::do_hir(exec_list *instructions,
       unreachable("ast_aggregate: Should never get here.");
 
    case ast_assign: {
+#ifdef MESA_BBOX_OPT
+      if (state->stage == MESA_SHADER_VERTEX &&
+          !state->state_bbox_simple_shader &&
+          is_simple_shader(instructions,this,state) &&
+          !state->state_shader_analysis_complete)
+      {
+         state->state_bbox_simple_shader = true;
+      }
+
+      if (state->state_bbox_simple_shader &&
+          !state->state_shader_analysis_complete) {
+         if (!is_attribute(state)) {
+            if ((ir_dereference_variable *)op[0] != NULL &&
+                ((ir_dereference_variable *)op[0])->var->name != NULL &&
+                strlen(state->stateVertPosition) != 0)
+            if (!strcmp(((ir_dereference_variable *)op[0])->var->name,
+                          state->stateVertPosition) ) {
+               if (((ir_instruction *)op[0])->ir_type == ir_type_variable ||
+                    ((ir_instruction *)op[0])->ir_type ==
+                                      ir_type_dereference_variable &&
+                    (ir_dereference_variable *)op[1] != NULL &&
+                    ((ir_dereference_variable *)op[1])->ir_type ==
+                     ir_type_dereference_variable) {
+                  if ((((ir_instruction *)op[1])->ir_type == ir_type_variable ||
+                      ((ir_instruction *)op[1])->ir_type ==
+                        ir_type_dereference_variable) &&
+                       ((ir_dereference_variable *)op[1])->var->name) {
+                     strncpy((char *)state->stateVertPosition,
+                           (char *)((ir_dereference_variable *)op[1])->var->name,
+                        strlen(((ir_dereference_variable *)op[1])->var->name));
+                        state->state_shader_analysis_complete = true;
+                  }
+               }
+               else {
+                  state->state_bbox_simple_shader = false;
+                  state->state_shader_analysis_complete = true;
+               }
+            }
+            else {
+               state->state_bbox_simple_shader = false;
+               state->state_shader_analysis_complete = true;
+
+            }
+         }
+         else {
+            state->state_shader_analysis_complete = true;
+         }
+      }
+#endif //MESA_BBOX_OPT
       this->subexpressions[0]->set_is_lhs(true);
       op[0] = this->subexpressions[0]->hir(instructions, state);
       op[1] = this->subexpressions[1]->hir(instructions, state);
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index 42ba88f..c540b3a 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2121,6 +2121,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    if (!state->error && !state->translation_unit.is_empty())
       _mesa_ast_to_hir(shader->ir, state);
 
+#ifdef MESA_BBOX_OPT
+   shader->shader_bbox_simple_shader = state->state_bbox_simple_shader;
+   if (shader->shader_bbox_simple_shader)
+   {
+        strncpy((char *)shader->shaderMVP,state->stateMVP,
+                        strlen(state->stateMVP));
+        strncpy((char *)shader->shaderVertPosition,state->stateVertPosition,
+                        strlen(state->stateVertPosition));
+   }
+#endif //MESA_BBOX_OPT
    if (!state->error) {
       validate_ir_tree(shader->ir);
 
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index da44d37..b951a66 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -887,6 +887,13 @@ struct _mesa_glsl_parse_state {
     * so we can check totals aren't too large.
     */
    unsigned clip_dist_size, cull_dist_size;
+
+#ifdef MESA_BBOX_OPT
+   bool state_shader_analysis_complete;
+   bool state_bbox_simple_shader;
+   char stateMVP[20] = {'\0'};
+   char stateVertPosition[100] = {'\0'};
+#endif
 };
 
 # define YYLLOC_DEFAULT(Current, Rhs, N)                        \
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 3ce78fe..210f37f 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4876,6 +4876,24 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
          switch (stage) {
          case MESA_SHADER_VERTEX:
+#ifdef MESA_BBOX_OPT
+            if (shader_list[MESA_SHADER_VERTEX][0] != NULL) {
+               sh->linked_bbox_simple_shader =
+                  shader_list[MESA_SHADER_VERTEX][0]->shader_bbox_simple_shader;
+                 /*TBD: How do we handle multiple Vertex Shaders
+                  *     being linked ??
+                  *     MVP Name copied to get MVP in VBO
+                  */
+                 strncpy((char *)sh->linkedshaderMVP,
+                     shader_list[MESA_SHADER_VERTEX][0]->shaderMVP,
+                     strlen(shader_list[MESA_SHADER_VERTEX][0]->shaderMVP));
+                 /* Vertex Position attribute name */
+                 strncpy((char *)sh->linkedshaderVertPosition,
+                     shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition,
+                     strlen(
+                     shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition));
+              }
+#endif
             validate_vertex_shader_executable(prog, sh, ctx);
             break;
          case MESA_SHADER_TESS_CTRL:
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f..c677208 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -106,6 +106,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
 static void
 brw_process_intel_debug_variable_once(void)
 {
+#if defined(__ANDROID__) || defined(ANDROID)
+   setenv("MESA_GLSL_CACHE_DISABLE","true",1);
+   setenv("MESA_BBOX_MIN_VERTEX_CNT", "999", 1);
+   setenv("MESA_BBOX_OPT_ENABLE", "3", 1);
+   setenv("MESA_OPT_SPLIT_SIZE", "198", 1);
+#endif
+
    INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
 }
 
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 0d3c277..8148622 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -1,4 +1,5 @@
 ### Lists of source files, included by Makefiles
+MESA_BBOX_ENABLE = false
 
 # this is part of MAIN_FILES
 MAIN_ES_FILES = \
@@ -400,6 +401,7 @@ TNL_FILES = \
 	tnl/t_vp_build.c \
 	tnl/t_vp_build.h
 
+
 VBO_FILES = \
 	vbo/vbo_attrib.h \
 	vbo/vbo_attrib_tmp.h \
@@ -422,6 +424,15 @@ VBO_FILES = \
 	vbo/vbo_save.h \
 	vbo/vbo_save_loopback.c
 
+#ifeq($(MESA_BBOX_ENABLE), true)
+
+VBO_FILES += \
+	vbo/vbo_bbox_cache.c \
+	vbo/vbo_bbox.c \
+	vbo/vbo_bbox.h
+
+#endif
+
 STATETRACKER_FILES = \
 	state_tracker/st_atifs_to_tgsi.c \
 	state_tracker/st_atifs_to_tgsi.h \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6ba64e4..03cf861 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -48,6 +48,10 @@
 
 #include "vbo/vbo.h"
 
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
+
 #include "drivers/common/driverfuncs.h"
 #include "drivers/common/meta.h"
 #include "utils.h"
@@ -890,6 +894,11 @@ brw_process_driconf_options(struct brw_context *brw)
    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 
+#ifdef MESA_BBOX_OPT
+   ctx->Const.EnableBoundingBoxCulling =
+      driQueryOptionb(options, "enable_bounding_box_culling");
+#endif //MESA_BBOX_OPT
+
    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
    driComputeOptionsSha1(&brw->screen->optionCache,
                          ctx->Const.dri_config_options_sha1);
@@ -1001,6 +1010,14 @@ brwCreateContext(gl_api api,
 
    brw_process_driconf_options(brw);
 
+#ifdef MESA_BBOX_OPT
+   if (ctx->Const.EnableBoundingBoxCulling) {
+        MESA_BBOX("EnableBoundingBoxCulling True\n");
+        vbo_bbox_init(ctx);
+   } else
+        MESA_BBOX("EnableBoundingBoxCulling False\n");
+#endif //MESA_BBOX_OPT
+
    if (INTEL_DEBUG & DEBUG_PERF)
       brw->perf_debug = true;
 
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index eaf5a3b..35c7624 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -88,6 +88,10 @@ DRI_CONF_BEGIN
       DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
       DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
 
+#ifdef MESA_BBOX_OPT
+      DRI_CONF_ENABLE_BOUNDING_BOX_CULLING("true")
+#endif
+
       DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
 	 DRI_CONF_DESC(en, "Perform code generation at shader link time.")
       DRI_CONF_OPT_END
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 1d1e51b..67a369b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -48,6 +48,9 @@
 #include "varray.h"
 #include "util/u_atomic.h"
 
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
 
 /* Debug flags */
 /*#define VBO_DEBUG*/
@@ -2254,6 +2257,10 @@ buffer_sub_data(GLenum target, GLuint buffer, GLintptr offset,
 
    if (no_error || validate_buffer_sub_data(ctx, bufObj, offset, size, func))
       _mesa_buffer_sub_data(ctx, bufObj, offset, size, data);
+
+#ifdef MESA_BBOX_OPT
+   vbo_bbox_element_buffer_update(ctx,bufObj,data,offset,size);
+#endif
 }
 
 
@@ -2589,9 +2596,17 @@ validate_and_unmap_buffer(struct gl_context *ctx,
 #endif
 
 #ifdef VBO_DEBUG
+#ifdef MESA_BBOX_OPT
+   if (bufObj->StorageFlags & GL_MAP_WRITE_BIT) {
+#else
    if (bufObj->AccessFlags & GL_MAP_WRITE_BIT) {
+#endif
       GLuint i, unchanged = 0;
+#ifdef MESA_BBOX_OPT
+      GLubyte *b = (GLubyte *) bufObj->Data;
+#else
       GLubyte *b = (GLubyte *) bufObj->Pointer;
+#endif
       GLint pos = -1;
       /* check which bytes changed */
       for (i = 0; i < bufObj->Size - 1; i++) {
@@ -3154,7 +3169,11 @@ map_buffer_range(struct gl_context *ctx, struct gl_buffer_object *bufObj,
       /* Access must be write only */
       if ((access & GL_MAP_WRITE_BIT) && (!(access & ~GL_MAP_WRITE_BIT))) {
          GLuint i;
+#ifdef MESA_BBOX_OPT
+         GLubyte *b = (GLubyte *) bufObj->Data;
+#else
          GLubyte *b = (GLubyte *) bufObj->Pointer;
+#endif
          for (i = 0; i < bufObj->Size; i++)
             b[i] = i & 0xff;
       }
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 9fd577d..c262173 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1359,6 +1359,9 @@ struct gl_buffer_object
    bool MinMaxCacheDirty;
 
    bool HandleAllocated; /**< GL_ARB_bindless_texture */
+#ifdef MESA_BBOX_OPT
+   int data_change_counter; //TBD: Same as RefCount to check
+#endif
 };
 
 
@@ -2535,6 +2538,12 @@ struct gl_linked_shader
     * stores information that is also needed during linking.
     */
    struct gl_shader_spirv_data *spirv_data;
+
+#ifdef MESA_BBOX_OPT
+   bool linked_bbox_simple_shader;
+   char linkedshaderMVP[20];
+   char linkedshaderVertPosition[100];
+#endif
 };
 
 
@@ -2631,6 +2640,11 @@ struct gl_shader
 
    /* ARB_gl_spirv related data */
    struct gl_shader_spirv_data *spirv_data;
+#ifdef MESA_BBOX_OPT
+   bool shader_bbox_simple_shader;
+   char shaderMVP[20];
+   char shaderVertPosition[100];
+#endif
 };
 
 
@@ -2902,6 +2916,9 @@ struct gl_shader_program_data
     * ARB_gl_spirv extension.
     */
    bool spirv;
+#ifdef MESA_BBOX_OPT
+   GLuint vbo_bbox_mvp_location;
+#endif
 };
 
 /**
@@ -3108,6 +3125,10 @@ struct gl_pipeline_shader_state
 
    /** Pipeline objects */
    struct _mesa_HashTable *Objects;
+#ifdef MESA_BBOX_OPT
+   /* Bounding box draw optimization control structure */
+   struct mesa_bbox_opt *BboxOpt;
+#endif
 };
 
 /**
@@ -4076,6 +4097,12 @@ struct gl_constants
 
    /** GL_ARB_gl_spirv */
    struct spirv_supported_capabilities SpirVCapabilities;
+
+#ifdef MESA_BBOX_OPT
+   /** MESA_BBOX_OPT Runtime enable_bounding_box_culling*/
+   bool EnableBoundingBoxCulling;
+#endif
+
 };
 
 
@@ -4720,6 +4747,21 @@ struct gl_semaphore_object
    GLuint Name;            /**< hash table ID/name */
 };
 
+#ifdef MESA_BBOX_OPT
+/**
+ * Bounding volume classification types
+ */
+typedef enum
+{
+    BOUNDING_VOLUME_AABB = 0,
+    BOUNDING_VOLUME_OBB = 1,
+    BOUNDING_VOLUME_SPHERE = 2,
+    BOUNDING_VOLUME_DOP = 3,
+    BOUNDING_VOULME_MIXED = 4,
+    BOUNDING_VOLUME_MAX = 5,
+} bounding_volume_type;
+#endif //MESA_BBOX_OPT
+
 /**
  * Mesa rendering context.
  *
@@ -5096,6 +5138,15 @@ struct gl_context
    struct hash_table_u64 *ResidentTextureHandles;
    struct hash_table_u64 *ResidentImageHandles;
    /*@}*/
+
+#ifdef MESA_BBOX_OPT
+   /**
+    * Bounding volume type
+    *
+    */
+    bounding_volume_type volume_type;
+#endif //MESA_BBOX_OPT
+
 };
 
 /**
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index c6470e6..6489d3e 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -75,6 +75,7 @@ $(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program_lexer.l
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/mesa \
+	$(MESA_TOP)/src/mesa/vbo \
 	$(MESA_TOP)/src/compiler/nir \
 	$(MESA_TOP)/src/gallium/auxiliary \
 	$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 6ab1bf5..e0b3563 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -43,6 +43,9 @@
 #include "util/ralloc.h"
 #include "util/u_atomic.h"
 
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
 
 /**
  * A pointer to this dummy program is put into the hash table when
diff --git a/src/mesa/vbo/vbo_bbox.c b/src/mesa/vbo/vbo_bbox.c
new file mode 100644
index 0000000..f1e153d
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox.c
@@ -0,0 +1,1538 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief  VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#pragma GCC optimize (0)
+#include "vbo_bbox.h"
+#include <fcntl.h>
+#define  BBOX_MAX_FRAMES_TO_DYNAMICALLY_UPDATE (5)
+#define  BBOX_MIN_EFFECTIVE_DRAWS_TO_DYNAMICALLY_UPDATE (5)
+#define  BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (-100)
+#define  BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (100)
+#define  BBOX_MIN_GPU_HEAD_ROOM_TO_PROCESS_ELEMENT_BUFFER (100)
+
+/**
+  *  Min part of split draw that we want to drop
+  */
+#define  BBOX_MIN_SPLITTED_DRAW_TO_DROP (1)
+
+#ifdef __AVX__
+static __m256 fullMin;
+static __m256 fullMax;
+#else
+static __m128 fullMin;
+static __m128 fullMax;
+#endif
+
+/* Segment functions */
+
+static inline GLboolean
+intersect(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+   return (s_src->Left < s_tar->Right && s_src->Right > s_tar->Left);
+}
+
+
+static inline GLboolean
+subsegment(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+  return (s_src->Left <= s_tar->Left && s_src->Right >= s_tar->Right);
+}
+
+
+static inline GLboolean
+superset(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+  return subsegment(s_tar,s_src);
+}
+
+
+static void
+normalize(vbo_bbox_frustum_plane *fr_plane)
+{
+        GLfloat a,b,c;
+        a = fr_plane->a;
+        b = fr_plane->b;
+        c = fr_plane->c;
+
+        GLfloat norm = 1.0f/sqrt(a*a + b*b + c*c);
+
+        fr_plane->a *= norm;
+        fr_plane->b *= norm;
+        fr_plane->c *= norm;
+        fr_plane->d *= norm;
+};
+
+static inline
+int vbo_bbox_get_delay(struct mesa_bbox_opt *opt)
+{
+   if (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_RECALC) {
+      if (opt->calc_delay > BBOX_CALC_MAX_DELAY) {
+         opt->calc_delay = BBOX_CALC_MIN_DELAY;
+      }
+      return opt->calc_delay++;
+   }
+   else {
+      return 0;
+   }
+}
+
+static inline
+bool vbo_init_sub_bbox_array(int bbox_count, struct vbo_bbox_cache_data* data)
+{
+   bool allocate_bbox = true;
+   if(data->sub_bbox_array != NULL) {
+      if(bbox_count == data->sub_bbox_cnt) {
+         allocate_bbox = false;
+      }
+      else {
+         free(data->sub_bbox_array);
+         data->sub_bbox_array = NULL;
+      }
+   }
+   if (allocate_bbox)
+      data->sub_bbox_array = (struct bounding_info*) malloc(
+                                    bbox_count * sizeof (struct bounding_info));
+
+   if (!data->sub_bbox_array)
+   return false;
+
+   data->sub_bbox_cnt = bbox_count;
+   return true;
+}
+
+static inline GLint
+vbo_bbox_get_mvp(struct gl_context *ctx)
+{
+   struct gl_linked_shader * linked_shader =
+                  ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+   return _mesa_GetUniformLocation(ctx->Shader.ActiveProgram->Name,
+                                   linked_shader->linkedshaderMVP);
+}
+
+/*
+ * Gets the currently linked shaders flag for simple shader
+ *
+ */
+static inline int
+vbo_is_simple_shader(struct gl_context *ctx)
+{
+   struct gl_linked_shader * linked_shader =
+                  ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+   return linked_shader->linked_bbox_simple_shader;
+}
+
+/**
+ * Get current VAO
+ */
+static inline struct gl_vertex_array_object*
+vbo_get_current_vao(struct gl_context *const gc)
+{
+   assert(gc);
+   struct gl_vertex_array_object* vao = gc->Array.VAO;
+   assert(vao);
+
+   return vao;
+}
+
+/**
+ *  Returns the location of the "position" in the attributes of the
+ *  currently active program
+ */
+static inline
+int vbo_get_simple_vs_position_attr_location(
+    struct gl_context *const gc)
+{
+   assert(gc);
+   struct gl_linked_shader * linked_shader =
+                   gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+   GLint vertexPosLocation = _mesa_GetAttribLocation(
+                           gc->Shader.ActiveProgram->Name,
+                           linked_shader->linkedshaderVertPosition);
+   if (vertexPosLocation >= 0)
+      return vertexPosLocation;
+   else
+      return -1;
+}
+
+/**
+ *   Get element-buffer handle of the current VAO
+ */
+static inline struct gl_buffer_object*
+vbo_get_current_element_buffer(struct gl_context *const gc)
+{
+   assert(gc);
+   struct gl_buffer_object* element_buffer =
+                                        vbo_get_current_vao(gc)->IndexBufferObj;
+   assert(element_buffer);
+   return element_buffer;
+}
+
+/**
+ *   Get vertex-binding of position from the current VAO
+ */
+static inline struct gl_vertex_buffer_binding*
+vbo_get_current_vertex_buffer_binding_of_position(struct gl_context *const gc)
+{
+   assert(gc);
+   struct gl_vertex_array_object* vao = vbo_get_current_vao(gc);
+
+   GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask;
+   const struct gl_array_attributes *attrib_array =
+                                           &vao->VertexAttrib[ffs(mask) - 1];
+   struct gl_vertex_buffer_binding *buffer_binding =
+                       &vao->BufferBinding[attrib_array->BufferBindingIndex];
+
+   return buffer_binding;
+}
+
+/**
+ * Get vertex-buffer handle of the current VAO
+ */
+static inline struct gl_buffer_object*
+vbo_get_current_vertex_buffer(struct gl_context *const gc)
+{
+   assert(gc);
+   struct gl_buffer_object* pVertexBuffer =
+               vbo_get_current_vertex_buffer_binding_of_position(gc)->BufferObj;
+   assert(pVertexBuffer);
+   return pVertexBuffer;
+}
+
+
+/**
+ * Condition to enter bounding box optimization
+ */
+static inline bool
+vbo_bbox_check_supported_draw_call(struct gl_context *const gc,
+                                   GLenum mode, GLsizei count, GLenum type,
+                                   const GLvoid *indices, GLint basevertex)
+{
+
+   assert(gc);
+   int shader_scenario;
+   struct gl_linked_shader *_LinkedShaders;
+
+   /* Check if the minimum vertex count is met. */
+   if (count < (GLsizei) mesa_bbox_env_variables.bbox_min_vrtx_count) {
+     /* Count is most common cause to bail out form optimization
+      * so should be first.
+      */
+     MESA_BBOX("Aborting MESA_BBOX :%d: Vertex count too small, minimum count = %d\n",
+                     count,mesa_bbox_env_variables.bbox_min_vrtx_count);
+     return false;
+   }
+
+   if (mode != GL_TRIANGLES) {
+      MESA_BBOX("Aborting MESA_BBOX :%d: Primitive mode is not GL_TRIANGLES, \
+                      mode = %d\n", count, mode);
+      return false;
+   }
+
+   /* Examine current shader */
+   if (!gc->_Shader->ActiveProgram) {
+      MESA_BBOX("Aborting MESA_BBOX:%d: No active GLSL program.\n", count);
+      return false;
+   }
+
+   /* BASIC Shader scenario is when we have just VS & FS */
+   if (gc->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL &&
+       gc->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] != NULL &&
+       gc->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL] == NULL &&
+       gc->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] == NULL &&
+       gc->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] == NULL) {
+      shader_scenario = 0;
+   }
+   else
+     shader_scenario = 1;
+
+   if (shader_scenario) {
+     MESA_BBOX("Aborting MESA_BBOX:%d: GLSL program must contain only vertex and \
+                     fragment shaders, shader scenario = \n", count );
+     return false;
+   }
+
+   if (!vbo_is_simple_shader(gc)) {
+      MESA_BBOX("Aborting MESA_BBOX:%d: GLSL vertex shader does not have simple \
+                 position calculation \n", count);
+      return false;
+   }
+   if (gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]) {
+       _LinkedShaders =
+               gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+       MESA_BBOX("MVP:%s, VertPos:%s\n",_LinkedShaders->linkedshaderMVP,
+                   _LinkedShaders->linkedshaderVertPosition);
+   }
+
+   /* Examine element buffer */
+   struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);
+   if ((!element_buffer) || element_buffer->Name == 0) {
+      MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer name is 0\n", count);
+      return false;
+   }
+
+   if (!(element_buffer->StorageFlags &
+      (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){
+       MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer not resident: %#x\n", count,
+                     element_buffer->StorageFlags);
+       return false;
+   }
+
+   /* Get VertexPosLocation */
+   int vertexPosLocation = 0;
+   if (gc->Shader.ActiveProgram)
+      vertexPosLocation = vbo_get_simple_vs_position_attr_location(gc);
+   if (vertexPosLocation < 0)
+   {
+      MESA_BBOX("Aborting MESA_BBOX:%d: VertexPosition Location is inValid:\n", count);
+      return false;
+   }
+
+   struct gl_vertex_array_object*  vao = vbo_get_current_vao(gc);
+   int posAttribMapMode =
+            _mesa_vao_attribute_map[vao->_AttributeMapMode][vertexPosLocation];
+
+   if (((vao->_Enabled >> posAttribMapMode) & 0x1) != 1)
+   {
+      MESA_BBOX("Aborting MESA_BBOX:%d: Vertex data does not come from VBO , GL-API:%d\n", count,gc->API);
+//#if !defined(__ANDROID__) || !defined(ANDROID)
+//This is not specific to Android but to the GLES API
+      if (gc->API != API_OPENGLES && gc->API != API_OPENGLES2)
+          return false;
+//#endif
+   }
+
+   struct gl_buffer_object* vertexattrib_buffer  =
+                            vbo_get_current_vertex_buffer(gc);
+   if ((!vertexattrib_buffer) || vertexattrib_buffer->Name == 0) {
+     MESA_BBOX("Aborting MESA_BBOX:%d: Vertex buffer %p name is %d\n", count,
+                     vertexattrib_buffer,vertexattrib_buffer->Name);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+     return false;
+#endif
+   }
+
+   if (!(vertexattrib_buffer->StorageFlags &
+        (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){
+       MESA_BBOX("Aborting MESA_BBOX:%d:Vertex buffer not resident %#x \n", count,
+                     vertexattrib_buffer->StorageFlags);
+       MESA_BBOX("Aborting MESA_BBOX:VAO AttributeMode is %d\n", vao->_AttributeMapMode);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+       return false;
+#endif
+   }
+
+    /* Examine vertex position attribute configuration */
+   if (vao->VertexAttrib[posAttribMapMode].Enabled) {
+      if (vao->VertexAttrib[posAttribMapMode].Size != 3)
+      {
+          MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib size :%d, only 3 supported\n",
+                         count, vao->VertexAttrib[VERT_ATTRIB_POS].Size);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+          return false;
+#endif
+      }
+      if (vao->VertexAttrib[posAttribMapMode].Type != GL_FLOAT)
+      {
+         MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib type is %d, only GL_FLOAT \
+                 supported\n", count, vao->VertexAttrib[VERT_ATTRIB_POS].Type);
+         return false;
+      }
+   }
+
+   if (type != GL_UNSIGNED_SHORT) {
+      MESA_BBOX("Aborting MESA_BBOX:%d: type is %d, only GL_UNSIGNED_SHORT \
+                 supported\n", count, type);
+      return false;
+   }
+   if (basevertex != 0) {
+      MESA_BBOX("Aborting MESA_BBOX:%d: basevertex is 0 \n", count);
+      return false;
+   }
+
+   /* If size ==3 and type == GL_FLOAT, then element stride must be 12. */
+   assert(vao->VertexAttrib[VERT_ATTRIB_POS].StrideB == 12);
+
+   /* When transform feedback is capturing we cannot do early clipping since
+   * xfb must write unclipped vertices
+   * Note - we could check for IsCapturing() but that would require
+   * more elaborate checking for VBO modifications.
+   */
+   if (gc->TransformFeedback.CurrentObject->Active) {
+      MESA_BBOX("MESA_BBOX:%d: Transform feedback is active, \
+                      cannot clip\n", count);
+      return false;
+   }
+   return true;
+}
+
+
+/**
+ *  Check condition to enter bounding box optimization and if draw call
+ *  is suitable prepare key describing given geometry.
+ */
+static inline
+void vbo_bbox_prepare_key(struct gl_context *const gc, GLenum mode,
+                          GLsizei count, GLenum type, GLuint type_size,
+                          const GLvoid *indices, GLint basevertex,
+                          vbo_bbox_cache_key *key)
+{
+   assert(gc);
+
+   /* Examine element buffer */
+   struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);
+   struct gl_buffer_object* vertexattrib_buffer  =
+                                           vbo_get_current_vertex_buffer(gc);
+   struct gl_vertex_buffer_binding* vbinding =
+                       vbo_get_current_vertex_buffer_binding_of_position(gc);
+
+   memset(key,0,sizeof(vbo_bbox_cache_key));
+
+   key->mode = mode;
+   key->count = count;
+   key->indices_type = type;
+   key->type_size = type_size;
+   key->indices = (GLint) (uintptr_t) indices;
+   key->basevertex = basevertex;
+
+   key->element_buf_name = element_buffer->Name;
+   key->vertex_buf_name = vertexattrib_buffer->Name;
+
+   key->offset = (GLint)vbinding->Offset;
+   key->stride = vbinding->Stride;
+}
+
+
+/**
+ *  Create a bounding box descriptor in a form of 8 correctly
+ *  ordered vertex coordinates. The order of coordinates is significant.
+ */
+static
+void vbo_bbox_create_bounding_box(float* const minVec3f, float* const maxVec3F,
+                                  vbo_vec4f* vertices4)
+{
+   assert(minVec3f);
+   assert(maxVec3F);
+   assert(vertices4);
+
+   float Xmin = minVec3f[0];
+   float Ymin = minVec3f[1];
+   float Zmin = minVec3f[2];
+   float Wmin = 1.0f;
+
+   float Xmax = maxVec3F[0];
+   float Ymax = maxVec3F[1];
+   float Zmax = maxVec3F[2];
+   float Wmax = 1.0f;
+
+   float* v = (float*)vertices4;
+   int i = 0;
+   v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+   v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+   v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+   v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+
+   v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+   v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+   v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+   v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+}
+
+#ifdef __AVX__
+/* Calculate bbox Subbox Coordinates */
+static void
+vbo_bbox_calc_subbox_coordinates(unsigned int vertSubBox,unsigned int vertCount,
+                                 unsigned int first_idx,unsigned int second_idx,
+                                 unsigned short* indices,float* vertices,
+                                 unsigned int stride,
+                                 struct vbo_bbox_cache_data *data)
+{
+
+   /* Retrieving the starting offset of the first and second subbox */
+   unsigned int first = first_idx * vertSubBox;
+   unsigned int second = second_idx * vertSubBox;
+
+   float tmpVertexBuf[8] = {0.0};
+   float tmp_buf_min[8] = {0.0};
+   float tmp_buf_max[8] = {0.0};
+
+   __m256 subMin = _mm256_set1_ps(FLT_MAX);
+   __m256 subMax = _mm256_set1_ps(-FLT_MAX);
+
+   /* Run both the subboxes for vertex count */
+   for(unsigned int iter = 0; iter < vertCount; iter++){
+      /* Calculate the vertex offset of first subbox */
+      unsigned short index1 = indices[first+iter];
+      /* Fetching the vertices for the first subbox */
+      float* vertex1 = (float *)((char *)(vertices) + stride*index1);
+      memcpy(tmpVertexBuf,vertex1, 3 * sizeof(float));
+
+      /* Calculate the vertex offset of second subbox */
+      unsigned short index2 = indices[second+iter];
+
+      /* Fetching the vertices for the second subbox */
+      float* vertex2 = (float *)((char *)(vertices) + stride*index2);
+      memcpy(tmpVertexBuf+4,vertex2, 3 * sizeof(float));
+
+      __m256 tmp = _mm256_loadu_ps(tmpVertexBuf);
+      subMin = _mm256_min_ps(subMin, tmp);
+      subMax = _mm256_max_ps(subMax, tmp);
+   }
+
+   /* compare full box values */
+   fullMin = _mm256_min_ps(fullMin, subMin);
+   fullMax = _mm256_max_ps(fullMax, subMax);
+
+   /* store results */
+   _mm256_storeu_ps(tmp_buf_min, subMin);
+   _mm256_storeu_ps(tmp_buf_max, subMax);
+
+
+   /* Update the min and max values in sub box coordinates for both
+    * the subboxes
+    */
+   vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+              &(data->sub_bbox_array[first_idx].bounding_volume.vert_vec4[0]));
+   vbo_bbox_create_bounding_box(tmp_buf_min+4, tmp_buf_max+4,
+             &(data->sub_bbox_array[second_idx].bounding_volume.vert_vec4[0]));
+}
+#else
+static void
+vbo_bbox_calc_subbox_coordinates(
+            unsigned int vertSubBox,
+            unsigned int vertCount,
+            unsigned int idx,
+            unsigned short *indices,
+            float *vertices,
+            unsigned int stride,
+            struct vbo_bbox_cache_data *data)
+{
+   /* Retrieving the starting offset of the first and second subbox */
+   unsigned int first = idx * vertSubBox;
+
+   float tmpVertexBuf[4] = {0.0};
+   float tmp_buf_min[4] = {0.0};
+   float tmp_buf_max[4] = {0.0};
+
+   __m128 subMin = _mm_set1_ps(FLT_MAX);
+   __m128 subMax = _mm_set1_ps(-FLT_MAX);
+
+   /* Run both the subboxes for vertex count */
+   for(unsigned int iter = 0; iter < vertCount; iter++){
+
+      /* Calculate the vertex offset of first subbox */
+      unsigned short index = indices[first+iter];
+      /* Fetching the vertices for the first subbox */
+      float* vertex = (float *)((char *)(vertices) + stride*index);
+      memcpy(tmpVertexBuf,vertex, 3 * sizeof(float));
+
+      __m128 tmp = _mm_loadu_ps(tmpVertexBuf);
+      subMin = _mm_min_ps(subMin, tmp);
+      subMax = _mm_max_ps(subMax, tmp);
+   }
+
+   /* compare full box values */
+   fullMin = _mm_min_ps(fullMin, subMin);
+   fullMax = _mm_max_ps(fullMax, subMax);
+
+   /* store results */
+   _mm_storeu_ps(tmp_buf_min, subMin);
+   _mm_storeu_ps(tmp_buf_max, subMax);
+
+   /* Update the min and max values in sub box coordinates for both
+    * the subboxes
+    */
+   vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+                    &(data->sub_bbox_array[idx].bounding_volume.vert_vec4[0]));
+}
+#endif
+
+/**
+ *  Get pointer to VBO data.
+ *  Pointer should be suitable for fast data reading, not data change.
+ */
+static
+bool vbo_bbox_get_vbo_ptr(struct gl_context* gc, struct gl_buffer_object* vbo,
+                          int offset, void** data, int* dataSize)
+{
+   assert(gc);
+   assert(vbo);
+   GLubyte* vboDataPtr = NULL;
+
+   if (offset >= vbo->Size) {
+      return false;
+   }
+   vboDataPtr = _mesa_MapNamedBuffer(vbo->Name,GL_WRITE_ONLY_ARB);
+   if (vboDataPtr == NULL) {
+      return false;
+   }
+   *data = vboDataPtr + offset;
+   *dataSize = vbo->Size - offset;
+
+   return true;
+}
+
+/**
+ * Unlock VBO
+ */
+static inline
+void vbo_bbox_release_vbo_ptr(struct gl_context* gc,
+                              struct gl_buffer_object* vbo)
+{
+    assert(gc);
+    assert(vbo);
+    _mesa_UnmapNamedBuffer_no_error(vbo->Name);
+}
+
+/**
+  * Check if given range of indices contains only degenerate triangles.
+  */
+static
+bool vbo_bbox_is_degenerate(GLvoid *indices, GLuint count_in)
+{
+   assert(indices);
+   assert(count_in % 3 == 0);
+
+   GLuint triangle_count = count_in / 3;
+   GLuint input_idx = 0;
+
+   GLushort* ptr = (GLushort*)indices;
+   for (GLuint i = 0; i < triangle_count; i++) {
+      GLushort a = ptr[input_idx++];
+      GLushort b = ptr[input_idx++];
+      GLushort c = ptr[input_idx++];
+      if (!(a == b || a == c || b == c)) {
+          return false;
+      }
+   }
+   return true;
+}
+
+
+/**
+ * Calculate bounding boxes for given geometry.
+ */
+static
+bool vbo_bbox_calculate_bounding_boxes_with_indices(struct gl_context *const gc,
+                                             const vbo_bbox_cache_key *key,
+                                             struct vbo_bbox_cache_data *data,
+                                             void* indexData,
+                                             int   indexDataSize)
+{
+   assert(gc);
+
+   void* vertex_data = NULL;
+   int vertex_datasize = 0;
+   int vert_per_subBbox = mesa_bbox_env_variables.bbox_split_size;
+   int sub_bbox_cnt = (key->count + vert_per_subBbox -1)/vert_per_subBbox;
+   int *subbox_array;
+   int idx =0;
+   int non_degen_count = 0;
+
+   assert(sub_bbox_cnt);
+
+   struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+                                                key->element_buf_name);
+
+   struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,
+                                                       key->vertex_buf_name);
+
+   if (element_buffer == NULL || vertexattrib_buffer == NULL) {
+   return false;
+   }
+
+   if (!vbo_bbox_get_vbo_ptr(gc, vertexattrib_buffer,(int) key->offset,
+                           &vertex_data, &vertex_datasize)) {
+   return false;
+   }
+
+   assert(vertex_data);
+   assert(vertex_datasize > 0);
+   assert(indexData);
+   assert(indexDataSize > 0);
+   assert(key->indices_type == GL_UNSIGNED_SHORT);
+   assert(indexDataSize > key->count * (int)key->type_size);
+
+   /* Allocate memory for bounding boxes */
+   if (!vbo_init_sub_bbox_array(sub_bbox_cnt,data)) {
+   vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);
+   return false;
+   }
+   /* Initialize size of bounding boxes */
+   for (int i = 0; i < sub_bbox_cnt; i++) {
+   data->sub_bbox_array[i].vert_count = (i==sub_bbox_cnt-1)?
+                            (key->count - i*vert_per_subBbox):vert_per_subBbox;
+   data->sub_bbox_array[i].start_offset = i*vert_per_subBbox * key->type_size;
+   }
+
+   subbox_array = malloc(sub_bbox_cnt * sizeof(int));
+
+   /* Check if all triangles withing bbox are degenerate (i.e triangles with
+      zero area) */
+   for (int i = 0; i < sub_bbox_cnt; i++) {
+      GLubyte* ptr = (GLubyte *)indexData;
+      data->sub_bbox_array[i].is_degenerate =
+          vbo_bbox_is_degenerate((ptr + data->sub_bbox_array[i].start_offset),
+                  data->sub_bbox_array[i].vert_count);
+
+       if(!data->sub_bbox_array[i].is_degenerate)
+       {
+          subbox_array[idx++] = i;
+          non_degen_count++;
+       }
+   }
+
+   float tmp_buf_min[8] = {0.0};
+   float tmp_buf_max[8] = {0.0};
+
+#ifdef __AVX__
+   int odd = non_degen_count % 2;
+   int num_iter = non_degen_count/2;
+   int iter;
+
+   fullMin = _mm256_set1_ps(FLT_MAX);
+   fullMax = _mm256_set1_ps(-FLT_MAX);
+   idx = 0;
+   for(iter = 0; iter < num_iter;iter++){
+      idx = 2*iter;
+      if(data->sub_bbox_array[subbox_array[idx]].vert_count ==
+         data->sub_bbox_array[subbox_array[idx+1]].vert_count)
+      {
+         /* call the algorithm with the count */
+         vbo_bbox_calc_subbox_coordinates(
+                       vert_per_subBbox,
+                       data->sub_bbox_array[subbox_array[idx]].vert_count,
+                       subbox_array[idx],
+                       subbox_array[idx+1],
+                       (GLushort*)indexData,
+                       (GLfloat*)vertex_data,
+                       key->stride,
+                       data
+                     );
+      }
+      else
+      {
+         /* call the first one separately */
+         vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+                            data->sub_bbox_array[subbox_array[idx]].vert_count,
+                            subbox_array[idx],
+                            subbox_array[idx],
+                            (GLushort*)indexData,
+                            (GLfloat*)vertex_data,
+                            key->stride,
+                            data);
+
+         /* call the second one separately */
+         vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+                          data->sub_bbox_array[subbox_array[idx+1]].vert_count,
+                          subbox_array[idx+1],
+                          subbox_array[idx+1],
+                          (GLushort*)indexData,
+                          (GLfloat*)vertex_data,
+                          key->stride,
+                          data);
+
+      }
+
+   }
+
+   if(odd)
+   {
+      idx = 2*iter;
+      /* call the last one separately */
+      vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+                            data->sub_bbox_array[subbox_array[idx]].vert_count,
+                            subbox_array[idx],
+                            subbox_array[idx],
+                            (GLushort*)indexData,
+                            (GLfloat*)vertex_data,
+                            key->stride,
+                            data);
+   }
+
+   /* Finding the minimum from the full box 256 */
+   __m128 firstlane   = _mm256_extractf128_ps(fullMin,0);
+   __m128 secondlane = _mm256_extractf128_ps(fullMin,1);
+   firstlane = _mm_min_ps(firstlane,secondlane);
+   _mm_storeu_ps(tmp_buf_min,firstlane);
+
+   /* Finding the maximum from the full box 256 */
+    firstlane  = _mm256_extractf128_ps(fullMax,0);
+    secondlane = _mm256_extractf128_ps(fullMax,1);
+    firstlane = _mm_max_ps(firstlane,secondlane);
+    _mm_storeu_ps(tmp_buf_max,firstlane);
+
+#else
+    fullMin = _mm_set1_ps(FLT_MAX);
+    fullMax = _mm_set1_ps(-FLT_MAX);
+
+    for(unsigned int i=0; i< non_degen_count; i++){
+       //call the algorithm with the count
+       vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+                              data->sub_bbox_array[subbox_array[i]].vert_count,
+                              subbox_array[i],
+                              (GLushort*)indexData,
+                              (GLfloat*)vertex_data,
+                              key->stride,
+                              data);
+       }
+       _mm_storeu_ps(tmp_buf_min, fullMin);
+       _mm_storeu_ps(tmp_buf_max, fullMax);
+#endif
+
+    /* Set up bounding box as 8 vertices and store in bbox data */
+    vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+                          &(data->full_box.bounding_volume.vert_vec4[0]));
+
+    if(subbox_array)
+      free(subbox_array);
+    vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);
+    data->valid = true;
+
+    return true;
+}
+
+
+/**
+  * Calculate bounding boxes for given geometry.
+  */
+static inline
+bool vbo_bbox_calculate_bounding_boxes(struct gl_context *const gc,
+                                       const vbo_bbox_cache_key *key,
+                                       struct vbo_bbox_cache_data* data)
+{
+    assert(gc);
+    assert(key->indices_type == GL_UNSIGNED_SHORT);
+
+    void* pIndexData     = NULL;
+    int   indexDataSize  = 0;
+
+    struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+                                                        key->element_buf_name);
+    if (element_buffer == NULL) {
+      return false;
+    }
+    if (!vbo_bbox_get_vbo_ptr(gc, element_buffer, (int) key->indices,
+                              &pIndexData, &indexDataSize)) {
+      return false;
+    }
+
+    bool ret = vbo_bbox_calculate_bounding_boxes_with_indices(gc, key, data,
+                                                    pIndexData, indexDataSize);
+
+    vbo_bbox_release_vbo_ptr(gc, element_buffer);
+
+    return ret;
+}
+
+
+/**
+  *  Create new bounding box cache entry
+  */
+static
+struct vbo_bbox_cache_data* vbo_bbox_create_data(struct gl_context *const gc,
+                                                 const vbo_bbox_cache_key *key)
+{
+   assert(gc);
+
+   struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);
+   struct gl_buffer_object* vertexattrib_buffer =
+                                             vbo_get_current_vertex_buffer(gc);
+
+   if ((vertexattrib_buffer == NULL) ||
+     (element_buffer == NULL)){
+      return NULL;
+   }
+
+   mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+   assert(BboxOpt);
+
+   struct vbo_bbox_cache_data* data = (struct vbo_bbox_cache_data *) malloc(
+                                      sizeof (struct vbo_bbox_cache_data));
+   data->full_box.is_degenerate = false;
+
+   if (data == NULL){
+      return NULL;
+   }
+   /* Initialize the cache data and variables in cache data */
+   data->valid = false;
+   data->need_new_calculation = true;
+   data->init_delaycnt = 0;
+   data->init_delaylimit = 0;
+   data->vertpos_vbo_changecnt = 0;
+   data->indices_vbo_changecnt = 0;
+   data->sub_bbox_cnt = 0;
+   data->sub_bbox_array = NULL;
+   data->drawcnt_bbox_helped = 0;
+   data->last_use_frame = 0;
+   data->vertpos_vbo_changecnt   = vertexattrib_buffer->data_change_counter;
+   data->indices_vbo_changecnt   = element_buffer->data_change_counter;
+
+   /* This defines for how many cache hits we wait before actually creating the
+    * data */
+   data->init_delaylimit = vbo_bbox_get_delay(BboxOpt);
+
+   /* At this point data is not valid yet */
+   _mesa_bbox_cache_insert(gc,gc->Pipeline.BboxOpt->cache,
+                           key,sizeof(vbo_bbox_cache_key),data);
+
+   return data;
+}
+
+
+/**
+ * Check if contents of the VBO buffers have changed since data entry
+ * was created.
+ */
+static inline
+bool vbo_bbox_validate_data(
+    struct gl_context *const gc,
+    const vbo_bbox_cache_key *key,
+    struct vbo_bbox_cache_data* data)
+{
+   assert(gc);
+
+   struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+                                               key->element_buf_name);
+
+   struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,
+                                                      key->vertex_buf_name);
+
+   if (element_buffer == NULL || vertexattrib_buffer == NULL) {
+      return false;
+   }
+
+   if ((element_buffer->data_change_counter != data->indices_vbo_changecnt) ||
+     (vertexattrib_buffer->data_change_counter != data->vertpos_vbo_changecnt)
+     ) {
+      return false;
+   }
+   return true;
+}
+
+/**
+ *  Retrieve bounding box data from cache.
+ */
+static inline
+struct vbo_bbox_cache_data* vbo_bbox_get_bounding_boxes(
+                                                 struct gl_context *const gc,
+                                                 const vbo_bbox_cache_key *key)
+ {
+
+   assert(gc);
+   mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+   assert(BboxOpt);
+   struct vbo_bbox_cache_data* data = _mesa_search_bbox_cache(BboxOpt->cache,
+                                              key, sizeof(vbo_bbox_cache_key));
+   if (data) {
+      if (data->need_new_calculation == false)
+      {
+         /* Data is initialized and valid */
+         if (data->valid) {
+             if (vbo_bbox_validate_data(gc, key, data)) {
+                data->mvp_valid = true;
+                return data;
+             }
+         }
+         else {
+             data->valid = false;
+             return NULL;
+         }
+      }
+   }
+   else {
+      /* Data does not exist, create it */
+      data = vbo_bbox_create_data(gc, key);
+      if (data == NULL)
+      {
+         return NULL;
+      }
+   }
+   if ((data->need_new_calculation) &&
+     (data->init_delaycnt++ >= data->init_delaylimit)) {
+     data->valid = false;
+     data->need_new_calculation = false;
+     data->mvp_valid = false;
+
+     if (!vbo_bbox_validate_data(gc, key, data)) {
+         struct gl_buffer_object * element_buffer =
+                 _mesa_lookup_bufferobj(gc,key->element_buf_name);
+
+         struct gl_buffer_object * vertexattrib_buffer =
+                 _mesa_lookup_bufferobj(gc,key->vertex_buf_name);
+
+         if ((vertexattrib_buffer == NULL) ||
+             (element_buffer == NULL)) {
+             return NULL;
+         }
+         data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter;
+         data->indices_vbo_changecnt = element_buffer->data_change_counter;
+      }
+      if (gc->volume_type == BOUNDING_VOLUME_AABB) {
+         /* Calculate bounding boxes */
+         if (vbo_bbox_calculate_bounding_boxes(gc, key, data)) {
+             return data;
+         }
+      }
+   }
+   return NULL;
+}
+
+/**
+ * This function is called when we updating the element buffer. because the
+ * element-buffer has changed we have to update the relevant bbox data:
+ */
+void vbo_bbox_element_buffer_update(struct gl_context *const gc,
+                                    struct gl_buffer_object *buffer,
+                                    const void* data,
+                                    int offset,
+                                    int size)
+{
+   mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+
+   if (BboxOpt) {
+      struct gl_segment updateSegment;
+      updateSegment.Left = offset;
+      updateSegment.Right = offset+size;
+
+      mesa_bbox_cache * buffer_map = BboxOpt->cache;
+      if (buffer_map) {
+         struct vbo_bbox_cache_data *c;
+         GLuint i = 0;
+         for (i = 0; i < buffer_map->size; i++)
+            for (c = buffer_map->items[i]; c != NULL ; c = c->next) {
+               struct vbo_bbox_cache_data * bbox_data = buffer_map->items[i];
+               struct gl_buffer_object *buffObj = _mesa_lookup_bufferobj(gc,
+                                            bbox_data->key->element_buf_name);
+
+               if(buffObj != buffer)
+               continue;
+
+               if (!bbox_data) {
+
+                  assert(bbox_data);
+                  return;
+               }
+
+               bbox_data->indices_vbo_changecnt = buffer->data_change_counter;
+
+               struct gl_segment element_bufferSegment;
+               element_bufferSegment.Left = bbox_data->key->indices;
+               element_bufferSegment.Right = bbox_data->key->indices +
+                           (bbox_data->key->count * bbox_data->key->type_size);
+
+               if (bbox_data->valid &&
+                   intersect(&element_bufferSegment, &updateSegment)) {
+                  bbox_data->valid = false;
+                  bbox_data->need_new_calculation = true;
+
+                  if (superset(&element_bufferSegment,&updateSegment))
+                  {
+                      int offset_in_newdata = bbox_data->key->indices - offset;
+                      int newsize         = size - offset_in_newdata;
+
+                      assert(offset_in_newdata >= 0);
+                      assert(newsize > 0);
+
+                      GLchar * start_data = (GLchar *)data + offset_in_newdata;
+
+                   if (vbo_bbox_calculate_bounding_boxes_with_indices(gc,
+                       bbox_data->key, bbox_data, start_data, newsize)) {
+                       bbox_data->need_new_calculation = false;
+                       bbox_data->mvp_valid = false;
+                   }
+               }
+            }
+         }
+      }
+   }
+}
+
+/**
+ *  Generate 6 clip planes from MVP.
+ */
+static
+void vbo_bbox_get_frustum_from_mvp(vbo_bbox_frustum *frustum, GLmatrix* mvpin)
+{
+
+   GLfloat in_mat[4][4] = {0};
+   {
+   #define M(row,col)  m[col*4+row]
+      in_mat[0][0] = mvpin->M(0,0);
+      in_mat[0][1] = mvpin->M(0,1);
+      in_mat[0][2] = mvpin->M(0,2);
+      in_mat[0][3] = mvpin->M(0,3);
+
+      in_mat[1][0] = mvpin->M(1,0);
+      in_mat[1][1] = mvpin->M(1,1);
+      in_mat[1][2] = mvpin->M(1,2);
+      in_mat[1][3] = mvpin->M(1,3);
+
+      in_mat[2][0] = mvpin->M(2,0);
+      in_mat[2][1] = mvpin->M(2,1);
+      in_mat[2][2] = mvpin->M(2,2);
+      in_mat[2][3] = mvpin->M(2,3);
+
+      in_mat[3][0] = mvpin->M(3,0);
+      in_mat[3][1] = mvpin->M(3,1);
+      in_mat[3][2] = mvpin->M(3,2);
+      in_mat[3][3] = mvpin->M(3,3);
+   #undef M
+   }
+
+   /* Frustum plane calculation */
+
+   /* Left plane */
+   frustum->plane[0].a = in_mat[3][0] + in_mat[0][0];
+   frustum->plane[0].b = in_mat[3][1] + in_mat[0][1];
+   frustum->plane[0].c = in_mat[3][2] + in_mat[0][2];
+   frustum->plane[0].d = in_mat[3][3] + in_mat[0][3];
+
+   /* Right plane */
+   frustum->plane[1].a = in_mat[3][0] - in_mat[0][0];
+   frustum->plane[1].b = in_mat[3][1] - in_mat[0][1];
+   frustum->plane[1].c = in_mat[3][2] - in_mat[0][2];
+   frustum->plane[1].d = in_mat[3][3] - in_mat[0][3];
+
+   /* Top plane */
+   frustum->plane[2].a = in_mat[3][0] - in_mat[1][0];
+   frustum->plane[2].b = in_mat[3][1] - in_mat[1][1];
+   frustum->plane[2].c = in_mat[3][2] - in_mat[1][2];
+   frustum->plane[2].d = in_mat[3][3] - in_mat[1][3];
+
+   /* Bottom plane */
+   frustum->plane[3].a = in_mat[3][0] + in_mat[1][0];
+   frustum->plane[3].b = in_mat[3][1] + in_mat[1][1];
+   frustum->plane[3].c = in_mat[3][2] + in_mat[1][2];
+   frustum->plane[3].d = in_mat[3][3] + in_mat[1][3];
+
+   /* Far plane */
+   frustum->plane[4].a = in_mat[3][0] - in_mat[2][0];
+   frustum->plane[4].b = in_mat[3][1] - in_mat[2][1];
+   frustum->plane[4].c = in_mat[3][2] - in_mat[2][2];
+   frustum->plane[4].d = in_mat[3][3] - in_mat[2][3];
+
+   /* Near plane */
+   frustum->plane[5].a = in_mat[3][0] + in_mat[2][0];
+   frustum->plane[5].b = in_mat[3][1] + in_mat[2][1];
+   frustum->plane[5].c = in_mat[3][2] + in_mat[2][2];
+   frustum->plane[5].d = in_mat[3][3] + in_mat[2][3];
+
+    /* Calculate octants */
+    for(int n = 0; n < 6; ++n) {
+        frustum->octant[n] = (frustum->plane[n].a >=0 ? 1 : 0) |
+                             (frustum->plane[n].b >=0 ? 2 : 0) |
+                             (frustum->plane[n].c >=0 ? 4 : 0);
+        normalize(&(frustum->plane[n]));
+    }
+}
+
+
+/**
+ * Calculate distance form a point to place
+ *
+ */
+static inline
+float vbo_bbox_dist_from_point_to_plane(
+    const vbo_bbox_frustum_plane *plane,
+    const vbo_vec4f *point)
+{
+    return (plane->a * point->x + plane->b * point->y + plane->c *
+            point->z + plane->d);
+}
+
+
+/**
+ * Description:
+ * Bounding box clipping algorthm
+ * BBOX_CLIP_INSIDE - bounding box is fully inside frustum
+ * BBOX_CLIP_OUTSIDE - bounding box is fully outside frustum
+ * BBOX_CLIP_INTERSECT - bounding box intersects with frustum
+ */
+static
+enum vbo_bbox_clip_result vbo_bbox_fast_clipping_test(bounding_info* bbox,
+                                               const vbo_bbox_frustum *frustum)
+{
+   assert(bbox);
+   vbo_vec4f *aabb = bbox->bounding_volume.vert_vec4;
+
+   enum vbo_bbox_clip_result result = BBOX_CLIP_INSIDE;
+   for (int i = 0; i < 6; ++i)
+   {
+      unsigned char normalOctant = frustum->octant[i];
+
+      /* Test near and far vertices of AABB according to plane normal.
+       * Plane equation can be normalized to save some divisions.
+       */
+      float farDistance = vbo_bbox_dist_from_point_to_plane(
+                                                        &(frustum->plane[i]),
+                                                        &(aabb[normalOctant]));
+      if (farDistance < 0.0f) {
+         return BBOX_CLIP_OUTSIDE;
+      }
+
+      float nearDistance = vbo_bbox_dist_from_point_to_plane(
+                                                    &(frustum->plane[i]),
+                                                    &(aabb[normalOctant ^ 7]));
+      if (nearDistance < 0.0f) {
+         result = BBOX_CLIP_INTERSECT;
+      }
+   }
+
+   return result;
+}
+
+/**
+ *  Wrapper for clip algorithm.
+ */
+static inline
+enum vbo_bbox_clip_result vbo_bbox_clip(bounding_info* bbox,
+                                        const vbo_bbox_frustum *frustum)
+{
+   assert(bbox);
+   if (bbox->is_degenerate) {
+      return BBOX_CLIP_DEGEN;
+   }
+   return vbo_bbox_fast_clipping_test(bbox, frustum);
+}
+
+/**
+ * Bounding box drawelements implementation
+ */
+void
+vbo_bbox_drawelements(struct gl_context *ctx, GLenum mode,
+                      GLboolean index_bounds_valid, GLuint start, GLuint end,
+                      GLsizei count, GLenum type, const GLvoid * indices,
+                      GLint basevertex, GLuint numInstances,
+                      GLuint baseInstance)
+{
+   assert(ctx);
+   GLuint type_size;
+   /* BOUNDING VOLUME: Checks would remain same I guess */
+   bool draw_call_supported = vbo_bbox_check_supported_draw_call(ctx,
+                                                               mode,
+                                                               count,
+                                                               type,
+                                                               indices,
+                                                               basevertex);
+   if (!draw_call_supported ||
+   (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_CLIPPING)) {
+      MESA_BBOX("Aborting MESA_BBOX : BBOX Is not ENABLED !!! \n");
+      vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+          end, count, type, indices, basevertex, numInstances, baseInstance);
+      return;
+   }
+
+   type_size = sizeof(GLushort);
+   vbo_bbox_cache_key key;/* Need not initialize key its done in prepare key */
+   vbo_bbox_prepare_key(ctx, mode, count, type, type_size, indices, basevertex,
+                        &key);
+   /*BOUNDING VOLUME: Call bounding volume creation based on bounding volume
+    * type */
+
+   struct vbo_bbox_cache_data* cached_bbox = vbo_bbox_get_bounding_boxes(ctx,
+                                                                         &key);
+
+   if (cached_bbox == NULL) {
+      MESA_BBOX("Aborting MESA_BBOX : New Object not in Cache!!! \n");
+      vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, end,
+                                      count, type, indices, basevertex,
+                                      numInstances, baseInstance);
+      return;
+   }
+
+   GLint loc = vbo_bbox_get_mvp(ctx);
+   if (loc < 0) {
+     MESA_BBOX("MVP Location Error\n");
+     /* TBD: Free cache here */
+     vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+                                     end, count, type, indices, basevertex,
+                                     numInstances, baseInstance);
+     return;
+   }
+
+   GLfloat *mvp_ptr = (GLfloat *)
+                ctx->_Shader->ActiveProgram->data->UniformStorage[loc].storage;
+   vbo_bbox_frustum frustum;
+   bool recalculate_subbox_clip = false;
+
+   if(cached_bbox->mvp_valid == false ||
+      memcmp(cached_bbox->mvp, mvp_ptr,16*sizeof(GLfloat))) {
+      memcpy(&(cached_bbox->mvp), mvp_ptr, 16*sizeof(GLfloat));
+      cached_bbox->mvpin.m = cached_bbox->mvp;
+      vbo_bbox_get_frustum_from_mvp(&frustum,&(cached_bbox->mvpin));
+
+      /* BOUNDING VOLUME: Call specific function to calculate clip results */
+
+      if (ctx->volume_type == BOUNDING_VOLUME_AABB) {
+          cached_bbox->full_box.clip_result =
+                  vbo_bbox_clip(&(cached_bbox->full_box), &frustum);
+      }
+      recalculate_subbox_clip = true;
+   }
+
+    /* Calculate frustum planes */
+    MESA_BBOX("MESA_BBOX: Full Box ClipResult:%d \n",
+                    cached_bbox->full_box.clip_result);
+    switch (cached_bbox->full_box.clip_result) {
+      case BBOX_CLIP_OUTSIDE:
+         /* Geometry outside view frustum, dont draw it */
+        return;
+      case BBOX_CLIP_INSIDE:
+        vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+                                        end, count, type, indices, basevertex,
+                                        numInstances, baseInstance);
+        return;
+      /* case BBOX_CLIP_INTERSECT: */
+      default:
+        MESA_BBOX("MESA_BBOX: Vertices INTERSECTING with the frustum, going"
+                  " for Sub Bboxes: \n");
+        break;
+    }
+
+    GLsizei count_to_send = 0;
+    GLsizei count_to_drop = 0;
+    GLvoid* offset_to_send = NULL;
+    bool clipped = false;
+    unsigned potential_clipped = 0;
+
+    for (int i = 0; i < cached_bbox->sub_bbox_cnt; i++) {
+      int new_count = cached_bbox->sub_bbox_array[i].vert_count;
+
+      if(recalculate_subbox_clip) {
+         if (ctx->volume_type == BOUNDING_VOLUME_AABB) {
+             cached_bbox->sub_bbox_array[i].clip_result =
+                 vbo_bbox_clip(&(cached_bbox->sub_bbox_array[i]), &frustum);
+         }
+      }
+
+      switch (cached_bbox->sub_bbox_array[i].clip_result)
+      {
+      case BBOX_CLIP_OUTSIDE:
+         count_to_drop += new_count;
+         potential_clipped += new_count;
+
+         break;
+      case BBOX_CLIP_DEGEN:
+         count_to_drop += new_count;
+         potential_clipped += new_count;
+         break;
+        default:
+            /* Sub bounding box intersects with view, draw/save it
+             * for later draw
+             */
+            if (count_to_send == 0) {
+                /* Starting new batch */
+                count_to_send = new_count;
+                offset_to_send = (char*)indices +
+                             cached_bbox->sub_bbox_array[i].start_offset;
+            }
+            else {
+                if (count_to_drop >=
+                   (int)(mesa_bbox_env_variables.bbox_split_size *
+                   BBOX_MIN_SPLITTED_DRAW_TO_DROP)) {
+
+                    /* Draw accumulated geometry */
+                    vbo_validated_drawrangeelements(ctx, mode,
+                       index_bounds_valid, start, end, count_to_send, type,
+                       offset_to_send, basevertex, numInstances, baseInstance);
+
+                    /* Reset accumulated draws */
+                    count_to_send = 0;
+                    offset_to_send = (char*)indices +
+                             cached_bbox->sub_bbox_array[i].start_offset;
+
+                    clipped = true;
+                }
+                else
+                {
+                    count_to_send += count_to_drop;
+                }
+
+                /* append to current batch of sent primitives */
+                count_to_send += new_count;
+            }
+
+            count_to_drop = 0;
+            break;
+        }
+    }
+
+    if (count_to_send > 0)
+    {
+        vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+                                        end, count_to_send, type,
+                                        offset_to_send, basevertex,
+                                        numInstances, baseInstance);
+    }
+
+
+    clipped |= (count_to_drop >= (int)(mesa_bbox_env_variables.bbox_split_size *
+                                       BBOX_MIN_SPLITTED_DRAW_TO_DROP));
+
+    if (clipped)
+    {
+        cached_bbox->drawcnt_bbox_helped =
+                                      MIN(cached_bbox->drawcnt_bbox_helped + 1,
+                                      BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER);
+    }
+    else
+    {
+        if (potential_clipped == 0)
+        {
+            cached_bbox->drawcnt_bbox_helped =
+                                      MAX(cached_bbox->drawcnt_bbox_helped - 1,
+                                      BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER);
+        }
+    }
+}
+
+/**
+ * Initialization for bounding box optimization
+ */
+void vbo_bbox_init(struct gl_context* const gc)
+{
+   assert(gc);
+
+   const char * mesa_bbox_opt = getenv("MESA_BBOX_OPT_ENABLE");
+   if (mesa_bbox_opt !=NULL)
+      mesa_bbox_env_variables.bbox_enable = atoi(mesa_bbox_opt);
+
+   mesa_bbox_opt = getenv("MESA_OPT_SPLIT_SIZE");
+   if (mesa_bbox_opt !=NULL)
+      mesa_bbox_env_variables.bbox_split_size = atoi(mesa_bbox_opt);
+
+   mesa_bbox_opt = getenv("MESA_BBOX_MIN_VERTEX_CNT");
+   if (mesa_bbox_opt != NULL)
+      mesa_bbox_env_variables.bbox_min_vrtx_count = atoi(mesa_bbox_opt);
+
+   if (!gc->Const.EnableBoundingBoxCulling) {
+      mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF;
+      mesa_bbox_env_variables.bbox_split_size = 0x7fffffff;
+      mesa_bbox_env_variables.bbox_min_vrtx_count = 0;
+   }
+
+   mesa_bbox_opt = getenv("MESA_OPT_TRACE_LEVEL");
+   if (mesa_bbox_opt !=NULL)
+      mesa_bbox_env_variables.bbox_trace_level = atoi(mesa_bbox_opt);
+
+   if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_AUTO) {
+      /* Android/Linux: enable of Gen7.5, Gen8 and Gen9 */
+      #if (IGFX_GEN == IGFX_GEN9)
+      mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_SMART;
+      #else
+      mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF;
+      #endif
+      if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_SMART && 1)
+      {
+         mesa_bbox_env_variables.bbox_enable =
+                                             MESA_BBOX_ENABLE_FORCE_CLIPPING;
+      }
+   }
+   /* BOUNDING VOLUME: Add initializations based on bounding volume here */
+   switch (gc->volume_type) {
+           case BOUNDING_VOLUME_AABB:
+                   if (mesa_bbox_env_variables.bbox_enable) {
+                      mesa_bbox_env_variables.bbox_split_size =
+                             (mesa_bbox_env_variables.bbox_split_size / 3) * 3;
+                      mesa_bbox_env_variables.bbox_min_vrtx_count =
+                         (mesa_bbox_env_variables.bbox_min_vrtx_count / 3) * 3;
+
+                      assert(gc->Pipeline.BboxOpt == NULL);
+
+                      gc->Pipeline.BboxOpt = malloc(sizeof(mesa_bbox_opt));
+                      if (!gc->Pipeline.BboxOpt) {
+                          /* No memory, disable bbox optimization */
+                          mesa_bbox_env_variables.bbox_enable =
+                                  MESA_BBOX_ENABLE_OFF;
+                          mesa_bbox_env_variables.bbox_split_size = 0x7fffffff;
+                          mesa_bbox_env_variables.bbox_min_vrtx_count = 0;
+                          return;
+                      }
+                      else {
+                          gc->Pipeline.BboxOpt->calc_delay = BBOX_CALC_MIN_DELAY;
+                          gc->Pipeline.BboxOpt->cache = _mesa_new_bbox_cache();
+
+                          if (!gc->Pipeline.BboxOpt->cache) {
+                              free(gc->Pipeline.BboxOpt);
+                              MESA_BBOX("MESA_BBOX: Cache creation failed\n");
+                              return;
+                           }
+                      }
+                      if (mesa_bbox_env_variables.bbox_trace_level > 0) {
+                          MESA_BBOX("\nMESA BBOX OPT config: \
+                               bboxOptEnable = %d, bboxOptMinVertexCount = %d,\
+                               bboxOptSplitSize = %d\n",
+                                   mesa_bbox_env_variables.bbox_enable,
+                                   mesa_bbox_env_variables.bbox_min_vrtx_count,
+                                   mesa_bbox_env_variables.bbox_split_size);
+                           }
+                   }
+                   /* Initialize some function pointers so that we dont have to
+                    * check bounding volume type for every draw call */
+                   break;
+           case BOUNDING_VOLUME_OBB:
+                   /* Init for OBB bounding volume */
+                   break;
+           case BOUNDING_VOLUME_SPHERE:
+                   /* Init for SPHERE bounding volume */
+                   break;
+           case BOUNDING_VOLUME_DOP:
+                   /* Init for DOP bounding volume */
+                   break;
+           default:
+                   MESA_BBOX("BOUNDING VOLUME TYPE IS INCORRECT\n");
+                   break;
+   }
+}
+
+
+/**
+ *  Free resources associated with bounding box optimization.
+ *  To be called when context is destroyed
+ */
+void vbo_bbox_free(struct gl_context* const gc)
+{
+   assert(gc);
+
+   if (gc->Pipeline.BboxOpt) {
+
+      if(gc->Pipeline.BboxOpt->cache)
+      {
+	_mesa_delete_bbox_cache(gc,gc->Pipeline.BboxOpt->cache);
+      	gc->Pipeline.BboxOpt->cache = NULL;
+      }
+
+      if(gc->Pipeline.BboxOpt) {
+      	free(gc->Pipeline.BboxOpt);
+      	gc->Pipeline.BboxOpt = NULL;
+      }
+   }
+}
diff --git a/src/mesa/vbo/vbo_bbox.h b/src/mesa/vbo/vbo_bbox.h
new file mode 100644
index 0000000..1dd98f2
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief  VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#ifndef _VBO_BBOX_H_
+#define _VBO_BBOX_H_
+
+#include <stdio.h>
+#include "main/arrayobj.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/varray.h"
+#include "main/bufferobj.h"
+#include "main/arrayobj.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/transformfeedback.h"
+#include "main/mtypes.h"
+#include "compiler/glsl/ir_uniform.h"
+#include "main/shaderapi.h"
+#include "main/uniforms.h"
+#include "sys/param.h"
+#include "program/prog_cache.h"
+/* For Intrinsic functions */
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <mmintrin.h>
+#include <immintrin.h>
+
+#if defined(__ANDROID__) || defined(ANDROID)
+#include <cutils/log.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FRUSTUM_PLANE_COUNT  6
+#define BBOX_CALC_MIN_DELAY  2
+#define BBOX_CALC_MAX_DELAY  5
+
+#undef LOG_TAG
+#define LOG_TAG "MESA_BBOX_LOG"
+
+/* Default, driver will select mode for given GPU and OS */
+#define MESA_BBOX_ENABLE_AUTO               -1
+/* Disable BBOX */
+#define MESA_BBOX_ENABLE_OFF                 0
+/* Enable BBOX, bot clipping */
+#define MESA_BBOX_ENABLE_SMART               1
+/* Enable BBOX, clipping will be done regardless of GPU utilization */
+#define MESA_BBOX_ENABLE_FORCE_CLIPPING      2
+/* Enable BBOX, force immediate bbox recalculation and clipping */
+#define MESA_BBOX_ENABLE_FORCE_RECALC        3
+
+/**
+ * MESA BBOX PRINTS
+ * Uncomment below line to enable debugging logs
+ */
+#define MESA_BBOX_DEBUG 0
+
+#if defined(__ANDROID__) || defined (ANDROID)
+
+#if MESA_BBOX_DEBUG == 2
+#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__)
+#define MESA_BBOX(...) ALOGE(__VA_ARGS__)
+
+#elif MESA_BBOX_DEBUG == 1
+#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__)
+#define MESA_BBOX(...)
+
+#else
+#define MESA_BBOX_PRINT(...)
+#define MESA_BBOX(...)
+#endif //MESA_BBOX_DEBUG
+
+#else //ANDROID
+
+#if MESA_BBOX_DEBUG == 2
+#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__)
+#define MESA_BBOX(...) printf(__VA_ARGS__)
+
+#elif MESA_BBOX_DEBUG == 1
+#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__)
+#define MESA_BBOX(...)
+
+#else
+#define MESA_BBOX_PRINT(...)
+#define MESA_BBOX(...)
+#endif //MESA_BBOX_DEBUG
+
+#endif //ANDROID
+
+/**
+ * MESA Bbox options environment variables
+ */
+int env_opt_val;
+const char *env_opt;
+
+typedef struct vbo_bbox_env_variable {
+   GLuint bbox_min_vrtx_count;
+   GLuint bbox_enable;
+   GLuint bbox_split_size;
+   GLuint bbox_trace_level;
+} bbox_env;
+bbox_env mesa_bbox_env_variables;
+
+void
+vbo_bbox_init(struct gl_context *const gc);
+
+void
+vbo_bbox_free(struct gl_context *const gc);
+
+
+void
+vbo_bbox_element_buffer_update(struct gl_context *const gc,
+                               struct gl_buffer_object *buffer,
+                               const void* data,
+                               int offset,
+                               int size);
+
+void
+vbo_validated_drawrangeelements(struct gl_context *ctx,
+                                GLenum mode,
+                                GLboolean index_bounds_valid,
+                                GLuint start,
+                                GLuint end,
+                                GLsizei count,
+                                GLenum type,
+                                const GLvoid * indices,
+                                GLint basevertex,
+                                GLuint numInstances,
+                                GLuint baseInstance);
+
+void
+vbo_bbox_drawelements(struct gl_context *ctx,
+                      GLenum mode,
+                      GLboolean index_bounds_valid,
+                      GLuint start,
+                      GLuint end,
+                      GLsizei count,
+                      GLenum type,
+                      const GLvoid * indices,
+                      GLint basevertex,
+                      GLuint numInstances,
+                      GLuint baseInstance);
+
+/**
+ * Segment Functions
+ */
+typedef struct gl_segment
+{
+  GLint Left;
+  GLint Right;
+
+}segment;
+
+/**
+ * Clip algorithm result
+ */
+enum vbo_bbox_clip_result
+{
+    BBOX_CLIP_OUTSIDE=0,
+    BBOX_CLIP_INTERSECT=1,
+    BBOX_CLIP_INSIDE=2,
+    BBOX_CLIP_DEGEN = 3,
+    BBOX_CLIP_ERROR = 4,
+};
+
+typedef struct gl_matrixRec
+{
+    GLfloat melem[4][4];
+    GLenum    matrixType;
+} gl_matrix;
+
+/**
+ * Structure to describe plane
+ */
+typedef struct vbo_bbox_frustum_plane
+{
+    GLfloat a, b, c, d;
+} vbo_bbox_frustum_plane;
+
+
+/**
+ * Planes and octants with their normals
+ */
+typedef struct vbo_bbox_frustum
+{
+    const unsigned char planeCount;
+    struct vbo_bbox_frustum_plane plane[FRUSTUM_PLANE_COUNT];
+    unsigned char octant[FRUSTUM_PLANE_COUNT];
+} vbo_bbox_frustum;
+
+/*
+ * Axis Aligned Bounding Box
+ */
+typedef union vbo_vec4f {
+    GLfloat data[4];
+    struct {
+        GLfloat x, y, z, w;
+    };
+} vbo_vec4f;
+
+/*
+ * Oriented Bounding Box
+ */
+typedef struct oriented_bounding_box {
+        GLfloat x,y,z,w;
+} oriented_box;
+
+/*
+ * Spherical Bounding volume
+ */
+typedef struct spherical_bounding_volume {
+        GLfloat x,y,z,r; // x^2+y^2+z^2 = r^2
+} spherical_volume;
+
+/*
+ * 8-Discrete oriented polytopes
+ */
+typedef struct dop_bounding_volume {
+        GLfloat x,y,z,r,a,b;//TBD Not sure of the representation for 8-DOP yet!
+} dop_volume;
+
+/*
+ * Bounding volumes for AABB, OBB, SPHERE, DOP etc
+ */
+typedef union bounding_volume_info {
+    vbo_vec4f vert_vec4[8]; /* Bbox mix man coordinates */
+    oriented_box obb[8];
+    spherical_volume sphere;
+    dop_volume dop[8];
+} bounding_volume_info;
+
+
+typedef struct vbo_bbox_cache_key
+{
+    /* From API call */
+    GLenum mode;         /* GL_TRAINGLES are only mode supported currently */
+    GLsizei count;       /* Number if indices in draw call */
+    GLenum indices_type;  /* must be GL_UNSIGNED_SHORT for now */
+    GLuint type_size;
+    GLuint type_sizeShift;
+    GLint   indices;     /* Offset to index VBO */
+    GLint basevertex;    /* Only 0 supported for now. */
+
+    /* VBO objects names */
+    GLuint element_buf_name;
+    GLuint vertex_buf_name;
+
+    /* Vertex position attribute configuration */
+    GLint    offset;
+    GLint    size;          /* Size of attribute, must be 3 for now */
+    GLenum   vertDataType;  /* Must be GL_FLOAT */
+    GLsizei  stride;        /* Any */
+    GLuint   elementStride;
+} vbo_bbox_cache_key;
+
+typedef struct bounding_info
+{
+    int vert_count;   /* Number of vertices this bbox covers */
+    int start_offset;   /* Start offset for this bbox */
+    bool is_degenerate; /* Triangle can not be formed */
+    enum vbo_bbox_clip_result clip_result;
+
+    bounding_volume_info bounding_volume; /* Bbox mix man coordinates */
+
+} bounding_info;
+
+/**
+ * Cached information about (multiple) bounding boxes
+ */
+struct vbo_bbox_cache_data
+{
+    /* Valid data indicator, will be set to false if VBO have been
+     * modified by application
+     */
+    bool valid;
+
+    /* Indicates if bounding boxes were calculated for this geometry */
+    bool need_new_calculation;
+
+    /* Controls delay with which bounding boxes are calculated */
+    int init_delaycnt;
+    int init_delaylimit;
+
+    /* Data change indicator for VBOs */
+    int vertpos_vbo_changecnt;
+    int indices_vbo_changecnt;
+
+    /* How many bounding boxes are stored for this geometry */
+    int sub_bbox_cnt;
+
+    /* How many draws call the bbox was effective */
+    int drawcnt_bbox_helped;
+
+    int last_use_frame;
+
+    GLuint hash;
+
+    unsigned keysize;
+
+    vbo_bbox_cache_key *key;
+
+    bool mvp_valid;
+    GLmatrix mvpin;
+    GLfloat mvp[16];
+    vbo_bbox_frustum frustum;
+
+    /* Pointer to array of bboxes */
+    bounding_info* sub_bbox_array;
+
+    /* Bounding box that covers whole geometry */
+    bounding_info full_box;
+
+    struct vbo_bbox_cache_data *next;
+};
+
+typedef struct mesa_bbox_cache {
+     struct vbo_bbox_cache_data **items;
+     struct vbo_bbox_cache_data *last;
+     GLuint size, n_items;
+} mesa_bbox_cache;
+
+typedef struct mesa_bbox_opt
+{
+    mesa_bbox_cache * cache;
+    GLuint calc_delay;
+} mesa_bbox_opt;
+
+/**
+ * VBO BBox Cache functions, replica of program cache functions
+ *
+ */
+struct mesa_bbox_cache *
+_mesa_new_bbox_cache(void);
+
+void
+_mesa_delete_bbox_cache(struct gl_context *ctx,
+                        struct mesa_bbox_cache *cache);
+
+struct vbo_bbox_cache_data *
+_mesa_search_bbox_cache(struct mesa_bbox_cache *cache,
+                        const void *key, GLuint keysize);
+
+void
+_mesa_bbox_cache_insert(struct gl_context *ctx,
+                        struct mesa_bbox_cache *cache,
+                        const void *key,
+                        GLuint keysize,
+                        struct vbo_bbox_cache_data *CachedData);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif //_VBO_BBOX_H_
diff --git a/src/mesa/vbo/vbo_bbox_cache.c b/src/mesa/vbo/vbo_bbox_cache.c
new file mode 100644
index 0000000..09bd1dd
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox_cache.c
@@ -0,0 +1,195 @@
+
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief  VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "vbo/vbo_bbox.h"
+
+#define CACHE_SIZE 17
+
+/**
+ * Compute hash index from state key.
+ */
+static GLuint
+bbox_hash_key(const void *key, GLuint key_size)
+{
+   const GLuint *ikey = (const GLuint *) key;
+   GLuint hash = 0, i;
+
+   assert(key_size >= 4);
+
+   /* Make a slightly better attempt at a hash function:
+    */
+   for (i = 0; i < key_size / sizeof(*ikey); i++)
+   {
+      hash += ikey[i];
+      hash += (hash << 10);
+      hash ^= (hash >> 6);
+   }
+
+   return hash;
+}
+
+
+/**
+ * Rebuild/expand the hash table to accommodate more entries
+ */
+static void
+bbox_rehash(struct mesa_bbox_cache *cache)
+{
+   struct vbo_bbox_cache_data **items;
+   struct vbo_bbox_cache_data *c, *next;
+   GLuint size, i;
+
+   cache->last = NULL;
+
+   size = cache->size * 3;
+   items = calloc(size, sizeof(*items));
+
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+         next = c->next;
+         c->next = items[c->hash % size];
+         items[c->hash % size] = c;
+      }
+
+   free(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+
+
+static void
+bbox_clear_cache(struct gl_context *ctx, mesa_bbox_cache *cache)
+{
+   struct vbo_bbox_cache_data *c, *next;
+   GLuint i;
+
+   cache->last = NULL;
+
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
+         next = c->next;
+         free(c->key);
+         free(c->sub_bbox_array);
+         free(c);
+      }
+      cache->items[i] = NULL;
+   }
+   cache->n_items = 0;
+}
+
+
+
+mesa_bbox_cache *
+_mesa_new_bbox_cache(void)
+{
+   mesa_bbox_cache *cache = CALLOC_STRUCT(mesa_bbox_cache);
+   if (cache) {
+      cache->size = CACHE_SIZE;
+      cache->items = calloc(cache->size, sizeof(struct vbo_bbox_cache_data));
+      if (!cache->items) {
+         MESA_BBOX("Func:%s cache-size=%d "
+                   "Cannot allocate items freeing cache\n",
+                   __func__,cache->size);
+         free(cache);
+         return NULL;
+      }
+      MESA_BBOX("Func:%s cache:%#x cache->size=%d \n",
+                 __func__,cache,cache->size);
+      return cache;
+   }
+   else {
+      MESA_BBOX("cache is Null in Func:%s\n",__func__);
+      return cache;
+   }
+}
+
+
+void
+_mesa_delete_bbox_cache(struct gl_context *ctx, mesa_bbox_cache *cache)
+{
+   bbox_clear_cache(ctx, cache);
+   free(cache->items);
+   free(cache);
+}
+
+struct vbo_bbox_cache_data *
+_mesa_search_bbox_cache(mesa_bbox_cache *cache,
+                           const void *key, GLuint keysize)
+{
+   MESA_BBOX("Func:%s cache:%#x \n",__func__,cache);
+   if (cache->last &&
+       cache->last->key->mode == ((vbo_bbox_cache_key *)key)->mode &&
+       cache->last->key->count == ((vbo_bbox_cache_key *)key)->count &&
+       cache->last->key->indices == ((vbo_bbox_cache_key *)key)->indices) {
+      return cache->last;
+   }
+   else {
+      const GLuint hash = bbox_hash_key(key, keysize);
+      struct vbo_bbox_cache_data *c;
+      MESA_BBOX("cache:%#x,hash:%d,cache->size:%d\n",cache,hash,cache->size);
+      for (c = cache->items[hash % cache->size]; c; c = c->next) {
+         if (c->hash == hash &&
+            c->key->mode == ((vbo_bbox_cache_key *)key)->mode &&
+            c->key->count == ((vbo_bbox_cache_key *)key)->count &&
+            c->key->indices == ((vbo_bbox_cache_key *)key)->indices) {
+            cache->last = c;
+            return c;
+         }
+      }
+      return NULL;
+   }
+}
+
+
+void
+_mesa_bbox_cache_insert(struct gl_context *ctx,struct mesa_bbox_cache *cache,
+                        const void *key, GLuint keysize,
+                        struct vbo_bbox_cache_data *CachedData)
+{
+   const GLuint hash = bbox_hash_key(key, keysize);
+
+   CachedData->hash = hash;
+
+   CachedData->key = calloc(1, keysize);
+   memcpy(CachedData->key, key, keysize);
+   CachedData->keysize = keysize;
+
+   if (cache->n_items > cache->size * 1.5) {
+      if (cache->size < 1000)
+         bbox_rehash(cache);
+      else
+         bbox_clear_cache(ctx, cache);
+   }
+
+   cache->n_items++;
+   CachedData->next = cache->items[hash % cache->size];
+   cache->items[hash % cache->size] = CachedData;
+}
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index cf9405d..8c608c2 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -36,6 +36,9 @@
 #include "vbo.h"
 #include "vbo_private.h"
 
+#ifdef MESA_BBOX_OPT
+#include "vbo_bbox.h"
+#endif
 
 static GLuint
 check_size(const GLfloat *attr)
@@ -199,6 +202,10 @@ _vbo_CreateContext(struct gl_context *ctx)
    if (ctx->API == API_OPENGL_COMPAT)
       vbo_save_init(ctx);
 
+#ifdef MESA_BBOX_OPT //Hard coded to AABB for now
+   ctx->volume_type = BOUNDING_VOLUME_AABB;
+#endif
+
    vbo->VAO = _mesa_new_vao(ctx, ~((GLuint)0));
    /* The exec VAO assumes to have all arributes bound to binding 0 */
    for (unsigned i = 0; i < VERT_ATTRIB_MAX; ++i)
@@ -219,7 +226,9 @@ _vbo_DestroyContext(struct gl_context *ctx)
       _ae_destroy_context(ctx);
       ctx->aelt_context = NULL;
    }
-
+#ifdef MESA_BBOX_OPT
+   vbo_bbox_free(ctx);
+#endif
    if (vbo) {
 
       _mesa_reference_buffer_object(ctx, &vbo->binding.BufferObj, NULL);
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 51c000e..3321a21 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -39,6 +39,10 @@
 #include "main/macros.h"
 #include "main/transformfeedback.h"
 
+#ifdef MESA_BBOX_OPT
+#include "vbo_bbox.h"
+#endif
+
 typedef struct {
    GLuint count;
    GLuint primCount;
@@ -784,6 +788,16 @@ skip_draw_elements(struct gl_context *ctx, GLsizei count,
  * Do the rendering for a glDrawElements or glDrawRangeElements call after
  * we've validated buffer bounds, etc.
  */
+#ifdef MESA_BBOX_OPT
+void
+vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
+                                GLboolean index_bounds_valid,
+                                GLuint start, GLuint end,
+                                GLsizei count, GLenum type,
+                                const GLvoid * indices,
+                                GLint basevertex, GLuint numInstances,
+                                GLuint baseInstance)
+#else
 static void
 vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
                                 GLboolean index_bounds_valid,
@@ -792,6 +806,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
                                 const GLvoid * indices,
                                 GLint basevertex, GLuint numInstances,
                                 GLuint baseInstance)
+#endif
 {
    struct _mesa_index_buffer ib;
    struct _mesa_prim prim;
@@ -997,6 +1012,11 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
       _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
                   _mesa_enum_to_string(mode), count,
                   _mesa_enum_to_string(type), indices);
+#ifdef MESA_BBOX_OPT
+      MESA_BBOX_PRINT("glDrawElements(%s, %u, %s, %p)\n",
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices);
+#endif
 
    FLUSH_FOR_DRAW(ctx);
 
@@ -1011,9 +1031,13 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
       if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
          return;
    }
-
+#ifdef MESA_BBOX_OPT
+   vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+                         count, type, indices, 0, 1, 0);
+#else
    vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
                                    count, type, indices, 0, 1, 0);
+#endif
 }
 
 
@@ -1045,8 +1069,13 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
          return;
    }
 
+#ifdef MESA_BBOX_OPT
+   vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+                                   count, type, indices, basevertex, 1, 0);
+#else
    vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
                                    count, type, indices, basevertex, 1, 0);
+#endif
 }
 
 
@@ -1078,9 +1107,13 @@ vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
                                                 indices, numInstances))
          return;
    }
-
+#ifdef MESA_BBOX_OPT
+   vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+                                   count, type, indices, 0, numInstances, 0);
+#else
    vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
                                    count, type, indices, 0, numInstances, 0);
+#endif
 }
 
 
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index ad59efb..15f064b 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -197,6 +197,10 @@ TODO: document the other workarounds.
             <option name="force_glsl_extensions_warn" value="true" />
         </application>
 
+        <application name="GLBenchmark" executable="GLBenchmark">
+            <option name="enable_bounding_box_culling" value="true"/>
+        </application>
+
         <!-- The GL thread whitelist is below, workarounds are above.
              Keep it that way. -->
 
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index ecf495a..c3f8928 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -135,6 +135,11 @@ DRI_CONF_OPT_BEGIN_B(allow_glsl_cross_stage_interpolation_mismatch, def) \
         DRI_CONF_DESC(en,gettext("Allow interpolation qualifier mismatch across shader stages")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_ENABLE_BOUNDING_BOX_CULLING(def) \
+DRI_CONF_OPT_BEGIN_B(enable_bounding_box_culling, def) \
+        DRI_CONF_DESC(en,gettext("Enable bounding box culling in CPU")) \
+DRI_CONF_OPT_END
+
 /**
  * \brief Image quality-related options
  */
-- 
2.7.4



More information about the mesa-dev mailing list