<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Fri, Aug 31, 2018 at 1:09 AM <<a href="mailto:kedar.j.karanje@intel.com">kedar.j.karanje@intel.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: "J Karanje, Kedar" <<a href="mailto:kedar.j.karanje@intel.com" target="_blank">kedar.j.karanje@intel.com</a>><br>
<br>
The feature is enabled by default during make however we need to<br>
add the following to drirc to enable the feature at runtime.<br>
<option name="enable_bounding_box_culling" value="true"/><br>
<br>
vbo: Main algorithm & code to check for MVP & vertex position location<br>
Build Files: Flags to enable BBOX Code and check AVX version<br>
compiler: Code to recognize simple shader<br>
          (gl_position is a simple function of mvp and vertex)<br>
i965 & util: dri query to check if feature is enabled<br>
<br>
vbo: Implements a bounding box algorithm for mesa,we hook into the default<br>
    drawelements and drawrangelements and the MVP & vertex positions location<br>
    and the corresponding program is got,we re-create the frustum planes<br>
    using this data and also create a box around the object and use the 8<br>
    vertices (box vertices) and check if the box is within the frustum or not,<br>
    we drop the draw calls that are completely outside the view frustum and<br>
    go for sub-boxes for objects that are intersecting with the frustum planes.<br>
<br>
The current patch has been verified on KBL+Ubuntu 16.04, we noticed<br>
8~10% improvements in GFxBench TREX offscreen and ~2% for Manhattan offscreen,<br>
Platforms where avx2 is not supported shall still see ~6-8% improvement, the<br>
other KPIs were not impacted.<br>
<br>
Based on empirical data we have set minimum vertex count as 999 and the<br>
sub-box size as 198, this provides the best results, we have also implemented<br>
some level of caching for the box co-od and frustum plane co-od.<br>
we have also optimized some algorithms to use avx2 when a target supports it.<br>
<br>
Shader classification code is currently in hir and we have got review comments<br>
to move the same to NIR.<br>
<br>
Signed-off-by: Aravindan Muthukumar <<a href="mailto:aravindan.muthukumar@intel.com" target="_blank">aravindan.muthukumar@intel.com</a>><br>
Signed-off-by: Yogesh Marathe <<a href="mailto:yogesh.marathe@intel.com" target="_blank">yogesh.marathe@intel.com</a>><br>
---<br>
 <a href="http://Android.common.mk" rel="noreferrer" target="_blank">Android.common.mk</a>                        |   19 +<br>
 <a href="http://configure.ac" rel="noreferrer" target="_blank">configure.ac</a>                             |   34 +-<br>
 src/compiler/glsl/ast_to_hir.cpp         |  168 +++-<br>
 src/compiler/glsl/glsl_parser_extras.cpp |   10 +<br>
 src/compiler/glsl/glsl_parser_extras.h   |    7 +<br>
 src/compiler/glsl/linker.cpp             |   18 +<br>
 src/intel/common/gen_debug.c             |    7 +<br>
 src/mesa/Makefile.sources                |   11 +<br>
 src/mesa/drivers/dri/i965/brw_context.c  |   17 +<br>
 src/mesa/drivers/dri/i965/intel_screen.c |    4 +<br>
 src/mesa/main/bufferobj.c                |   19 +<br>
 src/mesa/main/mtypes.h                   |   51 +<br>
 src/mesa/program/Android.mk              |    1 +<br>
 src/mesa/program/program.c               |    3 +<br>
 src/mesa/vbo/vbo_bbox.c                  | 1538 ++++++++++++++++++++++++++++++<br>
 src/mesa/vbo/vbo_bbox.h                  |  383 ++++++++<br>
 src/mesa/vbo/vbo_bbox_cache.c            |  195 ++++<br>
 src/mesa/vbo/vbo_context.c               |   11 +-<br>
 src/mesa/vbo/vbo_exec_array.c            |   37 +-<br>
 src/util/00-mesa-defaults.conf           |    4 +<br>
 src/util/xmlpool/t_options.h             |    5 +<br>
 21 files changed, 2535 insertions(+), 7 deletions(-)<br>
 mode change 100644 => 100755 src/compiler/glsl/ast_to_hir.cpp<br>
 create mode 100644 src/mesa/vbo/vbo_bbox.c<br>
 create mode 100644 src/mesa/vbo/vbo_bbox.h<br>
 create mode 100644 src/mesa/vbo/vbo_bbox_cache.c<br>
<br>
diff --git a/<a href="http://Android.common.mk" rel="noreferrer" target="_blank">Android.common.mk</a> b/<a href="http://Android.common.mk" rel="noreferrer" target="_blank">Android.common.mk</a><br>
index aa1b266..efd6792 100644<br>
--- a/<a href="http://Android.common.mk" rel="noreferrer" target="_blank">Android.common.mk</a><br>
+++ b/<a href="http://Android.common.mk" rel="noreferrer" target="_blank">Android.common.mk</a><br>
@@ -21,6 +21,8 @@<br>
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER<br>
 # DEALINGS IN THE SOFTWARE.<br>
<br>
+MESA_BBOX_ENABLE=true<br>
+<br>
 ifeq ($(LOCAL_IS_HOST_MODULE),true)<br>
 LOCAL_CFLAGS += -D_GNU_SOURCE<br>
 endif<br>
@@ -80,6 +82,10 @@ LOCAL_CFLAGS += \<br>
        -fno-trapping-math \<br>
        -Wno-sign-compare<br>
<br>
+ifeq ($(MESA_BBOX_ENABLE),true)<br>
+LOCAL_CFLAGS += -DMESA_BBOX_OPT<br>
+endif<br>
+<br>
 LOCAL_CPPFLAGS += \<br>
        -D__STDC_CONSTANT_MACROS \<br>
        -D__STDC_FORMAT_MACROS \<br>
@@ -87,6 +93,10 @@ LOCAL_CPPFLAGS += \<br>
        -Wno-error=non-virtual-dtor \<br>
        -Wno-non-virtual-dtor<br>
<br>
+ifeq ($(MESA_BBOX_ENABLE),true)<br>
+LOCAL_CPPFLAGS += -DMESA_BBOX_OPT<br>
+endif<br>
+<br>
 # mesa requires at least c99 compiler<br>
 LOCAL_CONLYFLAGS += \<br>
        -std=c99<br>
@@ -98,6 +108,15 @@ ifeq ($(filter 5 6 7 8 9, $(MESA_ANDROID_MAJOR_VERSION)),)<br>
 LOCAL_CFLAGS += -DHAVE_TIMESPEC_GET<br>
 endif<br>
<br>
+ifeq ($(MESA_BBOX_ENABLE),true)<br>
+#if defined(CONFIG_AS_AVX)<br>
+LOCAL_CONLYFLAGS += -mavx<br>
+#elif<br>
+LOCAL_CONLYFLAGS += -msse4.1<br>
+#endif<br>
+endif<br>
+<br>
+<br>
 ifeq ($(strip $(MESA_ENABLE_ASM)),true)<br>
 ifeq ($(TARGET_ARCH),x86)<br>
 LOCAL_CFLAGS += \<br>
diff --git a/<a href="http://configure.ac" rel="noreferrer" target="_blank">configure.ac</a> b/<a href="http://configure.ac" rel="noreferrer" target="_blank">configure.ac</a><br>
index 4d9d9e5..dcdbcf3 100644<br>
--- a/<a href="http://configure.ac" rel="noreferrer" target="_blank">configure.ac</a><br>
+++ b/<a href="http://configure.ac" rel="noreferrer" target="_blank">configure.ac</a><br>
@@ -278,7 +278,8 @@ _SAVE_LDFLAGS="$LDFLAGS"<br>
 _SAVE_CPPFLAGS="$CPPFLAGS"<br>
<br>
 dnl Compiler macros<br>
-DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"<br>
+DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DMESA_BBOX_OPT"<br>
+dnl DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"<br>
 AC_SUBST([DEFINES])<br>
 android=no<br>
 case "$host_os" in<br>
@@ -295,10 +296,38 @@ esac<br>
<br>
 AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes)<br>
<br>
+<br>
+dnl Conditional parameters for enabling BBOX file compilation in Makefile<br>
+dnl bbox=yes<br>
+dnl MESA_BBOX_ENABLE=true<br>
+dnl AM_CONDITIONAL([MESA_BBOX_ENABLE], [test x$bbox= xyes])<br>
+<br>
 dnl<br>
 dnl Check compiler flags<br>
 dnl<br>
-AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall"])<br>
+<br>
+AC_ARG_WITH([swr-archs],<br>
+    [AS_HELP_STRING([--with-swr-archs@<:@=DIRS...@:>@],<br>
+        [comma delimited swr architectures list, e.g.<br>
+        "avx,avx2,sse4.1,sse4.2" @<:@default="sse4.1,sse4.2"@:>@])],<br>
+    [with_swr_archs="$withval"],<br>
+    [with_swr_archs="sse4.1,sse4.2"])<br>
+<br>
+swr_archs=`IFS=', '; echo $with_swr_archs`<br>
+if test "$swr_archs" = "avx"; then<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx"])<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx"])<br>
+elif test "$swr_archs" = "avx2"; then<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx -mavx2"])<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx -mavx2"])<br>
+elif test "$swr_archs" = "sse4.1"; then<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1"])<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1"])<br>
+elif test "$swr_archs" = "sse4.2"; then<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CFLAGS="$CFLAGS -Wall -msse4.1 -msse4.2"])<br>
+AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -msse4.2"])<br>
+fi<br>
+<br>
 AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])<br>
 AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],            [CFLAGS="$CFLAGS -Werror=missing-prototypes"])<br>
 AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],                  [CFLAGS="$CFLAGS -Wmissing-prototypes"])<br>
@@ -313,7 +342,6 @@ dnl<br>
 dnl Check C++ compiler flags<br>
 dnl<br>
 AC_LANG_PUSH([C++])<br>
-AX_CHECK_COMPILE_FLAG([-Wall],                                 [CXXFLAGS="$CXXFLAGS -Wall"])<br>
 AX_CHECK_COMPILE_FLAG([-fno-math-errno],                       [CXXFLAGS="$CXXFLAGS -fno-math-errno"])<br>
 AX_CHECK_COMPILE_FLAG([-fno-trapping-math],                    [CXXFLAGS="$CXXFLAGS -fno-trapping-math"])<br>
 AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],                   [VISIBILITY_CXXFLAGS="-fvisibility=hidden"])<br>
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp<br>
old mode 100644<br>
new mode 100755<br>
index 5d3f10b..f4e8dea<br>
--- a/src/compiler/glsl/ast_to_hir.cpp<br>
+++ b/src/compiler/glsl/ast_to_hir.cpp<br>
@@ -62,7 +62,6 @@<br>
 #include "builtin_functions.h"<br>
<br>
 using namespace ir_builder;<br>
-<br>
 static void<br>
 detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,<br>
                                exec_list *instructions);<br>
@@ -1325,6 +1324,124 @@ ast_expression::set_is_lhs(bool new_value)<br>
       this->subexpressions[0]->set_is_lhs(new_value);<br>
 }<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+static bool<br>
+is_simple_shader(exec_list *instructions, ast_expression *simple_ast_root,<br>
+                 struct _mesa_glsl_parse_state *state)<br></blockquote><div><br></div><div>This is in desparate need of some comments.  I think what it's trying to do is detect a "simple" shader that's just</div><div><br></div><div>uniform mat4 MVP</div><div>in vec4 v0;<br></div><div>void main()</div><div>{</div><div>   gl_Position = MVP * v0;</div><div>}</div><div><br></div><div>However, the mess of AST code doesn't make that obvious.</div><div><br></div><div>Also, as others have said, this should be done lower down in the stack in GLSL IR (HIR) or, better yet, in NIR so that we can detect the same case for ARB programs.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+{<br>
+   ast_expression * subex0 = simple_ast_root->subexpressions[0];<br>
+   ast_expression * subex1 = simple_ast_root->subexpressions[1];<br>
+<br>
+   char temp_identifier[100];<br>
+<br>
+   subex0->set_is_lhs(true);<br>
+<br>
+   if (subex1->oper == ast_mul)<br>
+   {<br>
+      ir_rvalue *rhsParts[3];<br>
+      rhsParts[0] = subex1->subexpressions[0]->hir(instructions, state);<br>
+      rhsParts[1] = subex1->subexpressions[1]->hir(instructions, state);<br>
+<br>
+      if (rhsParts[0]->type->gl_type == GL_FLOAT_MAT4 &&<br>
+          rhsParts[1]->type->gl_type == GL_FLOAT_VEC4) {<br>
+         foreach_list_typed (ast_node, ast, link,<br>
+                             &subex1->subexpressions[1]->expressions) {<br>
+            if (((ast_expression *)ast)->oper != ast_identifier &&<br>
+               (((ast_expression *)ast)->oper != ast_int_constant) &&<br>
+               (((ast_expression *)ast)->oper != ast_float_constant)) {<br>
+               return false;<br>
+            }<br>
+            if (((ast_expression *)ast)->oper == ast_identifier &&<br>
+                ((ast_expression *)ast)->primary_expression.identifier) {<br>
+                   strncpy((char *)state->stateVertPosition,<br>
+                   ((ast_expression *)ast)->primary_expression.identifier,<br>
+                strlen(((ast_expression *)ast)->primary_expression.identifier));<br>
+            }<br>
+         }<br>
+      }<br>
+      if (subex0->oper == ast_identifier) {<br>
+         if (!strcmp(subex0->primary_expression.identifier,"gl_Position")) {<br>
+            if(subex0 && subex0->primary_expression.identifier) {<br>
+               strncpy((char *)temp_identifier,<br>
+                    subex0->primary_expression.identifier,<br>
+                    strlen(subex0->primary_expression.identifier));<br>
+            } else {<br>
+              return false;<br>
+            }<br>
+            if(subex1 && subex1->subexpressions[0] &&<br>
+              subex1->subexpressions[0]->primary_expression.identifier) {<br>
+                strncpy((char *)state->stateMVP,<br>
+                   subex1->subexpressions[0]->primary_expression.identifier,<br>
+            strlen(subex1->subexpressions[0]->primary_expression.identifier));<br>
+            } else {<br>
+              return false;<br>
+            }<br>
+<br>
+            return true;<br>
+         } else {<br>
+            if(subex0 && subex0->primary_expression.identifier) {<br>
+               strncpy((char *)temp_identifier,<br>
+                  subex0->primary_expression.identifier,<br>
+                  strlen(subex0->primary_expression.identifier));<br>
+            } else {<br>
+              return true;<br>
+            }<br>
+<br>
+            if (subex1->subexpressions[0]->oper == ast_identifier) {<br>
+               if(subex1 && subex1->subexpressions[0] &&<br>
+                  subex1->subexpressions[0]->primary_expression.identifier) {<br>
+                     strncpy((char *)state->stateMVP,<br>
+                     subex1->subexpressions[0]->primary_expression.identifier,<br>
+              strlen(subex1->subexpressions[0]->primary_expression.identifier));<br>
+               } else {<br>
+                  return true;<br>
+               }<br>
+            }<br>
+            return false; //Return false to trigger further parsing.<br>
+         }<br>
+      } else {<br>
+        return false;<br>
+      }<br>
+   } else {<br>
+      if (subex0->primary_expression.identifier != NULL)<br>
+        if (!strcmp(subex0->primary_expression.identifier,"gl_Position") &&<br>
+          (strlen(temp_identifier) > 0)) { //gl_position = temp;<br>
+           if (subex1->oper == ast_identifier) {<br>
+              if (!strcmp((char *)temp_identifier,<br>
+                 subex1->primary_expression.identifier)) {<br>
+                   return true;<br>
+              }<br>
+           } else {<br>
+              return false;<br>
+           }<br>
+        }<br>
+   }<br>
+   return false;<br>
+}<br>
+/*<br>
+ * Function to check if LHS of assign is an input variable Eg: in_position<br>
+ */<br>
+<br>
+static bool<br>
+is_attribute(struct _mesa_glsl_parse_state *state)<br>
+{<br>
+   ir_variable *const var = state->symbols->get_variable(state->stateVertPosition);<br>
+   ir_rvalue *result = NULL;<br>
+   void *ctx = state;<br>
+   if (var != NULL)<br>
+   {<br>
+         result = new(ctx) ir_dereference_variable(var);<br>
+         (void)result;<br>
+         if (var->data.mode ==  ir_var_shader_in)<br>
+             return true;<br>
+<br>
+   }<br>
+<br>
+   return false;<br>
+<br>
+}<br>
+#endif //MESA_BBOX_OPT<br>
+<br>
 ir_rvalue *<br>
 ast_expression::do_hir(exec_list *instructions,<br>
                        struct _mesa_glsl_parse_state *state,<br>
@@ -1400,6 +1517,55 @@ ast_expression::do_hir(exec_list *instructions,<br>
       unreachable("ast_aggregate: Should never get here.");<br>
<br>
    case ast_assign: {<br>
+#ifdef MESA_BBOX_OPT<br>
+      if (state->stage == MESA_SHADER_VERTEX &&<br>
+          !state->state_bbox_simple_shader &&<br>
+          is_simple_shader(instructions,this,state) &&<br>
+          !state->state_shader_analysis_complete)<br>
+      {<br>
+         state->state_bbox_simple_shader = true;<br>
+      }<br>
+<br>
+      if (state->state_bbox_simple_shader &&<br>
+          !state->state_shader_analysis_complete) {<br>
+         if (!is_attribute(state)) {<br>
+            if ((ir_dereference_variable *)op[0] != NULL &&<br>
+                ((ir_dereference_variable *)op[0])->var->name != NULL &&<br>
+                strlen(state->stateVertPosition) != 0)<br>
+            if (!strcmp(((ir_dereference_variable *)op[0])->var->name,<br>
+                          state->stateVertPosition) ) {<br>
+               if (((ir_instruction *)op[0])->ir_type == ir_type_variable ||<br>
+                    ((ir_instruction *)op[0])->ir_type ==<br>
+                                      ir_type_dereference_variable &&<br>
+                    (ir_dereference_variable *)op[1] != NULL &&<br>
+                    ((ir_dereference_variable *)op[1])->ir_type ==<br>
+                     ir_type_dereference_variable) {<br>
+                  if ((((ir_instruction *)op[1])->ir_type == ir_type_variable ||<br>
+                      ((ir_instruction *)op[1])->ir_type ==<br>
+                        ir_type_dereference_variable) &&<br>
+                       ((ir_dereference_variable *)op[1])->var->name) {<br>
+                     strncpy((char *)state->stateVertPosition,<br>
+                           (char *)((ir_dereference_variable *)op[1])->var->name,<br>
+                        strlen(((ir_dereference_variable *)op[1])->var->name));<br>
+                        state->state_shader_analysis_complete = true;<br>
+                  }<br>
+               }<br>
+               else {<br>
+                  state->state_bbox_simple_shader = false;<br>
+                  state->state_shader_analysis_complete = true;<br>
+               }<br>
+            }<br>
+            else {<br>
+               state->state_bbox_simple_shader = false;<br>
+               state->state_shader_analysis_complete = true;<br>
+<br>
+            }<br>
+         }<br>
+         else {<br>
+            state->state_shader_analysis_complete = true;<br>
+         }<br>
+      }<br>
+#endif //MESA_BBOX_OPT<br>
       this->subexpressions[0]->set_is_lhs(true);<br>
       op[0] = this->subexpressions[0]->hir(instructions, state);<br>
       op[1] = this->subexpressions[1]->hir(instructions, state);<br>
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp<br>
index 42ba88f..c540b3a 100644<br>
--- a/src/compiler/glsl/glsl_parser_extras.cpp<br>
+++ b/src/compiler/glsl/glsl_parser_extras.cpp<br>
@@ -2121,6 +2121,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,<br>
    if (!state->error && !state->translation_unit.is_empty())<br>
       _mesa_ast_to_hir(shader->ir, state);<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+   shader->shader_bbox_simple_shader = state->state_bbox_simple_shader;<br>
+   if (shader->shader_bbox_simple_shader)<br>
+   {<br>
+        strncpy((char *)shader->shaderMVP,state->stateMVP,<br>
+                        strlen(state->stateMVP));<br>
+        strncpy((char *)shader->shaderVertPosition,state->stateVertPosition,<br>
+                        strlen(state->stateVertPosition));<br>
+   }<br>
+#endif //MESA_BBOX_OPT<br>
    if (!state->error) {<br>
       validate_ir_tree(shader->ir);<br>
<br>
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h<br>
index da44d37..b951a66 100644<br>
--- a/src/compiler/glsl/glsl_parser_extras.h<br>
+++ b/src/compiler/glsl/glsl_parser_extras.h<br>
@@ -887,6 +887,13 @@ struct _mesa_glsl_parse_state {<br>
     * so we can check totals aren't too large.<br>
     */<br>
    unsigned clip_dist_size, cull_dist_size;<br>
+<br>
+#ifdef MESA_BBOX_OPT<br>
+   bool state_shader_analysis_complete;<br>
+   bool state_bbox_simple_shader;<br>
+   char stateMVP[20] = {'\0'};<br>
+   char stateVertPosition[100] = {'\0'};<br>
+#endif<br>
 };<br>
<br>
 # define YYLLOC_DEFAULT(Current, Rhs, N)                        \<br>
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp<br>
index 3ce78fe..210f37f 100644<br>
--- a/src/compiler/glsl/linker.cpp<br>
+++ b/src/compiler/glsl/linker.cpp<br>
@@ -4876,6 +4876,24 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)<br>
<br>
          switch (stage) {<br>
          case MESA_SHADER_VERTEX:<br>
+#ifdef MESA_BBOX_OPT<br>
+            if (shader_list[MESA_SHADER_VERTEX][0] != NULL) {<br>
+               sh->linked_bbox_simple_shader =<br>
+                  shader_list[MESA_SHADER_VERTEX][0]->shader_bbox_simple_shader;<br>
+                 /*TBD: How do we handle multiple Vertex Shaders<br>
+                  *     being linked ??<br>
+                  *     MVP Name copied to get MVP in VBO<br>
+                  */<br>
+                 strncpy((char *)sh->linkedshaderMVP,<br>
+                     shader_list[MESA_SHADER_VERTEX][0]->shaderMVP,<br>
+                     strlen(shader_list[MESA_SHADER_VERTEX][0]->shaderMVP));<br>
+                 /* Vertex Position attribute name */<br>
+                 strncpy((char *)sh->linkedshaderVertPosition,<br>
+                     shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition,<br>
+                     strlen(<br>
+                     shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition));<br>
+              }<br>
+#endif<br>
             validate_vertex_shader_executable(prog, sh, ctx);<br>
             break;<br>
          case MESA_SHADER_TESS_CTRL:<br>
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c<br>
index a978f2f..c677208 100644<br>
--- a/src/intel/common/gen_debug.c<br>
+++ b/src/intel/common/gen_debug.c<br>
@@ -106,6 +106,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)<br>
 static void<br>
 brw_process_intel_debug_variable_once(void)<br>
 {<br>
+#if defined(__ANDROID__) || defined(ANDROID)<br>
+   setenv("MESA_GLSL_CACHE_DISABLE","true",1);<br>
+   setenv("MESA_BBOX_MIN_VERTEX_CNT", "999", 1);<br>
+   setenv("MESA_BBOX_OPT_ENABLE", "3", 1);<br>
+   setenv("MESA_OPT_SPLIT_SIZE", "198", 1);<br>
+#endif<br>
+<br>
    INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);<br>
 }<br>
<br>
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources<br>
index 0d3c277..8148622 100644<br>
--- a/src/mesa/Makefile.sources<br>
+++ b/src/mesa/Makefile.sources<br>
@@ -1,4 +1,5 @@<br>
 ### Lists of source files, included by Makefiles<br>
+MESA_BBOX_ENABLE = false<br>
<br>
 # this is part of MAIN_FILES<br>
 MAIN_ES_FILES = \<br>
@@ -400,6 +401,7 @@ TNL_FILES = \<br>
        tnl/t_vp_build.c \<br>
        tnl/t_vp_build.h<br>
<br>
+<br>
 VBO_FILES = \<br>
        vbo/vbo_attrib.h \<br>
        vbo/vbo_attrib_tmp.h \<br>
@@ -422,6 +424,15 @@ VBO_FILES = \<br>
        vbo/vbo_save.h \<br>
        vbo/vbo_save_loopback.c<br>
<br>
+#ifeq($(MESA_BBOX_ENABLE), true)<br>
+<br>
+VBO_FILES += \<br>
+       vbo/vbo_bbox_cache.c \<br>
+       vbo/vbo_bbox.c \<br>
+       vbo/vbo_bbox.h<br>
+<br>
+#endif<br>
+<br>
 STATETRACKER_FILES = \<br>
        state_tracker/st_atifs_to_tgsi.c \<br>
        state_tracker/st_atifs_to_tgsi.h \<br>
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c<br>
index 6ba64e4..03cf861 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_context.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_context.c<br>
@@ -48,6 +48,10 @@<br>
<br>
 #include "vbo/vbo.h"<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+#include "vbo/vbo_bbox.h"<br>
+#endif<br>
+<br>
 #include "drivers/common/driverfuncs.h"<br>
 #include "drivers/common/meta.h"<br>
 #include "utils.h"<br>
@@ -890,6 +894,11 @@ brw_process_driconf_options(struct brw_context *brw)<br>
    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =<br>
       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+   ctx->Const.EnableBoundingBoxCulling =<br>
+      driQueryOptionb(options, "enable_bounding_box_culling");<br>
+#endif //MESA_BBOX_OPT<br>
+<br>
    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);<br>
    driComputeOptionsSha1(&brw->screen->optionCache,<br>
                          ctx->Const.dri_config_options_sha1);<br>
@@ -1001,6 +1010,14 @@ brwCreateContext(gl_api api,<br>
<br>
    brw_process_driconf_options(brw);<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+   if (ctx->Const.EnableBoundingBoxCulling) {<br>
+        MESA_BBOX("EnableBoundingBoxCulling True\n");<br>
+        vbo_bbox_init(ctx);<br>
+   } else<br>
+        MESA_BBOX("EnableBoundingBoxCulling False\n");<br>
+#endif //MESA_BBOX_OPT<br>
+<br>
    if (INTEL_DEBUG & DEBUG_PERF)<br>
       brw->perf_debug = true;<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c<br>
index eaf5a3b..35c7624 100644<br>
--- a/src/mesa/drivers/dri/i965/intel_screen.c<br>
+++ b/src/mesa/drivers/dri/i965/intel_screen.c<br>
@@ -88,6 +88,10 @@ DRI_CONF_BEGIN<br>
       DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")<br>
       DRI_CONF_FORCE_GLSL_ABS_SQRT("false")<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+      DRI_CONF_ENABLE_BOUNDING_BOX_CULLING("true")<br>
+#endif<br>
+<br>
       DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")<br>
         DRI_CONF_DESC(en, "Perform code generation at shader link time.")<br>
       DRI_CONF_OPT_END<br>
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c<br>
index 1d1e51b..67a369b 100644<br>
--- a/src/mesa/main/bufferobj.c<br>
+++ b/src/mesa/main/bufferobj.c<br>
@@ -48,6 +48,9 @@<br>
 #include "varray.h"<br>
 #include "util/u_atomic.h"<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+#include "vbo/vbo_bbox.h"<br>
+#endif<br>
<br>
 /* Debug flags */<br>
 /*#define VBO_DEBUG*/<br>
@@ -2254,6 +2257,10 @@ buffer_sub_data(GLenum target, GLuint buffer, GLintptr offset,<br>
<br>
    if (no_error || validate_buffer_sub_data(ctx, bufObj, offset, size, func))<br>
       _mesa_buffer_sub_data(ctx, bufObj, offset, size, data);<br>
+<br>
+#ifdef MESA_BBOX_OPT<br>
+   vbo_bbox_element_buffer_update(ctx,bufObj,data,offset,size);<br>
+#endif<br>
 }<br>
<br>
<br>
@@ -2589,9 +2596,17 @@ validate_and_unmap_buffer(struct gl_context *ctx,<br>
 #endif<br>
<br>
 #ifdef VBO_DEBUG<br>
+#ifdef MESA_BBOX_OPT<br>
+   if (bufObj->StorageFlags & GL_MAP_WRITE_BIT) {<br>
+#else<br>
    if (bufObj->AccessFlags & GL_MAP_WRITE_BIT) {<br>
+#endif<br>
       GLuint i, unchanged = 0;<br>
+#ifdef MESA_BBOX_OPT<br>
+      GLubyte *b = (GLubyte *) bufObj->Data;<br>
+#else<br>
       GLubyte *b = (GLubyte *) bufObj->Pointer;<br>
+#endif<br>
       GLint pos = -1;<br>
       /* check which bytes changed */<br>
       for (i = 0; i < bufObj->Size - 1; i++) {<br>
@@ -3154,7 +3169,11 @@ map_buffer_range(struct gl_context *ctx, struct gl_buffer_object *bufObj,<br>
       /* Access must be write only */<br>
       if ((access & GL_MAP_WRITE_BIT) && (!(access & ~GL_MAP_WRITE_BIT))) {<br>
          GLuint i;<br>
+#ifdef MESA_BBOX_OPT<br>
+         GLubyte *b = (GLubyte *) bufObj->Data;<br>
+#else<br>
          GLubyte *b = (GLubyte *) bufObj->Pointer;<br>
+#endif<br>
          for (i = 0; i < bufObj->Size; i++)<br>
             b[i] = i & 0xff;<br>
       }<br>
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h<br>
index 9fd577d..c262173 100644<br>
--- a/src/mesa/main/mtypes.h<br>
+++ b/src/mesa/main/mtypes.h<br>
@@ -1359,6 +1359,9 @@ struct gl_buffer_object<br>
    bool MinMaxCacheDirty;<br>
<br>
    bool HandleAllocated; /**< GL_ARB_bindless_texture */<br>
+#ifdef MESA_BBOX_OPT<br>
+   int data_change_counter; //TBD: Same as RefCount to check<br>
+#endif<br>
 };<br>
<br>
<br>
@@ -2535,6 +2538,12 @@ struct gl_linked_shader<br>
     * stores information that is also needed during linking.<br>
     */<br>
    struct gl_shader_spirv_data *spirv_data;<br>
+<br>
+#ifdef MESA_BBOX_OPT<br>
+   bool linked_bbox_simple_shader;<br>
+   char linkedshaderMVP[20];<br>
+   char linkedshaderVertPosition[100];<br>
+#endif<br>
 };<br>
<br>
<br>
@@ -2631,6 +2640,11 @@ struct gl_shader<br>
<br>
    /* ARB_gl_spirv related data */<br>
    struct gl_shader_spirv_data *spirv_data;<br>
+#ifdef MESA_BBOX_OPT<br>
+   bool shader_bbox_simple_shader;<br>
+   char shaderMVP[20];<br>
+   char shaderVertPosition[100];<br>
+#endif<br>
 };<br>
<br>
<br>
@@ -2902,6 +2916,9 @@ struct gl_shader_program_data<br>
     * ARB_gl_spirv extension.<br>
     */<br>
    bool spirv;<br>
+#ifdef MESA_BBOX_OPT<br>
+   GLuint vbo_bbox_mvp_location;<br>
+#endif<br>
 };<br>
<br>
 /**<br>
@@ -3108,6 +3125,10 @@ struct gl_pipeline_shader_state<br>
<br>
    /** Pipeline objects */<br>
    struct _mesa_HashTable *Objects;<br>
+#ifdef MESA_BBOX_OPT<br>
+   /* Bounding box draw optimization control structure */<br>
+   struct mesa_bbox_opt *BboxOpt;<br>
+#endif<br>
 };<br>
<br>
 /**<br>
@@ -4076,6 +4097,12 @@ struct gl_constants<br>
<br>
    /** GL_ARB_gl_spirv */<br>
    struct spirv_supported_capabilities SpirVCapabilities;<br>
+<br>
+#ifdef MESA_BBOX_OPT<br>
+   /** MESA_BBOX_OPT Runtime enable_bounding_box_culling*/<br>
+   bool EnableBoundingBoxCulling;<br>
+#endif<br>
+<br>
 };<br>
<br>
<br>
@@ -4720,6 +4747,21 @@ struct gl_semaphore_object<br>
    GLuint Name;            /**< hash table ID/name */<br>
 };<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+/**<br>
+ * Bounding volume classification types<br>
+ */<br>
+typedef enum<br>
+{<br>
+    BOUNDING_VOLUME_AABB = 0,<br>
+    BOUNDING_VOLUME_OBB = 1,<br>
+    BOUNDING_VOLUME_SPHERE = 2,<br>
+    BOUNDING_VOLUME_DOP = 3,<br>
+    BOUNDING_VOULME_MIXED = 4,<br>
+    BOUNDING_VOLUME_MAX = 5,<br>
+} bounding_volume_type;<br>
+#endif //MESA_BBOX_OPT<br>
+<br>
 /**<br>
  * Mesa rendering context.<br>
  *<br>
@@ -5096,6 +5138,15 @@ struct gl_context<br>
    struct hash_table_u64 *ResidentTextureHandles;<br>
    struct hash_table_u64 *ResidentImageHandles;<br>
    /*@}*/<br>
+<br>
+#ifdef MESA_BBOX_OPT<br>
+   /**<br>
+    * Bounding volume type<br>
+    *<br>
+    */<br>
+    bounding_volume_type volume_type;<br>
+#endif //MESA_BBOX_OPT<br>
+<br>
 };<br>
<br>
 /**<br>
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk<br>
index c6470e6..6489d3e 100644<br>
--- a/src/mesa/program/Android.mk<br>
+++ b/src/mesa/program/Android.mk<br>
@@ -75,6 +75,7 @@ $(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program_lexer.l<br>
 LOCAL_C_INCLUDES := \<br>
        $(MESA_TOP)/src/mapi \<br>
        $(MESA_TOP)/src/mesa \<br>
+       $(MESA_TOP)/src/mesa/vbo \<br>
        $(MESA_TOP)/src/compiler/nir \<br>
        $(MESA_TOP)/src/gallium/auxiliary \<br>
        $(MESA_TOP)/src/gallium/include<br>
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c<br>
index 6ab1bf5..e0b3563 100644<br>
--- a/src/mesa/program/program.c<br>
+++ b/src/mesa/program/program.c<br>
@@ -43,6 +43,9 @@<br>
 #include "util/ralloc.h"<br>
 #include "util/u_atomic.h"<br>
<br>
+#ifdef MESA_BBOX_OPT<br>
+#include "vbo/vbo_bbox.h"<br>
+#endif<br>
<br>
 /**<br>
  * A pointer to this dummy program is put into the hash table when<br>
diff --git a/src/mesa/vbo/vbo_bbox.c b/src/mesa/vbo/vbo_bbox.c<br>
new file mode 100644<br>
index 0000000..f1e153d<br>
--- /dev/null<br>
+++ b/src/mesa/vbo/vbo_bbox.c<br>
@@ -0,0 +1,1538 @@<br>
+/*<br>
+ * Copyright © 2018 Intel Corporation<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS<br>
+ * IN THE SOFTWARE.<br>
+ */<br>
+<br>
+/**<br>
+ * \brief  VBO BBOX module implementation<br>
+ * \author Kedar Karanje<br>
+ */<br>
+<br>
+#pragma GCC optimize (0)<br>
+#include "vbo_bbox.h"<br>
+#include <fcntl.h><br>
+#define  BBOX_MAX_FRAMES_TO_DYNAMICALLY_UPDATE (5)<br>
+#define  BBOX_MIN_EFFECTIVE_DRAWS_TO_DYNAMICALLY_UPDATE (5)<br>
+#define  BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (-100)<br>
+#define  BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (100)<br>
+#define  BBOX_MIN_GPU_HEAD_ROOM_TO_PROCESS_ELEMENT_BUFFER (100)<br>
+<br>
+/**<br>
+  *  Min part of split draw that we want to drop<br>
+  */<br>
+#define  BBOX_MIN_SPLITTED_DRAW_TO_DROP (1)<br>
+<br>
+#ifdef __AVX__<br>
+static __m256 fullMin;<br>
+static __m256 fullMax;<br>
+#else<br>
+static __m128 fullMin;<br>
+static __m128 fullMax;<br>
+#endif<br>
+<br>
+/* Segment functions */<br>
+<br>
+static inline GLboolean<br>
+intersect(const struct gl_segment *s_src,const struct gl_segment *s_tar)<br>
+{<br>
+   return (s_src->Left < s_tar->Right && s_src->Right > s_tar->Left);<br>
+}<br>
+<br>
+<br>
+static inline GLboolean<br>
+subsegment(const struct gl_segment *s_src,const struct gl_segment *s_tar)<br>
+{<br>
+  return (s_src->Left <= s_tar->Left && s_src->Right >= s_tar->Right);<br>
+}<br>
+<br>
+<br>
+static inline GLboolean<br>
+superset(const struct gl_segment *s_src,const struct gl_segment *s_tar)<br>
+{<br>
+  return subsegment(s_tar,s_src);<br>
+}<br>
+<br>
+<br>
+static void<br>
+normalize(vbo_bbox_frustum_plane *fr_plane)<br>
+{<br>
+        GLfloat a,b,c;<br>
+        a = fr_plane->a;<br>
+        b = fr_plane->b;<br>
+        c = fr_plane->c;<br>
+<br>
+        GLfloat norm = 1.0f/sqrt(a*a + b*b + c*c);<br>
+<br>
+        fr_plane->a *= norm;<br>
+        fr_plane->b *= norm;<br>
+        fr_plane->c *= norm;<br>
+        fr_plane->d *= norm;<br>
+};<br>
+<br>
+static inline<br>
+int vbo_bbox_get_delay(struct mesa_bbox_opt *opt)<br>
+{<br>
+   if (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_RECALC) {<br>
+      if (opt->calc_delay > BBOX_CALC_MAX_DELAY) {<br>
+         opt->calc_delay = BBOX_CALC_MIN_DELAY;<br>
+      }<br>
+      return opt->calc_delay++;<br>
+   }<br>
+   else {<br>
+      return 0;<br>
+   }<br>
+}<br>
+<br>
+static inline<br>
+bool vbo_init_sub_bbox_array(int bbox_count, struct vbo_bbox_cache_data* data)<br>
+{<br>
+   bool allocate_bbox = true;<br>
+   if(data->sub_bbox_array != NULL) {<br>
+      if(bbox_count == data->sub_bbox_cnt) {<br>
+         allocate_bbox = false;<br>
+      }<br>
+      else {<br>
+         free(data->sub_bbox_array);<br>
+         data->sub_bbox_array = NULL;<br>
+      }<br>
+   }<br>
+   if (allocate_bbox)<br>
+      data->sub_bbox_array = (struct bounding_info*) malloc(<br>
+                                    bbox_count * sizeof (struct bounding_info));<br>
+<br>
+   if (!data->sub_bbox_array)<br>
+   return false;<br>
+<br>
+   data->sub_bbox_cnt = bbox_count;<br>
+   return true;<br>
+}<br>
+<br>
+static inline GLint<br>
+vbo_bbox_get_mvp(struct gl_context *ctx)<br>
+{<br>
+   struct gl_linked_shader * linked_shader =<br>
+                  ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];<br>
+<br>
+   return _mesa_GetUniformLocation(ctx->Shader.ActiveProgram->Name,<br>
+                                   linked_shader->linkedshaderMVP);<br>
+}<br>
+<br>
+/*<br>
+ * Gets the currently linked shaders flag for simple shader<br>
+ *<br>
+ */<br>
+static inline int<br>
+vbo_is_simple_shader(struct gl_context *ctx)<br>
+{<br>
+   struct gl_linked_shader * linked_shader =<br>
+                  ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];<br>
+<br>
+   return linked_shader->linked_bbox_simple_shader;<br>
+}<br>
+<br>
+/**<br>
+ * Get current VAO<br>
+ */<br>
+static inline struct gl_vertex_array_object*<br>
+vbo_get_current_vao(struct gl_context *const gc)<br>
+{<br>
+   assert(gc);<br>
+   struct gl_vertex_array_object* vao = gc->Array.VAO;<br>
+   assert(vao);<br>
+<br>
+   return vao;<br>
+}<br>
+<br>
+/**<br>
+ *  Returns the location of the "position" in the attributes of the<br>
+ *  currently active program<br>
+ */<br>
+static inline<br>
+int vbo_get_simple_vs_position_attr_location(<br>
+    struct gl_context *const gc)<br>
+{<br>
+   assert(gc);<br>
+   struct gl_linked_shader * linked_shader =<br>
+                   gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];<br>
+   GLint vertexPosLocation = _mesa_GetAttribLocation(<br>
+                           gc->Shader.ActiveProgram->Name,<br>
+                           linked_shader->linkedshaderVertPosition);<br>
+   if (vertexPosLocation >= 0)<br>
+      return vertexPosLocation;<br>
+   else<br>
+      return -1;<br>
+}<br>
+<br>
+/**<br>
+ *   Get element-buffer handle of the current VAO<br>
+ */<br>
+static inline struct gl_buffer_object*<br>
+vbo_get_current_element_buffer(struct gl_context *const gc)<br>
+{<br>
+   assert(gc);<br>
+   struct gl_buffer_object* element_buffer =<br>
+                                        vbo_get_current_vao(gc)->IndexBufferObj;<br>
+   assert(element_buffer);<br>
+   return element_buffer;<br>
+}<br>
+<br>
+/**<br>
+ *   Get vertex-binding of position from the current VAO<br>
+ */<br>
+static inline struct gl_vertex_buffer_binding*<br>
+vbo_get_current_vertex_buffer_binding_of_position(struct gl_context *const gc)<br>
+{<br>
+   assert(gc);<br>
+   struct gl_vertex_array_object* vao = vbo_get_current_vao(gc);<br>
+<br>
+   GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask;<br>
+   const struct gl_array_attributes *attrib_array =<br>
+                                           &vao->VertexAttrib[ffs(mask) - 1];<br>
+   struct gl_vertex_buffer_binding *buffer_binding =<br>
+                       &vao->BufferBinding[attrib_array->BufferBindingIndex];<br>
+<br>
+   return buffer_binding;<br>
+}<br>
+<br>
+/**<br>
+ * Get vertex-buffer handle of the current VAO<br>
+ */<br>
+static inline struct gl_buffer_object*<br>
+vbo_get_current_vertex_buffer(struct gl_context *const gc)<br>
+{<br>
+   assert(gc);<br>
+   struct gl_buffer_object* pVertexBuffer =<br>
+               vbo_get_current_vertex_buffer_binding_of_position(gc)->BufferObj;<br>
+   assert(pVertexBuffer);<br>
+   return pVertexBuffer;<br>
+}<br>
+<br>
+<br>
+/**<br>
+ * Condition to enter bounding box optimization<br>
+ */<br>
+static inline bool<br>
+vbo_bbox_check_supported_draw_call(struct gl_context *const gc,<br>
+                                   GLenum mode, GLsizei count, GLenum type,<br>
+                                   const GLvoid *indices, GLint basevertex)<br>
+{<br>
+<br>
+   assert(gc);<br>
+   int shader_scenario;<br>
+   struct gl_linked_shader *_LinkedShaders;<br>
+<br>
+   /* Check if the minimum vertex count is met. */<br>
+   if (count < (GLsizei) mesa_bbox_env_variables.bbox_min_vrtx_count) {<br>
+     /* Count is most common cause to bail out form optimization<br>
+      * so should be first.<br>
+      */<br>
+     MESA_BBOX("Aborting MESA_BBOX :%d: Vertex count too small, minimum count = %d\n",<br>
+                     count,mesa_bbox_env_variables.bbox_min_vrtx_count);<br>
+     return false;<br>
+   }<br>
+<br>
+   if (mode != GL_TRIANGLES) {<br>
+      MESA_BBOX("Aborting MESA_BBOX :%d: Primitive mode is not GL_TRIANGLES, \<br>
+                      mode = %d\n", count, mode);<br>
+      return false;<br>
+   }<br>
+<br>
+   /* Examine current shader */<br>
+   if (!gc->_Shader->ActiveProgram) {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: No active GLSL program.\n", count);<br>
+      return false;<br>
+   }<br>
+<br>
+   /* BASIC Shader scenario is when we have just VS & FS */<br>
+   if (gc->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL &&<br>
+       gc->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] != NULL &&<br>
+       gc->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL] == NULL &&<br>
+       gc->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] == NULL &&<br>
+       gc->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] == NULL) {<br>
+      shader_scenario = 0;<br>
+   }<br>
+   else<br>
+     shader_scenario = 1;<br>
+<br>
+   if (shader_scenario) {<br>
+     MESA_BBOX("Aborting MESA_BBOX:%d: GLSL program must contain only vertex and \<br>
+                     fragment shaders, shader scenario = \n", count );<br>
+     return false;<br>
+   }<br>
+<br>
+   if (!vbo_is_simple_shader(gc)) {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: GLSL vertex shader does not have simple \<br>
+                 position calculation \n", count);<br>
+      return false;<br>
+   }<br>
+   if (gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]) {<br>
+       _LinkedShaders =<br>
+               gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];<br>
+<br>
+       MESA_BBOX("MVP:%s, VertPos:%s\n",_LinkedShaders->linkedshaderMVP,<br>
+                   _LinkedShaders->linkedshaderVertPosition);<br>
+   }<br>
+<br>
+   /* Examine element buffer */<br>
+   struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);<br>
+   if ((!element_buffer) || element_buffer->Name == 0) {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer name is 0\n", count);<br>
+      return false;<br>
+   }<br>
+<br>
+   if (!(element_buffer->StorageFlags &<br>
+      (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){<br>
+       MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer not resident: %#x\n", count,<br>
+                     element_buffer->StorageFlags);<br>
+       return false;<br>
+   }<br>
+<br>
+   /* Get VertexPosLocation */<br>
+   int vertexPosLocation = 0;<br>
+   if (gc->Shader.ActiveProgram)<br>
+      vertexPosLocation = vbo_get_simple_vs_position_attr_location(gc);<br>
+   if (vertexPosLocation < 0)<br>
+   {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: VertexPosition Location is inValid:\n", count);<br>
+      return false;<br>
+   }<br>
+<br>
+   struct gl_vertex_array_object*  vao = vbo_get_current_vao(gc);<br>
+   int posAttribMapMode =<br>
+            _mesa_vao_attribute_map[vao->_AttributeMapMode][vertexPosLocation];<br>
+<br>
+   if (((vao->_Enabled >> posAttribMapMode) & 0x1) != 1)<br>
+   {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: Vertex data does not come from VBO , GL-API:%d\n", count,gc->API);<br>
+//#if !defined(__ANDROID__) || !defined(ANDROID)<br>
+//This is not specific to Android but to the GLES API<br>
+      if (gc->API != API_OPENGLES && gc->API != API_OPENGLES2)<br>
+          return false;<br>
+//#endif<br>
+   }<br>
+<br>
+   struct gl_buffer_object* vertexattrib_buffer  =<br>
+                            vbo_get_current_vertex_buffer(gc);<br>
+   if ((!vertexattrib_buffer) || vertexattrib_buffer->Name == 0) {<br>
+     MESA_BBOX("Aborting MESA_BBOX:%d: Vertex buffer %p name is %d\n", count,<br>
+                     vertexattrib_buffer,vertexattrib_buffer->Name);<br>
+#if !defined(__ANDROID__) || !defined(ANDROID)<br>
+     return false;<br>
+#endif<br>
+   }<br>
+<br>
+   if (!(vertexattrib_buffer->StorageFlags &<br>
+        (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){<br>
+       MESA_BBOX("Aborting MESA_BBOX:%d:Vertex buffer not resident %#x \n", count,<br>
+                     vertexattrib_buffer->StorageFlags);<br>
+       MESA_BBOX("Aborting MESA_BBOX:VAO AttributeMode is %d\n", vao->_AttributeMapMode);<br>
+#if !defined(__ANDROID__) || !defined(ANDROID)<br>
+       return false;<br>
+#endif<br>
+   }<br>
+<br>
+    /* Examine vertex position attribute configuration */<br>
+   if (vao->VertexAttrib[posAttribMapMode].Enabled) {<br>
+      if (vao->VertexAttrib[posAttribMapMode].Size != 3)<br>
+      {<br>
+          MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib size :%d, only 3 supported\n",<br>
+                         count, vao->VertexAttrib[VERT_ATTRIB_POS].Size);<br>
+#if !defined(__ANDROID__) || !defined(ANDROID)<br>
+          return false;<br>
+#endif<br>
+      }<br>
+      if (vao->VertexAttrib[posAttribMapMode].Type != GL_FLOAT)<br>
+      {<br>
+         MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib type is %d, only GL_FLOAT \<br>
+                 supported\n", count, vao->VertexAttrib[VERT_ATTRIB_POS].Type);<br>
+         return false;<br>
+      }<br>
+   }<br>
+<br>
+   if (type != GL_UNSIGNED_SHORT) {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: type is %d, only GL_UNSIGNED_SHORT \<br>
+                 supported\n", count, type);<br>
+      return false;<br>
+   }<br>
+   if (basevertex != 0) {<br>
+      MESA_BBOX("Aborting MESA_BBOX:%d: basevertex is 0 \n", count);<br>
+      return false;<br>
+   }<br>
+<br>
+   /* If size ==3 and type == GL_FLOAT, then element stride must be 12. */<br>
+   assert(vao->VertexAttrib[VERT_ATTRIB_POS].StrideB == 12);<br>
+<br>
+   /* When transform feedback is capturing we cannot do early clipping since<br>
+   * xfb must write unclipped vertices<br>
+   * Note - we could check for IsCapturing() but that would require<br>
+   * more elaborate checking for VBO modifications.<br>
+   */<br>
+   if (gc->TransformFeedback.CurrentObject->Active) {<br>
+      MESA_BBOX("MESA_BBOX:%d: Transform feedback is active, \<br>
+                      cannot clip\n", count);<br>
+      return false;<br>
+   }<br>
+   return true;<br>
+}<br>
+<br>
+<br>
+/**<br>
+ *  Check condition to enter bounding box optimization and if draw call<br>
+ *  is suitable prepare key describing given geometry.<br>
+ */<br>
+static inline<br>
+void vbo_bbox_prepare_key(struct gl_context *const gc, GLenum mode,<br>
+                          GLsizei count, GLenum type, GLuint type_size,<br>
+                          const GLvoid *indices, GLint basevertex,<br>
+                          vbo_bbox_cache_key *key)<br>
+{<br>
+   assert(gc);<br>
+<br>
+   /* Examine element buffer */<br>
+   struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);<br>
+   struct gl_buffer_object* vertexattrib_buffer  =<br>
+                                           vbo_get_current_vertex_buffer(gc);<br>
+   struct gl_vertex_buffer_binding* vbinding =<br>
+                       vbo_get_current_vertex_buffer_binding_of_position(gc);<br>
+<br>
+   memset(key,0,sizeof(vbo_bbox_cache_key));<br>
+<br>
+   key->mode = mode;<br>
+   key->count = count;<br>
+   key->indices_type = type;<br>
+   key->type_size = type_size;<br>
+   key->indices = (GLint) (uintptr_t) indices;<br>
+   key->basevertex = basevertex;<br>
+<br>
+   key->element_buf_name = element_buffer->Name;<br>
+   key->vertex_buf_name = vertexattrib_buffer->Name;<br>
+<br>
+   key->offset = (GLint)vbinding->Offset;<br>
+   key->stride = vbinding->Stride;<br>
+}<br>
+<br>
+<br>
+/**<br>
+ *  Create a bounding box descriptor in a form of 8 correctly<br>
+ *  ordered vertex coordinates. The order of coordinates is significant.<br>
+ */<br>
+static<br>
+void vbo_bbox_create_bounding_box(float* const minVec3f, float* const maxVec3F,<br>
+                                  vbo_vec4f* vertices4)<br>
+{<br>
+   assert(minVec3f);<br>
+   assert(maxVec3F);<br>
+   assert(vertices4);<br>
+<br>
+   float Xmin = minVec3f[0];<br>
+   float Ymin = minVec3f[1];<br>
+   float Zmin = minVec3f[2];<br>
+   float Wmin = 1.0f;<br>
+<br>
+   float Xmax = maxVec3F[0];<br>
+   float Ymax = maxVec3F[1];<br>
+   float Zmax = maxVec3F[2];<br>
+   float Wmax = 1.0f;<br>
+<br>
+   float* v = (float*)vertices4;<br>
+   int i = 0;<br>
+   v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;<br>
+   v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;<br>
+   v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;<br>
+   v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;<br>
+<br>
+   v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;<br>
+   v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;<br>
+   v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;<br>
+   v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;<br>
+}<br>
+<br>
+#ifdef __AVX__<br>
+/* Calculate bbox Subbox Coordinates */<br>
+static void<br>
+vbo_bbox_calc_subbox_coordinates(unsigned int vertSubBox,unsigned int vertCount,<br>
+                                 unsigned int first_idx,unsigned int second_idx,<br>
+                                 unsigned short* indices,float* vertices,<br>
+                                 unsigned int stride,<br>
+                                 struct vbo_bbox_cache_data *data)<br>
+{<br>
+<br>
+   /* Retrieving the starting offset of the first and second subbox */<br>
+   unsigned int first = first_idx * vertSubBox;<br>
+   unsigned int second = second_idx * vertSubBox;<br>
+<br>
+   float tmpVertexBuf[8] = {0.0};<br>
+   float tmp_buf_min[8] = {0.0};<br>
+   float tmp_buf_max[8] = {0.0};<br>
+<br>
+   __m256 subMin = _mm256_set1_ps(FLT_MAX);<br>
+   __m256 subMax = _mm256_set1_ps(-FLT_MAX);<br>
+<br>
+   /* Run both the subboxes for vertex count */<br>
+   for(unsigned int iter = 0; iter < vertCount; iter++){<br>
+      /* Calculate the vertex offset of first subbox */<br>
+      unsigned short index1 = indices[first+iter];<br>
+      /* Fetching the vertices for the first subbox */<br>
+      float* vertex1 = (float *)((char *)(vertices) + stride*index1);<br>
+      memcpy(tmpVertexBuf,vertex1, 3 * sizeof(float));<br>
+<br>
+      /* Calculate the vertex offset of second subbox */<br>
+      unsigned short index2 = indices[second+iter];<br>
+<br>
+      /* Fetching the vertices for the second subbox */<br>
+      float* vertex2 = (float *)((char *)(vertices) + stride*index2);<br>
+      memcpy(tmpVertexBuf+4,vertex2, 3 * sizeof(float));<br>
+<br>
+      __m256 tmp = _mm256_loadu_ps(tmpVertexBuf);<br>
+      subMin = _mm256_min_ps(subMin, tmp);<br>
+      subMax = _mm256_max_ps(subMax, tmp);<br>
+   }<br>
+<br>
+   /* compare full box values */<br>
+   fullMin = _mm256_min_ps(fullMin, subMin);<br>
+   fullMax = _mm256_max_ps(fullMax, subMax);<br>
+<br>
+   /* store results */<br>
+   _mm256_storeu_ps(tmp_buf_min, subMin);<br>
+   _mm256_storeu_ps(tmp_buf_max, subMax);<br>
+<br>
+<br>
+   /* Update the min and max values in sub box coordinates for both<br>
+    * the subboxes<br>
+    */<br>
+   vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,<br>
+              &(data->sub_bbox_array[first_idx].bounding_volume.vert_vec4[0]));<br>
+   vbo_bbox_create_bounding_box(tmp_buf_min+4, tmp_buf_max+4,<br>
+             &(data->sub_bbox_array[second_idx].bounding_volume.vert_vec4[0]));<br>
+}<br>
+#else<br>
+static void<br>
+vbo_bbox_calc_subbox_coordinates(<br>
+            unsigned int vertSubBox,<br>
+            unsigned int vertCount,<br>
+            unsigned int idx,<br>
+            unsigned short *indices,<br>
+            float *vertices,<br>
+            unsigned int stride,<br>
+            struct vbo_bbox_cache_data *data)<br>
+{<br>
+   /* Retrieving the starting offset of the first and second subbox */<br>
+   unsigned int first = idx * vertSubBox;<br>
+<br>
+   float tmpVertexBuf[4] = {0.0};<br>
+   float tmp_buf_min[4] = {0.0};<br>
+   float tmp_buf_max[4] = {0.0};<br>
+<br>
+   __m128 subMin = _mm_set1_ps(FLT_MAX);<br>
+   __m128 subMax = _mm_set1_ps(-FLT_MAX);<br>
+<br>
+   /* Run both the subboxes for vertex count */<br>
+   for(unsigned int iter = 0; iter < vertCount; iter++){<br>
+<br>
+      /* Calculate the vertex offset of first subbox */<br>
+      unsigned short index = indices[first+iter];<br>
+      /* Fetching the vertices for the first subbox */<br>
+      float* vertex = (float *)((char *)(vertices) + stride*index);<br>
+      memcpy(tmpVertexBuf,vertex, 3 * sizeof(float));<br>
+<br>
+      __m128 tmp = _mm_loadu_ps(tmpVertexBuf);<br>
+      subMin = _mm_min_ps(subMin, tmp);<br>
+      subMax = _mm_max_ps(subMax, tmp);<br>
+   }<br>
+<br>
+   /* compare full box values */<br>
+   fullMin = _mm_min_ps(fullMin, subMin);<br>
+   fullMax = _mm_max_ps(fullMax, subMax);<br>
+<br>
+   /* store results */<br>
+   _mm_storeu_ps(tmp_buf_min, subMin);<br>
+   _mm_storeu_ps(tmp_buf_max, subMax);<br>
+<br>
+   /* Update the min and max values in sub box coordinates for both<br>
+    * the subboxes<br>
+    */<br>
+   vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,<br>
+                    &(data->sub_bbox_array[idx].bounding_volume.vert_vec4[0]));<br>
+}<br>
+#endif<br>
+<br>
+/**<br>
+ *  Get pointer to VBO data.<br>
+ *  Pointer should be suitable for fast data reading, not data change.<br>
+ */<br>
+static<br>
+bool vbo_bbox_get_vbo_ptr(struct gl_context* gc, struct gl_buffer_object* vbo,<br>
+                          int offset, void** data, int* dataSize)<br>
+{<br>
+   assert(gc);<br>
+   assert(vbo);<br>
+   GLubyte* vboDataPtr = NULL;<br>
+<br>
+   if (offset >= vbo->Size) {<br>
+      return false;<br>
+   }<br>
+   vboDataPtr = _mesa_MapNamedBuffer(vbo->Name,GL_WRITE_ONLY_ARB);<br>
+   if (vboDataPtr == NULL) {<br>
+      return false;<br>
+   }<br>
+   *data = vboDataPtr + offset;<br>
+   *dataSize = vbo->Size - offset;<br>
+<br>
+   return true;<br>
+}<br>
+<br>
+/**<br>
+ * Unlock VBO<br>
+ */<br>
+static inline<br>
+void vbo_bbox_release_vbo_ptr(struct gl_context* gc,<br>
+                              struct gl_buffer_object* vbo)<br>
+{<br>
+    assert(gc);<br>
+    assert(vbo);<br>
+    _mesa_UnmapNamedBuffer_no_error(vbo->Name);<br>
+}<br>
+<br>
+/**<br>
+  * Check if given range of indices contains only degenerate triangles.<br>
+  */<br>
+static<br>
+bool vbo_bbox_is_degenerate(GLvoid *indices, GLuint count_in)<br>
+{<br>
+   assert(indices);<br>
+   assert(count_in % 3 == 0);<br>
+<br>
+   GLuint triangle_count = count_in / 3;<br>
+   GLuint input_idx = 0;<br>
+<br>
+   GLushort* ptr = (GLushort*)indices;<br>
+   for (GLuint i = 0; i < triangle_count; i++) {<br>
+      GLushort a = ptr[input_idx++];<br>
+      GLushort b = ptr[input_idx++];<br>
+      GLushort c = ptr[input_idx++];<br>
+      if (!(a == b || a == c || b == c)) {<br>
+          return false;<br>
+      }<br>
+   }<br>
+   return true;<br>
+}<br>
+<br>
+<br>
+/**<br>
+ * Calculate bounding boxes for given geometry.<br>
+ */<br>
+static<br>
+bool vbo_bbox_calculate_bounding_boxes_with_indices(struct gl_context *const gc,<br>
+                                             const vbo_bbox_cache_key *key,<br>
+                                             struct vbo_bbox_cache_data *data,<br>
+                                             void* indexData,<br>
+                                             int   indexDataSize)<br>
+{<br>
+   assert(gc);<br>
+<br>
+   void* vertex_data = NULL;<br>
+   int vertex_datasize = 0;<br>
+   int vert_per_subBbox = mesa_bbox_env_variables.bbox_split_size;<br>
+   int sub_bbox_cnt = (key->count + vert_per_subBbox -1)/vert_per_subBbox;<br>
+   int *subbox_array;<br>
+   int idx =0;<br>
+   int non_degen_count = 0;<br>
+<br>
+   assert(sub_bbox_cnt);<br>
+<br>
+   struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,<br>
+                                                key->element_buf_name);<br>
+<br>
+   struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,<br>
+                                                       key->vertex_buf_name);<br>
+<br>
+   if (element_buffer == NULL || vertexattrib_buffer == NULL) {<br>
+   return false;<br>
+   }<br>
+<br>
+   if (!vbo_bbox_get_vbo_ptr(gc, vertexattrib_buffer,(int) key->offset,<br>
+                           &vertex_data, &vertex_datasize)) {<br>
+   return false;<br>
+   }<br>
+<br>
+   assert(vertex_data);<br>
+   assert(vertex_datasize > 0);<br>
+   assert(indexData);<br>
+   assert(indexDataSize > 0);<br>
+   assert(key->indices_type == GL_UNSIGNED_SHORT);<br>
+   assert(indexDataSize > key->count * (int)key->type_size);<br>
+<br>
+   /* Allocate memory for bounding boxes */<br>
+   if (!vbo_init_sub_bbox_array(sub_bbox_cnt,data)) {<br>
+   vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);<br>
+   return false;<br>
+   }<br>
+   /* Initialize size of bounding boxes */<br>
+   for (int i = 0; i < sub_bbox_cnt; i++) {<br>
+   data->sub_bbox_array[i].vert_count = (i==sub_bbox_cnt-1)?<br>
+                            (key->count - i*vert_per_subBbox):vert_per_subBbox;<br>
+   data->sub_bbox_array[i].start_offset = i*vert_per_subBbox * key->type_size;<br>
+   }<br>
+<br>
+   subbox_array = malloc(sub_bbox_cnt * sizeof(int));<br>
+<br>
+   /* Check if all triangles withing bbox are degenerate (i.e triangles with<br>
+      zero area) */<br>
+   for (int i = 0; i < sub_bbox_cnt; i++) {<br>
+      GLubyte* ptr = (GLubyte *)indexData;<br>
+      data->sub_bbox_array[i].is_degenerate =<br>
+          vbo_bbox_is_degenerate((ptr + data->sub_bbox_array[i].start_offset),<br>
+                  data->sub_bbox_array[i].vert_count);<br>
+<br>
+       if(!data->sub_bbox_array[i].is_degenerate)<br>
+       {<br>
+          subbox_array[idx++] = i;<br>
+          non_degen_count++;<br>
+       }<br>
+   }<br>
+<br>
+   float tmp_buf_min[8] = {0.0};<br>
+   float tmp_buf_max[8] = {0.0};<br>
+<br>
+#ifdef __AVX__<br>
+   int odd = non_degen_count % 2;<br>
+   int num_iter = non_degen_count/2;<br>
+   int iter;<br>
+<br>
+   fullMin = _mm256_set1_ps(FLT_MAX);<br>
+   fullMax = _mm256_set1_ps(-FLT_MAX);<br>
+   idx = 0;<br>
+   for(iter = 0; iter < num_iter;iter++){<br>
+      idx = 2*iter;<br>
+      if(data->sub_bbox_array[subbox_array[idx]].vert_count ==<br>
+         data->sub_bbox_array[subbox_array[idx+1]].vert_count)<br>
+      {<br>
+         /* call the algorithm with the count */<br>
+         vbo_bbox_calc_subbox_coordinates(<br>
+                       vert_per_subBbox,<br>
+                       data->sub_bbox_array[subbox_array[idx]].vert_count,<br>
+                       subbox_array[idx],<br>
+                       subbox_array[idx+1],<br>
+                       (GLushort*)indexData,<br>
+                       (GLfloat*)vertex_data,<br>
+                       key->stride,<br>
+                       data<br>
+                     );<br>
+      }<br>
+      else<br>
+      {<br>
+         /* call the first one separately */<br>
+         vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,<br>
+                            data->sub_bbox_array[subbox_array[idx]].vert_count,<br>
+                            subbox_array[idx],<br>
+                            subbox_array[idx],<br>
+                            (GLushort*)indexData,<br>
+                            (GLfloat*)vertex_data,<br>
+                            key->stride,<br>
+                            data);<br>
+<br>
+         /* call the second one separately */<br>
+         vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,<br>
+                          data->sub_bbox_array[subbox_array[idx+1]].vert_count,<br>
+                          subbox_array[idx+1],<br>
+                          subbox_array[idx+1],<br>
+                          (GLushort*)indexData,<br>
+                          (GLfloat*)vertex_data,<br>
+                          key->stride,<br>
+                          data);<br>
+<br>
+      }<br>
+<br>
+   }<br>
+<br>
+   if(odd)<br>
+   {<br>
+      idx = 2*iter;<br>
+      /* call the last one separately */<br>
+      vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,<br>
+                            data->sub_bbox_array[subbox_array[idx]].vert_count,<br>
+                            subbox_array[idx],<br>
+                            subbox_array[idx],<br>
+                            (GLushort*)indexData,<br>
+                            (GLfloat*)vertex_data,<br>
+                            key->stride,<br>
+                            data);<br>
+   }<br>
+<br>
+   /* Finding the minimum from the full box 256 */<br>
+   __m128 firstlane   = _mm256_extractf128_ps(fullMin,0);<br>
+   __m128 secondlane = _mm256_extractf128_ps(fullMin,1);<br>
+   firstlane = _mm_min_ps(firstlane,secondlane);<br>
+   _mm_storeu_ps(tmp_buf_min,firstlane);<br>
+<br>
+   /* Finding the maximum from the full box 256 */<br>
+    firstlane  = _mm256_extractf128_ps(fullMax,0);<br>
+    secondlane = _mm256_extractf128_ps(fullMax,1);<br>
+    firstlane = _mm_max_ps(firstlane,secondlane);<br>
+    _mm_storeu_ps(tmp_buf_max,firstlane);<br>
+<br>
+#else<br>
+    fullMin = _mm_set1_ps(FLT_MAX);<br>
+    fullMax = _mm_set1_ps(-FLT_MAX);<br>
+<br>
+    for(unsigned int i=0; i< non_degen_count; i++){<br>
+       //call the algorithm with the count<br>
+       vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,<br>
+                              data->sub_bbox_array[subbox_array[i]].vert_count,<br>
+                              subbox_array[i],<br>
+                              (GLushort*)indexData,<br>
+                              (GLfloat*)vertex_data,<br>
+                              key->stride,<br>
+                              data);<br>
+       }<br>
+       _mm_storeu_ps(tmp_buf_min, fullMin);<br>
+       _mm_storeu_ps(tmp_buf_max, fullMax);<br>
+#endif<br>
+<br>
+    /* Set up bounding box as 8 vertices and store in bbox data */<br>
+    vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,<br>
+                          &(data->full_box.bounding_volume.vert_vec4[0]));<br>
+<br>
+    if(subbox_array)<br>
+      free(subbox_array);<br>
+    vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);<br>
+    data->valid = true;<br>
+<br>
+    return true;<br>
+}<br>
+<br>
+<br>
+/**<br>
+  * Calculate bounding boxes for given geometry.<br>
+  */<br>
+static inline<br>
+bool vbo_bbox_calculate_bounding_boxes(struct gl_context *const gc,<br>
+                                       const vbo_bbox_cache_key *key,<br>
+                                       struct vbo_bbox_cache_data* data)<br>
+{<br>
+    assert(gc);<br>
+    assert(key->indices_type == GL_UNSIGNED_SHORT);<br>
+<br>
+    void* pIndexData     = NULL;<br>
+    int   indexDataSize  = 0;<br>
+<br>
+    struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,<br>
+                                                        key->element_buf_name);<br>
+    if (element_buffer == NULL) {<br>
+      return false;<br>
+    }<br>
+    if (!vbo_bbox_get_vbo_ptr(gc, element_buffer, (int) key->indices,<br>
+                              &pIndexData, &indexDataSize)) {<br>
+      return false;<br>
+    }<br>
+<br>
+    bool ret = vbo_bbox_calculate_bounding_boxes_with_indices(gc, key, data,<br>
+                                                    pIndexData, indexDataSize);<br>
+<br>
+    vbo_bbox_release_vbo_ptr(gc, element_buffer);<br>
+<br>
+    return ret;<br>
+}<br>
+<br>
+<br>
+/**<br>
+  *  Create new bounding box cache entry<br>
+  */<br>
+static<br>
+struct vbo_bbox_cache_data* vbo_bbox_create_data(struct gl_context *const gc,<br>
+                                                 const vbo_bbox_cache_key *key)<br>
+{<br>
+   assert(gc);<br>
+<br>
+   struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc);<br>
+   struct gl_buffer_object* vertexattrib_buffer =<br>
+                                             vbo_get_current_vertex_buffer(gc);<br>
+<br>
+   if ((vertexattrib_buffer == NULL) ||<br>
+     (element_buffer == NULL)){<br>
+      return NULL;<br>
+   }<br>
+<br>
+   mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;<br>
+   assert(BboxOpt);<br>
+<br>
+   struct vbo_bbox_cache_data* data = (struct vbo_bbox_cache_data *) malloc(<br>
+                                      sizeof (struct vbo_bbox_cache_data));<br>
+   data->full_box.is_degenerate = false;<br>
+<br>
+   if (data == NULL){<br>
+      return NULL;<br>
+   }<br>
+   /* Initialize the cache data and variables in cache data */<br>
+   data->valid = false;<br>
+   data->need_new_calculation = true;<br>
+   data->init_delaycnt = 0;<br>
+   data->init_delaylimit = 0;<br>
+   data->vertpos_vbo_changecnt = 0;<br>
+   data->indices_vbo_changecnt = 0;<br>
+   data->sub_bbox_cnt = 0;<br>
+   data->sub_bbox_array = NULL;<br>
+   data->drawcnt_bbox_helped = 0;<br>
+   data->last_use_frame = 0;<br>
+   data->vertpos_vbo_changecnt   = vertexattrib_buffer->data_change_counter;<br>
+   data->indices_vbo_changecnt   = element_buffer->data_change_counter;<br>
+<br>
+   /* This defines for how many cache hits we wait before actually creating the<br>
+    * data */<br>
+   data->init_delaylimit = vbo_bbox_get_delay(BboxOpt);<br>
+<br>
+   /* At this point data is not valid yet */<br>
+   _mesa_bbox_cache_insert(gc,gc->Pipeline.BboxOpt->cache,<br>
+                           key,sizeof(vbo_bbox_cache_key),data);<br>
+<br>
+   return data;<br>
+}<br>
+<br>
+<br>
+/**<br>
+ * Check if contents of the VBO buffers have changed since data entry<br>
+ * was created.<br>
+ */<br>
+static inline<br>
+bool vbo_bbox_validate_data(<br>
+    struct gl_context *const gc,<br>
+    const vbo_bbox_cache_key *key,<br>
+    struct vbo_bbox_cache_data* data)<br>
+{<br>
+   assert(gc);<br>
+<br>
+   struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,<br>
+                                               key->element_buf_name);<br>
+<br>
+   struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,<br>
+                                                      key->vertex_buf_name);<br>
+<br>
+   if (element_buffer == NULL || vertexattrib_buffer == NULL) {<br>
+      return false;<br>
+   }<br>
+<br>
+   if ((element_buffer->data_change_counter != data->indices_vbo_changecnt) ||<br>
+     (vertexattrib_buffer->data_change_counter != data->vertpos_vbo_changecnt)<br>
+     ) {<br>
+      return false;<br>
+   }<br>
+   return true;<br>
+}<br>
+<br>
+/**<br>
+ *  Retrieve bounding box data from cache.<br>
+ */<br>
+static inline<br>
+struct vbo_bbox_cache_data* vbo_bbox_get_bounding_boxes(<br>
+                                                 struct gl_context *const gc,<br>
+                                                 const vbo_bbox_cache_key *key)<br>
+ {<br>
+<br>
+   assert(gc);<br>
+   mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;<br>
+   assert(BboxOpt);<br>
+   struct vbo_bbox_cache_data* data = _mesa_search_bbox_cache(BboxOpt->cache,<br>
+                                              key, sizeof(vbo_bbox_cache_key));<br>
+   if (data) {<br>
+      if (data->need_new_calculation == false)<br>
+      {<br>
+         /* Data is initialized and valid */<br>
+         if (data->valid) {<br>
+             if (vbo_bbox_validate_data(gc, key, data)) {<br>
+                data->mvp_valid = true;<br>
+                return data;<br>
+             }<br>
+         }<br>
+         else {<br>
+             data->valid = false;<br>
+             return NULL;<br>
+         }<br>
+      }<br>
+   }<br>
+   else {<br>
+      /* Data does not exist, create it */<br>
+      data = vbo_bbox_create_data(gc, key);<br>
+      if (data == NULL)<br>
+      {<br>
+         return NULL;<br>
+      }<br>
+   }<br>
+   if ((data->need_new_calculation) &&<br>
+     (data->init_delaycnt++ >= data->init_delaylimit)) {<br>
+     data->valid = false;<br>
+     data->need_new_calculation = false;<br>
+     data->mvp_valid = false;<br>
+<br>
+     if (!vbo_bbox_validate_data(gc, key, data)) {<br>
+         struct gl_buffer_object * element_buffer =<br>
+                 _mesa_lookup_bufferobj(gc,key->element_buf_name);<br>
+<br>
+         struct gl_buffer_object * vertexattrib_buffer =<br>
+                 _mesa_lookup_bufferobj(gc,key->vertex_buf_name);<br>
+<br>
+         if ((vertexattrib_buffer == NULL) ||<br>
+             (element_buffer == NULL)) {<br>
+             return NULL;<br>
+         }<br>
+         data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter;<br>
+         data->indices_vbo_changecnt = element_buffer->data_change_counter;<br>
+      }<br>
+      if (gc->volume_type == BOUNDING_VOLUME_AABB) {<br>
+         /* Calculate bounding boxes */<br>
+         if (vbo_bbox_calculate_bounding_boxes(gc, key, data)) {<br>
+             return data;<br>
+         }<br>
+      }<br>
+   }<br>
+   return NULL;<br>
+}<br>
+<br>
+/**<br>
+ * This function is called when we updating the element buffer. because the<br>
+ * element-buffer has changed we have to update the relevant bbox data:<br>
+ */<br>
+void vbo_bbox_element_buffer_update(struct gl_context *const gc,<br>
+                                    struct gl_buffer_object *buffer,<br>
+                                    const void* data,<br>
+                                    int offset,<br>
+                                    int size)<br>
+{<br>
+   mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;<br>
+<br>
+   if (BboxOpt) {<br>
+      struct gl_segment updateSegment;<br>
+      updateSegment.Left = offset;<br>
+      updateSegment.Right = offset+size;<br>
+<br>
+      mesa_bbox_cache </blockquote></div></div>