[Mesa-dev] [PATCH 10/11] glsl: Improve precision of mod(x,y)
Eduardo Lima Mitev
elima at igalia.com
Mon Jan 19 03:32:16 PST 2015
From: Iago Toral Quiroga <itoral at igalia.com>
Currently, Mesa uses the lowering pass MOD_TO_FRACT to implement
mod(x,y) as y * fract(x/y). This implementation has a down side though:
it introduces precision errors due to the fract() operation. Even worse,
since the result of fract() is multiplied by y, the larger y gets the
larger the precision error we produce, so for large enough numbers the
precision loss is significant. Some examples on i965:
Operation Precision error
-----------------------------------------------------
mod(-1.951171875, 1.9980468750) 0.0000000447
mod(121.57, 13.29) 0.0000023842
mod(3769.12, 321.99) 0.0000762939
mod(3769.12, 1321.99) 0.0001220703
mod(-987654.125, 123456.984375) 0.0160663128
mod( 987654.125, 123456.984375) 0.0312500000
This patch replaces the current lowering pass with a different one
(MOD_TO_FLOOR) that follows the recommended implementation in the GLSL
man pages:
mod(x,y) = x - y * floor(x/y)
This implementation eliminates the precision errors at the expense of
an additional add instruction on some systems. On systems that can do
negate with multiply-add in a single operation this new implementation
would come at no additional cost.
Fixes the following 16 dEQP tests:
dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.mediump_*
dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.highp_*
---
src/glsl/README | 2 +-
src/glsl/ir_optimization.h | 2 +-
src/glsl/lower_instructions.cpp | 49 ++++++++++++--------------
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
src/mesa/program/ir_to_mesa.cpp | 4 +--
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
8 files changed, 31 insertions(+), 34 deletions(-)
diff --git a/src/glsl/README b/src/glsl/README
index 2f93f12..bfcf69f 100644
--- a/src/glsl/README
+++ b/src/glsl/README
@@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type:
You can then use the new expression from builtins (if all backends
would rather see it), or scan the IR and convert to use your new
-expression type (see ir_mod_to_fract, for example).
+expression type (see ir_mod_to_floor, for example).
Q: How is memory management handled in the compiler?
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 34e0b4b..912d910 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -34,7 +34,7 @@
#define EXP_TO_EXP2 0x04
#define POW_TO_EXP2 0x08
#define LOG_TO_LOG2 0x10
-#define MOD_TO_FRACT 0x20
+#define MOD_TO_FLOOR 0x20
#define INT_DIV_TO_MUL_RCP 0x40
#define BITFIELD_INSERT_TO_BFM_BFI 0x80
#define LDEXP_TO_ARITH 0x100
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 6842853..b23c24d 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -36,7 +36,7 @@
* - EXP_TO_EXP2
* - POW_TO_EXP2
* - LOG_TO_LOG2
- * - MOD_TO_FRACT
+ * - MOD_TO_FLOOR
* - LDEXP_TO_ARITH
* - BITFIELD_INSERT_TO_BFM_BFI
* - CARRY_TO_ARITH
@@ -77,14 +77,17 @@
* Many older GPUs don't have an x**y instruction. For these GPUs, convert
* x**y to 2**(y * log2(x)).
*
- * MOD_TO_FRACT:
+ * MOD_TO_FLOOR:
* -------------
- * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
+ * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
*
* Many GPUs don't have a MOD instruction (945 and 965 included), and
* if we have to break it down like this anyway, it gives an
* opportunity to do things like constant fold the (1.0 / op1) easily.
*
+ * Note: before we used to implement this as op1 * fract(op / op1) but this
+ * implementation had significant precission errors.
+ *
* LDEXP_TO_ARITH:
* -------------
* Converts ir_binop_ldexp to arithmetic and bit operations.
@@ -136,7 +139,7 @@ private:
void sub_to_add_neg(ir_expression *);
void div_to_mul_rcp(ir_expression *);
void int_div_to_mul_rcp(ir_expression *);
- void mod_to_fract(ir_expression *);
+ void mod_to_floor(ir_expression *);
void exp_to_exp2(ir_expression *);
void pow_to_exp2(ir_expression *);
void log_to_log2(ir_expression *);
@@ -276,22 +279,15 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
}
void
-lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+lower_instructions_visitor::mod_to_floor(ir_expression *ir)
{
- ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
- ir_var_temporary);
- this->base_ir->insert_before(temp);
-
- ir_assignment *const assign =
- new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
- ir->operands[1], NULL);
-
- this->base_ir->insert_before(assign);
+ ir_rvalue *x = ir->operands[0];
+ ir_rvalue *y = ir->operands[1];
+ ir_rvalue *x_clone = x->clone(ir, NULL);
+ ir_rvalue *y_clone = y->clone(ir, NULL);
ir_expression *const div_expr =
- new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
- ir->operands[0],
- new(ir) ir_dereference_variable(temp));
+ new(ir) ir_expression(ir_binop_div, x->type, x, y);
/* Don't generate new IR that would need to be lowered in an additional
* pass.
@@ -299,14 +295,15 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
if (lowering(DIV_TO_MUL_RCP))
div_to_mul_rcp(div_expr);
- ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
- ir->operands[0]->type,
- div_expr,
- NULL);
+ ir_expression *const floor_expr =
+ new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
- ir->operation = ir_binop_mul;
- ir->operands[0] = new(ir) ir_dereference_variable(temp);
- ir->operands[1] = expr;
+ ir_expression *const mul_expr =
+ new(ir) ir_expression(ir_binop_mul, y_clone, floor_expr);
+
+ ir->operation = ir_binop_sub;
+ ir->operands[0] = x_clone;
+ ir->operands[1] = mul_expr;
this->progress = true;
}
@@ -535,8 +532,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
break;
case ir_binop_mod:
- if (lowering(MOD_TO_FRACT) && ir->type->is_float())
- mod_to_fract(ir);
+ if (lowering(MOD_TO_FLOOR) && ir->type->is_float())
+ mod_to_floor(ir);
break;
case ir_binop_pow:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 38cf29f..4a7e045 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -877,7 +877,7 @@ fs_visitor::visit(ir_expression *ir)
break;
}
case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+ /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
assert(ir->type->is_integer());
emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d76134b..eac9982 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -157,7 +157,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
? BITFIELD_INSERT_TO_BFM_BFI
: 0;
lower_instructions(shader->base.ir,
- MOD_TO_FRACT |
+ MOD_TO_FLOOR |
DIV_TO_MUL_RCP |
SUB_TO_ADD_NEG |
EXP_TO_EXP2 |
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8b8b27f..8129118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1522,7 +1522,7 @@ vec4_visitor::visit(ir_expression *ir)
break;
}
case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+ /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
assert(ir->type->is_integer());
emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
break;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ce3af31..fc3dad7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1152,7 +1152,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
break;
case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+ /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
assert(ir->type->is_integer());
emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
break;
@@ -2942,7 +2942,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
/* Lowering */
do_mat_op_to_vec(ir);
- lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+ lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c3d7793..c9903cd 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5418,7 +5418,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
lower_offset_arrays(ir);
do_mat_op_to_vec(ir);
lower_instructions(ir,
- MOD_TO_FRACT |
+ MOD_TO_FLOOR |
DIV_TO_MUL_RCP |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
--
2.1.3
More information about the mesa-dev
mailing list