[PATCH 02/28] drm/amd/display: Rework dsc to isolate FPU operations
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Jun 8 09:37:01 UTC 2020
Am 08.06.20 um 06:59 schrieb Qingqing Zhuo:
> From: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
>
> When we want to use float point operation on Linux
> we need to use within special kernel protection
> (`kernel_fpu_{begin,end}()`.), otherwise the kernel
> can clobber userspace FPU register state. For detecting
> these issues we use a tool named objtool (with -Ffa
> flags) to highlight the FPU problems, all warnings can
> be summed up as follows:
>
> ./tools/objtool/objtool check -Ffa
> drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.o
>
> [..] dc/dsc/rc_calc.o: warning: objtool: get_qp_set()+0x2f8:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/rc_calc.o: warning: objtool: dsc_roundf()+0x5:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/rc_calc.o: warning: objtool: dsc_ceil()+0x5:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/rc_calc.o: warning: objtool: get_ofs_set()+0x3eb:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/rc_calc.o: warning: objtool: calc_rc_params()+0x3c:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/dc_dsc.o: warning: objtool:
> get_dsc_bandwidth_range.isra.0()+0x8d:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/dc_dsc.o: warning: objtool: setup_dsc_config()+0x2ef:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/rc_calc_dpi.o: warning: objtool:copy_pps_fields()+0xbb:
> FPU instruction outside of kernel_fpu_{begin,end}()
> [..] dc/dsc/rc_calc_dpi.o: warning: objtool:
> dscc_compute_dsc_parameters()+0x7b:
> FPU instruction outside of kernel_fpu_{begin,end}()
>
> This commit fixes the above issues by rework DSC as described:
>
> 1. Isolate all FPU operations in a single file;
> 2. Use FPU flags only in the file that handles FPU operations;
> 3. Isolate all functions that require float point operation in static
> functions;
> 4. Add a mid-layer function that does not use any float point operation,
> and that could be safely invoked in other parts of the code.
> 5. Keep float point operation under DC_FP_{START/END} macro.
>
> CC: Christian König <christian.koenig at amd.com>
> CC: Alexander Deucher <Alexander.Deucher at amd.com>
> CC: Peter Zijlstra <peterz at infradead.org>
> CC: Tony Cheng <tony.cheng at amd.com>
> CC: Harry Wentland <hwentlan at amd.com>
> Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
> Reviewed-by: Mikita Lipski <Mikita.Lipski at amd.com>
> Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
> ---
> drivers/gpu/drm/amd/display/dc/dsc/Makefile | 2 -
> drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 18 +--
> drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c | 151 +++++++++++++++++-
> drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h | 5 +-
> .../gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c | 27 +---
> 5 files changed, 153 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
> index 3f66868df171..ea29cf95d470 100644
> --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
> @@ -28,8 +28,6 @@ endif
> endif
>
> CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
> -CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags)
> -CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags)
>
> DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
> index 0ea6662a1563..0c7f247bb7de 100644
> --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
> +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
> @@ -22,10 +22,12 @@
> * Author: AMD
> */
>
> +#include <drm/drm_dsc.h>
> #include "dc_hw_types.h"
> #include "dsc.h"
> #include <drm/drm_dp_helper.h>
> #include "dc.h"
> +#include "rc_calc.h"
>
> /* This module's internal functions */
>
> @@ -304,22 +306,6 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
> return (value + 9) / 10;
> }
>
> -static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div)
> -{
> - uint32_t dsc_target_bpp_x16;
> - float f_dsc_target_bpp;
> - float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
> - uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision
> -
> - f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
> -
> - // Round down to the nearest precision stop to bring it into DSC spec range
> - dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision);
> - dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
> -
> - return dsc_target_bpp_x16;
> -}
> -
> /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock
> * and uncompressed bandwidth.
> */
> diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
> index 03ae15946c6d..667afbc260f9 100644
> --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
> +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
> @@ -23,6 +23,7 @@
> * Authors: AMD
> *
> */
> +#include <drm/drm_dsc.h>
>
> #include "os_types.h"
> #include "rc_calc.h"
> @@ -40,7 +41,8 @@
> break
>
>
> -void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp)
> +static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
> + enum max_min max_min, float bpp)
> {
> int mode = MODE_SELECT(444, 422, 420);
> int sel = table_hash(mode, bpc, max_min);
> @@ -85,7 +87,7 @@ void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum ma
> memcpy(qps, table[index].qps, sizeof(qp_set));
> }
>
> -double dsc_roundf(double num)
> +static double dsc_roundf(double num)
> {
> if (num < 0.0)
> num = num - 0.5;
> @@ -95,7 +97,7 @@ double dsc_roundf(double num)
> return (int)(num);
> }
>
> -double dsc_ceil(double num)
> +static double dsc_ceil(double num)
> {
> double retval = (int)num;
>
> @@ -105,7 +107,7 @@ double dsc_ceil(double num)
> return (int)retval;
> }
>
> -void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
> +static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
> {
> int *p = ofs;
>
> @@ -160,7 +162,7 @@ void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
> }
> }
>
> -int median3(int a, int b, int c)
> +static int median3(int a, int b, int c)
> {
> if (a > b)
> swap(a, b);
> @@ -172,13 +174,25 @@ int median3(int a, int b, int c)
> return b;
> }
>
> -void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version)
> +static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
> + enum bits_per_comp bpc, u8 drm_bpp,
> + bool is_navite_422_or_420,
> + int slice_width, int slice_height,
> + int minor_version)
> {
> + float bpp;
> float bpp_group;
> float initial_xmit_delay_factor;
> int padding_pixels;
> int i;
>
> + bpp = ((float)drm_bpp / 16.0);
> + /* in native_422 or native_420 modes, the bits_per_pixel is double the
> + * target bpp (the latter is what calc_rc_params expects)
> + */
> + if (is_navite_422_or_420)
> + bpp /= 2.0;
> +
> rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
> rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
>
> @@ -251,3 +265,128 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
> rc->rc_buf_thresh[13] = 8064;
> }
>
> +static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp,
> + bool is_navite_422_or_420)
> +{
> + float bpp;
> + u32 bytes_per_pixel;
> + double d_bytes_per_pixel;
> +
> + bpp = ((float)drm_bpp / 16.0);
> + d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
> + // TODO: Make sure the formula for calculating this is precise (ceiling
> + // vs. floor, and at what point they should be applied)
> + if (is_navite_422_or_420)
> + d_bytes_per_pixel /= 2;
> +
> + bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
> +
> + return bytes_per_pixel;
> +}
> +
> +static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
> + u32 bpp_increment_div)
> +{
> + u32 dsc_target_bpp_x16;
> + float f_dsc_target_bpp;
> + float f_stream_bandwidth_100bps;
> + // bpp_increment_div is actually precision
> + u32 precision = bpp_increment_div;
> +
> + f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
> + f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
> +
> + // Round down to the nearest precision stop to bring it into DSC spec
> + // range
> + dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision);
> + dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
> +
> + return dsc_target_bpp_x16;
> +}
> +
> +/**
> + * calc_rc_params - reads the user's cmdline mode
> + * @rc: DC internal DSC parameters
> + * @pps: DRM struct with all required DSC values
> + *
> + * This function expects a drm_dsc_config data struct with all the required DSC
> + * values previously filled out by our driver and based on this information it
> + * computes some of the DSC values.
> + *
> + * @note This calculation requires float point operation, most of it executes
> + * under kernel_fpu_{begin,end}.
> + */
> +void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps)
> +{
> + enum colour_mode mode;
> + enum bits_per_comp bpc;
> + bool is_navite_422_or_420;
> + u8 drm_bpp = pps->bits_per_pixel;
> + int slice_width = pps->slice_width;
> + int slice_height = pps->slice_height;
> +
> + mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 :
> + (pps->native_422 ? CM_422 :
> + pps->native_420 ? CM_420 : CM_444));
> + bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10)
> + ? BPC_10 : BPC_12;
> +
> + is_navite_422_or_420 = pps->native_422 || pps->native_420;
> +
> + DC_FP_START();
> + _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420,
> + slice_width, slice_height,
> + pps->dsc_version_minor);
> + DC_FP_END();
> +}
> +
> +/**
> + * calc_dsc_bytes_per_pixel - calculate bytes per pixel
> + * @pps: DRM struct with all required DSC values
> + *
> + * Based on the information inside drm_dsc_config, this function calculates the
> + * total of bytes per pixel.
> + *
> + * @note This calculation requires float point operation, most of it executes
> + * under kernel_fpu_{begin,end}.
> + *
> + * Return:
> + * Return the number of bytes per pixel
> + */
> +u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
> +
> +{
> + u32 ret;
> + u8 drm_bpp = pps->bits_per_pixel;
> + int slice_width = pps->slice_width;
> + bool is_navite_422_or_420 = pps->native_422 || pps->native_420;
> +
> + DC_FP_START();
> + ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp,
> + is_navite_422_or_420);
> + DC_FP_END();
> + return ret;
> +}
> +
> +/**
> + * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
> + * @stream_bandwidth_kbps:
> + * @pix_clk_100hz:
> + * @bpp_increment_div:
> + *
> + * Calculate the total of bits per pixel for DSC configuration.
> + *
> + * @note This calculation requires float point operation, most of it executes
> + * under kernel_fpu_{begin,end}.
> + */
> +u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
> + u32 bpp_increment_div)
> +{
> + u32 dsc_bpp;
> +
> + DC_FP_START();
> + dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz,
> + bpp_increment_div);
> + DC_FP_END();
The calls to DC_FP_START(); and DC_FP_END() are still in the wrong file.
The compiler can potentially inline the functions and illegally optimize
stuff outside of the protected region.
Regards,
Christian.
> + return dsc_bpp;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
> index b6b1f09c2009..21723fa6561e 100644
> --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
> +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
> @@ -77,7 +77,10 @@ struct qp_entry {
>
> typedef struct qp_entry qp_table[];
>
> -void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version);
> +void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
> +u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);
> +u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
> + u32 bpp_increment_div);
>
> #endif
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
> index 1f6e63b71456..ef830aded5b1 100644
> --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
> +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
> @@ -27,8 +27,6 @@
> #include "dscc_types.h"
> #include "rc_calc.h"
>
> -double dsc_ceil(double num);
> -
> static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from)
> {
> to->line_buf_depth = from->line_buf_depth;
> @@ -100,34 +98,13 @@ static void copy_rc_to_cfg(struct drm_dsc_config *dsc_cfg, const struct rc_param
>
> int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params)
> {
> - enum colour_mode mode = pps->convert_rgb ? CM_RGB :
> - (pps->simple_422 ? CM_444 :
> - (pps->native_422 ? CM_422 :
> - pps->native_420 ? CM_420 : CM_444));
> - enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 :
> - (pps->bits_per_component == 10) ? BPC_10 : BPC_12;
> - float bpp = ((float) pps->bits_per_pixel / 16.0);
> - int slice_width = pps->slice_width;
> - int slice_height = pps->slice_height;
> int ret;
> struct rc_params rc;
> struct drm_dsc_config dsc_cfg;
>
> - double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
> -
> - // TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied)
> - if (pps->native_422 || pps->native_420)
> - d_bytes_per_pixel /= 2;
> -
> - dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000);
> -
> - /* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp
> - * (the latter is what calc_rc_params expects)
> - */
> - if (pps->native_422 || pps->native_420)
> - bpp /= 2.0;
> + dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps);
>
> - calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor);
> + calc_rc_params(&rc, pps);
> dsc_params->pps = *pps;
> dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset);
>
More information about the amd-gfx
mailing list