[Nouveau] [PATCH] pmu/fuc: don't use movw directly anymore

Ilia Mirkin imirkin at alum.mit.edu
Wed Nov 1 17:14:44 UTC 2017


On Wed, Nov 1, 2017 at 12:51 PM, Karol Herbst <kherbst at redhat.com> wrote:
> fixes compilation issues with recent envytools, because movw was removed
> from fuc5, because it doesn't exist there anymore. The current code is
> most likely broken for fuc5 hardware as well and might have triggered all
> kinds of random memory reclocking fails.
>
> Changes in fuc3 binaries are tue do opcode optimizations using shorter
> opcodes when possible.

Might I suggest the following wording for the commit:

---------------8<----------------
Fixes failure to compile with recent envyas as a result of the 'movw'
alias being removed for v5.

A bit of history:

v3 only has a 16-bit sign-extended immediate mov op. In order to set
the high bits, there's a separate 'sethi' op. envyas validates that
the value passed to mov(imm) is between -0x8000 and 0x7fff. In order
to simplify macros that load both the low and high word, a 'movw'
alias was added which takes an unsigned 16-bit immediate. However the
actual hardware op still sign extends.

v5 has a full 32-bit immediate mov op. The v3 16-bit immediate mov op
is gone (loads 0 into the dst reg). However due to a bug in envyas,
the movw alias still existed, and selected the no-longer-present v3
16-bit immediate mov op. As a result usage of movw on v5 is the same
as mov with a 0x0 argument.

The proper fix throughout is to only ever use the 'movw' alias in
combination with 'sethi'. Anything else should get the sign-extended
validation to ensure that the intended value ends up in the
destination register.

Changes in fuc3 binaries is the result of a different encoding being
selected for a mov with an 8-bit value.
---------------8<----------------

>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
>  drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h |  746 +++++++++----------
>  drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h |  802 ++++++++++----------
>  drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h | 1006 +++++++++++++-------------
>  drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc     |   30 +-
>  4 files changed, 1292 insertions(+), 1292 deletions(-)
>

[...]

> diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc b/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
> index ec03f9a4..1663bf94 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
> +++ b/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
> @@ -82,15 +82,15 @@ memx_train_tail:
>  // $r0  - zero
>  memx_func_enter:
>  #if NVKM_PPWR_CHIPSET == GT215
> -       movw $r8 0x1610
> +       mov $r8 0x1610
>         nv_rd32($r7, $r8)
>         imm32($r6, 0xfffffffc)
>         and $r7 $r6
> -       movw $r6 0x2
> +       mov $r6 0x2
>         or $r7 $r6
>         nv_wr32($r8, $r7)
>  #else
> -       movw $r6 0x001620
> +       mov $r6 0x001620
>         imm32($r7, ~0x00000aa2);
>         nv_rd32($r8, $r6)
>         and $r8 $r7
> @@ -101,7 +101,7 @@ memx_func_enter:
>         and $r8 $r7
>         nv_wr32($r6, $r8)
>
> -       movw $r6 0x0026f0
> +       mov $r6 0x0026f0
>         nv_rd32($r8, $r6)
>         and $r8 $r7
>         nv_wr32($r6, $r8)
> @@ -136,19 +136,19 @@ memx_func_leave:
>                 bra nz #memx_func_leave_wait
>
>  #if NVKM_PPWR_CHIPSET == GT215
> -       movw $r8 0x1610
> +       mov $r8 0x1610
>         nv_rd32($r7, $r8)
>         imm32($r6, 0xffffffcc)
>         and $r7 $r6
>         nv_wr32($r8, $r7)
>  #else
> -       movw $r6 0x0026f0
> +       mov $r6 0x0026f0
>         imm32($r7, 0x00000001)
>         nv_rd32($r8, $r6)
>         or $r8 $r7
>         nv_wr32($r6, $r8)
>
> -       movw $r6 0x001620
> +       mov $r6 0x001620
>         nv_rd32($r8, $r6)
>         or $r8 $r7
>         nv_wr32($r6, $r8)
> @@ -177,11 +177,11 @@ memx_func_wait_vblank:
>         bra #memx_func_wait_vblank_fini
>
>         memx_func_wait_vblank_head1:
> -       movw $r7 0x20
> +       mov $r7 0x20
>         bra #memx_func_wait_vblank_0
>
>         memx_func_wait_vblank_head0:
> -       movw $r7 0x8
> +       mov $r7 0x8
>
>         memx_func_wait_vblank_0:
>                 nv_iord($r6, NV_PPWR_INPUT)
> @@ -273,13 +273,13 @@ memx_func_train:
>  // $r5 - outer loop counter
>  // $r6 - inner loop counter
>  // $r7 - entry counter (#memx_train_head + $r7)
> -       movw $r5 0x3
> -       movw $r7 0x0
> +       mov $r5 0x3
> +       mov $r7 0x0
>
>  // Read random memory to wake up... things
>         imm32($r9, 0x700000)
>         nv_rd32($r8,$r9)
> -       movw $r14 0x2710
> +       mov $r14 0x2710
>         call(nsec)
>
>         memx_func_train_loop_outer:
> @@ -289,9 +289,9 @@ memx_func_train:
>                 nv_wr32($r9, $r8)
>                 push $r5
>
> -               movw $r6 0x0
> +               mov $r6 0x0
>                 memx_func_train_loop_inner:
> -                       movw $r8 0x1111
> +                       mov $r8 0x1111
>                         mulu $r9 $r6 $r8
>                         shl b32 $r8 $r9 0x10
>                         or $r8 $r9
> @@ -315,7 +315,7 @@ memx_func_train:
>
>                         // $r5 - inner inner loop counter
>                         // $r9 - result
> -                       movw $r5 0
> +                       mov $r5 0
>                         imm32($r9, 0x8300ffff)
>                         memx_func_train_loop_4x:
>                                 imm32($r10, 0x100080)
> --
> 2.14.2
>
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau


More information about the Nouveau mailing list