[Nouveau] [PATCH] pmu/fuc: don't use movw directly anymore
Ilia Mirkin
imirkin at alum.mit.edu
Wed Nov 1 17:14:44 UTC 2017
On Wed, Nov 1, 2017 at 12:51 PM, Karol Herbst <kherbst at redhat.com> wrote:
> fixes compilation issues with recent envytools, because movw was removed
> from fuc5, because it doesn't exist there anymore. The current code is
> most likely broken for fuc5 hardware as well and might have triggered all
> kinds of random memory reclocking fails.
>
> Changes in fuc3 binaries are tue do opcode optimizations using shorter
> opcodes when possible.
Might I suggest the following wording for the commit:
---------------8<----------------
Fixes failure to compile with recent envyas as a result of the 'movw'
alias being removed for v5.
A bit of history:
v3 only has a 16-bit sign-extended immediate mov op. In order to set
the high bits, there's a separate 'sethi' op. envyas validates that
the value passed to mov(imm) is between -0x8000 and 0x7fff. In order
to simplify macros that load both the low and high word, a 'movw'
alias was added which takes an unsigned 16-bit immediate. However the
actual hardware op still sign extends.
v5 has a full 32-bit immediate mov op. The v3 16-bit immediate mov op
is gone (loads 0 into the dst reg). However due to a bug in envyas,
the movw alias still existed, and selected the no-longer-present v3
16-bit immediate mov op. As a result usage of movw on v5 is the same
as mov with a 0x0 argument.
The proper fix throughout is to only ever use the 'movw' alias in
combination with 'sethi'. Anything else should get the sign-extended
validation to ensure that the intended value ends up in the
destination register.
Changes in fuc3 binaries is the result of a different encoding being
selected for a mov with an 8-bit value.
---------------8<----------------
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
> drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h | 746 +++++++++----------
> drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h | 802 ++++++++++----------
> drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h | 1006 +++++++++++++-------------
> drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc | 30 +-
> 4 files changed, 1292 insertions(+), 1292 deletions(-)
>
[...]
> diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc b/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
> index ec03f9a4..1663bf94 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
> +++ b/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
> @@ -82,15 +82,15 @@ memx_train_tail:
> // $r0 - zero
> memx_func_enter:
> #if NVKM_PPWR_CHIPSET == GT215
> - movw $r8 0x1610
> + mov $r8 0x1610
> nv_rd32($r7, $r8)
> imm32($r6, 0xfffffffc)
> and $r7 $r6
> - movw $r6 0x2
> + mov $r6 0x2
> or $r7 $r6
> nv_wr32($r8, $r7)
> #else
> - movw $r6 0x001620
> + mov $r6 0x001620
> imm32($r7, ~0x00000aa2);
> nv_rd32($r8, $r6)
> and $r8 $r7
> @@ -101,7 +101,7 @@ memx_func_enter:
> and $r8 $r7
> nv_wr32($r6, $r8)
>
> - movw $r6 0x0026f0
> + mov $r6 0x0026f0
> nv_rd32($r8, $r6)
> and $r8 $r7
> nv_wr32($r6, $r8)
> @@ -136,19 +136,19 @@ memx_func_leave:
> bra nz #memx_func_leave_wait
>
> #if NVKM_PPWR_CHIPSET == GT215
> - movw $r8 0x1610
> + mov $r8 0x1610
> nv_rd32($r7, $r8)
> imm32($r6, 0xffffffcc)
> and $r7 $r6
> nv_wr32($r8, $r7)
> #else
> - movw $r6 0x0026f0
> + mov $r6 0x0026f0
> imm32($r7, 0x00000001)
> nv_rd32($r8, $r6)
> or $r8 $r7
> nv_wr32($r6, $r8)
>
> - movw $r6 0x001620
> + mov $r6 0x001620
> nv_rd32($r8, $r6)
> or $r8 $r7
> nv_wr32($r6, $r8)
> @@ -177,11 +177,11 @@ memx_func_wait_vblank:
> bra #memx_func_wait_vblank_fini
>
> memx_func_wait_vblank_head1:
> - movw $r7 0x20
> + mov $r7 0x20
> bra #memx_func_wait_vblank_0
>
> memx_func_wait_vblank_head0:
> - movw $r7 0x8
> + mov $r7 0x8
>
> memx_func_wait_vblank_0:
> nv_iord($r6, NV_PPWR_INPUT)
> @@ -273,13 +273,13 @@ memx_func_train:
> // $r5 - outer loop counter
> // $r6 - inner loop counter
> // $r7 - entry counter (#memx_train_head + $r7)
> - movw $r5 0x3
> - movw $r7 0x0
> + mov $r5 0x3
> + mov $r7 0x0
>
> // Read random memory to wake up... things
> imm32($r9, 0x700000)
> nv_rd32($r8,$r9)
> - movw $r14 0x2710
> + mov $r14 0x2710
> call(nsec)
>
> memx_func_train_loop_outer:
> @@ -289,9 +289,9 @@ memx_func_train:
> nv_wr32($r9, $r8)
> push $r5
>
> - movw $r6 0x0
> + mov $r6 0x0
> memx_func_train_loop_inner:
> - movw $r8 0x1111
> + mov $r8 0x1111
> mulu $r9 $r6 $r8
> shl b32 $r8 $r9 0x10
> or $r8 $r9
> @@ -315,7 +315,7 @@ memx_func_train:
>
> // $r5 - inner inner loop counter
> // $r9 - result
> - movw $r5 0
> + mov $r5 0
> imm32($r9, 0x8300ffff)
> memx_func_train_loop_4x:
> imm32($r10, 0x100080)
> --
> 2.14.2
>
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau
More information about the Nouveau
mailing list