[PATCH v3 13/19] gpu: nova-core: add falcon register definitions and base code
Alexandre Courbot
acourbot at nvidia.com
Fri May 16 12:19:45 UTC 2025
On Wed May 14, 2025 at 1:19 AM JST, Danilo Krummrich wrote:
<snip>
>> + util::wait_on(Duration::from_millis(20), || {
>> + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE);
>> + if r.mem_scrubbing() {
>> + Some(())
>> + } else {
>> + None
>> + }
>> + })
>> + }
>> +
>> + /// Reset the falcon engine.
>> + fn reset_eng(&self, bar: &Bar0) -> Result<()> {
>> + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE);
>> +
>> + // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set
>> + // RESET_READY so a non-failing timeout is used.
>
> Should we still warn about it?
OpenRM does not (as this is apparently a workaround to a HW bug?) so I
don't think we need to.
>
>> + let _ = util::wait_on(Duration::from_micros(150), || {
>
> Do we know for sure that if RESET_READY is not set after 150us, it won't ever be
> set? If the answer to that is yes, and we also do not want to warn about
> RESET_READY not being set, why even bother trying to read it in the first place?
My guess is because this would the expected behavior if the bug wasn't
there. My GPU (Ampere) does wait until the timeout, but we can expect
newer GPUs to not have this problem and return earlier.
>
>> + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE);
>> + if r.reset_ready() {
>> + Some(())
>> + } else {
>> + None
>> + }
>> + });
>> +
>> + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true));
>> +
>> + let _: Result<()> = util::wait_on(Duration::from_micros(10), || None);
>
> Can we please get an abstraction for udelay() for this?
Should it be local to nova-core, or be generally available? I refrained
from doing this because there is work going on regarding timer and I
thought it would cover things like udelay() as well. I'll add a TODO
item for now but please let me know if you have something different in
mind.
<snip>
>> +fn get_signature_reg_fuse_version_ga102(
>> + dev: &device::Device,
>> + bar: &Bar0,
>> + engine_id_mask: u16,
>> + ucode_id: u8,
>> +) -> Result<u32> {
>> + // The ucode fuse versions are contained in the FUSE_OPT_FPF_<ENGINE>_UCODE<X>_VERSION
>> + // registers, which are an array. Our register definition macros do not allow us to manage them
>> + // properly, so we need to hardcode their addresses for now.
>> +
>> + // Each engine has 16 ucode version registers numbered from 1 to 16.
>> + if ucode_id == 0 || ucode_id > 16 {
>> + dev_warn!(dev, "invalid ucode id {:#x}", ucode_id);
>
> Given that this is an error condition, this should be dev_err!() I suppose.
>
>> + return Err(EINVAL);
>> + }
>> + let reg_fuse = if engine_id_mask & 0x0001 != 0 {
>> + // NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION
>> + 0x824140
>> + } else if engine_id_mask & 0x0004 != 0 {
>> + // NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION
>> + 0x824100
>> + } else if engine_id_mask & 0x0400 != 0 {
>> + // NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION
>> + 0x8241c0
>> + } else {
>> + dev_warn!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask);
>
> s/dev_warn/dev_err/
>
>> + return Err(EINVAL);
>> + } + ((ucode_id - 1) as usize * core::mem::size_of::<u32>());
>
> I feel like this calculation deserves a comment.
Agreed, added one. Also reorganized the code to make the calculation a
bit more obvious.
>
>> +
>> + let reg_fuse_version = bar.read32(reg_fuse);
>
> I feel like the calculation of reg_fuse should be abstracted with a dedicated
> type in regs.rs. that takes the magic number derived from the engine_id_mask
> (which I assume is chip specific) and the ucode_id.
We would need proper support for register arrays to manage the ucode_id
offset, so I'm afraid this one will be hard to get rid of. What kind of
type did you have in mind?
One thing we can do though, is expose the offset of each register as a
register type constant, and use that instead of the hardcoded values
currently in this code - that part at least will be cleaner.
>
>> +
>> + // Equivalent of Find Last Set bit.
>> + Ok(u32::BITS - reg_fuse_version.leading_zeros())
>
> Maybe we should create a generic helper for that?
Good idea.
>
>> +}
>> +
>> +fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result<()> {
>> + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default()
>> + .set_value(params.pkc_data_offset)
>> + .write(bar, E::BASE);
>> + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default()
>> + .set_value(params.engine_id_mask as u32)
>> + .write(bar, E::BASE);
>> + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default()
>> + .set_ucode_id(params.ucode_id)
>> + .write(bar, E::BASE);
>> + regs::NV_PFALCON2_FALCON_MOD_SEL::default()
>> + .set_algo(FalconModSelAlgo::Rsa3k)
>> + .write(bar, E::BASE);
>> +
>> + Ok(())
>> +}
>> +
>> +pub(super) struct Ga102<E: FalconEngine>(PhantomData<E>);
>> +
>> +impl<E: FalconEngine> Ga102<E> {
>> + pub(super) fn new() -> Self {
>> + Self(PhantomData)
>> + }
>> +}
>> +
>> +impl<E: FalconEngine> FalconHal<E> for Ga102<E> {
>> + fn select_core(&self, _falcon: &Falcon<E>, bar: &Bar0) -> Result<()> {
>> + select_core_ga102::<E>(bar)
>> + }
>> +
>> + fn get_signature_reg_fuse_version(
>> + &self,
>> + falcon: &Falcon<E>,
>> + bar: &Bar0,
>> + engine_id_mask: u16,
>> + ucode_id: u8,
>> + ) -> Result<u32> {
>> + get_signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id)
>> + }
>> +
>> + fn program_brom(
>> + &self,
>> + _falcon: &Falcon<E>,
>> + bar: &Bar0,
>> + params: &FalconBromParams,
>> + ) -> Result<()> {
>> + program_brom_ga102::<E>(bar, params)
>> + }
>> +}
>> diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..c1efdaa7c4e1b8c04c4e041aae3b61a8b65f656b
>> --- /dev/null
>> +++ b/drivers/gpu/nova-core/falcon/sec2.rs
>> @@ -0,0 +1,8 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +use crate::falcon::FalconEngine;
>> +
>> +pub(crate) struct Sec2;
>> +impl FalconEngine for Sec2 {
>> + const BASE: usize = 0x00840000;
>> +}
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index c338da69ecbc2200f1ef3061a4d62971b021e3eb..ece13594fba687f3f714e255b5436e72d80dece3 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -5,6 +5,7 @@
>> use crate::devinit;
>> use crate::dma::DmaObject;
>> use crate::driver::Bar0;
>> +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon};
>> use crate::firmware::Firmware;
>> use crate::regs;
>> use crate::util;
>> @@ -227,6 +228,16 @@ pub(crate) fn new(
>> page
>> };
>>
>> + let gsp_falcon = Falcon::<Gsp>::new(
>> + pdev.as_ref(),
>> + spec.chipset,
>> + bar,
>> + spec.chipset > Chipset::GA100,
>> + )?;
>> + gsp_falcon.clear_swgen0_intr(bar);
>> +
>> + let _sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?;
>
> Just `_` instead? Also, please add a comment why it is important to create this
> instance even though it's never used.
It is not really important now, more a way to exercise the code until
we need to run Booter. The variable will be renamed to `sec2_falcon`
eventually, so I'd like to keep that name in the placeholder.
More information about the dri-devel
mailing list