diff options
Diffstat (limited to 'drivers/gpu/nova-core')
36 files changed, 5407 insertions, 981 deletions
diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs new file mode 100644 index 000000000000..16e143658c51 --- /dev/null +++ b/drivers/gpu/nova-core/bitfield.rs @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bitfield library for Rust structures +//! +//! Support for defining bitfields in Rust structures. Also used by the [`register!`] macro. + +/// Defines a struct with accessors to access bits within an inner unsigned integer. +/// +/// # Syntax +/// +/// ```rust +/// use nova_core::bitfield; +/// +/// #[derive(Debug, Clone, Copy, Default)] +/// enum Mode { +/// #[default] +/// Low = 0, +/// High = 1, +/// Auto = 2, +/// } +/// +/// impl TryFrom<u8> for Mode { +/// type Error = u8; +/// fn try_from(value: u8) -> Result<Self, Self::Error> { +/// match value { +/// 0 => Ok(Mode::Low), +/// 1 => Ok(Mode::High), +/// 2 => Ok(Mode::Auto), +/// _ => Err(value), +/// } +/// } +/// } +/// +/// impl From<Mode> for u8 { +/// fn from(mode: Mode) -> u8 { +/// mode as u8 +/// } +/// } +/// +/// #[derive(Debug, Clone, Copy, Default)] +/// enum State { +/// #[default] +/// Inactive = 0, +/// Active = 1, +/// } +/// +/// impl From<bool> for State { +/// fn from(value: bool) -> Self { +/// if value { State::Active } else { State::Inactive } +/// } +/// } +/// +/// impl From<State> for bool { +/// fn from(state: State) -> bool { +/// match state { +/// State::Inactive => false, +/// State::Active => true, +/// } +/// } +/// } +/// +/// bitfield! { +/// pub struct ControlReg(u32) { +/// 7:7 state as bool => State; +/// 3:0 mode as u8 ?=> Mode; +/// } +/// } +/// ``` +/// +/// This generates a struct with: +/// - Field accessors: `mode()`, `state()`, etc. +/// - Field setters: `set_mode()`, `set_state()`, etc. (supports chaining with builder pattern). +/// Note that the compiler will error out if the size of the setter's arg exceeds the +/// struct's storage size. +/// - Debug and Default implementations. +/// +/// Note: Field accessors and setters inherit the same visibility as the struct itself. +/// In the example above, both `mode()` and `set_mode()` methods will be `pub`. +/// +/// Fields are defined as follows: +/// +/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns +/// the result. +/// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation +/// and returns the result. This is useful with fields for which not all values are valid. +macro_rules! bitfield { + // Main entry point - defines the bitfield struct with fields + ($vis:vis struct $name:ident($storage:ty) $(, $comment:literal)? { $($fields:tt)* }) => { + bitfield!(@core $vis $name $storage $(, $comment)? { $($fields)* }); + }; + + // All rules below are helpers. + + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, and conversion to the value type) and field accessor methods. + (@core $vis:vis $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => { + $( + #[doc=$comment] + )? + #[repr(transparent)] + #[derive(Clone, Copy)] + $vis struct $name($storage); + + impl ::core::convert::From<$name> for $storage { + fn from(val: $name) -> $storage { + val.0 + } + } + + bitfield!(@fields_dispatcher $vis $name $storage { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $vis:vis $name:ident $storage:ty { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + bitfield!(@field_accessors $vis $name $storage { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + bitfield!(@debug $name { $($field;)* }); + bitfield!(@default $name { $($field;)* }); + }; + + // Defines all the field getter/setter methods for `$name`. + ( + @field_accessors $vis:vis $name:ident $storage:ty { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + $( + bitfield!(@check_field_bounds $hi:$lo $field as $type); + )* + + #[allow(dead_code)] + impl $name { + $( + bitfield!(@field_accessor $vis $name $storage, $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + ); + )* + } + }; + + // Boolean fields must have `$hi == $lo`. + (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { + #[allow(clippy::eq_op)] + const _: () = { + ::kernel::build_assert!( + $hi == $lo, + concat!("boolean field `", stringify!($field), "` covers more than one bit") + ); + }; + }; + + // Non-boolean fields must have `$hi >= $lo`. + (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { + #[allow(clippy::eq_op)] + const _: () = { + ::kernel::build_assert!( + $hi >= $lo, + concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") + ); + }; + }; + + // Catches fields defined as `bool` and convert them into a boolean value. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + => $into_type:ty $(, $comment:literal)?; + ) => { + bitfield!( + @leaf_accessor $vis $name $storage, $hi:$lo $field + { |f| <$into_type>::from(f != 0) } + bool $into_type => $into_type $(, $comment)?; + ); + }; + + // Shortcut for fields defined as `bool` without the `=>` syntax. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + $(, $comment:literal)?; + ) => { + bitfield!( + @field_accessor $vis $name $storage, $hi:$lo $field as bool => bool $(, $comment)?; + ); + }; + + // Catches the `?=>` syntax for non-boolean fields. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + ?=> $try_into_type:ty $(, $comment:literal)?; + ) => { + bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field + { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => + ::core::result::Result< + $try_into_type, + <$try_into_type as ::core::convert::TryFrom<$type>>::Error + > + $(, $comment)?;); + }; + + // Catches the `=>` syntax for non-boolean fields. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + => $into_type:ty $(, $comment:literal)?; + ) => { + bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field + { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); + }; + + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + $(, $comment:literal)?; + ) => { + bitfield!( + @field_accessor $vis $name $storage, $hi:$lo $field as $type => $type $(, $comment)?; + ); + }; + + // Generates the accessor methods for a single field. + ( + @leaf_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident + { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; + ) => { + ::kernel::macros::paste!( + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; + const [<$field:upper _MASK>]: $storage = { + // Generate mask for shifting + match ::core::mem::size_of::<$storage>() { + 1 => ::kernel::bits::genmask_u8($lo..=$hi) as $storage, + 2 => ::kernel::bits::genmask_u16($lo..=$hi) as $storage, + 4 => ::kernel::bits::genmask_u32($lo..=$hi) as $storage, + 8 => ::kernel::bits::genmask_u64($lo..=$hi) as $storage, + _ => ::kernel::build_error!("Unsupported storage type size") + } + }; + const [<$field:upper _SHIFT>]: u32 = $lo; + ); + + $( + #[doc="Returns the value of this field:"] + #[doc=$comment] + )? + #[inline(always)] + $vis fn $field(self) -> $res_type { + ::kernel::macros::paste!( + const MASK: $storage = $name::[<$field:upper _MASK>]; + const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; + ); + let field = ((self.0 & MASK) >> SHIFT); + + $process(field) + } + + ::kernel::macros::paste!( + $( + #[doc="Sets the value of this field:"] + #[doc=$comment] + )? + #[inline(always)] + $vis fn [<set_ $field>](mut self, value: $to_type) -> Self { + const MASK: $storage = $name::[<$field:upper _MASK>]; + const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; + let value = ($storage::from($prim_type::from(value)) << SHIFT) & MASK; + self.0 = (self.0 & !MASK) | value; + + self + } + ); + }; + + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the bitfield where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[<set_ $field>](Default::default()); + )* + ); + + value + } + } + }; +} diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs index 94f44bcfd748..7215398969da 100644 --- a/drivers/gpu/nova-core/dma.rs +++ b/drivers/gpu/nova-core/dma.rs @@ -2,12 +2,17 @@ //! Simple DMA object wrapper. -use core::ops::{Deref, DerefMut}; - -use kernel::device; -use kernel::dma::CoherentAllocation; -use kernel::page::PAGE_SIZE; -use kernel::prelude::*; +use core::ops::{ + Deref, + DerefMut, // +}; + +use kernel::{ + device, + dma::CoherentAllocation, + page::PAGE_SIZE, + prelude::*, // +}; pub(crate) struct DmaObject { dma: CoherentAllocation<u8>, @@ -25,20 +30,11 @@ impl DmaObject { } pub(crate) fn from_data(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { - Self::new(dev, data.len()).map(|mut dma_obj| { - // TODO[COHA]: replace with `CoherentAllocation::write()` once available. - // SAFETY: - // - `dma_obj`'s size is at least `data.len()`. - // - We have just created this object and there is no other user at this stage. - unsafe { - core::ptr::copy_nonoverlapping( - data.as_ptr(), - dma_obj.dma.start_ptr_mut(), - data.len(), - ); - } - - dma_obj + Self::new(dev, data.len()).and_then(|mut dma_obj| { + // SAFETY: We have just allocated the DMA memory, we are the only users and + // we haven't made the device aware of the handle yet. + unsafe { dma_obj.write(data, 0)? } + Ok(dma_obj) }) } } diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index edc72052e27a..d91bbc50cde7 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -1,13 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 use kernel::{ - auxiliary, c_str, + auxiliary, + c_str, device::Core, + dma::Device, + dma::DmaMask, pci, - pci::{Class, ClassMask, Vendor}, + pci::{ + Class, + ClassMask, + Vendor, // + }, prelude::*, sizes::SZ_16M, - sync::Arc, + sync::Arc, // }; use crate::gpu::Gpu; @@ -20,6 +27,15 @@ pub(crate) struct NovaCore { } const BAR0_SIZE: usize = SZ_16M; + +// For now we only support Ampere which can use up to 47-bit DMA addresses. +// +// TODO: Add an abstraction for this to support newer GPUs which may support +// larger DMA addresses. Limiting these GPUs to smaller address widths won't +// have any adverse affects, unless installed on systems which require larger +// DMA addresses. These systems should be quite rare. +const GPU_DMA_BITS: u32 = 47; + pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; kernel::pci_device_table!( @@ -57,6 +73,11 @@ impl pci::Driver for NovaCore { pdev.enable_device_mem()?; pdev.set_master(); + // SAFETY: No concurrent DMA allocations or mappings can be made because + // the device is still being probed and therefore isn't being used by + // other threads of execution. + unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? }; + let devres_bar = Arc::pin_init( pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")), GFP_KERNEL, diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 37e6298195e4..82c661aef594 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -3,30 +3,43 @@ //! Falcon microprocessor base support use core::ops::Deref; + use hal::FalconHal; -use kernel::device; -use kernel::dma::DmaAddress; -use kernel::prelude::*; -use kernel::sync::aref::ARef; -use kernel::time::Delta; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::gpu::Chipset; -use crate::regs; -use crate::regs::macros::RegisterBase; -use crate::util; + +use kernel::{ + device, + dma::DmaAddress, + io::poll::read_poll_timeout, + prelude::*, + sync::aref::ARef, + time::{ + delay::fsleep, + Delta, // + }, +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + gpu::Chipset, + num::{ + FromSafeCast, + IntoSafeCast, // + }, + regs, + regs::macros::RegisterBase, // +}; pub(crate) mod gsp; mod hal; pub(crate) mod sec2; // TODO[FPRI]: Replace with `ToPrimitive`. -macro_rules! impl_from_enum_to_u32 { +macro_rules! impl_from_enum_to_u8 { ($enum_type:ty) => { - impl From<$enum_type> for u32 { + impl From<$enum_type> for u8 { fn from(value: $enum_type) -> Self { - value as u32 + value as u8 } } }; @@ -46,7 +59,7 @@ pub(crate) enum FalconCoreRev { Rev6 = 6, Rev7 = 7, } -impl_from_enum_to_u32!(FalconCoreRev); +impl_from_enum_to_u8!(FalconCoreRev); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom<u8> for FalconCoreRev { @@ -81,7 +94,7 @@ pub(crate) enum FalconCoreRevSubversion { Subversion2 = 2, Subversion3 = 3, } -impl_from_enum_to_u32!(FalconCoreRevSubversion); +impl_from_enum_to_u8!(FalconCoreRevSubversion); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom<u8> for FalconCoreRevSubversion { @@ -125,7 +138,7 @@ pub(crate) enum FalconSecurityModel { /// Also known as High-Secure, Privilege Level 3 or PL3. Heavy = 3, } -impl_from_enum_to_u32!(FalconSecurityModel); +impl_from_enum_to_u8!(FalconSecurityModel); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom<u8> for FalconSecurityModel { @@ -157,7 +170,7 @@ pub(crate) enum FalconModSelAlgo { #[default] Rsa3k = 1, } -impl_from_enum_to_u32!(FalconModSelAlgo); +impl_from_enum_to_u8!(FalconModSelAlgo); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom<u8> for FalconModSelAlgo { @@ -179,7 +192,7 @@ pub(crate) enum DmaTrfCmdSize { #[default] Size256B = 0x6, } -impl_from_enum_to_u32!(DmaTrfCmdSize); +impl_from_enum_to_u8!(DmaTrfCmdSize); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom<u8> for DmaTrfCmdSize { @@ -202,7 +215,6 @@ pub(crate) enum PeregrineCoreSelect { /// RISC-V core is active. Riscv = 1, } -impl_from_enum_to_u32!(PeregrineCoreSelect); impl From<bool> for PeregrineCoreSelect { fn from(value: bool) -> Self { @@ -213,6 +225,15 @@ impl From<bool> for PeregrineCoreSelect { } } +impl From<PeregrineCoreSelect> for bool { + fn from(value: PeregrineCoreSelect) -> Self { + match value { + PeregrineCoreSelect::Falcon => false, + PeregrineCoreSelect::Riscv => true, + } + } +} + /// Different types of memory present in a falcon core. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum FalconMem { @@ -236,7 +257,7 @@ pub(crate) enum FalconFbifTarget { /// Non-coherent system memory (System DRAM). NoncoherentSysmem = 2, } -impl_from_enum_to_u32!(FalconFbifTarget); +impl_from_enum_to_u8!(FalconFbifTarget); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom<u8> for FalconFbifTarget { @@ -263,7 +284,6 @@ pub(crate) enum FalconFbifMemType { /// Physical memory addresses. Physical = 1, } -impl_from_enum_to_u32!(FalconFbifMemType); /// Conversion from a single-bit register field. impl From<bool> for FalconFbifMemType { @@ -275,6 +295,15 @@ impl From<bool> for FalconFbifMemType { } } +impl From<FalconFbifMemType> for bool { + fn from(value: FalconFbifMemType) -> Self { + match value { + FalconFbifMemType::Virtual => false, + FalconFbifMemType::Physical => true, + } + } +} + /// Type used to represent the `PFALCON` registers address base for a given falcon engine. pub(crate) struct PFalconBase(()); @@ -346,47 +375,29 @@ pub(crate) struct Falcon<E: FalconEngine> { impl<E: FalconEngine + 'static> Falcon<E> { /// Create a new falcon instance. - /// - /// `need_riscv` is set to `true` if the caller expects the falcon to be a dual falcon/riscv - /// controller. - pub(crate) fn new( - dev: &device::Device, - chipset: Chipset, - bar: &Bar0, - need_riscv: bool, - ) -> Result<Self> { - let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); - // Check that the revision and security model contain valid values. - let _ = hwcfg1.core_rev()?; - let _ = hwcfg1.security_model()?; - - if need_riscv { - let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); - if !hwcfg2.riscv() { - dev_err!( - dev, - "riscv support requested on a controller that does not support it\n" - ); - return Err(EINVAL); - } - } - + pub(crate) fn new(dev: &device::Device, chipset: Chipset) -> Result<Self> { Ok(Self { hal: hal::falcon_hal(chipset)?, dev: dev.into(), }) } + /// Resets DMA-related registers. + pub(crate) fn dma_reset(&self, bar: &Bar0) { + regs::NV_PFALCON_FBIF_CTL::update(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + } + /// Wait for memory scrubbing to complete. fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. - util::wait_on(Delta::from_millis(20), || { - if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { - Some(()) - } else { - None - } - }) + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), + |r| r.mem_scrubbing_done(), + Delta::ZERO, + Delta::from_millis(20), + ) + .map(|_| ()) } /// Reset the falcon engine. @@ -395,22 +406,19 @@ impl<E: FalconEngine + 'static> Falcon<E> { // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. - let _ = util::wait_on(Delta::from_micros(150), || { - let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); - if r.reset_ready() { - Some(()) - } else { - None - } - }); + let _ = read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), + |r| r.reset_ready(), + Delta::ZERO, + Delta::from_micros(150), + ); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); + regs::NV_PFALCON_FALCON_ENGINE::update(bar, &E::ID, |v| v.set_reset(true)); - // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. - let _: Result = util::wait_on(Delta::from_micros(10), || None); + fsleep(Delta::from_micros(10)); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); + regs::NV_PFALCON_FALCON_ENGINE::update(bar, &E::ID, |v| v.set_reset(false)); self.reset_wait_mem_scrubbing(bar)?; @@ -452,7 +460,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { FalconMem::Imem => (load_offsets.src_start, fw.dma_handle()), FalconMem::Dmem => ( 0, - fw.dma_handle_with_offset(load_offsets.src_start as usize)?, + fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, ), }; if dma_start % DmaAddress::from(DMA_LEN) > 0 { @@ -478,7 +486,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { dev_err!(self.dev, "DMA transfer length overflow"); return Err(EOVERFLOW); } - Some(upper_bound) if upper_bound as usize > fw.size() => { + Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => { dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); return Err(EINVAL); } @@ -488,9 +496,13 @@ impl<E: FalconEngine + 'static> Falcon<E> { // Set up the base source DMA address. regs::NV_PFALCON_FALCON_DMATRFBASE::default() + // CAST: `as u32` is used on purpose since we do want to strip the upper bits, which + // will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. .set_base((dma_start >> 8) as u32) .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFBASE1::default() + // CAST: `as u16` is used on purpose since the remaining bits are guaranteed to fit + // within a `u16`. .set_base((dma_start >> 40) as u16) .write(bar, &E::ID); @@ -512,14 +524,12 @@ impl<E: FalconEngine + 'static> Falcon<E> { // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. - util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); - if r.idle() { - Some(()) - } else { - None - } - })?; + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID)), + |r| r.idle(), + Delta::ZERO, + Delta::from_secs(2), + )?; } Ok(()) @@ -527,9 +537,8 @@ impl<E: FalconEngine + 'static> Falcon<E> { /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. pub(crate) fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { - regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); - regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { + self.dma_reset(bar); + regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); @@ -547,19 +556,35 @@ impl<E: FalconEngine + 'static> Falcon<E> { Ok(()) } - /// Runs the loaded firmware and waits for its completion. - /// - /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers - /// prior to running. - /// - /// Wait up to two seconds for the firmware to complete, and return its exit status read from - /// the `MBOX0` and `MBOX1` registers. - pub(crate) fn boot( - &self, - bar: &Bar0, - mbox0: Option<u32>, - mbox1: Option<u32>, - ) -> Result<(u32, u32)> { + /// Wait until the falcon CPU is halted. + pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> { + // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID)), + |r| r.halted(), + Delta::ZERO, + Delta::from_secs(2), + )?; + + Ok(()) + } + + /// Start the falcon CPU. + pub(crate) fn start(&self, bar: &Bar0) -> Result<()> { + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { + true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() + .set_startcpu(true) + .write(bar, &E::ID), + false => regs::NV_PFALCON_FALCON_CPUCTL::default() + .set_startcpu(true) + .write(bar, &E::ID), + } + + Ok(()) + } + + /// Writes values to the mailbox registers if provided. + pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option<u32>, mbox1: Option<u32>) { if let Some(mbox0) = mbox0 { regs::NV_PFALCON_FALCON_MAILBOX0::default() .set_value(mbox0) @@ -571,32 +596,43 @@ impl<E: FalconEngine + 'static> Falcon<E> { .set_value(mbox1) .write(bar, &E::ID); } + } - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { - true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() - .set_startcpu(true) - .write(bar, &E::ID), - false => regs::NV_PFALCON_FALCON_CPUCTL::default() - .set_startcpu(true) - .write(bar, &E::ID), - } + /// Reads the value from `mbox0` register. + pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 { + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value() + } - // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. - util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); - if r.halted() { - Some(()) - } else { - None - } - })?; + /// Reads the value from `mbox1` register. + pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 { + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value() + } - let (mbox0, mbox1) = ( - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), - ); + /// Reads values from both mailbox registers. + pub(crate) fn read_mailboxes(&self, bar: &Bar0) -> (u32, u32) { + let mbox0 = self.read_mailbox0(bar); + let mbox1 = self.read_mailbox1(bar); + + (mbox0, mbox1) + } - Ok((mbox0, mbox1)) + /// Start running the loaded firmware. + /// + /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers + /// prior to running. + /// + /// Wait up to two seconds for the firmware to complete, and return its exit status read from + /// the `MBOX0` and `MBOX1` registers. + pub(crate) fn boot( + &self, + bar: &Bar0, + mbox0: Option<u32>, + mbox1: Option<u32>, + ) -> Result<(u32, u32)> { + self.write_mailboxes(bar, mbox0, mbox1); + self.start(bar)?; + self.wait_till_halted(bar)?; + Ok(self.read_mailboxes(bar)) } /// Returns the fused version of the signature to use in order to run a HS firmware on this @@ -610,4 +646,19 @@ impl<E: FalconEngine + 'static> Falcon<E> { self.hal .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id) } + + /// Check if the RISC-V core is active. + /// + /// Returns `true` if the RISC-V core is active, `false` otherwise. + pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { + let cpuctl = regs::NV_PRISCV_RISCV_CPUCTL::read(bar, &E::ID); + cpuctl.active_stat() + } + + /// Write the application version to the OS register. + pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { + regs::NV_PFALCON_FALCON_OS::default() + .set_value(app_version) + .write(bar, &E::ID); + } } diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index f17599cb49fa..67edef3636c1 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -1,9 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +use kernel::{ + io::poll::read_poll_timeout, + prelude::*, + time::Delta, // +}; + use crate::{ driver::Bar0, - falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, - regs::{self, macros::RegisterBase}, + falcon::{ + Falcon, + FalconEngine, + PFalcon2Base, + PFalconBase, // + }, + regs::{ + self, + macros::RegisterBase, // + }, }; /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. @@ -29,4 +43,15 @@ impl Falcon<Gsp> { .set_swgen0(true) .write(bar, &Gsp::ID); } + + /// Checks if GSP reload/resume has completed during the boot process. + pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result<bool> { + read_poll_timeout( + || Ok(regs::NV_PGC6_BSI_SECURE_SCRATCH_14::read(bar)), + |val| val.boot_stage_3_handoff(), + Delta::ZERO, + timeout, + ) + .map(|_| true) + } } diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index bba288455617..8dc56a28ad65 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -2,9 +2,15 @@ use kernel::prelude::*; -use crate::driver::Bar0; -use crate::falcon::{Falcon, FalconBromParams, FalconEngine}; -use crate::gpu::Chipset; +use crate::{ + driver::Bar0, + falcon::{ + Falcon, + FalconBromParams, + FalconEngine, // + }, + gpu::Chipset, +}; mod ga102; @@ -44,7 +50,7 @@ pub(super) fn falcon_hal<E: FalconEngine + 'static>( use Chipset::*; let hal = match chipset { - GA102 | GA103 | GA104 | GA106 | GA107 => { + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>> } _ => return Err(ENOTSUPP), diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 0b1cbe7853b3..69a7a95cac16 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -2,16 +2,24 @@ use core::marker::PhantomData; -use kernel::device; -use kernel::prelude::*; -use kernel::time::Delta; +use kernel::{ + device, + io::poll::read_poll_timeout, + prelude::*, + time::Delta, // +}; -use crate::driver::Bar0; -use crate::falcon::{ - Falcon, FalconBromParams, FalconEngine, FalconModSelAlgo, PeregrineCoreSelect, +use crate::{ + driver::Bar0, + falcon::{ + Falcon, + FalconBromParams, + FalconEngine, + FalconModSelAlgo, + PeregrineCoreSelect, // + }, + regs, }; -use crate::regs; -use crate::util; use super::FalconHal; @@ -23,14 +31,12 @@ fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { .write(bar, &E::ID); // TIMEOUT: falcon core should take less than 10ms to report being enabled. - util::wait_on(Delta::from_millis(10), || { - let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); - if r.valid() { - Some(()) - } else { - None - } - })?; + read_poll_timeout( + || Ok(regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID)), + |r| r.valid(), + Delta::ZERO, + Delta::from_millis(10), + )?; } Ok(()) @@ -42,11 +48,9 @@ fn signature_reg_fuse_version_ga102( engine_id_mask: u16, ucode_id: u8, ) -> Result<u32> { - const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; - // Each engine has 16 ucode version registers numbered from 1 to 16. - let ucode_idx = match ucode_id { - 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + let ucode_idx = match usize::from(ucode_id) { + ucode_id @ 1..=regs::NV_FUSE_OPT_FPF_SIZE => ucode_id - 1, _ => { dev_err!(dev, "invalid ucode id {:#x}", ucode_id); return Err(EINVAL); diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index 815786c8480d..b57d362e576a 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,7 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; -use crate::regs::macros::RegisterBase; +use crate::{ + falcon::{ + FalconEngine, + PFalcon2Base, + PFalconBase, // + }, + regs::macros::RegisterBase, +}; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. pub(crate) struct Sec2(()); diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 27d9edab8347..3c9cf151786c 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -2,16 +2,29 @@ use core::ops::Range; -use kernel::prelude::*; -use kernel::ptr::{Alignable, Alignment}; -use kernel::sizes::*; -use kernel::sync::aref::ARef; -use kernel::{dev_warn, device}; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::gpu::Chipset; -use crate::regs; +use kernel::{ + device, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sizes::*, + sync::aref::ARef, // +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + firmware::gsp::GspFirmware, + gpu::Chipset, + gsp, + num::{ + usize_as_u64, + FromSafeCast, // + }, + regs, +}; mod hal; @@ -85,16 +98,28 @@ impl SysmemFlush { /// /// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. #[derive(Debug)] -#[expect(dead_code)] pub(crate) struct FbLayout { + /// Range of the framebuffer. Starts at `0`. pub(crate) fb: Range<u64>, + /// VGA workspace, small area of reserved memory at the end of the framebuffer. pub(crate) vga_workspace: Range<u64>, + /// FRTS range. pub(crate) frts: Range<u64>, + /// Memory area containing the GSP bootloader image. + pub(crate) boot: Range<u64>, + /// Memory area containing the GSP firmware image. + pub(crate) elf: Range<u64>, + /// WPR2 heap. + pub(crate) wpr2_heap: Range<u64>, + /// WPR2 region range, starting with an instance of `GspFwWprMeta`. + pub(crate) wpr2: Range<u64>, + pub(crate) heap: Range<u64>, + pub(crate) vf_partition_count: u8, } impl FbLayout { - /// Computes the FB layout. - pub(crate) fn new(chipset: Chipset, bar: &Bar0) -> Result<Self> { + /// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP firmware. + pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw: &GspFirmware) -> Result<Self> { let hal = hal::fb_hal(chipset); let fb = { @@ -105,14 +130,14 @@ impl FbLayout { let vga_workspace = { let vga_base = { - const NV_PRAMIN_SIZE: u64 = SZ_1M as u64; + const NV_PRAMIN_SIZE: u64 = usize_as_u64(SZ_1M); let base = fb.end - NV_PRAMIN_SIZE; if hal.supports_display(bar) { match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { Some(addr) => { if addr < base { - const VBIOS_WORKSPACE_SIZE: u64 = SZ_128K as u64; + const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K); // Point workspace address to end of framebuffer. fb.end - VBIOS_WORKSPACE_SIZE @@ -132,16 +157,61 @@ impl FbLayout { let frts = { const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>(); - const FRTS_SIZE: u64 = SZ_1M as u64; + const FRTS_SIZE: u64 = usize_as_u64(SZ_1M); let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; frts_base..frts_base + FRTS_SIZE }; + let boot = { + const BOOTLOADER_DOWN_ALIGN: Alignment = Alignment::new::<SZ_4K>(); + let bootloader_size = u64::from_safe_cast(gsp_fw.bootloader.ucode.size()); + let bootloader_base = (frts.start - bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN); + + bootloader_base..bootloader_base + bootloader_size + }; + + let elf = { + const ELF_DOWN_ALIGN: Alignment = Alignment::new::<SZ_64K>(); + let elf_size = u64::from_safe_cast(gsp_fw.size); + let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN); + + elf_addr..elf_addr + elf_size + }; + + let wpr2_heap = { + const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>(); + let wpr2_heap_size = + gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end); + let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN); + + wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN) + }; + + let wpr2 = { + const WPR2_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>(); + let wpr2_addr = (wpr2_heap.start - u64::from_safe_cast(size_of::<gsp::GspFwWprMeta>())) + .align_down(WPR2_DOWN_ALIGN); + + wpr2_addr..frts.end + }; + + let heap = { + const HEAP_SIZE: u64 = usize_as_u64(SZ_1M); + + wpr2.start - HEAP_SIZE..wpr2.start + }; + Ok(Self { fb, vga_workspace, frts, + boot, + elf, + wpr2_heap, + wpr2, + heap, + vf_partition_count: 0, }) } } diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs index 2f914948bb9a..aba0abd8ee00 100644 --- a/drivers/gpu/nova-core/fb/hal.rs +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -2,8 +2,10 @@ use kernel::prelude::*; -use crate::driver::Bar0; -use crate::gpu::Chipset; +use crate::{ + driver::Bar0, + gpu::Chipset, // +}; mod ga100; mod ga102; diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs index 871c42bf033a..e0acc41aa7cd 100644 --- a/drivers/gpu/nova-core/fb/hal/ga100.rs +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -struct Ga100; - use kernel::prelude::*; -use crate::driver::Bar0; -use crate::fb::hal::FbHal; -use crate::regs; +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; +struct Ga100; + pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) @@ -18,9 +20,13 @@ pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() + // CAST: `as u32` is used on purpose since the remaining bits are guaranteed to fit within + // a `u32`. .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) .write(bar); regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + // CAST: `as u32` is used on purpose since we want to strip the upper bits that have been + // written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) .write(bar); } diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs index a73b77e39715..734605905031 100644 --- a/drivers/gpu/nova-core/fb/hal/ga102.rs +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -2,9 +2,11 @@ use kernel::prelude::*; -use crate::driver::Bar0; -use crate::fb::hal::FbHal; -use crate::regs; +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; fn vidmem_size_ga102(bar: &Bar0) -> u64 { regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs index b022c781caf4..eec984f4e816 100644 --- a/drivers/gpu/nova-core/fb/hal/tu102.rs +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::driver::Bar0; -use crate::fb::hal::FbHal; -use crate::regs; use kernel::prelude::*; +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; + /// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`, /// to be used by HALs. pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; @@ -15,15 +18,13 @@ pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { // Check that the address doesn't overflow the receiving 32-bit register. - if addr >> (u32::BITS + FLUSH_SYSMEM_ADDR_SHIFT) == 0 { - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() - .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) - .write(bar); - - Ok(()) - } else { - Err(EINVAL) - } + u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT) + .map_err(|_| EINVAL) + .map(|addr| { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08(addr) + .write(bar) + }) } pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4179a74a2342..2d2008b33fb4 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -4,17 +4,24 @@ //! to be loaded into a given execution unit. use core::marker::PhantomData; -use core::mem::size_of; -use kernel::device; -use kernel::firmware; -use kernel::prelude::*; -use kernel::str::CString; -use kernel::transmute::FromBytes; - -use crate::dma::DmaObject; -use crate::falcon::FalconFirmware; -use crate::gpu; +use kernel::{ + device, + firmware, + prelude::*, + str::CString, + transmute::FromBytes, // +}; + +use crate::{ + dma::DmaObject, + falcon::FalconFirmware, + gpu, + num::{ + FromSafeCast, + IntoSafeCast, // + }, +}; pub(crate) mod booter; pub(crate) mod fwsec; @@ -75,7 +82,7 @@ impl FalconUCodeDescV3 { const HDR_SIZE_SHIFT: u32 = 16; const HDR_SIZE_MASK: u32 = 0xffff0000; - ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT) as usize + ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT).into_safe_cast() } } @@ -190,8 +197,8 @@ impl<'a> BinFirmware<'a> { /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of /// the firmware image. fn data(&self) -> Option<&[u8]> { - let fw_start = self.hdr.data_offset as usize; - let fw_size = self.hdr.data_size as usize; + let fw_start = usize::from_safe_cast(self.hdr.data_offset); + let fw_size = usize::from_safe_cast(self.hdr.data_size); self.fw.get(fw_start..fw_start + fw_size) } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs index b4ff1b17e4a0..f107f753214a 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -4,20 +4,41 @@ //! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon //! (and optionally unload it through a separate firmware image). -use core::marker::PhantomData; -use core::mem::size_of; -use core::ops::Deref; - -use kernel::device; -use kernel::prelude::*; -use kernel::transmute::FromBytes; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::falcon::sec2::Sec2; -use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; -use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; -use crate::gpu::Chipset; +use core::{ + marker::PhantomData, + ops::Deref, // +}; + +use kernel::{ + device, + prelude::*, + transmute::FromBytes, // +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + falcon::{ + sec2::Sec2, + Falcon, + FalconBromParams, + FalconFirmware, + FalconLoadParams, + FalconLoadTarget, // + }, + firmware::{ + BinFirmware, + FirmwareDmaObject, + FirmwareSignature, + Signed, + Unsigned, // + }, + gpu::Chipset, + num::{ + FromSafeCast, + IntoSafeCast, // + }, +}; /// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at /// `offset` in `slice`. @@ -74,7 +95,7 @@ impl<'a> HsFirmwareV2<'a> { /// /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> { - frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset as usize) + frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset.into_safe_cast()) .map(|hdr| Self { hdr, fw: bin_fw.fw }) } @@ -83,7 +104,7 @@ impl<'a> HsFirmwareV2<'a> { /// Fails if the offset of the patch location is outside the bounds of the firmware /// image. fn patch_location(&self) -> Result<u32> { - frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset as usize) + frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset.into_safe_cast()) } /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the @@ -91,19 +112,23 @@ impl<'a> HsFirmwareV2<'a> { /// /// Fails if the pointed signatures are outside the bounds of the firmware image. fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> { - let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset as usize)?; + let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset.into_safe_cast())?; let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { // If there are no signatures, return an iterator that will yield zero elements. None => (&[] as &[u8]).chunks_exact(1), Some(sig_size) => { - let patch_sig = frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset as usize)?; - let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + let patch_sig = + frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset.into_safe_cast())?; + let signatures_start = usize::from_safe_cast(self.hdr.sig_prod_offset + patch_sig); self.fw // Get signatures range. - .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .get( + signatures_start + ..signatures_start + usize::from_safe_cast(self.hdr.sig_prod_size), + ) .ok_or(EINVAL)? - .chunks_exact(sig_size as usize) + .chunks_exact(sig_size.into_safe_cast()) } }; @@ -132,9 +157,9 @@ impl HsSignatureParams { /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or /// if its size doesn't match that of [`HsSignatureParams`]. fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { - let start = hs_fw.hdr.meta_data_offset as usize; + let start = usize::from_safe_cast(hs_fw.hdr.meta_data_offset); let end = start - .checked_add(hs_fw.hdr.meta_data_size as usize) + .checked_add(hs_fw.hdr.meta_data_size.into_safe_cast()) .ok_or(EINVAL)?; hs_fw @@ -169,7 +194,7 @@ impl HsLoadHeaderV2 { /// /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { - frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset as usize) + frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset.into_safe_cast()) } } @@ -198,12 +223,13 @@ impl HsLoadHeaderV2App { } else { frombytes_at::<Self>( hs_fw.fw, - (hs_fw.hdr.header_offset as usize) + usize::from_safe_cast(hs_fw.hdr.header_offset) // Skip the load header... .checked_add(size_of::<HsLoadHeaderV2>()) // ... and jump to app header `idx`. .and_then(|offset| { - offset.checked_add((idx as usize).checked_mul(size_of::<Self>())?) + offset + .checked_add(usize::from_safe_cast(idx).checked_mul(size_of::<Self>())?) }) .ok_or(EINVAL)?, ) @@ -318,12 +344,12 @@ impl BooterFirmware { dev_err!(dev, "invalid fuse version for Booter firmware\n"); return Err(EINVAL); }; - signatures.nth(idx as usize) + signatures.nth(idx.into_safe_cast()) } } .ok_or(EINVAL)?; - ucode.patch_signature(&signature, patch_loc as usize)? + ucode.patch_signature(&signature, patch_loc.into_safe_cast())? } }; diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 8edbb5c0572c..b28e34d279f4 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -10,20 +10,48 @@ //! - The command to be run, as this firmware can perform several tasks ; //! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. -use core::marker::PhantomData; -use core::mem::{align_of, size_of}; -use core::ops::Deref; - -use kernel::device::{self, Device}; -use kernel::prelude::*; -use kernel::transmute::FromBytes; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::falcon::gsp::Gsp; -use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; -use crate::firmware::{FalconUCodeDescV3, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; -use crate::vbios::Vbios; +use core::{ + marker::PhantomData, + mem::size_of, + ops::Deref, // +}; + +use kernel::{ + device::{ + self, + Device, // + }, + prelude::*, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + falcon::{ + gsp::Gsp, + Falcon, + FalconBromParams, + FalconFirmware, + FalconLoadParams, + FalconLoadTarget, // + }, + firmware::{ + FalconUCodeDescV3, + FirmwareDmaObject, + FirmwareSignature, + Signed, + Unsigned, // + }, + num::{ + FromSafeCast, + IntoSafeCast, // + }, + vbios::Vbios, +}; const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4; @@ -35,7 +63,7 @@ struct FalconAppifHdrV1 { entry_size: u8, entry_count: u8, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifHdrV1 {} #[repr(C, packed)] @@ -44,7 +72,7 @@ struct FalconAppifV1 { id: u32, dmem_base: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifV1 {} #[derive(Debug)] @@ -68,8 +96,10 @@ struct FalconAppifDmemmapperV3 { ucode_cmd_mask1: u32, multi_tgt_tbl: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifDmemmapperV3 {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FalconAppifDmemmapperV3 {} #[derive(Debug)] #[repr(C, packed)] @@ -80,8 +110,10 @@ struct ReadVbios { size: u32, flags: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for ReadVbios {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for ReadVbios {} #[derive(Debug)] #[repr(C, packed)] @@ -92,8 +124,10 @@ struct FrtsRegion { size: u32, ftype: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FrtsRegion {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FrtsRegion {} const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2; @@ -102,8 +136,10 @@ struct FrtsCmd { read_vbios: ReadVbios, frts_region: FrtsRegion, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FrtsCmd {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FrtsCmd {} const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15; const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19; @@ -147,26 +183,15 @@ impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {} /// /// # Safety /// -/// Callers must ensure that the region of memory returned is not written for as long as the -/// returned reference is alive. -/// -/// TODO[TRSM][COHA]: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is -/// available and we have a way to transmute objects implementing FromBytes, e.g.: -/// https://lore.kernel.org/lkml/20250330234039.29814-1-christiansantoslima21@gmail.com/ -unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( - fw: &'a DmaObject, - offset: usize, -) -> Result<&'b T> { - if offset + size_of::<T>() > fw.size() { - return Err(EINVAL); - } - if (fw.start_ptr() as usize + offset) % align_of::<T>() != 0 { - return Err(EINVAL); - } - - // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is - // large enough the contains an instance of `T`, which implements `FromBytes`. - Ok(unsafe { &*(fw.start_ptr().add(offset).cast::<T>()) }) +/// * Callers must ensure that the device does not read/write to/from memory while the returned +/// reference is live. +/// * Callers must ensure that this call does not race with a write to the same region while +/// the returned reference is live. +unsafe fn transmute<T: Sized + FromBytes>(fw: &DmaObject, offset: usize) -> Result<&T> { + // SAFETY: The safety requirements of the function guarantee the device won't read + // or write to memory while the reference is alive and that this call won't race + // with writes to the same memory region. + T::from_bytes(unsafe { fw.as_slice(offset, size_of::<T>())? }).ok_or(EINVAL) } /// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must @@ -174,22 +199,18 @@ unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( /// /// # Safety /// -/// Callers must ensure that the region of memory returned is not read or written for as long as -/// the returned reference is alive. -unsafe fn transmute_mut<'a, 'b, T: Sized + FromBytes>( - fw: &'a mut DmaObject, +/// * Callers must ensure that the device does not read/write to/from memory while the returned +/// slice is live. +/// * Callers must ensure that this call does not race with a read or write to the same region +/// while the returned slice is live. +unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>( + fw: &mut DmaObject, offset: usize, -) -> Result<&'b mut T> { - if offset + size_of::<T>() > fw.size() { - return Err(EINVAL); - } - if (fw.start_ptr_mut() as usize + offset) % align_of::<T>() != 0 { - return Err(EINVAL); - } - - // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is - // large enough the contains an instance of `T`, which implements `FromBytes`. - Ok(unsafe { &mut *(fw.start_ptr_mut().add(offset).cast::<T>()) }) +) -> Result<&mut T> { + // SAFETY: The safety requirements of the function guarantee the device won't read + // or write to memory while the reference is alive and that this call won't race + // with writes or reads to the same memory region. + T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL) } /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. @@ -250,7 +271,7 @@ impl FirmwareDmaObject<FwsecFirmware, Unsigned> { let ucode = bios.fwsec_image().ucode(desc)?; let mut dma_object = DmaObject::from_data(dev, ucode)?; - let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; + let hdr_offset = usize::from_safe_cast(desc.imem_load_size + desc.interface_offset); // SAFETY: we have exclusive access to `dma_object`. let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; @@ -259,61 +280,62 @@ impl FirmwareDmaObject<FwsecFirmware, Unsigned> { } // Find the DMEM mapper section in the firmware. - for i in 0..hdr.entry_count as usize { - let app: &FalconAppifV1 = + for i in 0..usize::from(hdr.entry_count) { // SAFETY: we have exclusive access to `dma_object`. - unsafe { + let app: &FalconAppifV1 = unsafe { transmute( &dma_object, - hdr_offset + hdr.header_size as usize + i * hdr.entry_size as usize + hdr_offset + usize::from(hdr.header_size) + i * usize::from(hdr.entry_size), ) }?; if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { continue; } + let dmem_base = app.dmem_base; // SAFETY: we have exclusive access to `dma_object`. let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { transmute_mut( &mut dma_object, - (desc.imem_load_size + app.dmem_base) as usize, + (desc.imem_load_size + dmem_base).into_safe_cast(), ) }?; + dmem_mapper.init_cmd = match cmd { + FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, + FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, + }; + let cmd_in_buffer_offset = dmem_mapper.cmd_in_buffer_offset; + // SAFETY: we have exclusive access to `dma_object`. let frts_cmd: &mut FrtsCmd = unsafe { transmute_mut( &mut dma_object, - (desc.imem_load_size + dmem_mapper.cmd_in_buffer_offset) as usize, + (desc.imem_load_size + cmd_in_buffer_offset).into_safe_cast(), ) }?; frts_cmd.read_vbios = ReadVbios { ver: 1, - hdr: size_of::<ReadVbios>() as u32, + hdr: u32::try_from(size_of::<ReadVbios>())?, addr: 0, size: 0, flags: 2, }; - - dmem_mapper.init_cmd = match cmd { - FwsecCommand::Frts { - frts_addr, - frts_size, - } => { - frts_cmd.frts_region = FrtsRegion { - ver: 1, - hdr: size_of::<FrtsRegion>() as u32, - addr: (frts_addr >> 12) as u32, - size: (frts_size >> 12) as u32, - ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, - }; - - NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS - } - FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, - }; + if let FwsecCommand::Frts { + frts_addr, + frts_size, + } = cmd + { + frts_cmd.frts_region = FrtsRegion { + ver: 1, + hdr: u32::try_from(size_of::<FrtsRegion>())?, + addr: u32::try_from(frts_addr >> 12)?, + size: u32::try_from(frts_size >> 12)?, + ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, + }; + } // Return early as we found and patched the DMEMMAPPER region. return Ok(Self(dma_object, PhantomData)); @@ -338,7 +360,7 @@ impl FwsecFirmware { // Patch signature if needed. let desc = bios.fwsec_image().header()?; let ucode_signed = if desc.signature_count != 0 { - let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; + let sig_base_img = usize::from_safe_cast(desc.imem_load_size + desc.pkc_data_offset); let desc_sig_versions = u32::from(desc.signature_versions); let reg_fuse_version = falcon.signature_reg_fuse_version(bar, desc.engine_id_mask, desc.ucode_id)?; @@ -369,7 +391,7 @@ impl FwsecFirmware { // Mask of the bits of `desc_sig_versions` to preserve. let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1); - (desc_sig_versions & reg_fuse_version_mask).count_ones() as usize + usize::from_safe_cast((desc_sig_versions & reg_fuse_version_mask).count_ones()) }; dev_dbg!(dev, "patching signature with index {}\n", signature_idx); diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 9b70095434c6..0549805282ab 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -2,16 +2,30 @@ use core::mem::size_of_val; -use kernel::device; -use kernel::dma::{DataDirection, DmaAddress}; -use kernel::kvec; -use kernel::prelude::*; -use kernel::scatterlist::{Owned, SGTable}; +use kernel::{ + device, + dma::{ + DataDirection, + DmaAddress, // + }, + kvec, + prelude::*, + scatterlist::{ + Owned, + SGTable, // + }, +}; -use crate::dma::DmaObject; -use crate::firmware::riscv::RiscvFirmware; -use crate::gpu::{Architecture, Chipset}; -use crate::gsp::GSP_PAGE_SIZE; +use crate::{ + dma::DmaObject, + firmware::riscv::RiscvFirmware, + gpu::{ + Architecture, + Chipset, // + }, + gsp::GSP_PAGE_SIZE, + num::FromSafeCast, +}; /// Ad-hoc and temporary module to extract sections from ELF images. /// @@ -129,11 +143,11 @@ pub(crate) struct GspFirmware { /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. level0: DmaObject, /// Size in bytes of the firmware contained in [`Self::fw`]. - size: usize, + pub(crate) size: usize, /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. - signatures: DmaObject, + pub(crate) signatures: DmaObject, /// GSP bootloader, verifies the GSP firmware before loading and running it. - bootloader: RiscvFirmware, + pub(crate) bootloader: RiscvFirmware, } impl GspFirmware { @@ -150,6 +164,7 @@ impl GspFirmware { let sigs_section = match chipset.arch() { Architecture::Ampere => ".fwsignature_ga10x", + Architecture::Ada => ".fwsignature_ad10x", _ => return Err(ENOTSUPP), }; let signatures = elf::elf64_section(fw.data(), sigs_section) @@ -202,10 +217,10 @@ impl GspFirmware { let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; // Fill level 1 page entry. - #[allow(clippy::useless_conversion)] - let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); - let dst = &mut level0_data[..size_of_val(&level1_entry)]; - dst.copy_from_slice(&level1_entry.to_le_bytes()); + let level1_entry = level1.iter().next().ok_or(EINVAL)?; + let level1_entry_addr = level1_entry.dma_address(); + let dst = &mut level0_data[..size_of_val(&level1_entry_addr)]; + dst.copy_from_slice(&level1_entry_addr.to_le_bytes()); // Turn the level0 page table into a [`DmaObject`]. DmaObject::from_data(dev, &level0_data)? @@ -216,7 +231,6 @@ impl GspFirmware { })) } - #[expect(unused)] /// Returns the DMA handle of the radix3 level 0 page table. pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { self.level0.dma_handle() @@ -231,10 +245,11 @@ impl GspFirmware { fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> { for sg_entry in sg_table.iter() { // Number of pages we need to map. - let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + let num_pages = usize::from_safe_cast(sg_entry.dma_len()).div_ceil(GSP_PAGE_SIZE); for i in 0..num_pages { - let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + let entry = sg_entry.dma_address() + + (u64::from_safe_cast(i) * u64::from_safe_cast(GSP_PAGE_SIZE)); dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; } } diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index afb08f5bc4ba..28dfef63657a 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -5,13 +5,18 @@ use core::mem::size_of; -use kernel::device; -use kernel::firmware::Firmware; -use kernel::prelude::*; -use kernel::transmute::FromBytes; +use kernel::{ + device, + firmware::Firmware, + prelude::*, + transmute::FromBytes, // +}; -use crate::dma::DmaObject; -use crate::firmware::BinFirmware; +use crate::{ + dma::DmaObject, + firmware::BinFirmware, + num::FromSafeCast, // +}; /// Descriptor for microcode running on a RISC-V core. #[repr(C)] @@ -41,7 +46,7 @@ impl RmRiscvUCodeDesc { /// /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { - let offset = bin_fw.hdr.header_offset as usize; + let offset = usize::from_safe_cast(bin_fw.hdr.header_offset); bin_fw .fw @@ -52,18 +57,17 @@ impl RmRiscvUCodeDesc { } /// A parsed firmware for a RISC-V core, ready to be loaded and run. -#[expect(unused)] pub(crate) struct RiscvFirmware { /// Offset at which the code starts in the firmware image. - code_offset: u32, + pub(crate) code_offset: u32, /// Offset at which the data starts in the firmware image. - data_offset: u32, + pub(crate) data_offset: u32, /// Offset at which the manifest starts in the firmware image. - manifest_offset: u32, + pub(crate) manifest_offset: u32, /// Application version. - app_version: u32, + pub(crate) app_version: u32, /// Device-mapped firmware image. - ucode: DmaObject, + pub(crate) ucode: DmaObject, } impl RiscvFirmware { @@ -74,8 +78,8 @@ impl RiscvFirmware { let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; let ucode = { - let start = bin_fw.hdr.data_offset as usize; - let len = bin_fw.hdr.data_size as usize; + let start = usize::from_safe_cast(bin_fw.hdr.data_offset); + let len = usize::from_safe_cast(bin_fw.hdr.data_size); DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? }; diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index 8ac1ed187199..9121f400046d 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -18,13 +18,16 @@ //! //! Note that the devinit sequence also needs to run during suspend/resume. -use kernel::bindings; -use kernel::prelude::*; -use kernel::time::Delta; +use kernel::{ + io::poll::read_poll_timeout, + prelude::*, + time::Delta, // +}; -use crate::driver::Bar0; -use crate::regs; -use crate::util; +use crate::{ + driver::Bar0, + regs, // +}; /// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. /// @@ -50,22 +53,19 @@ pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { // // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of // reset, and should complete in less time than that. - util::wait_on(Delta::from_secs(4), || { - // Check that FWSEC has lowered its protection level before reading the GFW_BOOT status. - let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) - .read_protection_level0() - && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(); - - if gfw_booted { - Some(()) - } else { - // TODO[DLAY]: replace with [1] once it merges. - // [1] https://lore.kernel.org/rust-for-linux/20250423192857.199712-6-fujita.tomonori@gmail.com/ - // - // SAFETY: `msleep()` is safe to call with any parameter. - unsafe { bindings::msleep(1) }; - - None - } - }) + read_poll_timeout( + || { + Ok( + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT + // status. + regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) + .read_protection_level0() + && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(), + ) + }, + |&gfw_booted| gfw_booted, + Delta::from_millis(1), + Delta::from_secs(4), + ) + .map(|_| ()) } diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index af20e2daea24..629c9d2dc994 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -1,13 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{device, devres::Devres, error::code::*, fmt, pci, prelude::*, sync::Arc}; +use kernel::{ + device, + devres::Devres, + fmt, + pci, + prelude::*, + sync::Arc, // +}; -use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; -use crate::fb::SysmemFlush; -use crate::gfw; -use crate::gsp::Gsp; -use crate::regs; +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp as GspFalcon, + sec2::Sec2 as Sec2Falcon, + Falcon, // + }, + fb::SysmemFlush, + gfw, + gsp::Gsp, + regs, +}; macro_rules! define_chipset { ({ $($variant:ident = $value:expr),* $(,)* }) => @@ -109,8 +122,14 @@ impl fmt::Display for Chipset { } /// Enum representation of the GPU generation. -#[derive(fmt::Debug)] +/// +/// TODO: remove the `Default` trait implementation, and the `#[default]` +/// attribute, once the register!() macro (which creates Architecture items) no +/// longer requires it for read-only fields. +#[derive(fmt::Debug, Default, Copy, Clone)] +#[repr(u8)] pub(crate) enum Architecture { + #[default] Turing = 0x16, Ampere = 0x17, Ada = 0x19, @@ -129,13 +148,20 @@ impl TryFrom<u8> for Architecture { } } +impl From<Architecture> for u8 { + fn from(value: Architecture) -> Self { + // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless. + value as u8 + } +} + pub(crate) struct Revision { major: u8, minor: u8, } -impl Revision { - fn from_boot0(boot0: regs::NV_PMC_BOOT_0) -> Self { +impl From<regs::NV_PMC_BOOT_42> for Revision { + fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { Self { major: boot0.major_revision(), minor: boot0.minor_revision(), @@ -149,24 +175,67 @@ impl fmt::Display for Revision { } } -/// Structure holding the metadata of the GPU. +/// Structure holding a basic description of the GPU: `Chipset` and `Revision`. pub(crate) struct Spec { chipset: Chipset, - /// The revision of the chipset. revision: Revision, } impl Spec { - fn new(bar: &Bar0) -> Result<Spec> { + fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> { + // Some brief notes about boot0 and boot42, in chronological order: + // + // NV04 through NV50: + // + // Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs. + // boot42 may not even exist on some of these GPUs. + // + // Fermi through Volta: + // + // Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42 + // is also guaranteed to be both present and accurate. + // + // Turing and later: + // + // Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not + // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. + // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. + let boot0 = regs::NV_PMC_BOOT_0::read(bar); + if boot0.is_older_than_fermi() { + return Err(ENODEV); + } + + let boot42 = regs::NV_PMC_BOOT_42::read(bar); + Spec::try_from(boot42).inspect_err(|_| { + dev_err!(dev, "Unsupported chipset: {}\n", boot42); + }) + } +} + +impl TryFrom<regs::NV_PMC_BOOT_42> for Spec { + type Error = Error; + + fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result<Self> { Ok(Self { - chipset: boot0.chipset()?, - revision: Revision::from_boot0(boot0), + chipset: boot42.chipset()?, + revision: boot42.into(), }) } } +impl fmt::Display for Spec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_fmt(fmt!( + "Chipset: {}, Architecture: {:?}, Revision: {}", + self.chipset, + self.chipset.arch(), + self.revision + )) + } +} + /// Structure holding the resources required to operate the GPU. #[pin_data] pub(crate) struct Gpu { @@ -192,14 +261,8 @@ impl Gpu { bar: &'a Bar0, ) -> impl PinInit<Self, Error> + 'a { try_pin_init!(Self { - spec: Spec::new(bar).inspect(|spec| { - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); + spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { + dev_info!(pdev.as_ref(),"NVIDIA ({})\n", spec); })?, // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. @@ -213,14 +276,12 @@ impl Gpu { gsp_falcon: Falcon::new( pdev.as_ref(), spec.chipset, - bar, - spec.chipset > Chipset::GA100, ) .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, - sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?, - gsp <- Gsp::new(), + gsp <- Gsp::new(pdev)?, _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 64e472e7a9d3..fb6f74797178 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -2,21 +2,160 @@ mod boot; -use kernel::prelude::*; +use kernel::{ + device, + dma::{ + CoherentAllocation, + DmaAddress, // + }, + dma_write, + pci, + prelude::*, + transmute::AsBytes, // +}; +pub(crate) mod cmdq; +pub(crate) mod commands; mod fw; +mod sequencer; + +pub(crate) use fw::{ + GspFwWprMeta, + LibosParams, // +}; + +use crate::{ + gsp::cmdq::Cmdq, + gsp::fw::{ + GspArgumentsCached, + LibosMemoryRegionInitArgument, // + }, + num, +}; pub(crate) const GSP_PAGE_SHIFT: usize = 12; pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; -/// GSP runtime data. +/// Number of GSP pages to use in a RM log buffer. +const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10; + +/// Array of page table entries, as understood by the GSP bootloader. +#[repr(C)] +struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]); + +/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one. +unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {} + +impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> { + /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`. + fn new(start: DmaAddress) -> Result<Self> { + let mut ptes = [0u64; NUM_PAGES]; + for (i, pte) in ptes.iter_mut().enumerate() { + *pte = start + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW)?; + } + + Ok(Self(ptes)) + } +} + +/// The logging buffers are byte queues that contain encoded printf-like +/// messages from GSP-RM. They need to be decoded by a special application +/// that can parse the buffers. /// -/// This is an empty pinned placeholder for now. +/// The 'loginit' buffer contains logs from early GSP-RM init and +/// exception dumps. The 'logrm' buffer contains the subsequent logs. Both are +/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. +/// +/// The physical address map for the log buffer is stored in the buffer +/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp). +/// Initially, pp is equal to 0. If the buffer has valid logging data in it, +/// then pp points to index into the buffer where the next logging entry will +/// be written. Therefore, the logging data is valid if: +/// 1 <= pp < sizeof(buffer)/sizeof(u64) +struct LogBuffer(CoherentAllocation<u8>); + +impl LogBuffer { + /// Creates a new `LogBuffer` mapped on `dev`. + fn new(dev: &device::Device<device::Bound>) -> Result<Self> { + const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES; + + let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent( + dev, + NUM_PAGES * GSP_PAGE_SIZE, + GFP_KERNEL | __GFP_ZERO, + )?); + let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?; + + // SAFETY: `obj` has just been created and we are its sole user. + unsafe { + // Copy the self-mapping PTE at the expected location. + obj.0 + .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))? + .copy_from_slice(ptes.as_bytes()) + }; + + Ok(obj) + } +} + +/// GSP runtime data. #[pin_data] -pub(crate) struct Gsp {} +pub(crate) struct Gsp { + /// Libos arguments. + pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>, + /// Init log buffer. + loginit: LogBuffer, + /// Interrupts log buffer. + logintr: LogBuffer, + /// RM log buffer. + logrm: LogBuffer, + /// Command queue. + pub(crate) cmdq: Cmdq, + /// RM arguments. + rmargs: CoherentAllocation<GspArgumentsCached>, +} impl Gsp { - pub(crate) fn new() -> impl PinInit<Self> { - pin_init!(Self {}) + // Creates an in-place initializer for a `Gsp` manager for `pdev`. + pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self, Error>> { + let dev = pdev.as_ref(); + let libos = CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent( + dev, + GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(), + GFP_KERNEL | __GFP_ZERO, + )?; + + // Initialise the logging structures. The OpenRM equivalents are in: + // _kgspInitLibosLoggingStructures (allocates memory for buffers) + // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + let loginit = LogBuffer::new(dev)?; + dma_write!(libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0))?; + + let logintr = LogBuffer::new(dev)?; + dma_write!(libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0))?; + + let logrm = LogBuffer::new(dev)?; + dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; + + let cmdq = Cmdq::new(dev)?; + + let rmargs = CoherentAllocation::<GspArgumentsCached>::alloc_coherent( + dev, + 1, + GFP_KERNEL | __GFP_ZERO, + )?; + dma_write!(rmargs[0] = fw::GspArgumentsCached::new(&cmdq))?; + dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", &rmargs))?; + + Ok(try_pin_init!(Self { + libos, + loginit, + logintr, + logrm, + rmargs, + cmdq, + })) } } diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 2800f3aee37d..54937606b5b0 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -1,21 +1,47 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::device; -use kernel::pci; -use kernel::prelude::*; - -use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; -use crate::fb::FbLayout; -use crate::firmware::{ - booter::{BooterFirmware, BooterKind}, - fwsec::{FwsecCommand, FwsecFirmware}, - gsp::GspFirmware, - FIRMWARE_VERSION, +use kernel::{ + device, + dma::CoherentAllocation, + dma_write, + io::poll::read_poll_timeout, + pci, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + sec2::Sec2, + Falcon, // + }, + fb::FbLayout, + firmware::{ + booter::{ + BooterFirmware, + BooterKind, // + }, + fwsec::{ + FwsecCommand, + FwsecFirmware, // + }, + gsp::GspFirmware, + FIRMWARE_VERSION, // + }, + gpu::Chipset, + gsp::{ + commands, + sequencer::{ + GspSequencer, + GspSequencerParams, // + }, + GspFwWprMeta, // + }, + regs, + vbios::Vbios, }; -use crate::gpu::Chipset; -use crate::regs; -use crate::vbios::Vbios; impl super::Gsp { /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly @@ -102,7 +128,7 @@ impl super::Gsp { /// /// Upon return, the GSP is up and running, and its runtime object given as return value. pub(crate) fn boot( - self: Pin<&mut Self>, + mut self: Pin<&mut Self>, pdev: &pci::Device<device::Bound>, bar: &Bar0, chipset: Chipset, @@ -113,17 +139,17 @@ impl super::Gsp { let bios = Vbios::new(dev, bar)?; - let _gsp_fw = KBox::pin_init( + let gsp_fw = KBox::pin_init( GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, GFP_KERNEL, )?; - let fb_layout = FbLayout::new(chipset, bar)?; + let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; - let _booter_loader = BooterFirmware::new( + let booter_loader = BooterFirmware::new( dev, BooterKind::Loader, chipset, @@ -132,6 +158,95 @@ impl super::Gsp { bar, )?; + let wpr_meta = + CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; + dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + + self.cmdq + .send_command(bar, commands::SetSystemInfo::new(pdev))?; + self.cmdq.send_command(bar, commands::SetRegistry::new())?; + + gsp_falcon.reset(bar)?; + let libos_handle = self.libos.dma_handle(); + let (mbox0, mbox1) = gsp_falcon.boot( + bar, + Some(libos_handle as u32), + Some((libos_handle >> 32) as u32), + )?; + dev_dbg!( + pdev.as_ref(), + "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", + mbox0, + mbox1 + ); + + dev_dbg!( + pdev.as_ref(), + "Using SEC2 to load and run the booter_load firmware...\n" + ); + + sec2_falcon.reset(bar)?; + sec2_falcon.dma_load(bar, &booter_loader)?; + let wpr_handle = wpr_meta.dma_handle(); + let (mbox0, mbox1) = sec2_falcon.boot( + bar, + Some(wpr_handle as u32), + Some((wpr_handle >> 32) as u32), + )?; + dev_dbg!( + pdev.as_ref(), + "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", + mbox0, + mbox1 + ); + + if mbox0 != 0 { + dev_err!( + pdev.as_ref(), + "Booter-load failed with error {:#x}\n", + mbox0 + ); + return Err(ENODEV); + } + + gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version); + + // Poll for RISC-V to become active before running sequencer + read_poll_timeout( + || Ok(gsp_falcon.is_riscv_active(bar)), + |val: &bool| *val, + Delta::from_millis(10), + Delta::from_secs(5), + )?; + + dev_dbg!( + pdev.as_ref(), + "RISC-V active? {}\n", + gsp_falcon.is_riscv_active(bar), + ); + + // Create and run the GSP sequencer. + let seq_params = GspSequencerParams { + bootloader_app_version: gsp_fw.bootloader.app_version, + libos_dma_handle: libos_handle, + gsp_falcon, + sec2_falcon, + dev: pdev.as_ref().into(), + bar, + }; + GspSequencer::run(&mut self.cmdq, seq_params)?; + + // Wait until GSP is fully initialized. + commands::wait_gsp_init_done(&mut self.cmdq)?; + + // Obtain and display basic GPU information. + let info = commands::get_gsp_info(&mut self.cmdq, bar)?; + dev_info!( + pdev.as_ref(), + "GPU name: {}\n", + info.gpu_name().unwrap_or("invalid GPU name") + ); + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs new file mode 100644 index 000000000000..6f946d14868a --- /dev/null +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -0,0 +1,679 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::{ + cmp, + mem, + sync::atomic::{ + fence, + Ordering, // + }, // +}; + +use kernel::{ + device, + dma::{ + CoherentAllocation, + DmaAddress, // + }, + dma_write, + io::poll::read_poll_timeout, + prelude::*, + sync::aref::ARef, + time::Delta, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + driver::Bar0, + gsp::{ + fw::{ + GspMsgElement, + MsgFunction, + MsgqRxHeader, + MsgqTxHeader, // + }, + PteArray, + GSP_PAGE_SHIFT, + GSP_PAGE_SIZE, // + }, + num, + regs, + sbuffer::SBufferIter, // +}; + +/// Trait implemented by types representing a command to send to the GSP. +/// +/// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it +/// needs to send a given command. +/// +/// [`CommandToGsp::init`] in particular is responsible for initializing the command directly +/// into the space reserved for it in the command queue buffer. +/// +/// Some commands may be followed by a variable-length payload. For these, the +/// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be +/// defined as well. +pub(crate) trait CommandToGsp { + /// Function identifying this command to the GSP. + const FUNCTION: MsgFunction; + + /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer. + type Command: FromBytes + AsBytes; + + /// Error type returned by [`CommandToGsp::init`]. + type InitError; + + /// In-place command initializer responsible for filling the command in the command queue + /// buffer. + fn init(&self) -> impl Init<Self::Command, Self::InitError>; + + /// Size of the variable-length payload following the command structure generated by + /// [`CommandToGsp::init`]. + /// + /// Most commands don't have a variable-length payload, so this is zero by default. + fn variable_payload_len(&self) -> usize { + 0 + } + + /// Method initializing the variable-length payload. + /// + /// The command buffer is circular, which means that we may need to jump back to its beginning + /// while in the middle of a command. For this reason, the variable-length payload is + /// initialized using a [`SBufferIter`]. + /// + /// This method will receive a buffer of the length returned by + /// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving + /// unwritten space will lead to an error. + /// + /// Most commands don't have a variable-length payload, so this does nothing by default. + fn init_variable_payload( + &self, + _dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + Ok(()) + } +} + +/// Trait representing messages received from the GSP. +/// +/// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message. +pub(crate) trait MessageFromGsp: Sized { + /// Function identifying this message from the GSP. + const FUNCTION: MsgFunction; + + /// Error type returned by [`MessageFromGsp::read`]. + type InitError; + + /// Type containing the raw message to be read from the message queue. + type Message: FromBytes; + + /// Method reading the message from the message queue and returning it. + /// + /// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns + /// it. + fn read( + msg: &Self::Message, + sbuffer: &mut SBufferIter<core::array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError>; +} + +/// Number of GSP pages making the [`Msgq`]. +pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f; + +/// Circular buffer of a [`Msgq`]. +/// +/// This area of memory is to be shared between the driver and the GSP to exchange commands or +/// messages. +#[repr(C, align(0x1000))] +#[derive(Debug)] +struct MsgqData { + data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)], +} + +// Annoyingly we are forced to use a literal to specify the alignment of +// `MsgqData`, so check that it corresponds to the actual GSP page size here. +static_assert!(align_of::<MsgqData>() == GSP_PAGE_SIZE); + +/// Unidirectional message queue. +/// +/// Contains the data for a message queue, that either the driver or GSP writes to. +/// +/// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the +/// read pointer of `rx` actually refers to the `Msgq` owned by the other side. +/// This design ensures that only the driver or GSP ever writes to a given instance of this struct. +#[repr(C)] +// There is no struct defined for this in the open-gpu-kernel-source headers. +// Instead it is defined by code in `GspMsgQueuesInit()`. +struct Msgq { + /// Header for sending messages, including the write pointer. + tx: MsgqTxHeader, + /// Header for receiving messages, including the read pointer. + rx: MsgqRxHeader, + /// The message queue proper. + msgq: MsgqData, +} + +/// Structure shared between the driver and the GSP and containing the command and message queues. +#[repr(C)] +struct GspMem { + /// Self-mapping page table entries. + ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>, + /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the + /// write and read pointers that the CPU updates. + /// + /// This member is read-only for the GSP. + cpuq: Msgq, + /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the + /// write and read pointers that the GSP updates. + /// + /// This member is read-only for the driver. + gspq: Msgq, +} + +// SAFETY: These structs don't meet the no-padding requirements of AsBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl AsBytes for GspMem {} + +// SAFETY: These structs don't meet the no-padding requirements of FromBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl FromBytes for GspMem {} + +/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`]. +/// +/// This provides the low-level functionality to communicate with the GSP, including allocation of +/// queue space to write messages to and management of read/write pointers. +/// +/// This is shared with the GSP, with clear ownership rules regarding the command queues: +/// +/// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write +/// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`]. +/// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read +/// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`]. +struct DmaGspMem(CoherentAllocation<GspMem>); + +impl DmaGspMem { + /// Allocate a new instance and map it for `dev`. + fn new(dev: &device::Device<device::Bound>) -> Result<Self> { + const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>(); + const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>(); + + let gsp_mem = + CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; + dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?; + dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?; + dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?; + + Ok(Self(gsp_mem)) + } + + /// Returns the region of the CPU message queue that the driver is currently allowed to write + /// to. + /// + /// As the message queue is a circular buffer, the region may be discontiguous in memory. In + /// that case the second slice will have a non-zero length. + fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) { + let tx = self.cpu_write_ptr() as usize; + let rx = self.gsp_read_ptr() as usize; + + // SAFETY: + // - The `CoherentAllocation` contains exactly one object. + // - We will only access the driver-owned part of the shared memory. + // - Per the safety statement of the function, no concurrent access will be performed. + let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0]; + // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`. + let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx); + + if rx <= tx { + // The area from `tx` up to the end of the ring, and from the beginning of the ring up + // to `rx`, minus one unit, belongs to the driver. + if rx == 0 { + let last = after_tx.len() - 1; + (&mut after_tx[..last], &mut before_tx[0..0]) + } else { + (after_tx, &mut before_tx[..rx]) + } + } else { + // The area from `tx` to `rx`, minus one unit, belongs to the driver. + // + // PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are + // `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`. + (after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0]) + } + } + + /// Returns the region of the GSP message queue that the driver is currently allowed to read + /// from. + /// + /// As the message queue is a circular buffer, the region may be discontiguous in memory. In + /// that case the second slice will have a non-zero length. + fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) { + let tx = self.gsp_write_ptr() as usize; + let rx = self.cpu_read_ptr() as usize; + + // SAFETY: + // - The `CoherentAllocation` contains exactly one object. + // - We will only access the driver-owned part of the shared memory. + // - Per the safety statement of the function, no concurrent access will be performed. + let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0]; + // PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`. + let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx); + + match tx.cmp(&rx) { + cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]), + cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]), + cmp::Ordering::Less => (after_rx, &before_rx[..tx]), + } + } + + /// Allocates a region on the command queue that is large enough to send a command of `size` + /// bytes. + /// + /// This returns a [`GspCommand`] ready to be written to by the caller. + /// + /// # Errors + /// + /// - `EAGAIN` if the driver area is too small to hold the requested command. + /// - `EIO` if the command header is not properly aligned. + fn allocate_command(&mut self, size: usize) -> Result<GspCommand<'_>> { + // Get the current writable area as an array of bytes. + let (slice_1, slice_2) = { + let (slice_1, slice_2) = self.driver_write_area(); + + #[allow(clippy::incompatible_msrv)] + (slice_1.as_flattened_mut(), slice_2.as_flattened_mut()) + }; + + // If the GSP is still processing previous messages the shared region + // may be full in which case we will have to retry once the GSP has + // processed the existing commands. + if size_of::<GspMsgElement>() + size > slice_1.len() + slice_2.len() { + return Err(EAGAIN); + } + + // Extract area for the `GspMsgElement`. + let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?; + + // Create the contents area. + let (slice_1, slice_2) = if slice_1.len() > size { + // Contents fits entirely in `slice_1`. + (&mut slice_1[..size], &mut slice_2[0..0]) + } else { + // Need all of `slice_1` and some of `slice_2`. + let slice_2_len = size - slice_1.len(); + (slice_1, &mut slice_2[..slice_2_len]) + }; + + Ok(GspCommand { + header, + contents: (slice_1, slice_2), + }) + } + + // Returns the index of the memory page the GSP will write the next message to. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn gsp_write_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + (unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES) + } + + // Returns the index of the memory page the GSP will read the next command from. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn gsp_read_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + (unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES) + } + + // Returns the index of the memory page the CPU can read the next message from. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn cpu_read_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The ['CoherentAllocation'] contains at least one object. + // - By the invariants of CoherentAllocation the pointer is valid. + (unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES) + } + + // Informs the GSP that it can send `elem_count` new pages into the message queue. + fn advance_cpu_read_ptr(&mut self, elem_count: u32) { + let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES; + + // Ensure read pointer is properly ordered. + fence(Ordering::SeqCst); + + let gsp_mem = self.0.start_ptr_mut(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) }; + } + + // Returns the index of the memory page the CPU can write the next command to. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn cpu_write_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + (unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES) + } + + // Informs the GSP that it can process `elem_count` new pages from the command queue. + fn advance_cpu_write_ptr(&mut self, elem_count: u32) { + let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES; + let gsp_mem = self.0.start_ptr_mut(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) }; + + // Ensure all command data is visible before triggering the GSP read. + fence(Ordering::SeqCst); + } +} + +/// A command ready to be sent on the command queue. +/// +/// This is the type returned by [`DmaGspMem::allocate_command`]. +struct GspCommand<'a> { + // Writable reference to the header of the command. + header: &'a mut GspMsgElement, + // Writable slices to the contents of the command. The second slice is zero unless the command + // loops over the command queue. + contents: (&'a mut [u8], &'a mut [u8]), +} + +/// A message ready to be processed from the message queue. +/// +/// This is the type returned by [`Cmdq::wait_for_msg`]. +struct GspMessage<'a> { + // Reference to the header of the message. + header: &'a GspMsgElement, + // Slices to the contents of the message. The second slice is zero unless the message loops + // over the message queue. + contents: (&'a [u8], &'a [u8]), +} + +/// GSP command queue. +/// +/// Provides the ability to send commands and receive messages from the GSP using a shared memory +/// area. +pub(crate) struct Cmdq { + /// Device this command queue belongs to. + dev: ARef<device::Device>, + /// Current command sequence number. + seq: u32, + /// Memory area shared with the GSP for communicating commands and messages. + gsp_mem: DmaGspMem, +} + +impl Cmdq { + /// Offset of the data after the PTEs. + const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq); + + /// Offset of command queue ring buffer. + pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq) + + core::mem::offset_of!(Msgq, msgq) + - Self::POST_PTE_OFFSET; + + /// Offset of message queue ring buffer. + pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq) + + core::mem::offset_of!(Msgq, msgq) + - Self::POST_PTE_OFFSET; + + /// Number of page table entries for the GSP shared region. + pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT; + + /// Creates a new command queue for `dev`. + pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> { + let gsp_mem = DmaGspMem::new(dev)?; + + Ok(Cmdq { + dev: dev.into(), + seq: 0, + gsp_mem, + }) + } + + /// Computes the checksum for the message pointed to by `it`. + /// + /// A message is made of several parts, so `it` is an iterator over byte slices representing + /// these parts. + fn calculate_checksum<T: Iterator<Item = u8>>(it: T) -> u32 { + let sum64 = it + .enumerate() + .map(|(idx, byte)| (((idx % 8) * 8) as u32, byte)) + .fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol)); + + ((sum64 >> 32) as u32) ^ (sum64 as u32) + } + + /// Notifies the GSP that we have updated the command queue pointers. + fn notify_gsp(bar: &Bar0) { + regs::NV_PGSP_QUEUE_HEAD::default() + .set_address(0) + .write(bar); + } + + /// Sends `command` to the GSP. + /// + /// # Errors + /// + /// - `EAGAIN` if there was not enough space in the command queue to send the command. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From<M::InitError>, + { + let command_size = size_of::<M::Command>() + command.variable_payload_len(); + let dst = self.gsp_mem.allocate_command(command_size)?; + + // Extract area for the command itself. + let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?; + + // Fill the header and command in-place. + let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION); + // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer + // fails. + unsafe { + msg_element.__init(core::ptr::from_mut(dst.header))?; + command.init().__init(core::ptr::from_mut(cmd))?; + } + + // Fill the variable-length payload. + if command_size > size_of::<M::Command>() { + let mut sbuffer = + SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); + command.init_variable_payload(&mut sbuffer)?; + + if !sbuffer.is_empty() { + return Err(EIO); + } + } + + // Compute checksum now that the whole message is ready. + dst.header + .set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([ + dst.header.as_bytes(), + dst.contents.0, + dst.contents.1, + ]))); + + dev_dbg!( + &self.dev, + "GSP RPC: send: seq# {}, function={}, length=0x{:x}\n", + self.seq, + M::FUNCTION, + dst.header.length(), + ); + + // All set - update the write pointer and inform the GSP of the new command. + let elem_count = dst.header.element_count(); + self.seq += 1; + self.gsp_mem.advance_cpu_write_ptr(elem_count); + Cmdq::notify_gsp(bar); + + Ok(()) + } + + /// Wait for a message to become available on the message queue. + /// + /// This works purely at the transport layer and does not interpret or validate the message + /// beyond the advertised length in its [`GspMsgElement`]. + /// + /// This method returns: + /// + /// - A reference to the [`GspMsgElement`] of the message, + /// - Two byte slices with the contents of the message. The second slice is empty unless the + /// message loops across the message queue. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. + /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the + /// message queue. + /// + /// Error codes returned by the message constructor are propagated as-is. + fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> { + // Wait for a message to arrive from the GSP. + let (slice_1, slice_2) = read_poll_timeout( + || Ok(self.gsp_mem.driver_read_area()), + |driver_area| !driver_area.0.is_empty(), + Delta::from_millis(1), + timeout, + ) + .map(|(slice_1, slice_2)| { + #[allow(clippy::incompatible_msrv)] + (slice_1.as_flattened(), slice_2.as_flattened()) + })?; + + // Extract the `GspMsgElement`. + let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?; + + dev_dbg!( + self.dev, + "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n", + header.sequence(), + header.function(), + header.length(), + ); + + // Check that the driver read area is large enough for the message. + if slice_1.len() + slice_2.len() < header.length() { + return Err(EIO); + } + + // Cut the message slices down to the actual length of the message. + let (slice_1, slice_2) = if slice_1.len() > header.length() { + // PANIC: we checked above that `slice_1` is at least as long as `msg_header.length()`. + (slice_1.split_at(header.length()).0, &slice_2[0..0]) + } else { + ( + slice_1, + // PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as + // large as `msg_header.length()`. + slice_2.split_at(header.length() - slice_1.len()).0, + ) + }; + + // Validate checksum. + if Cmdq::calculate_checksum(SBufferIter::new_reader([ + header.as_bytes(), + slice_1, + slice_2, + ])) != 0 + { + dev_err!( + self.dev, + "GSP RPC: receive: Call {} - bad checksum", + header.sequence() + ); + return Err(EIO); + } + + Ok(GspMessage { + header, + contents: (slice_1, slice_2), + }) + } + + /// Receive a message from the GSP. + /// + /// `init` is a closure tasked with processing the message. It receives a reference to the + /// message in the message queue, and a [`SBufferIter`] pointing to its variable-length + /// payload, if any. + /// + /// The expected message is specified using the `M` generic parameter. If the pending message + /// is different, `EAGAIN` is returned and the unexpected message is dropped. + /// + /// This design is by no means final, but it is simple and will let us go through GSP + /// initialization. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. + /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the + /// message queue. + /// - `EINVAL` if the function of the message was unrecognized. + pub(crate) fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M> + where + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From<M::InitError>, + { + let message = self.wait_for_msg(timeout)?; + let function = message.header.function().map_err(|_| EINVAL)?; + + // Extract the message. Store the result as we want to advance the read pointer even in + // case of failure. + let result = if function == M::FUNCTION { + let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?; + let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]); + + M::read(cmd, &mut sbuffer).map_err(|e| e.into()) + } else { + Err(ERANGE) + }; + + // Advance the read pointer past this message. + self.gsp_mem.advance_cpu_read_ptr(u32::try_from( + message.header.length().div_ceil(GSP_PAGE_SIZE), + )?); + + result + } + + /// Returns the DMA handle of the command queue's shared memory region. + pub(crate) fn dma_handle(&self) -> DmaAddress { + self.gsp_mem.0.dma_handle() + } +} diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs new file mode 100644 index 000000000000..0425c65b5d6f --- /dev/null +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::{ + array, + convert::Infallible, // +}; + +use kernel::{ + device, + pci, + prelude::*, + time::Delta, + transmute::{ + AsBytes, + FromBytes, // + }, // +}; + +use crate::{ + driver::Bar0, + gsp::{ + cmdq::{ + Cmdq, + CommandToGsp, + MessageFromGsp, // + }, + fw::{ + commands::*, + MsgFunction, // + }, + }, + sbuffer::SBufferIter, + util, +}; + +/// The `GspSetSystemInfo` command. +pub(crate) struct SetSystemInfo<'a> { + pdev: &'a pci::Device<device::Bound>, +} + +impl<'a> SetSystemInfo<'a> { + /// Creates a new `GspSetSystemInfo` command using the parameters of `pdev`. + pub(crate) fn new(pdev: &'a pci::Device<device::Bound>) -> Self { + Self { pdev } + } +} + +impl<'a> CommandToGsp for SetSystemInfo<'a> { + const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo; + type Command = GspSetSystemInfo; + type InitError = Error; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + GspSetSystemInfo::init(self.pdev) + } +} + +struct RegistryEntry { + key: &'static str, + value: u32, +} + +/// The `SetRegistry` command. +pub(crate) struct SetRegistry { + entries: [RegistryEntry; Self::NUM_ENTRIES], +} + +impl SetRegistry { + // For now we hard-code the registry entries. Future work will allow others to + // be added as module parameters. + const NUM_ENTRIES: usize = 3; + + /// Creates a new `SetRegistry` command, using a set of hardcoded entries. + pub(crate) fn new() -> Self { + Self { + entries: [ + // RMSecBusResetEnable - enables PCI secondary bus reset + RegistryEntry { + key: "RMSecBusResetEnable", + value: 1, + }, + // RMForcePcieConfigSave - forces GSP-RM to preserve PCI configuration registers on + // any PCI reset. + RegistryEntry { + key: "RMForcePcieConfigSave", + value: 1, + }, + // RMDevidCheckIgnore - allows GSP-RM to boot even if the PCI dev ID is not found + // in the internal product name database. + RegistryEntry { + key: "RMDevidCheckIgnore", + value: 1, + }, + ], + } + } +} + +impl CommandToGsp for SetRegistry { + const FUNCTION: MsgFunction = MsgFunction::SetRegistry; + type Command = PackedRegistryTable; + type InitError = Infallible; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + PackedRegistryTable::init(Self::NUM_ENTRIES as u32, self.variable_payload_len() as u32) + } + + fn variable_payload_len(&self) -> usize { + let mut key_size = 0; + for i in 0..Self::NUM_ENTRIES { + key_size += self.entries[i].key.len() + 1; // +1 for NULL terminator + } + Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>() + key_size + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + let string_data_start_offset = + size_of::<PackedRegistryTable>() + Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>(); + + // Array for string data. + let mut string_data = KVec::new(); + + for entry in self.entries.iter().take(Self::NUM_ENTRIES) { + dst.write_all( + PackedRegistryEntry::new( + (string_data_start_offset + string_data.len()) as u32, + entry.value, + ) + .as_bytes(), + )?; + + let key_bytes = entry.key.as_bytes(); + string_data.extend_from_slice(key_bytes, GFP_KERNEL)?; + string_data.push(0, GFP_KERNEL)?; + } + + dst.write_all(string_data.as_slice()) + } +} + +/// Message type for GSP initialization done notification. +struct GspInitDone {} + +// SAFETY: `GspInitDone` is a zero-sized type with no bytes, therefore it +// trivially has no uninitialized bytes. +unsafe impl FromBytes for GspInitDone {} + +impl MessageFromGsp for GspInitDone { + const FUNCTION: MsgFunction = MsgFunction::GspInitDone; + type InitError = Infallible; + type Message = GspInitDone; + + fn read( + _msg: &Self::Message, + _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError> { + Ok(GspInitDone {}) + } +} + +/// Waits for GSP initialization to complete. +pub(crate) fn wait_gsp_init_done(cmdq: &mut Cmdq) -> Result { + loop { + match cmdq.receive_msg::<GspInitDone>(Delta::from_secs(10)) { + Ok(_) => break Ok(()), + Err(ERANGE) => continue, + Err(e) => break Err(e), + } + } +} + +/// The `GetGspStaticInfo` command. +struct GetGspStaticInfo; + +impl CommandToGsp for GetGspStaticInfo { + const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; + type Command = GspStaticConfigInfo; + type InitError = Infallible; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + GspStaticConfigInfo::init_zeroed() + } +} + +/// The reply from the GSP to the [`GetGspInfo`] command. +pub(crate) struct GetGspStaticInfoReply { + gpu_name: [u8; 64], +} + +impl MessageFromGsp for GetGspStaticInfoReply { + const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; + type Message = GspStaticConfigInfo; + type InitError = Infallible; + + fn read( + msg: &Self::Message, + _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError> { + Ok(GetGspStaticInfoReply { + gpu_name: msg.gpu_name_str(), + }) + } +} + +impl GetGspStaticInfoReply { + /// Returns the name of the GPU as a string, or `None` if the string given by the GSP was + /// invalid. + pub(crate) fn gpu_name(&self) -> Option<&str> { + util::str_from_null_terminated(&self.gpu_name) + } +} + +/// Send the [`GetGspInfo`] command and awaits for its reply. +pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> { + cmdq.send_command(bar, GetGspStaticInfo)?; + + loop { + match cmdq.receive_msg::<GetGspStaticInfoReply>(Delta::from_secs(5)) { + Ok(info) => return Ok(info), + Err(ERANGE) => continue, + Err(e) => return Err(e), + } + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 34226dd00982..abffd6beec65 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -1,7 +1,928 @@ // SPDX-License-Identifier: GPL-2.0 +pub(crate) mod commands; mod r570_144; // Alias to avoid repeating the version number with every use. -#[expect(unused)] use r570_144 as bindings; + +use core::ops::Range; + +use kernel::{ + dma::CoherentAllocation, + fmt, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sizes::{ + SZ_128K, + SZ_1M, // + }, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + fb::FbLayout, + firmware::gsp::GspFirmware, + gpu::Chipset, + gsp::{ + cmdq::Cmdq, // + GSP_PAGE_SIZE, + }, + num::{ + self, + FromSafeCast, // + }, +}; + +/// Empty type to group methods related to heap parameters for running the GSP firmware. +enum GspFwHeapParams {} + +/// Minimum required alignment for the GSP heap. +const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>(); + +impl GspFwHeapParams { + /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to + /// and including the first client subdevice allocation). + fn base_rm_size(_chipset: Chipset) -> u64 { + // TODO: this needs to be updated to return the correct value for Hopper+ once support for + // them is added: + // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100) + u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X) + } + + /// Returns the amount of heap memory required to support a single channel allocation. + fn client_alloc_size() -> u64 { + u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE) + .align_up(GSP_HEAP_ALIGNMENT) + .unwrap_or(u64::MAX) + } + + /// Returns the amount of memory to reserve for management purposes for a framebuffer of size + /// `fb_size`. + fn management_overhead(fb_size: u64) -> u64 { + let fb_size_gb = fb_size.div_ceil(u64::from_safe_cast(kernel::sizes::SZ_1G)); + + u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB) + .saturating_mul(fb_size_gb) + .align_up(GSP_HEAP_ALIGNMENT) + .unwrap_or(u64::MAX) + } +} + +/// Heap memory requirements and constraints for a given version of the GSP LIBOS. +pub(crate) struct LibosParams { + /// The base amount of heap required by the GSP operating system, in bytes. + carveout_size: u64, + /// The minimum and maximum sizes allowed for the GSP FW heap, in bytes. + allowed_heap_size: Range<u64>, +} + +impl LibosParams { + /// Version 2 of the GSP LIBOS (Turing and GA100) + const LIBOS2: LibosParams = LibosParams { + carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2), + allowed_heap_size: num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB) + * num::usize_as_u64(SZ_1M) + ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB) + * num::usize_as_u64(SZ_1M), + }; + + /// Version 3 of the GSP LIBOS (GA102+) + const LIBOS3: LibosParams = LibosParams { + carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL), + allowed_heap_size: num::u32_as_u64( + bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB, + ) * num::usize_as_u64(SZ_1M) + ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB) + * num::usize_as_u64(SZ_1M), + }; + + /// Returns the libos parameters corresponding to `chipset`. + pub(crate) fn from_chipset(chipset: Chipset) -> &'static LibosParams { + if chipset < Chipset::GA102 { + &Self::LIBOS2 + } else { + &Self::LIBOS3 + } + } + + /// Returns the amount of memory (in bytes) to allocate for the WPR heap for a framebuffer size + /// of `fb_size` (in bytes) for `chipset`. + pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 { + // The WPR heap will contain the following: + // LIBOS carveout, + self.carveout_size + // RM boot working memory, + .saturating_add(GspFwHeapParams::base_rm_size(chipset)) + // One RM client, + .saturating_add(GspFwHeapParams::client_alloc_size()) + // Overhead for memory management. + .saturating_add(GspFwHeapParams::management_overhead(fb_size)) + // Clamp to the supported heap sizes. + .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1) + } +} + +/// Structure passed to the GSP bootloader, containing the framebuffer layout as well as the DMA +/// addresses of the GSP bootloader and firmware. +#[repr(transparent)] +pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta); + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for GspFwWprMeta {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspFwWprMeta {} + +type GspFwWprMetaBootResumeInfo = r570_144::GspFwWprMeta__bindgen_ty_1; +type GspFwWprMetaBootInfo = r570_144::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; + +impl GspFwWprMeta { + /// Fill in and return a `GspFwWprMeta` suitable for booting `gsp_firmware` using the + /// `fb_layout` layout. + pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout: &FbLayout) -> Self { + Self(bindings::GspFwWprMeta { + // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified. + magic: r570_144::GSP_FW_WPR_META_MAGIC as u64, + revision: u64::from(r570_144::GSP_FW_WPR_META_REVISION), + sysmemAddrOfRadix3Elf: gsp_firmware.radix3_dma_handle(), + sizeOfRadix3Elf: u64::from_safe_cast(gsp_firmware.size), + sysmemAddrOfBootloader: gsp_firmware.bootloader.ucode.dma_handle(), + sizeOfBootloader: u64::from_safe_cast(gsp_firmware.bootloader.ucode.size()), + bootloaderCodeOffset: u64::from(gsp_firmware.bootloader.code_offset), + bootloaderDataOffset: u64::from(gsp_firmware.bootloader.data_offset), + bootloaderManifestOffset: u64::from(gsp_firmware.bootloader.manifest_offset), + __bindgen_anon_1: GspFwWprMetaBootResumeInfo { + __bindgen_anon_1: GspFwWprMetaBootInfo { + sysmemAddrOfSignature: gsp_firmware.signatures.dma_handle(), + sizeOfSignature: u64::from_safe_cast(gsp_firmware.signatures.size()), + }, + }, + gspFwRsvdStart: fb_layout.heap.start, + nonWprHeapOffset: fb_layout.heap.start, + nonWprHeapSize: fb_layout.heap.end - fb_layout.heap.start, + gspFwWprStart: fb_layout.wpr2.start, + gspFwHeapOffset: fb_layout.wpr2_heap.start, + gspFwHeapSize: fb_layout.wpr2_heap.end - fb_layout.wpr2_heap.start, + gspFwOffset: fb_layout.elf.start, + bootBinOffset: fb_layout.boot.start, + frtsOffset: fb_layout.frts.start, + frtsSize: fb_layout.frts.end - fb_layout.frts.start, + gspFwWprEnd: fb_layout + .vga_workspace + .start + .align_down(Alignment::new::<SZ_128K>()), + gspFwHeapVfPartitionCount: fb_layout.vf_partition_count, + fbSize: fb_layout.fb.end - fb_layout.fb.start, + vgaWorkspaceOffset: fb_layout.vga_workspace.start, + vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start, + ..Default::default() + }) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u32)] +pub(crate) enum MsgFunction { + // Common function codes + Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, + SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, + AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, + AllocDevice = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE, + AllocMemory = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY, + AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, + AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, + MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, + BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, + AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT, + Free = bindings::NV_VGPU_MSG_FUNCTION_FREE, + Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, + GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO, + SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, + GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, + GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU, + GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, + GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, + + // Event codes + GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE, + GspRunCpuSequencer = bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER, + PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, + RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, + MmuFaultQueued = bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, + OsErrorLog = bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG, + GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, + GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, + UcodeLibOsPrint = bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, +} + +impl fmt::Display for MsgFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + // Common function codes + MsgFunction::Nop => write!(f, "NOP"), + MsgFunction::SetGuestSystemInfo => write!(f, "SET_GUEST_SYSTEM_INFO"), + MsgFunction::AllocRoot => write!(f, "ALLOC_ROOT"), + MsgFunction::AllocDevice => write!(f, "ALLOC_DEVICE"), + MsgFunction::AllocMemory => write!(f, "ALLOC_MEMORY"), + MsgFunction::AllocCtxDma => write!(f, "ALLOC_CTX_DMA"), + MsgFunction::AllocChannelDma => write!(f, "ALLOC_CHANNEL_DMA"), + MsgFunction::MapMemory => write!(f, "MAP_MEMORY"), + MsgFunction::BindCtxDma => write!(f, "BIND_CTX_DMA"), + MsgFunction::AllocObject => write!(f, "ALLOC_OBJECT"), + MsgFunction::Free => write!(f, "FREE"), + MsgFunction::Log => write!(f, "LOG"), + MsgFunction::GetGspStaticInfo => write!(f, "GET_GSP_STATIC_INFO"), + MsgFunction::SetRegistry => write!(f, "SET_REGISTRY"), + MsgFunction::GspSetSystemInfo => write!(f, "GSP_SET_SYSTEM_INFO"), + MsgFunction::GspInitPostObjGpu => write!(f, "GSP_INIT_POST_OBJGPU"), + MsgFunction::GspRmControl => write!(f, "GSP_RM_CONTROL"), + MsgFunction::GetStaticInfo => write!(f, "GET_STATIC_INFO"), + + // Event codes + MsgFunction::GspInitDone => write!(f, "INIT_DONE"), + MsgFunction::GspRunCpuSequencer => write!(f, "RUN_CPU_SEQUENCER"), + MsgFunction::PostEvent => write!(f, "POST_EVENT"), + MsgFunction::RcTriggered => write!(f, "RC_TRIGGERED"), + MsgFunction::MmuFaultQueued => write!(f, "MMU_FAULT_QUEUED"), + MsgFunction::OsErrorLog => write!(f, "OS_ERROR_LOG"), + MsgFunction::GspPostNoCat => write!(f, "NOCAT"), + MsgFunction::GspLockdownNotice => write!(f, "LOCKDOWN_NOTICE"), + MsgFunction::UcodeLibOsPrint => write!(f, "LIBOS_PRINT"), + } + } +} + +impl TryFrom<u32> for MsgFunction { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result<MsgFunction> { + match value { + bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), + bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { + Ok(MsgFunction::SetGuestSystemInfo) + } + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE => Ok(MsgFunction::AllocDevice), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY => Ok(MsgFunction::AllocMemory), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), + bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), + bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject), + bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free), + bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), + bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo), + bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), + bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), + bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU => { + Ok(MsgFunction::GspInitPostObjGpu) + } + bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl), + bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), + bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone), + bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => { + Ok(MsgFunction::GspRunCpuSequencer) + } + bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), + bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), + bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED => Ok(MsgFunction::MmuFaultQueued), + bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG => Ok(MsgFunction::OsErrorLog), + bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), + bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), + bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT => Ok(MsgFunction::UcodeLibOsPrint), + _ => Err(EINVAL), + } + } +} + +impl From<MsgFunction> for u32 { + fn from(value: MsgFunction) -> Self { + // CAST: `MsgFunction` is `repr(u32)` and can thus be cast losslessly. + value as u32 + } +} + +/// Sequencer buffer opcode for GSP sequencer commands. +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u32)] +pub(crate) enum SeqBufOpcode { + // Core operation opcodes + CoreReset = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET, + CoreResume = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME, + CoreStart = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START, + CoreWaitForHalt = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT, + + // Delay opcode + DelayUs = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US, + + // Register operation opcodes + RegModify = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY, + RegPoll = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL, + RegStore = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE, + RegWrite = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, +} + +impl fmt::Display for SeqBufOpcode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SeqBufOpcode::CoreReset => write!(f, "CORE_RESET"), + SeqBufOpcode::CoreResume => write!(f, "CORE_RESUME"), + SeqBufOpcode::CoreStart => write!(f, "CORE_START"), + SeqBufOpcode::CoreWaitForHalt => write!(f, "CORE_WAIT_FOR_HALT"), + SeqBufOpcode::DelayUs => write!(f, "DELAY_US"), + SeqBufOpcode::RegModify => write!(f, "REG_MODIFY"), + SeqBufOpcode::RegPoll => write!(f, "REG_POLL"), + SeqBufOpcode::RegStore => write!(f, "REG_STORE"), + SeqBufOpcode::RegWrite => write!(f, "REG_WRITE"), + } + } +} + +impl TryFrom<u32> for SeqBufOpcode { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result<SeqBufOpcode> { + match value { + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET => { + Ok(SeqBufOpcode::CoreReset) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME => { + Ok(SeqBufOpcode::CoreResume) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START => { + Ok(SeqBufOpcode::CoreStart) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT => { + Ok(SeqBufOpcode::CoreWaitForHalt) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US => Ok(SeqBufOpcode::DelayUs), + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY => { + Ok(SeqBufOpcode::RegModify) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL => Ok(SeqBufOpcode::RegPoll), + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE => Ok(SeqBufOpcode::RegStore), + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE => Ok(SeqBufOpcode::RegWrite), + _ => Err(EINVAL), + } + } +} + +impl From<SeqBufOpcode> for u32 { + fn from(value: SeqBufOpcode) -> Self { + // CAST: `SeqBufOpcode` is `repr(u32)` and can thus be cast losslessly. + value as u32 + } +} + +/// Wrapper for GSP sequencer register write payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegWritePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); + +impl RegWritePayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the value to write. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegWritePayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegWritePayload {} + +/// Wrapper for GSP sequencer register modify payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegModifyPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); + +impl RegModifyPayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the mask to apply. + pub(crate) fn mask(&self) -> u32 { + self.0.mask + } + + /// Returns the value to write. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegModifyPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegModifyPayload {} + +/// Wrapper for GSP sequencer register poll payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegPollPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_POLL); + +impl RegPollPayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the mask to apply. + pub(crate) fn mask(&self) -> u32 { + self.0.mask + } + + /// Returns the expected value. + pub(crate) fn val(&self) -> u32 { + self.0.val + } + + /// Returns the timeout in microseconds. + pub(crate) fn timeout(&self) -> u32 { + self.0.timeout + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegPollPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegPollPayload {} + +/// Wrapper for GSP sequencer delay payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct DelayUsPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_DELAY_US); + +impl DelayUsPayload { + /// Returns the delay value in microseconds. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for DelayUsPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for DelayUsPayload {} + +/// Wrapper for GSP sequencer register store payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegStorePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_STORE); + +impl RegStorePayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the storage index. + #[allow(unused)] + pub(crate) fn index(&self) -> u32 { + self.0.index + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegStorePayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegStorePayload {} + +/// Wrapper for GSP sequencer buffer command. +#[repr(transparent)] +pub(crate) struct SequencerBufferCmd(r570_144::GSP_SEQUENCER_BUFFER_CMD); + +impl SequencerBufferCmd { + /// Returns the opcode as a `SeqBufOpcode` enum, or error if invalid. + pub(crate) fn opcode(&self) -> Result<SeqBufOpcode> { + self.0.opCode.try_into() + } + + /// Returns the register write payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegWrite`. + pub(crate) fn reg_write_payload(&self) -> Result<RegWritePayload> { + if self.opcode()? != SeqBufOpcode::RegWrite { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegWrite`, so union contains valid `RegWritePayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regWrite).cast::<u8>(), + core::mem::size_of::<RegWritePayload>(), + ) + }; + Ok(*RegWritePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the register modify payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegModify`. + pub(crate) fn reg_modify_payload(&self) -> Result<RegModifyPayload> { + if self.opcode()? != SeqBufOpcode::RegModify { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegModify`, so union contains valid `RegModifyPayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regModify).cast::<u8>(), + core::mem::size_of::<RegModifyPayload>(), + ) + }; + Ok(*RegModifyPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the register poll payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegPoll`. + pub(crate) fn reg_poll_payload(&self) -> Result<RegPollPayload> { + if self.opcode()? != SeqBufOpcode::RegPoll { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegPoll`, so union contains valid `RegPollPayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regPoll).cast::<u8>(), + core::mem::size_of::<RegPollPayload>(), + ) + }; + Ok(*RegPollPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the delay payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::DelayUs`. + pub(crate) fn delay_us_payload(&self) -> Result<DelayUsPayload> { + if self.opcode()? != SeqBufOpcode::DelayUs { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `DelayUs`, so union contains valid `DelayUsPayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.delayUs).cast::<u8>(), + core::mem::size_of::<DelayUsPayload>(), + ) + }; + Ok(*DelayUsPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the register store payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegStore`. + pub(crate) fn reg_store_payload(&self) -> Result<RegStorePayload> { + if self.opcode()? != SeqBufOpcode::RegStore { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegStore`, so union contains valid `RegStorePayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regStore).cast::<u8>(), + core::mem::size_of::<RegStorePayload>(), + ) + }; + Ok(*RegStorePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for SequencerBufferCmd {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for SequencerBufferCmd {} + +/// Wrapper for GSP run CPU sequencer RPC. +#[repr(transparent)] +pub(crate) struct RunCpuSequencer(r570_144::rpc_run_cpu_sequencer_v17_00); + +impl RunCpuSequencer { + /// Returns the command index. + pub(crate) fn cmd_index(&self) -> u32 { + self.0.cmdIndex + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RunCpuSequencer {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RunCpuSequencer {} + +/// Struct containing the arguments required to pass a memory buffer to the GSP +/// for use during initialisation. +/// +/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is +/// configured for a larger page size (e.g. 64K pages), we need to give +/// the GSP an array of 4K pages. Since we only create physically contiguous +/// buffers the math to calculate the addresses is simple. +/// +/// The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently +/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the +/// buffers to be physically contiguous anyway. +/// +/// The memory allocated for the arguments must remain until the GSP sends the +/// init_done RPC. +#[repr(transparent)] +pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument); + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for LibosMemoryRegionInitArgument {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for LibosMemoryRegionInitArgument {} + +impl LibosMemoryRegionInitArgument { + pub(crate) fn new<A: AsBytes + FromBytes>( + name: &'static str, + obj: &CoherentAllocation<A>, + ) -> Self { + /// Generates the `ID8` identifier required for some GSP objects. + fn id8(name: &str) -> u64 { + let mut bytes = [0u8; core::mem::size_of::<u64>()]; + + for (c, b) in name.bytes().rev().zip(&mut bytes) { + *b = c; + } + + u64::from_ne_bytes(bytes) + } + + Self(bindings::LibosMemoryRegionInitArgument { + id8: id8(name), + pa: obj.dma_handle(), + size: num::usize_as_u64(obj.size()), + kind: num::u32_into_u8::< + { bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS }, + >(), + loc: num::u32_into_u8::< + { bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM }, + >(), + ..Default::default() + }) + } +} + +/// TX header for setting up a message queue with the GSP. +#[repr(transparent)] +pub(crate) struct MsgqTxHeader(bindings::msgqTxHeader); + +impl MsgqTxHeader { + /// Create a new TX queue header. + /// + /// # Arguments + /// + /// * `msgq_size` - Total size of the message queue structure, in bytes. + /// * `rx_hdr_offset` - Offset, in bytes, of the start of the RX header in the message queue + /// structure. + /// * `msg_count` - Number of messages that can be sent, i.e. the number of memory pages + /// allocated for the message queue in the message queue structure. + pub(crate) fn new(msgq_size: u32, rx_hdr_offset: u32, msg_count: u32) -> Self { + Self(bindings::msgqTxHeader { + version: 0, + size: msgq_size, + msgSize: num::usize_into_u32::<GSP_PAGE_SIZE>(), + msgCount: msg_count, + writePtr: 0, + flags: 1, + rxHdrOff: rx_hdr_offset, + entryOff: num::usize_into_u32::<GSP_PAGE_SIZE>(), + }) + } + + /// Returns the value of the write pointer for this queue. + pub(crate) fn write_ptr(&self) -> u32 { + let ptr = core::ptr::from_ref(&self.0.writePtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.read_volatile() } + } + + /// Sets the value of the write pointer for this queue. + pub(crate) fn set_write_ptr(&mut self, val: u32) { + let ptr = core::ptr::from_mut(&mut self.0.writePtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.write_volatile(val) } + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for MsgqTxHeader {} + +/// RX header for setting up a message queue with the GSP. +#[repr(transparent)] +pub(crate) struct MsgqRxHeader(bindings::msgqRxHeader); + +/// Header for the message RX queue. +impl MsgqRxHeader { + /// Creates a new RX queue header. + pub(crate) fn new() -> Self { + Self(Default::default()) + } + + /// Returns the value of the read pointer for this queue. + pub(crate) fn read_ptr(&self) -> u32 { + let ptr = core::ptr::from_ref(&self.0.readPtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.read_volatile() } + } + + /// Sets the value of the read pointer for this queue. + pub(crate) fn set_read_ptr(&mut self, val: u32) { + let ptr = core::ptr::from_mut(&mut self.0.readPtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.write_volatile(val) } + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for MsgqRxHeader {} + +bitfield! { + struct MsgHeaderVersion(u32) { + 31:24 major as u8; + 23:16 minor as u8; + } +} + +impl MsgHeaderVersion { + const MAJOR_TOT: u8 = 3; + const MINOR_TOT: u8 = 0; + + fn new() -> Self { + Self::default() + .set_major(Self::MAJOR_TOT) + .set_minor(Self::MINOR_TOT) + } +} + +impl bindings::rpc_message_header_v { + fn init(cmd_size: usize, function: MsgFunction) -> impl Init<Self, Error> { + type RpcMessageHeader = bindings::rpc_message_header_v; + + try_init!(RpcMessageHeader { + header_version: MsgHeaderVersion::new().into(), + signature: bindings::NV_VGPU_MSG_SIGNATURE_VALID, + function: function.into(), + length: size_of::<Self>() + .checked_add(cmd_size) + .ok_or(EOVERFLOW) + .and_then(|v| v.try_into().map_err(|_| EINVAL))?, + rpc_result: 0xffffffff, + rpc_result_private: 0xffffffff, + ..Zeroable::init_zeroed() + }) + } +} + +// SAFETY: We can't derive the Zeroable trait for this binding because the +// procedural macro doesn't support the syntax used by bindgen to create the +// __IncompleteArrayField types. So instead we implement it here, which is safe +// because these are explicitly padded structures only containing types for +// which any bit pattern, including all zeros, is valid. +unsafe impl Zeroable for bindings::rpc_message_header_v {} + +/// GSP Message Element. +/// +/// This is essentially a message header expected to be followed by the message data. +#[repr(transparent)] +pub(crate) struct GspMsgElement { + inner: bindings::GSP_MSG_QUEUE_ELEMENT, +} + +impl GspMsgElement { + /// Creates a new message element. + /// + /// # Arguments + /// + /// * `sequence` - Sequence number of the message. + /// * `cmd_size` - Size of the command (not including the message element), in bytes. + /// * `function` - Function of the message. + #[allow(non_snake_case)] + pub(crate) fn init( + sequence: u32, + cmd_size: usize, + function: MsgFunction, + ) -> impl Init<Self, Error> { + type RpcMessageHeader = bindings::rpc_message_header_v; + type InnerGspMsgElement = bindings::GSP_MSG_QUEUE_ELEMENT; + let init_inner = try_init!(InnerGspMsgElement { + seqNum: sequence, + elemCount: size_of::<Self>() + .checked_add(cmd_size) + .ok_or(EOVERFLOW)? + .div_ceil(GSP_PAGE_SIZE) + .try_into() + .map_err(|_| EOVERFLOW)?, + rpc <- RpcMessageHeader::init(cmd_size, function), + ..Zeroable::init_zeroed() + }); + + try_init!(GspMsgElement { + inner <- init_inner, + }) + } + + /// Sets the checksum of this message. + /// + /// Since the header is also part of the checksum, this is usually called after the whole + /// message has been written to the shared memory area. + pub(crate) fn set_checksum(&mut self, checksum: u32) { + self.inner.checkSum = checksum; + } + + /// Returns the total length of the message. + pub(crate) fn length(&self) -> usize { + // `rpc.length` includes the length of the GspRpcHeader but not the message header. + size_of::<Self>() - size_of::<bindings::rpc_message_header_v>() + + num::u32_as_usize(self.inner.rpc.length) + } + + // Returns the sequence number of the message. + pub(crate) fn sequence(&self) -> u32 { + self.inner.rpc.sequence + } + + // Returns the function of the message, if it is valid, or the invalid function number as an + // error. + pub(crate) fn function(&self) -> Result<MsgFunction, u32> { + self.inner + .rpc + .function + .try_into() + .map_err(|_| self.inner.rpc.function) + } + + // Returns the number of elements (i.e. memory pages) used by this message. + pub(crate) fn element_count(&self) -> u32 { + self.inner.elemCount + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for GspMsgElement {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspMsgElement {} + +/// Arguments for GSP startup. +#[repr(transparent)] +pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED); + +impl GspArgumentsCached { + /// Creates the arguments for starting the GSP up using `cmdq` as its command queue. + pub(crate) fn new(cmdq: &Cmdq) -> Self { + Self(bindings::GSP_ARGUMENTS_CACHED { + messageQueueInitArguments: MessageQueueInitArguments::new(cmdq).0, + bDmemStack: 1, + ..Default::default() + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspArgumentsCached {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspArgumentsCached {} + +/// Init arguments for the message queue. +#[repr(transparent)] +struct MessageQueueInitArguments(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS); + +impl MessageQueueInitArguments { + /// Creates a new init arguments structure for `cmdq`. + fn new(cmdq: &Cmdq) -> Self { + Self(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS { + sharedMemPhysAddr: cmdq.dma_handle(), + pageTableEntryCount: num::usize_into_u32::<{ Cmdq::NUM_PTES }>(), + cmdQueueOffset: num::usize_as_u64(Cmdq::CMDQ_OFFSET), + statQueueOffset: num::usize_as_u64(Cmdq::STATQ_OFFSET), + ..Default::default() + }) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs new file mode 100644 index 000000000000..21be44199693 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; +use kernel::transmute::{AsBytes, FromBytes}; +use kernel::{device, pci}; + +use crate::gsp::GSP_PAGE_SIZE; + +use super::bindings; + +/// Payload of the `GspSetSystemInfo` command. +#[repr(transparent)] +pub(crate) struct GspSetSystemInfo { + inner: bindings::GspSystemInfo, +} +static_assert!(size_of::<GspSetSystemInfo>() < GSP_PAGE_SIZE); + +impl GspSetSystemInfo { + /// Returns an in-place initializer for the `GspSetSystemInfo` command. + #[allow(non_snake_case)] + pub(crate) fn init<'a>(dev: &'a pci::Device<device::Bound>) -> impl Init<Self, Error> + 'a { + type InnerGspSystemInfo = bindings::GspSystemInfo; + let init_inner = try_init!(InnerGspSystemInfo { + gpuPhysAddr: dev.resource_start(0)?, + gpuPhysFbAddr: dev.resource_start(1)?, + gpuPhysInstAddr: dev.resource_start(3)?, + nvDomainBusDeviceFunc: u64::from(dev.dev_id()), + + // Using TASK_SIZE in r535_gsp_rpc_set_system_info() seems wrong because + // TASK_SIZE is per-task. That's probably a design issue in GSP-RM though. + maxUserVa: (1 << 47) - 4096, + pciConfigMirrorBase: 0x088000, + pciConfigMirrorSize: 0x001000, + + PCIDeviceID: (u32::from(dev.device_id()) << 16) | u32::from(dev.vendor_id().as_raw()), + PCISubDeviceID: (u32::from(dev.subsystem_device_id()) << 16) + | u32::from(dev.subsystem_vendor_id()), + PCIRevisionID: u32::from(dev.revision_id()), + bIsPrimary: 0, + bPreserveVideoMemoryAllocations: 0, + ..Zeroable::init_zeroed() + }); + + try_init!(GspSetSystemInfo { + inner <- init_inner, + }) + } +} + +// SAFETY: These structs don't meet the no-padding requirements of AsBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl AsBytes for GspSetSystemInfo {} + +// SAFETY: These structs don't meet the no-padding requirements of FromBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl FromBytes for GspSetSystemInfo {} + +#[repr(transparent)] +pub(crate) struct PackedRegistryEntry(bindings::PACKED_REGISTRY_ENTRY); + +impl PackedRegistryEntry { + pub(crate) fn new(offset: u32, value: u32) -> Self { + Self({ + bindings::PACKED_REGISTRY_ENTRY { + nameOffset: offset, + + // We only support DWORD types for now. Support for other types + // will come later if required. + type_: bindings::REGISTRY_TABLE_ENTRY_TYPE_DWORD as u8, + __bindgen_padding_0: Default::default(), + data: value, + length: 0, + } + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for PackedRegistryEntry {} + +/// Payload of the `SetRegistry` command. +#[repr(transparent)] +pub(crate) struct PackedRegistryTable { + inner: bindings::PACKED_REGISTRY_TABLE, +} + +impl PackedRegistryTable { + #[allow(non_snake_case)] + pub(crate) fn init(num_entries: u32, size: u32) -> impl Init<Self> { + type InnerPackedRegistryTable = bindings::PACKED_REGISTRY_TABLE; + let init_inner = init!(InnerPackedRegistryTable { + numEntries: num_entries, + size, + entries: Default::default() + }); + + init!(PackedRegistryTable { inner <- init_inner }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for PackedRegistryTable {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for PackedRegistryTable {} + +/// Payload of the `GetGspStaticInfo` command and message. +#[repr(transparent)] +pub(crate) struct GspStaticConfigInfo(bindings::GspStaticConfigInfo_t); + +impl GspStaticConfigInfo { + /// Returns a bytes array containing the (hopefully) zero-terminated name of this GPU. + pub(crate) fn gpu_name_str(&self) -> [u8; 64] { + self.0.gpuNameString + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspStaticConfigInfo {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspStaticConfigInfo {} + +// SAFETY: This struct only contains integer types and fixed-size arrays for which +// all bit patterns are valid. +unsafe impl Zeroable for GspStaticConfigInfo {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs index 35cb0370a7c9..048234d1a9d1 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -12,7 +12,6 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( dead_code, - unused_imports, clippy::all, clippy::undocumented_unsafe_blocks, clippy::ptr_as_ptr, @@ -25,5 +24,8 @@ unreachable_pub, unsafe_op_in_unsafe_fn )] -use kernel::ffi; +use kernel::{ + ffi, + prelude::Zeroable, // +}; include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index cec594032515..5bcfbcd1ad22 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -1 +1,951 @@ // SPDX-License-Identifier: GPL-2.0 + +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField<T>(::core::marker::PhantomData<T>, [T; 0]); +impl<T> __IncompleteArrayField<T> { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::core::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::core::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::core::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl<T> ::core::fmt::Debug for __IncompleteArrayField<T> { + fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +pub const NV_VGPU_MSG_SIGNATURE_VALID: u32 = 1129337430; +pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 = 0; +pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 = 23068672; +pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X: u32 = 8388608; +pub const GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB: u32 = 98304; +pub const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE: u32 = 100663296; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB: u32 = 64; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB: u32 = 256; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB: u32 = 88; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280; +pub const GSP_FW_WPR_META_REVISION: u32 = 1; +pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285; +pub const REGISTRY_TABLE_ENTRY_TYPE_DWORD: u32 = 1; +pub type __u8 = ffi::c_uchar; +pub type __u16 = ffi::c_ushort; +pub type __u32 = ffi::c_uint; +pub type __u64 = ffi::c_ulonglong; +pub type u8_ = __u8; +pub type u16_ = __u16; +pub type u32_ = __u32; +pub type u64_ = __u64; +pub const NV_VGPU_MSG_FUNCTION_NOP: _bindgen_ty_2 = 0; +pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO: _bindgen_ty_2 = 1; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_ROOT: _bindgen_ty_2 = 2; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE: _bindgen_ty_2 = 3; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY: _bindgen_ty_2 = 4; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA: _bindgen_ty_2 = 5; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA: _bindgen_ty_2 = 6; +pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY: _bindgen_ty_2 = 7; +pub const NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA: _bindgen_ty_2 = 8; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT: _bindgen_ty_2 = 9; +pub const NV_VGPU_MSG_FUNCTION_FREE: _bindgen_ty_2 = 10; +pub const NV_VGPU_MSG_FUNCTION_LOG: _bindgen_ty_2 = 11; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIDMEM: _bindgen_ty_2 = 12; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY: _bindgen_ty_2 = 13; +pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY_DMA: _bindgen_ty_2 = 14; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY_DMA: _bindgen_ty_2 = 15; +pub const NV_VGPU_MSG_FUNCTION_GET_EDID: _bindgen_ty_2 = 16; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_CHANNEL: _bindgen_ty_2 = 17; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_OBJECT: _bindgen_ty_2 = 18; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_SUBDEVICE: _bindgen_ty_2 = 19; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DYNAMIC_MEMORY: _bindgen_ty_2 = 20; +pub const NV_VGPU_MSG_FUNCTION_DUP_OBJECT: _bindgen_ty_2 = 21; +pub const NV_VGPU_MSG_FUNCTION_IDLE_CHANNELS: _bindgen_ty_2 = 22; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_EVENT: _bindgen_ty_2 = 23; +pub const NV_VGPU_MSG_FUNCTION_SEND_EVENT: _bindgen_ty_2 = 24; +pub const NV_VGPU_MSG_FUNCTION_REMAPPER_CONTROL: _bindgen_ty_2 = 25; +pub const NV_VGPU_MSG_FUNCTION_DMA_CONTROL: _bindgen_ty_2 = 26; +pub const NV_VGPU_MSG_FUNCTION_DMA_FILL_PTE_MEM: _bindgen_ty_2 = 27; +pub const NV_VGPU_MSG_FUNCTION_MANAGE_HW_RESOURCE: _bindgen_ty_2 = 28; +pub const NV_VGPU_MSG_FUNCTION_BIND_ARBITRARY_CTX_DMA: _bindgen_ty_2 = 29; +pub const NV_VGPU_MSG_FUNCTION_CREATE_FB_SEGMENT: _bindgen_ty_2 = 30; +pub const NV_VGPU_MSG_FUNCTION_DESTROY_FB_SEGMENT: _bindgen_ty_2 = 31; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_SHARE_DEVICE: _bindgen_ty_2 = 32; +pub const NV_VGPU_MSG_FUNCTION_DEFERRED_API_CONTROL: _bindgen_ty_2 = 33; +pub const NV_VGPU_MSG_FUNCTION_REMOVE_DEFERRED_API: _bindgen_ty_2 = 34; +pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_READ: _bindgen_ty_2 = 35; +pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_WRITE: _bindgen_ty_2 = 36; +pub const NV_VGPU_MSG_FUNCTION_SIM_MANAGE_DISPLAY_CONTEXT_DMA: _bindgen_ty_2 = 37; +pub const NV_VGPU_MSG_FUNCTION_FREE_VIDMEM_VIRT: _bindgen_ty_2 = 38; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PSTATE_INFO: _bindgen_ty_2 = 39; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PERFMON_SAMPLE: _bindgen_ty_2 = 40; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_VIRTUAL_PSTATE_INFO: _bindgen_ty_2 = 41; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_LEVEL_INFO: _bindgen_ty_2 = 42; +pub const NV_VGPU_MSG_FUNCTION_MAP_SEMA_MEMORY: _bindgen_ty_2 = 43; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_SEMA_MEMORY: _bindgen_ty_2 = 44; +pub const NV_VGPU_MSG_FUNCTION_SET_SURFACE_PROPERTIES: _bindgen_ty_2 = 45; +pub const NV_VGPU_MSG_FUNCTION_CLEANUP_SURFACE: _bindgen_ty_2 = 46; +pub const NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER: _bindgen_ty_2 = 47; +pub const NV_VGPU_MSG_FUNCTION_TDR_SET_TIMEOUT_STATE: _bindgen_ty_2 = 48; +pub const NV_VGPU_MSG_FUNCTION_SWITCH_TO_VGA: _bindgen_ty_2 = 49; +pub const NV_VGPU_MSG_FUNCTION_GPU_EXEC_REG_OPS: _bindgen_ty_2 = 50; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO: _bindgen_ty_2 = 51; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIRTMEM: _bindgen_ty_2 = 52; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_PDE_2: _bindgen_ty_2 = 53; +pub const NV_VGPU_MSG_FUNCTION_SET_PAGE_DIRECTORY: _bindgen_ty_2 = 54; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_PSTATE_INFO: _bindgen_ty_2 = 55; +pub const NV_VGPU_MSG_FUNCTION_TRANSLATE_GUEST_GPU_PTES: _bindgen_ty_2 = 56; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_57: _bindgen_ty_2 = 57; +pub const NV_VGPU_MSG_FUNCTION_RESET_CURRENT_GR_CONTEXT: _bindgen_ty_2 = 58; +pub const NV_VGPU_MSG_FUNCTION_SET_SEMA_MEM_VALIDATION_STATE: _bindgen_ty_2 = 59; +pub const NV_VGPU_MSG_FUNCTION_GET_ENGINE_UTILIZATION: _bindgen_ty_2 = 60; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPU_PDES: _bindgen_ty_2 = 61; +pub const NV_VGPU_MSG_FUNCTION_GET_ENCODER_CAPACITY: _bindgen_ty_2 = 62; +pub const NV_VGPU_MSG_FUNCTION_VGPU_PF_REG_READ32: _bindgen_ty_2 = 63; +pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO_EXT: _bindgen_ty_2 = 64; +pub const NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO: _bindgen_ty_2 = 65; +pub const NV_VGPU_MSG_FUNCTION_RMFS_INIT: _bindgen_ty_2 = 66; +pub const NV_VGPU_MSG_FUNCTION_RMFS_CLOSE_QUEUE: _bindgen_ty_2 = 67; +pub const NV_VGPU_MSG_FUNCTION_RMFS_CLEANUP: _bindgen_ty_2 = 68; +pub const NV_VGPU_MSG_FUNCTION_RMFS_TEST: _bindgen_ty_2 = 69; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_BAR_PDE: _bindgen_ty_2 = 70; +pub const NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD: _bindgen_ty_2 = 71; +pub const NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO: _bindgen_ty_2 = 72; +pub const NV_VGPU_MSG_FUNCTION_SET_REGISTRY: _bindgen_ty_2 = 73; +pub const NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU: _bindgen_ty_2 = 74; +pub const NV_VGPU_MSG_FUNCTION_SUBDEV_EVENT_SET_NOTIFICATION: _bindgen_ty_2 = 75; +pub const NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL: _bindgen_ty_2 = 76; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO2: _bindgen_ty_2 = 77; +pub const NV_VGPU_MSG_FUNCTION_DUMP_PROTOBUF_COMPONENT: _bindgen_ty_2 = 78; +pub const NV_VGPU_MSG_FUNCTION_UNSET_PAGE_DIRECTORY: _bindgen_ty_2 = 79; +pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_STATIC_INFO: _bindgen_ty_2 = 80; +pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_FAULT_BUFFER: _bindgen_ty_2 = 81; +pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_FAULT_BUFFER: _bindgen_ty_2 = 82; +pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 83; +pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 84; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_VGPU_FB_USAGE: _bindgen_ty_2 = 85; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVFBC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 86; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVENC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 87; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_CHANNEL: _bindgen_ty_2 = 88; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_ISOLATED_CHANNEL: _bindgen_ty_2 = 89; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_HANDLE_VF_PRI_FAULT: _bindgen_ty_2 = 90; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CLK_GET_EXTENDED_INFO: _bindgen_ty_2 = 91; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_BOOST: _bindgen_ty_2 = 92; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_VPSTATES_GET_CONTROL: _bindgen_ty_2 = 93; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE: _bindgen_ty_2 = 94; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_COLOR_CLEAR: _bindgen_ty_2 = 95; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_DEPTH_CLEAR: _bindgen_ty_2 = 96; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SCHEDULE: _bindgen_ty_2 = 97; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TIMESLICE: _bindgen_ty_2 = 98; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PREEMPT: _bindgen_ty_2 = 99; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_DISABLE_CHANNELS: _bindgen_ty_2 = 100; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TSG_INTERLEAVE_LEVEL: _bindgen_ty_2 = 101; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_CHANNEL_INTERLEAVE_LEVEL: _bindgen_ty_2 = 102; +pub const NV_VGPU_MSG_FUNCTION_GSP_RM_ALLOC: _bindgen_ty_2 = 103; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_V2: _bindgen_ty_2 = 104; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_AES_ENCRYPT: _bindgen_ty_2 = 105; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY: _bindgen_ty_2 = 106; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY_STATUS: _bindgen_ty_2 = 107; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 108; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 109; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_EXCEPTION_MASK: _bindgen_ty_2 = 110; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_PROMOTE_CTX: _bindgen_ty_2 = 111; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_PREEMPTION_BIND: _bindgen_ty_2 = 112; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_CTXSW_PREEMPTION_MODE: _bindgen_ty_2 = 113; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_ZCULL_BIND: _bindgen_ty_2 = 114; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_INITIALIZE_CTX: _bindgen_ty_2 = 115; +pub const NV_VGPU_MSG_FUNCTION_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES: _bindgen_ty_2 = 116; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_CLEAR_FAULTED_BIT: _bindgen_ty_2 = 117; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_LATEST_ECC_ADDRESSES: _bindgen_ty_2 = 118; +pub const NV_VGPU_MSG_FUNCTION_CTRL_MC_SERVICE_INTERRUPTS: _bindgen_ty_2 = 119; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DMA_SET_DEFAULT_VASPACE: _bindgen_ty_2 = 120; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_CE_PCE_MASK: _bindgen_ty_2 = 121; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE_ENTRY: _bindgen_ty_2 = 122; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_PEER_ID_MASK: _bindgen_ty_2 = 123; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_STATUS: _bindgen_ty_2 = 124; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS: _bindgen_ty_2 = 125; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_MATRIX: _bindgen_ty_2 = 126; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_0: _bindgen_ty_2 = 127; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_PM_AREA_SMPC: _bindgen_ty_2 = 128; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HWPM_LEGACY: _bindgen_ty_2 = 129; +pub const NV_VGPU_MSG_FUNCTION_CTRL_B0CC_EXEC_REG_OPS: _bindgen_ty_2 = 130; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BIND_PM_RESOURCES: _bindgen_ty_2 = 131; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SUSPEND_CONTEXT: _bindgen_ty_2 = 132; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_RESUME_CONTEXT: _bindgen_ty_2 = 133; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_EXEC_REG_OPS: _bindgen_ty_2 = 134; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_DEBUG: _bindgen_ty_2 = 135; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 136; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 137; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_ERRBAR_DEBUG: _bindgen_ty_2 = 138; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_NEXT_STOP_TRIGGER_TYPE: _bindgen_ty_2 = 139; +pub const NV_VGPU_MSG_FUNCTION_CTRL_ALLOC_PMA_STREAM: _bindgen_ty_2 = 140; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PMA_STREAM_UPDATE_GET_PUT: _bindgen_ty_2 = 141; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_INFO_V2: _bindgen_ty_2 = 142; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SET_CHANNEL_PROPERTIES: _bindgen_ty_2 = 143; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 144; +pub const NV_VGPU_MSG_FUNCTION_CTRL_KGR_GET_CTX_BUFFER_PTES: _bindgen_ty_2 = 145; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_EVICT_CTX: _bindgen_ty_2 = 146; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_FS_INFO: _bindgen_ty_2 = 147; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GRMGR_GET_GR_FS_INFO: _bindgen_ty_2 = 148; +pub const NV_VGPU_MSG_FUNCTION_CTRL_STOP_CHANNEL: _bindgen_ty_2 = 149; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_PC_SAMPLING_MODE: _bindgen_ty_2 = 150; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_GET_STATUS: _bindgen_ty_2 = 151; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_SET_CONTROL: _bindgen_ty_2 = 152; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FREE_PMA_STREAM: _bindgen_ty_2 = 153; +pub const NV_VGPU_MSG_FUNCTION_CTRL_TIMER_SET_GR_TICK_FREQ: _bindgen_ty_2 = 154; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB: _bindgen_ty_2 = 155; +pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_GR_STATIC_INFO: _bindgen_ty_2 = 156; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_SINGLE_SM_SINGLE_STEP: _bindgen_ty_2 = 157; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_TPC_PARTITION_MODE: _bindgen_ty_2 = 158; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_TPC_PARTITION_MODE: _bindgen_ty_2 = 159; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_ALLOCATE: _bindgen_ty_2 = 160; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_DESTROY: _bindgen_ty_2 = 161; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_MAP: _bindgen_ty_2 = 162; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_UNMAP: _bindgen_ty_2 = 163; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_PUSH_STREAM: _bindgen_ty_2 = 164; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_SET_HANDLES: _bindgen_ty_2 = 165; +pub const NV_VGPU_MSG_FUNCTION_UVM_METHOD_STREAM_GUEST_PAGES_OPERATION: _bindgen_ty_2 = 166; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL: _bindgen_ty_2 = 167; +pub const NV_VGPU_MSG_FUNCTION_DCE_RM_INIT: _bindgen_ty_2 = 168; +pub const NV_VGPU_MSG_FUNCTION_REGISTER_VIRTUAL_EVENT_BUFFER: _bindgen_ty_2 = 169; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EVENT_BUFFER_UPDATE_GET: _bindgen_ty_2 = 170; +pub const NV_VGPU_MSG_FUNCTION_GET_PLCABLE_ADDRESS_KIND: _bindgen_ty_2 = 171; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_LIMITS_SET_STATUS_V2: _bindgen_ty_2 = 172; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM: _bindgen_ty_2 = 173; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_MMU_DEBUG_MODE: _bindgen_ty_2 = 174; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_PROMOTE_FAULT_METHOD_BUFFERS: _bindgen_ty_2 = 175; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_SIZE: _bindgen_ty_2 = 176; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 177; +pub const NV_VGPU_MSG_FUNCTION_DISABLE_CHANNELS: _bindgen_ty_2 = 178; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEMORY_DESCRIBE: _bindgen_ty_2 = 179; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEM_STATS: _bindgen_ty_2 = 180; +pub const NV_VGPU_MSG_FUNCTION_SAVE_HIBERNATION_DATA: _bindgen_ty_2 = 181; +pub const NV_VGPU_MSG_FUNCTION_RESTORE_HIBERNATION_DATA: _bindgen_ty_2 = 182; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_MEMSYS_SET_ZBC_REFERENCED: _bindgen_ty_2 = 183; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_CREATE: _bindgen_ty_2 = 184; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_DELETE: _bindgen_ty_2 = 185; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_GET_WORK_SUBMIT_TOKEN: _bindgen_ty_2 = 186; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SET_WORK_SUBMIT_TOKEN_NOTIF_INDEX: _bindgen_ty_2 = 187; +pub const NV_VGPU_MSG_FUNCTION_PMA_SCRUBBER_SHARED_BUFFER_GUEST_PAGES_OPERATION: _bindgen_ty_2 = + 188; +pub const NV_VGPU_MSG_FUNCTION_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK: + _bindgen_ty_2 = 189; +pub const NV_VGPU_MSG_FUNCTION_SET_SYSMEM_DIRTY_PAGE_TRACKING_BUFFER: _bindgen_ty_2 = 190; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_P2P_CAPS: _bindgen_ty_2 = 191; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_SET_P2P_MAPPING: _bindgen_ty_2 = 192; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_UNSET_P2P_MAPPING: _bindgen_ty_2 = 193; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK: _bindgen_ty_2 = 194; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_MIGRATABLE_OPS: _bindgen_ty_2 = 195; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_TOTAL_HS_CREDITS: _bindgen_ty_2 = 196; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_HS_CREDITS: _bindgen_ty_2 = 197; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_HS_CREDITS: _bindgen_ty_2 = 198; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PM_AREA_PC_SAMPLER: _bindgen_ty_2 = 199; +pub const NV_VGPU_MSG_FUNCTION_INVALIDATE_TLB: _bindgen_ty_2 = 200; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_QUERY_ECC_STATUS: _bindgen_ty_2 = 201; +pub const NV_VGPU_MSG_FUNCTION_ECC_NOTIFIER_WRITE_ACK: _bindgen_ty_2 = 202; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_DEBUG: _bindgen_ty_2 = 203; +pub const NV_VGPU_MSG_FUNCTION_RM_API_CONTROL: _bindgen_ty_2 = 204; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_GPU_START_FABRIC_PROBE: _bindgen_ty_2 = 205; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVLINK_GET_INBAND_RECEIVED_DATA: _bindgen_ty_2 = 206; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_DATA: _bindgen_ty_2 = 207; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_208: _bindgen_ty_2 = 208; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_GET_INFO_V2: _bindgen_ty_2 = 209; +pub const NV_VGPU_MSG_FUNCTION_GET_BRAND_CAPS: _bindgen_ty_2 = 210; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_NVLINK_INBAND_SEND_DATA: _bindgen_ty_2 = 211; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPM_GUEST_BUFFER_INFO: _bindgen_ty_2 = 212; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_CONTROL_GSP_TRACE: _bindgen_ty_2 = 213; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_STENCIL_CLEAR: _bindgen_ty_2 = 214; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_VGPU_HEAP_STATS: _bindgen_ty_2 = 215; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_LIBOS_HEAP_STATS: _bindgen_ty_2 = 216; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 217; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 218; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HES: _bindgen_ty_2 = 219; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_HES: _bindgen_ty_2 = 220; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_CCU_PROF: _bindgen_ty_2 = 221; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_CCU_PROF: _bindgen_ty_2 = 222; +pub const NV_VGPU_MSG_FUNCTION_RESERVED: _bindgen_ty_2 = 223; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_CHIPLET_HS_CREDIT_POOL: _bindgen_ty_2 = 224; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_HS_CREDITS_MAPPING: _bindgen_ty_2 = 225; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_EXPORT: _bindgen_ty_2 = 226; +pub const NV_VGPU_MSG_FUNCTION_NUM_FUNCTIONS: _bindgen_ty_2 = 227; +pub type _bindgen_ty_2 = ffi::c_uint; +pub const NV_VGPU_MSG_EVENT_FIRST_EVENT: _bindgen_ty_3 = 4096; +pub const NV_VGPU_MSG_EVENT_GSP_INIT_DONE: _bindgen_ty_3 = 4097; +pub const NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER: _bindgen_ty_3 = 4098; +pub const NV_VGPU_MSG_EVENT_POST_EVENT: _bindgen_ty_3 = 4099; +pub const NV_VGPU_MSG_EVENT_RC_TRIGGERED: _bindgen_ty_3 = 4100; +pub const NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED: _bindgen_ty_3 = 4101; +pub const NV_VGPU_MSG_EVENT_OS_ERROR_LOG: _bindgen_ty_3 = 4102; +pub const NV_VGPU_MSG_EVENT_RG_LINE_INTR: _bindgen_ty_3 = 4103; +pub const NV_VGPU_MSG_EVENT_GPUACCT_PERFMON_UTIL_SAMPLES: _bindgen_ty_3 = 4104; +pub const NV_VGPU_MSG_EVENT_SIM_READ: _bindgen_ty_3 = 4105; +pub const NV_VGPU_MSG_EVENT_SIM_WRITE: _bindgen_ty_3 = 4106; +pub const NV_VGPU_MSG_EVENT_SEMAPHORE_SCHEDULE_CALLBACK: _bindgen_ty_3 = 4107; +pub const NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT: _bindgen_ty_3 = 4108; +pub const NV_VGPU_MSG_EVENT_VGPU_GSP_PLUGIN_TRIGGERED: _bindgen_ty_3 = 4109; +pub const NV_VGPU_MSG_EVENT_PERF_GPU_BOOST_SYNC_LIMITS_CALLBACK: _bindgen_ty_3 = 4110; +pub const NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE: _bindgen_ty_3 = 4111; +pub const NV_VGPU_MSG_EVENT_VGPU_CONFIG: _bindgen_ty_3 = 4112; +pub const NV_VGPU_MSG_EVENT_DISPLAY_MODESET: _bindgen_ty_3 = 4113; +pub const NV_VGPU_MSG_EVENT_EXTDEV_INTR_SERVICE: _bindgen_ty_3 = 4114; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_256: _bindgen_ty_3 = 4115; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_512: _bindgen_ty_3 = 4116; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_1024: _bindgen_ty_3 = 4117; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_2048: _bindgen_ty_3 = 4118; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_4096: _bindgen_ty_3 = 4119; +pub const NV_VGPU_MSG_EVENT_TIMED_SEMAPHORE_RELEASE: _bindgen_ty_3 = 4120; +pub const NV_VGPU_MSG_EVENT_NVLINK_IS_GPU_DEGRADED: _bindgen_ty_3 = 4121; +pub const NV_VGPU_MSG_EVENT_PFM_REQ_HNDLR_STATE_SYNC_CALLBACK: _bindgen_ty_3 = 4122; +pub const NV_VGPU_MSG_EVENT_NVLINK_FAULT_UP: _bindgen_ty_3 = 4123; +pub const NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE: _bindgen_ty_3 = 4124; +pub const NV_VGPU_MSG_EVENT_MIG_CI_CONFIG_UPDATE: _bindgen_ty_3 = 4125; +pub const NV_VGPU_MSG_EVENT_UPDATE_GSP_TRACE: _bindgen_ty_3 = 4126; +pub const NV_VGPU_MSG_EVENT_NVLINK_FATAL_ERROR_RECOVERY: _bindgen_ty_3 = 4127; +pub const NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD: _bindgen_ty_3 = 4128; +pub const NV_VGPU_MSG_EVENT_FECS_ERROR: _bindgen_ty_3 = 4129; +pub const NV_VGPU_MSG_EVENT_RECOVERY_ACTION: _bindgen_ty_3 = 4130; +pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131; +pub type _bindgen_ty_3 = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { + pub totalVFs: u32_, + pub firstVfOffset: u32_, + pub vfFeatureMask: u32_, + pub FirstVFBar0Address: u64_, + pub FirstVFBar1Address: u64_, + pub FirstVFBar2Address: u64_, + pub bar0Size: u64_, + pub bar1Size: u64_, + pub bar2Size: u64_, + pub b64bitBar0: u8_, + pub b64bitBar1: u8_, + pub b64bitBar2: u8_, + pub bSriovEnabled: u8_, + pub bSriovHeavyEnabled: u8_, + pub bEmulateVFBar0TlbInvalidationRegister: u8_, + pub bClientRmAllocatedCtxBuffer: u8_, + pub bNonPowerOf2ChannelCountSupported: u8_, + pub bVfResizableBAR1Supported: u8_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS { + pub BoardID: u32_, + pub chipSKU: [ffi::c_char; 9usize], + pub chipSKUMod: [ffi::c_char; 5usize], + pub skuConfigVersion: u32_, + pub project: [ffi::c_char; 5usize], + pub projectSKU: [ffi::c_char; 5usize], + pub CDP: [ffi::c_char; 6usize], + pub projectSKUMod: [ffi::c_char; 2usize], + pub businessCycle: u32_, +} +pub type NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG = [u8_; 17usize]; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO { + pub base: u64_, + pub limit: u64_, + pub reserved: u64_, + pub performance: u32_, + pub supportCompressed: u8_, + pub supportISO: u8_, + pub bProtected: u8_, + pub blackList: NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS { + pub numFBRegions: u32_, + pub fbRegion: [NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO; 16usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { + pub index: u32_, + pub flags: u32_, + pub length: u32_, + pub data: [u8_; 256usize], +} +impl Default for NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct DOD_METHOD_DATA { + pub status: u32_, + pub acpiIdListLen: u32_, + pub acpiIdList: [u32_; 16usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct JT_METHOD_DATA { + pub status: u32_, + pub jtCaps: u32_, + pub jtRevId: u16_, + pub bSBIOSCaps: u8_, + pub __bindgen_padding_0: u8, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct MUX_METHOD_DATA_ELEMENT { + pub acpiId: u32_, + pub mode: u32_, + pub status: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct MUX_METHOD_DATA { + pub tableLen: u32_, + pub acpiIdMuxModeTable: [MUX_METHOD_DATA_ELEMENT; 16usize], + pub acpiIdMuxPartTable: [MUX_METHOD_DATA_ELEMENT; 16usize], + pub acpiIdMuxStateTable: [MUX_METHOD_DATA_ELEMENT; 16usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct CAPS_METHOD_DATA { + pub status: u32_, + pub optimusCaps: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct ACPI_METHOD_DATA { + pub bValid: u8_, + pub __bindgen_padding_0: [u8; 3usize], + pub dodMethodData: DOD_METHOD_DATA, + pub jtMethodData: JT_METHOD_DATA, + pub muxMethodData: MUX_METHOD_DATA, + pub capsMethodData: CAPS_METHOD_DATA, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS { + pub headIndex: u32_, + pub maxHResolution: u32_, + pub maxVResolution: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS { + pub numHeads: u32_, + pub maxNumHeads: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct BUSINFO { + pub deviceID: u16_, + pub vendorID: u16_, + pub subdeviceID: u16_, + pub subvendorID: u16_, + pub revisionID: u8_, + pub __bindgen_padding_0: u8, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_VF_INFO { + pub totalVFs: u32_, + pub firstVFOffset: u32_, + pub FirstVFBar0Address: u64_, + pub FirstVFBar1Address: u64_, + pub FirstVFBar2Address: u64_, + pub b64bitBar0: u8_, + pub b64bitBar1: u8_, + pub b64bitBar2: u8_, + pub __bindgen_padding_0: [u8; 5usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_PCIE_CONFIG_REG { + pub linkCap: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct EcidManufacturingInfo { + pub ecidLow: u32_, + pub ecidHigh: u32_, + pub ecidExtended: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FW_WPR_LAYOUT_OFFSET { + pub nonWprHeapOffset: u64_, + pub frtsOffset: u64_, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GspStaticConfigInfo_t { + pub grCapsBits: [u8_; 23usize], + pub gidInfo: NV2080_CTRL_GPU_GET_GID_INFO_PARAMS, + pub SKUInfo: NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS, + pub fbRegionInfoParams: NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS, + pub sriovCaps: NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS, + pub sriovMaxGfid: u32_, + pub engineCaps: [u32_; 3usize], + pub poisonFuseEnabled: u8_, + pub fb_length: u64_, + pub fbio_mask: u64_, + pub fb_bus_width: u32_, + pub fb_ram_type: u32_, + pub fbp_mask: u64_, + pub l2_cache_size: u32_, + pub gpuNameString: [u8_; 64usize], + pub gpuShortNameString: [u8_; 64usize], + pub gpuNameString_Unicode: [u16_; 64usize], + pub bGpuInternalSku: u8_, + pub bIsQuadroGeneric: u8_, + pub bIsQuadroAd: u8_, + pub bIsNvidiaNvs: u8_, + pub bIsVgx: u8_, + pub bGeforceSmb: u8_, + pub bIsTitan: u8_, + pub bIsTesla: u8_, + pub bIsMobile: u8_, + pub bIsGc6Rtd3Allowed: u8_, + pub bIsGc8Rtd3Allowed: u8_, + pub bIsGcOffRtd3Allowed: u8_, + pub bIsGcoffLegacyAllowed: u8_, + pub bIsMigSupported: u8_, + pub RTD3GC6TotalBoardPower: u16_, + pub RTD3GC6PerstDelay: u16_, + pub bar1PdeBase: u64_, + pub bar2PdeBase: u64_, + pub bVbiosValid: u8_, + pub vbiosSubVendor: u32_, + pub vbiosSubDevice: u32_, + pub bPageRetirementSupported: u8_, + pub bSplitVasBetweenServerClientRm: u8_, + pub bClRootportNeedsNosnoopWAR: u8_, + pub displaylessMaxHeads: VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS, + pub displaylessMaxResolution: VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS, + pub displaylessMaxPixels: u64_, + pub hInternalClient: u32_, + pub hInternalDevice: u32_, + pub hInternalSubdevice: u32_, + pub bSelfHostedMode: u8_, + pub bAtsSupported: u8_, + pub bIsGpuUefi: u8_, + pub bIsEfiInit: u8_, + pub ecidInfo: [EcidManufacturingInfo; 2usize], + pub fwWprLayoutOffset: FW_WPR_LAYOUT_OFFSET, +} +impl Default for GspStaticConfigInfo_t { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GspSystemInfo { + pub gpuPhysAddr: u64_, + pub gpuPhysFbAddr: u64_, + pub gpuPhysInstAddr: u64_, + pub gpuPhysIoAddr: u64_, + pub nvDomainBusDeviceFunc: u64_, + pub simAccessBufPhysAddr: u64_, + pub notifyOpSharedSurfacePhysAddr: u64_, + pub pcieAtomicsOpMask: u64_, + pub consoleMemSize: u64_, + pub maxUserVa: u64_, + pub pciConfigMirrorBase: u32_, + pub pciConfigMirrorSize: u32_, + pub PCIDeviceID: u32_, + pub PCISubDeviceID: u32_, + pub PCIRevisionID: u32_, + pub pcieAtomicsCplDeviceCapMask: u32_, + pub oorArch: u8_, + pub __bindgen_padding_0: [u8; 7usize], + pub clPdbProperties: u64_, + pub Chipset: u32_, + pub bGpuBehindBridge: u8_, + pub bFlrSupported: u8_, + pub b64bBar0Supported: u8_, + pub bMnocAvailable: u8_, + pub chipsetL1ssEnable: u32_, + pub bUpstreamL0sUnsupported: u8_, + pub bUpstreamL1Unsupported: u8_, + pub bUpstreamL1PorSupported: u8_, + pub bUpstreamL1PorMobileOnly: u8_, + pub bSystemHasMux: u8_, + pub upstreamAddressValid: u8_, + pub FHBBusInfo: BUSINFO, + pub chipsetIDInfo: BUSINFO, + pub __bindgen_padding_1: [u8; 2usize], + pub acpiMethodData: ACPI_METHOD_DATA, + pub hypervisorType: u32_, + pub bIsPassthru: u8_, + pub __bindgen_padding_2: [u8; 7usize], + pub sysTimerOffsetNs: u64_, + pub gspVFInfo: GSP_VF_INFO, + pub bIsPrimary: u8_, + pub isGridBuild: u8_, + pub __bindgen_padding_3: [u8; 2usize], + pub pcieConfigReg: GSP_PCIE_CONFIG_REG, + pub gridBuildCsp: u32_, + pub bPreserveVideoMemoryAllocations: u8_, + pub bTdrEventSupported: u8_, + pub bFeatureStretchVblankCapable: u8_, + pub bEnableDynamicGranularityPageArrays: u8_, + pub bClockBoostSupported: u8_, + pub bRouteDispIntrsToCPU: u8_, + pub __bindgen_padding_4: [u8; 6usize], + pub hostPageSize: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct MESSAGE_QUEUE_INIT_ARGUMENTS { + pub sharedMemPhysAddr: u64_, + pub pageTableEntryCount: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub cmdQueueOffset: u64_, + pub statQueueOffset: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_SR_INIT_ARGUMENTS { + pub oldLevel: u32_, + pub flags: u32_, + pub bInPMTransition: u8_, + pub __bindgen_padding_0: [u8; 3usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_ARGUMENTS_CACHED { + pub messageQueueInitArguments: MESSAGE_QUEUE_INIT_ARGUMENTS, + pub srInitArguments: GSP_SR_INIT_ARGUMENTS, + pub gpuInstance: u32_, + pub bDmemStack: u8_, + pub __bindgen_padding_0: [u8; 7usize], + pub profilerArgs: GSP_ARGUMENTS_CACHED__bindgen_ty_1, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_ARGUMENTS_CACHED__bindgen_ty_1 { + pub pa: u64_, + pub size: u64_, +} +#[repr(C)] +#[derive(Copy, Clone, Zeroable)] +pub union rpc_message_rpc_union_field_v03_00 { + pub spare: u32_, + pub cpuRmGfid: u32_, +} +impl Default for rpc_message_rpc_union_field_v03_00 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type rpc_message_rpc_union_field_v = rpc_message_rpc_union_field_v03_00; +#[repr(C)] +pub struct rpc_message_header_v03_00 { + pub header_version: u32_, + pub signature: u32_, + pub length: u32_, + pub function: u32_, + pub rpc_result: u32_, + pub rpc_result_private: u32_, + pub sequence: u32_, + pub u: rpc_message_rpc_union_field_v, + pub rpc_message_data: __IncompleteArrayField<u8_>, +} +impl Default for rpc_message_header_v03_00 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type rpc_message_header_v = rpc_message_header_v03_00; +#[repr(C)] +#[derive(Copy, Clone, Zeroable)] +pub struct GspFwWprMeta { + pub magic: u64_, + pub revision: u64_, + pub sysmemAddrOfRadix3Elf: u64_, + pub sizeOfRadix3Elf: u64_, + pub sysmemAddrOfBootloader: u64_, + pub sizeOfBootloader: u64_, + pub bootloaderCodeOffset: u64_, + pub bootloaderDataOffset: u64_, + pub bootloaderManifestOffset: u64_, + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1, + pub gspFwRsvdStart: u64_, + pub nonWprHeapOffset: u64_, + pub nonWprHeapSize: u64_, + pub gspFwWprStart: u64_, + pub gspFwHeapOffset: u64_, + pub gspFwHeapSize: u64_, + pub gspFwOffset: u64_, + pub bootBinOffset: u64_, + pub frtsOffset: u64_, + pub frtsSize: u64_, + pub gspFwWprEnd: u64_, + pub fbSize: u64_, + pub vgaWorkspaceOffset: u64_, + pub vgaWorkspaceSize: u64_, + pub bootCount: u64_, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2, + pub gspFwHeapVfPartitionCount: u8_, + pub flags: u8_, + pub padding: [u8_; 2usize], + pub pmuReservedSize: u32_, + pub verified: u64_, +} +#[repr(C)] +#[derive(Copy, Clone, Zeroable)] +pub union GspFwWprMeta__bindgen_ty_1 { + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1__bindgen_ty_1, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_1__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_1 { + pub sysmemAddrOfSignature: u64_, + pub sizeOfSignature: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_2 { + pub gspFwHeapFreeListWprOffset: u32_, + pub unused0: u32_, + pub unused1: u64_, +} +impl Default for GspFwWprMeta__bindgen_ty_1 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Copy, Clone, Zeroable)] +pub union GspFwWprMeta__bindgen_ty_2 { + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_2__bindgen_ty_1, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { + pub partitionRpcAddr: u64_, + pub partitionRpcRequestOffset: u16_, + pub partitionRpcReplyOffset: u16_, + pub elfCodeOffset: u32_, + pub elfDataOffset: u32_, + pub elfCodeSize: u32_, + pub elfDataSize: u32_, + pub lsUcodeVersion: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_2 { + pub partitionRpcPadding: [u32_; 4usize], + pub sysmemAddrOfCrashReportQueue: u64_, + pub sizeOfCrashReportQueue: u32_, + pub lsUcodeVersionPadding: [u32_; 1usize], +} +impl Default for GspFwWprMeta__bindgen_ty_2 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +impl Default for GspFwWprMeta { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type LibosAddress = u64_; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_NONE: LibosMemoryRegionKind = 0; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS: LibosMemoryRegionKind = 1; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_RADIX3: LibosMemoryRegionKind = 2; +pub type LibosMemoryRegionKind = ffi::c_uint; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_NONE: LibosMemoryRegionLoc = 0; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegionLoc = 1; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2; +pub type LibosMemoryRegionLoc = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct LibosMemoryRegionInitArgument { + pub id8: LibosAddress, + pub pa: LibosAddress, + pub size: LibosAddress, + pub kind: u8_, + pub loc: u8_, + pub __bindgen_padding_0: [u8; 6usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct PACKED_REGISTRY_ENTRY { + pub nameOffset: u32_, + pub type_: u8_, + pub __bindgen_padding_0: [u8; 3usize], + pub data: u32_, + pub length: u32_, +} +#[repr(C)] +#[derive(Debug, Default)] +pub struct PACKED_REGISTRY_TABLE { + pub size: u32_, + pub numEntries: u32_, + pub entries: __IncompleteArrayField<PACKED_REGISTRY_ENTRY>, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct msgqTxHeader { + pub version: u32_, + pub size: u32_, + pub msgSize: u32_, + pub msgCount: u32_, + pub writePtr: u32_, + pub flags: u32_, + pub rxHdrOff: u32_, + pub entryOff: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct msgqRxHeader { + pub readPtr: u32_, +} +#[repr(C)] +#[repr(align(8))] +#[derive(Zeroable)] +pub struct GSP_MSG_QUEUE_ELEMENT { + pub authTagBuffer: [u8_; 16usize], + pub aadBuffer: [u8_; 16usize], + pub checkSum: u32_, + pub seqNum: u32_, + pub elemCount: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub rpc: rpc_message_header_v, +} +impl Default for GSP_MSG_QUEUE_ELEMENT { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Debug, Default)] +pub struct rpc_run_cpu_sequencer_v17_00 { + pub bufferSizeDWord: u32_, + pub cmdIndex: u32_, + pub regSaveArea: [u32_; 8usize], + pub commandBuffer: __IncompleteArrayField<u32_>, +} +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE: GSP_SEQ_BUF_OPCODE = 0; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY: GSP_SEQ_BUF_OPCODE = 1; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL: GSP_SEQ_BUF_OPCODE = 2; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US: GSP_SEQ_BUF_OPCODE = 3; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE: GSP_SEQ_BUF_OPCODE = 4; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET: GSP_SEQ_BUF_OPCODE = 5; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START: GSP_SEQ_BUF_OPCODE = 6; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT: GSP_SEQ_BUF_OPCODE = 7; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME: GSP_SEQ_BUF_OPCODE = 8; +pub type GSP_SEQ_BUF_OPCODE = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_WRITE { + pub addr: u32_, + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_MODIFY { + pub addr: u32_, + pub mask: u32_, + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_POLL { + pub addr: u32_, + pub mask: u32_, + pub val: u32_, + pub timeout: u32_, + pub error: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_DELAY_US { + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_STORE { + pub addr: u32_, + pub index: u32_, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct GSP_SEQUENCER_BUFFER_CMD { + pub opCode: GSP_SEQ_BUF_OPCODE, + pub payload: GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { + pub regWrite: GSP_SEQ_BUF_PAYLOAD_REG_WRITE, + pub regModify: GSP_SEQ_BUF_PAYLOAD_REG_MODIFY, + pub regPoll: GSP_SEQ_BUF_PAYLOAD_REG_POLL, + pub delayUs: GSP_SEQ_BUF_PAYLOAD_DELAY_US, + pub regStore: GSP_SEQ_BUF_PAYLOAD_REG_STORE, +} +impl Default for GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +impl Default for GSP_SEQUENCER_BUFFER_CMD { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs new file mode 100644 index 000000000000..2d0369c49092 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GSP Sequencer implementation for Pre-hopper GSP boot sequence. + +use core::{ + array, + mem::{ + size_of, + size_of_val, // + }, +}; + +use kernel::{ + device, + io::poll::read_poll_timeout, + prelude::*, + time::{ + delay::fsleep, + Delta, // + }, + transmute::FromBytes, + types::ARef, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + sec2::Sec2, + Falcon, // + }, + gsp::{ + cmdq::{ + Cmdq, + MessageFromGsp, // + }, + fw, + }, + num::FromSafeCast, + sbuffer::SBufferIter, +}; + +/// GSP Sequencer information containing the command sequence and data. +struct GspSequence { + /// Current command index for error reporting. + cmd_index: u32, + /// Command data buffer containing the sequence of commands. + cmd_data: KVec<u8>, +} + +impl MessageFromGsp for GspSequence { + const FUNCTION: fw::MsgFunction = fw::MsgFunction::GspRunCpuSequencer; + type InitError = Error; + type Message = fw::RunCpuSequencer; + + fn read( + msg: &Self::Message, + sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError> { + let cmd_data = sbuffer.flush_into_kvec(GFP_KERNEL)?; + Ok(GspSequence { + cmd_index: msg.cmd_index(), + cmd_data, + }) + } +} + +const CMD_SIZE: usize = size_of::<fw::SequencerBufferCmd>(); + +/// GSP Sequencer Command types with payload data. +/// Commands have an opcode and an opcode-dependent struct. +#[allow(clippy::enum_variant_names)] +pub(crate) enum GspSeqCmd { + RegWrite(fw::RegWritePayload), + RegModify(fw::RegModifyPayload), + RegPoll(fw::RegPollPayload), + DelayUs(fw::DelayUsPayload), + RegStore(fw::RegStorePayload), + CoreReset, + CoreStart, + CoreWaitForHalt, + CoreResume, +} + +impl GspSeqCmd { + /// Creates a new `GspSeqCmd` from raw data returning the command and its size in bytes. + pub(crate) fn new(data: &[u8], dev: &device::Device) -> Result<(Self, usize)> { + let fw_cmd = fw::SequencerBufferCmd::from_bytes(data).ok_or(EINVAL)?; + let opcode_size = core::mem::size_of::<u32>(); + + let (cmd, size) = match fw_cmd.opcode()? { + fw::SeqBufOpcode::RegWrite => { + let payload = fw_cmd.reg_write_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegWrite(payload), size) + } + fw::SeqBufOpcode::RegModify => { + let payload = fw_cmd.reg_modify_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegModify(payload), size) + } + fw::SeqBufOpcode::RegPoll => { + let payload = fw_cmd.reg_poll_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegPoll(payload), size) + } + fw::SeqBufOpcode::DelayUs => { + let payload = fw_cmd.delay_us_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::DelayUs(payload), size) + } + fw::SeqBufOpcode::RegStore => { + let payload = fw_cmd.reg_store_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegStore(payload), size) + } + fw::SeqBufOpcode::CoreReset => (GspSeqCmd::CoreReset, opcode_size), + fw::SeqBufOpcode::CoreStart => (GspSeqCmd::CoreStart, opcode_size), + fw::SeqBufOpcode::CoreWaitForHalt => (GspSeqCmd::CoreWaitForHalt, opcode_size), + fw::SeqBufOpcode::CoreResume => (GspSeqCmd::CoreResume, opcode_size), + }; + + if data.len() < size { + dev_err!(dev, "Data is not enough for command"); + return Err(EINVAL); + } + + Ok((cmd, size)) + } +} + +/// GSP Sequencer for executing firmware commands during boot. +pub(crate) struct GspSequencer<'a> { + /// Sequencer information with command data. + seq_info: GspSequence, + /// `Bar0` for register access. + bar: &'a Bar0, + /// SEC2 falcon for core operations. + sec2_falcon: &'a Falcon<Sec2>, + /// GSP falcon for core operations. + gsp_falcon: &'a Falcon<Gsp>, + /// LibOS DMA handle address. + libos_dma_handle: u64, + /// Bootloader application version. + bootloader_app_version: u32, + /// Device for logging. + dev: ARef<device::Device>, +} + +/// Trait for running sequencer commands. +pub(crate) trait GspSeqCmdRunner { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result; +} + +impl GspSeqCmdRunner for fw::RegWritePayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_write32(self.val(), addr) + } +} + +impl GspSeqCmdRunner for fw::RegModifyPayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_read32(addr).and_then(|val| { + sequencer + .bar + .try_write32((val & !self.mask()) | self.val(), addr) + }) + } +} + +impl GspSeqCmdRunner for fw::RegPollPayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + // Default timeout to 4 seconds. + let timeout_us = if self.timeout() == 0 { + 4_000_000 + } else { + i64::from(self.timeout()) + }; + + // First read. + sequencer.bar.try_read32(addr)?; + + // Poll the requested register with requested timeout. + read_poll_timeout( + || sequencer.bar.try_read32(addr), + |current| (current & self.mask()) == self.val(), + Delta::ZERO, + Delta::from_micros(timeout_us), + ) + .map(|_| ()) + } +} + +impl GspSeqCmdRunner for fw::DelayUsPayload { + fn run(&self, _sequencer: &GspSequencer<'_>) -> Result { + fsleep(Delta::from_micros(i64::from(self.val()))); + Ok(()) + } +} + +impl GspSeqCmdRunner for fw::RegStorePayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_read32(addr).map(|_| ()) + } +} + +impl GspSeqCmdRunner for GspSeqCmd { + fn run(&self, seq: &GspSequencer<'_>) -> Result { + match self { + GspSeqCmd::RegWrite(cmd) => cmd.run(seq), + GspSeqCmd::RegModify(cmd) => cmd.run(seq), + GspSeqCmd::RegPoll(cmd) => cmd.run(seq), + GspSeqCmd::DelayUs(cmd) => cmd.run(seq), + GspSeqCmd::RegStore(cmd) => cmd.run(seq), + GspSeqCmd::CoreReset => { + seq.gsp_falcon.reset(seq.bar)?; + seq.gsp_falcon.dma_reset(seq.bar); + Ok(()) + } + GspSeqCmd::CoreStart => { + seq.gsp_falcon.start(seq.bar)?; + Ok(()) + } + GspSeqCmd::CoreWaitForHalt => { + seq.gsp_falcon.wait_till_halted(seq.bar)?; + Ok(()) + } + GspSeqCmd::CoreResume => { + // At this point, 'SEC2-RTOS' has been loaded into SEC2 by the sequencer + // but neither SEC2-RTOS nor GSP-RM is running yet. This part of the + // sequencer will start both. + + // Reset the GSP to prepare it for resuming. + seq.gsp_falcon.reset(seq.bar)?; + + // Write the libOS DMA handle to GSP mailboxes. + seq.gsp_falcon.write_mailboxes( + seq.bar, + Some(seq.libos_dma_handle as u32), + Some((seq.libos_dma_handle >> 32) as u32), + ); + + // Start the SEC2 falcon which will trigger GSP-RM to resume on the GSP. + seq.sec2_falcon.start(seq.bar)?; + + // Poll until GSP-RM reload/resume has completed (up to 2 seconds). + seq.gsp_falcon + .check_reload_completed(seq.bar, Delta::from_secs(2))?; + + // Verify SEC2 completed successfully by checking its mailbox for errors. + let mbox0 = seq.sec2_falcon.read_mailbox0(seq.bar); + if mbox0 != 0 { + dev_err!(seq.dev, "Sequencer: sec2 errors: {:?}\n", mbox0); + return Err(EIO); + } + + // Configure GSP with the bootloader version. + seq.gsp_falcon + .write_os_version(seq.bar, seq.bootloader_app_version); + + // Verify the GSP's RISC-V core is active indicating successful GSP boot. + if !seq.gsp_falcon.is_riscv_active(seq.bar) { + dev_err!(seq.dev, "Sequencer: RISC-V core is not active\n"); + return Err(EIO); + } + Ok(()) + } + } + } +} + +/// Iterator over GSP sequencer commands. +pub(crate) struct GspSeqIter<'a> { + /// Command data buffer. + cmd_data: &'a [u8], + /// Current position in the buffer. + current_offset: usize, + /// Total number of commands to process. + total_cmds: u32, + /// Number of commands processed so far. + cmds_processed: u32, + /// Device for logging. + dev: ARef<device::Device>, +} + +impl<'a> Iterator for GspSeqIter<'a> { + type Item = Result<GspSeqCmd>; + + fn next(&mut self) -> Option<Self::Item> { + // Stop if we've processed all commands or reached the end of data. + if self.cmds_processed >= self.total_cmds || self.current_offset >= self.cmd_data.len() { + return None; + } + + // Check if we have enough data for opcode. + if self.current_offset + core::mem::size_of::<u32>() > self.cmd_data.len() { + return Some(Err(EIO)); + } + + let offset = self.current_offset; + + // Handle command creation based on available data, + // zero-pad if necessary (since last command may not be full size). + let mut buffer = [0u8; CMD_SIZE]; + let copy_len = if offset + CMD_SIZE <= self.cmd_data.len() { + CMD_SIZE + } else { + self.cmd_data.len() - offset + }; + buffer[..copy_len].copy_from_slice(&self.cmd_data[offset..offset + copy_len]); + let cmd_result = GspSeqCmd::new(&buffer, &self.dev); + + cmd_result.map_or_else( + |_err| { + dev_err!(self.dev, "Error parsing command at offset {}", offset); + None + }, + |(cmd, size)| { + self.current_offset += size; + self.cmds_processed += 1; + Some(Ok(cmd)) + }, + ) + } +} + +impl<'a> GspSequencer<'a> { + fn iter(&self) -> GspSeqIter<'_> { + let cmd_data = &self.seq_info.cmd_data[..]; + + GspSeqIter { + cmd_data, + current_offset: 0, + total_cmds: self.seq_info.cmd_index, + cmds_processed: 0, + dev: self.dev.clone(), + } + } +} + +/// Parameters for running the GSP sequencer. +pub(crate) struct GspSequencerParams<'a> { + /// Bootloader application version. + pub(crate) bootloader_app_version: u32, + /// LibOS DMA handle address. + pub(crate) libos_dma_handle: u64, + /// GSP falcon for core operations. + pub(crate) gsp_falcon: &'a Falcon<Gsp>, + /// SEC2 falcon for core operations. + pub(crate) sec2_falcon: &'a Falcon<Sec2>, + /// Device for logging. + pub(crate) dev: ARef<device::Device>, + /// BAR0 for register access. + pub(crate) bar: &'a Bar0, +} + +impl<'a> GspSequencer<'a> { + pub(crate) fn run(cmdq: &mut Cmdq, params: GspSequencerParams<'a>) -> Result { + let seq_info = loop { + match cmdq.receive_msg::<GspSequence>(Delta::from_secs(10)) { + Ok(seq_info) => break seq_info, + Err(ERANGE) => continue, + Err(e) => return Err(e), + } + }; + + let sequencer = GspSequencer { + seq_info, + bar: params.bar, + sec2_falcon: params.sec2_falcon, + gsp_falcon: params.gsp_falcon, + libos_dma_handle: params.libos_dma_handle, + bootloader_app_version: params.bootloader_app_version, + dev: params.dev, + }; + + dev_dbg!(sequencer.dev, "Running CPU Sequencer commands"); + + for cmd_result in sequencer.iter() { + match cmd_result { + Ok(cmd) => cmd.run(&sequencer)?, + Err(e) => { + dev_err!( + sequencer.dev, + "Error running command at index {}", + sequencer.seq_info.cmd_index + ); + return Err(e); + } + } + } + + dev_dbg!( + sequencer.dev, + "CPU Sequencer commands completed successfully" + ); + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index fffcaee2249f..b98a1c03f13d 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -2,6 +2,9 @@ //! Nova Core GPU Driver +#[macro_use] +mod bitfield; + mod dma; mod driver; mod falcon; @@ -10,7 +13,9 @@ mod firmware; mod gfw; mod gpu; mod gsp; +mod num; mod regs; +mod sbuffer; mod util; mod vbios; diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs new file mode 100644 index 000000000000..c952a834e662 --- /dev/null +++ b/drivers/gpu/nova-core/num.rs @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Numerical helpers functions and traits. +//! +//! This is essentially a staging module for code to mature until it can be moved to the `kernel` +//! crate. + +use kernel::{ + macros::paste, + prelude::*, // +}; + +/// Implements safe `as` conversion functions from a given type into a series of target types. +/// +/// These functions can be used in place of `as`, with the guarantee that they will be lossless. +macro_rules! impl_safe_as { + ($from:ty as { $($into:ty),* }) => { + $( + paste! { + #[doc = ::core::concat!( + "Losslessly converts a [`", + ::core::stringify!($from), + "`] into a [`", + ::core::stringify!($into), + "`].")] + /// + /// This conversion is allowed as it is always lossless. Prefer this over the `as` + /// keyword to ensure no lossy casts are performed. + /// + /// This is for use from a `const` context. For non `const` use, prefer the + /// [`FromSafeCast`] and [`IntoSafeCast`] traits. + /// + /// # Examples + /// + /// ``` + /// use crate::num; + /// + #[doc = ::core::concat!( + "assert_eq!(num::", + ::core::stringify!($from), + "_as_", + ::core::stringify!($into), + "(1", + ::core::stringify!($from), + "), 1", + ::core::stringify!($into), + ");")] + /// ``` + #[allow(unused)] + #[inline(always)] + pub(crate) const fn [<$from _as_ $into>](value: $from) -> $into { + kernel::static_assert!(size_of::<$into>() >= size_of::<$from>()); + + value as $into + } + } + )* + }; +} + +impl_safe_as!(u8 as { u16, u32, u64, usize }); +impl_safe_as!(u16 as { u32, u64, usize }); +impl_safe_as!(u32 as { u64, usize } ); +// `u64` and `usize` have the same size on 64-bit platforms. +#[cfg(CONFIG_64BIT)] +impl_safe_as!(u64 as { usize } ); + +// A `usize` fits into a `u64` on 32 and 64-bit platforms. +#[cfg(any(CONFIG_32BIT, CONFIG_64BIT))] +impl_safe_as!(usize as { u64 }); + +// A `usize` fits into a `u32` on 32-bit platforms. +#[cfg(CONFIG_32BIT)] +impl_safe_as!(usize as { u32 }); + +/// Extension trait providing guaranteed lossless cast to `Self` from `T`. +/// +/// The standard library's `From` implementations do not cover conversions that are not portable or +/// future-proof. For instance, even though it is safe today, `From<usize>` is not implemented for +/// [`u64`] because of the possibility to support larger-than-64bit architectures in the future. +/// +/// The workaround is to either deal with the error handling of [`TryFrom`] for an operation that +/// technically cannot fail, or to use the `as` keyword, which can silently strip data if the +/// destination type is smaller than the source. +/// +/// Both options are hardly acceptable for the kernel. It is also a much more architecture +/// dependent environment, supporting only 32 and 64 bit architectures, with some modules +/// explicitly depending on a specific bus width that could greatly benefit from infallible +/// conversion operations. +/// +/// Thus this extension trait that provides, for the architecture the kernel is built for, safe +/// conversion between types for which such cast is lossless. +/// +/// In other words, this trait is implemented if, for the current build target and with `t: T`, the +/// `t as Self` operation is completely lossless. +/// +/// Prefer this over the `as` keyword to ensure no lossy casts are performed. +/// +/// If you need to perform a conversion in `const` context, use [`u64_as_usize`], [`u32_as_usize`], +/// [`usize_as_u64`], etc. +/// +/// # Examples +/// +/// ``` +/// use crate::num::FromSafeCast; +/// +/// assert_eq!(usize::from_safe_cast(0xf00u32), 0xf00u32 as usize); +/// ``` +pub(crate) trait FromSafeCast<T> { + /// Create a `Self` from `value`. This operation is guaranteed to be lossless. + fn from_safe_cast(value: T) -> Self; +} + +impl FromSafeCast<usize> for u64 { + fn from_safe_cast(value: usize) -> Self { + usize_as_u64(value) + } +} + +#[cfg(CONFIG_32BIT)] +impl FromSafeCast<usize> for u32 { + fn from_safe_cast(value: usize) -> Self { + usize_as_u32(value) + } +} + +impl FromSafeCast<u32> for usize { + fn from_safe_cast(value: u32) -> Self { + u32_as_usize(value) + } +} + +#[cfg(CONFIG_64BIT)] +impl FromSafeCast<u64> for usize { + fn from_safe_cast(value: u64) -> Self { + u64_as_usize(value) + } +} + +/// Counterpart to the [`FromSafeCast`] trait, i.e. this trait is to [`FromSafeCast`] what [`Into`] +/// is to [`From`]. +/// +/// See the documentation of [`FromSafeCast`] for the motivation. +/// +/// # Examples +/// +/// ``` +/// use crate::num::IntoSafeCast; +/// +/// assert_eq!(0xf00u32.into_safe_cast(), 0xf00u32 as usize); +/// ``` +pub(crate) trait IntoSafeCast<T> { + /// Convert `self` into a `T`. This operation is guaranteed to be lossless. + fn into_safe_cast(self) -> T; +} + +/// Reverse operation for types implementing [`FromSafeCast`]. +impl<S, T> IntoSafeCast<T> for S +where + T: FromSafeCast<S>, +{ + fn into_safe_cast(self) -> T { + T::from_safe_cast(self) + } +} + +/// Implements lossless conversion of a constant from a larger type into a smaller one. +macro_rules! impl_const_into { + ($from:ty => { $($into:ty),* }) => { + $( + paste! { + #[doc = ::core::concat!( + "Performs a build-time safe conversion of a [`", + ::core::stringify!($from), + "`] constant value into a [`", + ::core::stringify!($into), + "`].")] + /// + /// This checks at compile-time that the conversion is lossless, and triggers a build + /// error if it isn't. + /// + /// # Examples + /// + /// ``` + /// use crate::num; + /// + /// // Succeeds because the value of the source fits into the destination's type. + #[doc = ::core::concat!( + "assert_eq!(num::", + ::core::stringify!($from), + "_into_", + ::core::stringify!($into), + "::<1", + ::core::stringify!($from), + ">(), 1", + ::core::stringify!($into), + ");")] + /// ``` + #[allow(unused)] + pub(crate) const fn [<$from _into_ $into>]<const N: $from>() -> $into { + // Make sure that the target type is smaller than the source one. + static_assert!($from::BITS >= $into::BITS); + // CAST: we statically enforced above that `$from` is larger than `$into`, so the + // `as` conversion will be lossless. + build_assert!(N >= $into::MIN as $from && N <= $into::MAX as $from); + + N as $into + } + } + )* + }; +} + +impl_const_into!(usize => { u8, u16, u32 }); +impl_const_into!(u64 => { u8, u16, u32 }); +impl_const_into!(u32 => { u8, u16 }); +impl_const_into!(u16 => { u8 }); diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 206dab2e1335..82cc6c0790e5 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -7,13 +7,28 @@ #[macro_use] pub(crate) mod macros; -use crate::falcon::{ - DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, - FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, -}; -use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; +use crate::{ + falcon::{ + DmaTrfCmdSize, + FalconCoreRev, + FalconCoreRevSubversion, + FalconFbifMemType, + FalconFbifTarget, + FalconModSelAlgo, + FalconSecurityModel, + PFalcon2Base, + PFalconBase, + PeregrineCoreSelect, // + }, + gpu::{ + Architecture, + Chipset, // + }, + num::FromSafeCast, +}; + // PMC register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { @@ -25,13 +40,24 @@ register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" }); impl NV_PMC_BOOT_0 { - /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. - pub(crate) fn architecture(self) -> Result<Architecture> { - Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), - ) + pub(crate) fn is_older_than_fermi(self) -> bool { + // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals : + const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc; + + // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than + // GF100, means "older than Fermi". + self.architecture_1() == 0 && self.architecture_0() < NV_PMC_BOOT_0_ARCHITECTURE_GF100 } +} +register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" { + 15:12 minor_revision as u8, "Minor revision of the chip"; + 19:16 major_revision as u8, "Major revision of the chip"; + 23:20 implementation as u8, "Implementation version of the architecture"; + 29:24 architecture as u8 ?=> Architecture, "Architecture value"; +}); + +impl NV_PMC_BOOT_42 { /// Combines `architecture` and `implementation` to obtain a code unique to the chipset. pub(crate) fn chipset(self) -> Result<Chipset> { self.architecture() @@ -41,6 +67,24 @@ impl NV_PMC_BOOT_0 { }) .and_then(Chipset::try_from) } + + /// Returns the raw architecture value from the register. + fn architecture_raw(self) -> u8 { + ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) + as u8 + } +} + +impl kernel::fmt::Display for NV_PMC_BOOT_42 { + fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result { + write!( + f, + "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})", + self.0, + self.architecture_raw(), + self.implementation() + ) + } } // PBUS @@ -71,11 +115,15 @@ register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { 30:30 ecc_mode_enabled as bool; }); +register!(NV_PGSP_QUEUE_HEAD @ 0x00110c00 { + 31:0 address as u32; +}); + impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) - * kernel::sizes::SZ_1M as u64; + * u64::from_safe_cast(kernel::sizes::SZ_1M); if self.ecc_mode_enabled() { // Remove the amount of memory reserved for ECC (one per 16 units). @@ -119,6 +167,12 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { // These scratch registers remain powered on even in a low-power state and have a designated group // number. +// Boot Sequence Interface (BSI) register used to determine +// if GSP reload/resume has completed during the boot process. +register!(NV_PGC6_BSI_SECURE_SCRATCH_14 @ 0x001180f8 { + 26:26 boot_stage_3_handoff as bool; +}); + // Privilege level mask register. It dictates whether the host CPU has privilege to access the // `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, @@ -158,7 +212,7 @@ register!( impl NV_USABLE_FB_SIZE_IN_MB { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { - u64::from(self.value()) * kernel::sizes::SZ_1M as u64 + u64::from(self.value()) * u64::from_safe_cast(kernel::sizes::SZ_1M) } } @@ -211,6 +265,12 @@ register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { 31:0 value as u32; }); +// Used to store version information about the firmware running +// on the Falcon processor. +register!(NV_PFALCON_FALCON_OS @ PFalconBase[0x00000080] { + 31:0 value as u32; +}); + register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { 31:0 value as u32; }); @@ -320,7 +380,12 @@ register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { // PRISCV -register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { +register!(NV_PRISCV_RISCV_CPUCTL @ PFalcon2Base[0x00000388] { + 0:0 halted as bool; + 7:7 active_stat as bool; +}); + +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalcon2Base[0x00000668] { 0:0 valid as bool; 4:4 core_select as bool => PeregrineCoreSelect; 8:8 br_fetch as bool; diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 8058e1696df9..fd1a815fa57d 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -8,7 +8,8 @@ //! //! The `register!` macro in this module provides an intuitive and readable syntax for defining a //! dedicated type for each register. Each such type comes with its own field accessors that can -//! return an error if a field's value is invalid. +//! return an error if a field's value is invalid. Please look at the [`bitfield`] macro for the +//! complete syntax of fields definitions. /// Trait providing a base address to be added to the offset of a relative register to obtain /// its actual offset. @@ -51,18 +52,9 @@ pub(crate) trait RegisterBase<T> { /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// /// // Or, just read and update the register in a single step: -/// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); +/// BOOT_0::update(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields are defined as follows: -/// -/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or -/// `bool`. Note that `bool` fields must have a range of 1 bit. -/// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns -/// the result. -/// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation -/// and returns the result. This is useful with fields for which not all values are valid. -/// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// @@ -144,15 +136,15 @@ pub(crate) trait RegisterBase<T> { /// 0:0 start as bool, "Start the CPU core"; /// }); /// -/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // The `read`, `write` and `update` methods of relative registers take an extra `base` argument /// // that is used to resolve its final address by adding its `BASE` to the offset of the /// // register. /// /// // Start `CPU0`. -/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// CPU_CTL::update(bar, &CPU0, |r| r.set_start(true)); /// /// // Start `CPU1`. -/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// CPU_CTL::update(bar, &CPU1, |r| r.set_start(true)); /// /// // Aliases can also be defined for relative register. /// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { @@ -160,7 +152,7 @@ pub(crate) trait RegisterBase<T> { /// }); /// /// // Start the aliased `CPU0`. -/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); +/// CPU_CTL_ALIAS::update(bar, &CPU0, |r| r.set_alias_start(true)); /// ``` /// /// ## Arrays of registers @@ -168,7 +160,7 @@ pub(crate) trait RegisterBase<T> { /// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas /// can be defined as an array of identical registers, allowing them to be accessed by index with /// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add -/// an `idx` parameter to their `read`, `write` and `alter` methods: +/// an `idx` parameter to their `read`, `write` and `update` methods: /// /// ```no_run /// # fn no_run() -> Result<(), Error> { @@ -284,25 +276,25 @@ pub(crate) trait RegisterBase<T> { macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $offset); }; // Creates an alias register of fixed offset register `alias` with its own fields. ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address provider. ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $offset ]); }; // Creates an alias register of relative offset register `alias` with its own fields. ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET ]); }; @@ -313,7 +305,7 @@ macro_rules! register { } ) => { static_assert!(::core::mem::size_of::<u32>() <= $stride); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_array $name @ $offset [ $size ; $stride ]); }; @@ -334,7 +326,7 @@ macro_rules! register { $(, $comment:literal)? { $($fields:tt)* } ) => { static_assert!(::core::mem::size_of::<u32>() <= $stride); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); }; @@ -356,7 +348,7 @@ macro_rules! register { } ) => { static_assert!($idx < $alias::SIZE); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); }; @@ -365,241 +357,10 @@ macro_rules! register { // to avoid it being interpreted in place of the relative register array alias rule. ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { static_assert!($idx < $alias::SIZE); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); }; - // All rules below are helpers. - - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, - // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. - (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { - $( - #[doc=$comment] - )? - #[repr(transparent)] - #[derive(Clone, Copy)] - pub(crate) struct $name(u32); - - impl ::core::ops::BitOr for $name { - type Output = Self; - - fn bitor(self, rhs: Self) -> Self::Output { - Self(self.0 | rhs.0) - } - } - - impl ::core::convert::From<$name> for u32 { - fn from(reg: $name) -> u32 { - reg.0 - } - } - - register!(@fields_dispatcher $name { $($fields)* }); - }; - - // Captures the fields and passes them to all the implementers that require field information. - // - // Used to simplify the matching rules for implementers, so they don't need to match the entire - // complex fields rule even though they only make use of part of it. - (@fields_dispatcher $name:ident { - $($hi:tt:$lo:tt $field:ident as $type:tt - $(?=> $try_into_type:ty)? - $(=> $into_type:ty)? - $(, $comment:literal)? - ; - )* - } - ) => { - register!(@field_accessors $name { - $( - $hi:$lo $field as $type - $(?=> $try_into_type)? - $(=> $into_type)? - $(, $comment)? - ; - )* - }); - register!(@debug $name { $($field;)* }); - register!(@default $name { $($field;)* }); - }; - - // Defines all the field getter/methods methods for `$name`. - ( - @field_accessors $name:ident { - $($hi:tt:$lo:tt $field:ident as $type:tt - $(?=> $try_into_type:ty)? - $(=> $into_type:ty)? - $(, $comment:literal)? - ; - )* - } - ) => { - $( - register!(@check_field_bounds $hi:$lo $field as $type); - )* - - #[allow(dead_code)] - impl $name { - $( - register!(@field_accessor $name $hi:$lo $field as $type - $(?=> $try_into_type)? - $(=> $into_type)? - $(, $comment)? - ; - ); - )* - } - }; - - // Boolean fields must have `$hi == $lo`. - (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { - #[allow(clippy::eq_op)] - const _: () = { - ::kernel::build_assert!( - $hi == $lo, - concat!("boolean field `", stringify!($field), "` covers more than one bit") - ); - }; - }; - - // Non-boolean fields must have `$hi >= $lo`. - (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { - #[allow(clippy::eq_op)] - const _: () = { - ::kernel::build_assert!( - $hi >= $lo, - concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") - ); - }; - }; - - // Catches fields defined as `bool` and convert them into a boolean value. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool => $into_type:ty - $(, $comment:literal)?; - ) => { - register!( - @leaf_accessor $name $hi:$lo $field - { |f| <$into_type>::from(if f != 0 { true } else { false }) } - $into_type => $into_type $(, $comment)?; - ); - }; - - // Shortcut for fields defined as `bool` without the `=>` syntax. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool $(, $comment:literal)?; - ) => { - register!(@field_accessor $name $hi:$lo $field as bool => bool $(, $comment)?;); - }; - - // Catches the `?=>` syntax for non-boolean fields. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty - $(, $comment:literal)?; - ) => { - register!(@leaf_accessor $name $hi:$lo $field - { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => - ::core::result::Result< - $try_into_type, - <$try_into_type as ::core::convert::TryFrom<$type>>::Error - > - $(, $comment)?;); - }; - - // Catches the `=>` syntax for non-boolean fields. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty - $(, $comment:literal)?; - ) => { - register!(@leaf_accessor $name $hi:$lo $field - { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); - }; - - // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt - $(, $comment:literal)?; - ) => { - register!(@field_accessor $name $hi:$lo $field as $type => $type $(, $comment)?;); - }; - - // Generates the accessor methods for a single field. - ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident - { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; - ) => { - ::kernel::macros::paste!( - const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; - const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); - const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); - ); - - $( - #[doc="Returns the value of this field:"] - #[doc=$comment] - )? - #[inline(always)] - pub(crate) fn $field(self) -> $res_type { - ::kernel::macros::paste!( - const MASK: u32 = $name::[<$field:upper _MASK>]; - const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - ); - let field = ((self.0 & MASK) >> SHIFT); - - $process(field) - } - - ::kernel::macros::paste!( - $( - #[doc="Sets the value of this field:"] - #[doc=$comment] - )? - #[inline(always)] - pub(crate) fn [<set_ $field>](mut self, value: $to_type) -> Self { - const MASK: u32 = $name::[<$field:upper _MASK>]; - const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = (u32::from(value) << SHIFT) & MASK; - self.0 = (self.0 & !MASK) | value; - - self - } - ); - }; - - // Generates the `Debug` implementation for `$name`. - (@debug $name:ident { $($field:ident;)* }) => { - impl ::kernel::fmt::Debug for $name { - fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { - f.debug_struct(stringify!($name)) - .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0)) - $( - .field(stringify!($field), &self.$field()) - )* - .finish() - } - } - }; - - // Generates the `Default` implementation for `$name`. - (@default $name:ident { $($field:ident;)* }) => { - /// Returns a value for the register where all fields are set to their default value. - impl ::core::default::Default for $name { - fn default() -> Self { - #[allow(unused_mut)] - let mut value = Self(Default::default()); - - ::kernel::macros::paste!( - $( - value.[<set_ $field>](Default::default()); - )* - ); - - value - } - } - }; - // Generates the IO accessors for a fixed offset register. (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] @@ -625,7 +386,7 @@ macro_rules! register { /// Read the register from its address in `io` and run `f` on its value to obtain a new /// value to write back. #[inline(always)] - pub(crate) fn alter<const SIZE: usize, T, F>( + pub(crate) fn update<const SIZE: usize, T, F>( io: &T, f: F, ) where @@ -688,7 +449,7 @@ macro_rules! register { /// the register's offset to it, then run `f` on its value to obtain a new value to /// write back. #[inline(always)] - pub(crate) fn alter<const SIZE: usize, T, B, F>( + pub(crate) fn update<const SIZE: usize, T, B, F>( io: &T, base: &B, f: F, @@ -746,7 +507,7 @@ macro_rules! register { /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a /// new value to write back. #[inline(always)] - pub(crate) fn alter<const SIZE: usize, T, F>( + pub(crate) fn update<const SIZE: usize, T, F>( io: &T, idx: usize, f: F, @@ -801,7 +562,7 @@ macro_rules! register { /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the /// access was out-of-bounds. #[inline(always)] - pub(crate) fn try_alter<const SIZE: usize, T, F>( + pub(crate) fn try_update<const SIZE: usize, T, F>( io: &T, idx: usize, f: F, @@ -810,7 +571,7 @@ macro_rules! register { F: ::core::ops::FnOnce(Self) -> Self, { if idx < Self::SIZE { - Ok(Self::alter(io, idx, f)) + Ok(Self::update(io, idx, f)) } else { Err(EINVAL) } @@ -875,7 +636,7 @@ macro_rules! register { /// by `base` and adding the register's offset to it, then run `f` on its value to /// obtain a new value to write back. #[inline(always)] - pub(crate) fn alter<const SIZE: usize, T, B, F>( + pub(crate) fn update<const SIZE: usize, T, B, F>( io: &T, base: &B, idx: usize, @@ -939,7 +700,7 @@ macro_rules! register { /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the /// access was out-of-bounds. #[inline(always)] - pub(crate) fn try_alter<const SIZE: usize, T, B, F>( + pub(crate) fn try_update<const SIZE: usize, T, B, F>( io: &T, base: &B, idx: usize, @@ -950,7 +711,7 @@ macro_rules! register { F: ::core::ops::FnOnce(Self) -> Self, { if idx < Self::SIZE { - Ok(Self::alter(io, base, idx, f)) + Ok(Self::update(io, base, idx, f)) } else { Err(EINVAL) } diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs new file mode 100644 index 000000000000..64758b7fae56 --- /dev/null +++ b/drivers/gpu/nova-core/sbuffer.rs @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::ops::Deref; + +use kernel::{ + alloc::KVec, + prelude::*, // +}; + +/// A buffer abstraction for discontiguous byte slices. +/// +/// This allows you to treat multiple non-contiguous `&mut [u8]` slices +/// of the same length as a single stream-like read/write buffer. +/// +/// # Examples +/// +/// ``` +// let mut buf1 = [0u8; 5]; +/// let mut buf2 = [0u8; 5]; +/// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]); +/// +/// let data = b"hi world!"; +/// sbuffer.write_all(data)?; +/// drop(sbuffer); +/// +/// assert_eq!(buf1, *b"hi wo"); +/// assert_eq!(buf2, *b"rld!\0"); +/// +/// # Ok::<(), Error>(()) +/// ``` +pub(crate) struct SBufferIter<I: Iterator> { + // [`Some`] if we are not at the end of the data yet. + cur_slice: Option<I::Item>, + // All the slices remaining after `cur_slice`. + slices: I, +} + +impl<'a, I> SBufferIter<I> +where + I: Iterator, +{ + /// Creates a reader buffer for a discontiguous set of byte slices. + /// + /// # Examples + /// + /// ``` + /// let buf1: [u8; 5] = [0, 1, 2, 3, 4]; + /// let buf2: [u8; 5] = [5, 6, 7, 8, 9]; + /// let sbuffer = SBufferIter::new_reader([&buf1[..], &buf2[..]]); + /// let sum: u8 = sbuffer.sum(); + /// assert_eq!(sum, 45); + /// ``` + pub(crate) fn new_reader(slices: impl IntoIterator<IntoIter = I>) -> Self + where + I: Iterator<Item = &'a [u8]>, + { + Self::new(slices) + } + + /// Creates a writeable buffer for a discontiguous set of byte slices. + /// + /// # Examples + /// + /// ``` + /// let mut buf1 = [0u8; 5]; + /// let mut buf2 = [0u8; 5]; + /// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]); + /// sbuffer.write_all(&[0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9][..])?; + /// drop(sbuffer); + /// assert_eq!(buf1, [0, 1, 2, 3, 4]); + /// assert_eq!(buf2, [5, 6, 7, 8, 9]); + /// + /// ``` + pub(crate) fn new_writer(slices: impl IntoIterator<IntoIter = I>) -> Self + where + I: Iterator<Item = &'a mut [u8]>, + { + Self::new(slices) + } + + fn new(slices: impl IntoIterator<IntoIter = I>) -> Self + where + I::Item: Deref<Target = [u8]>, + { + let mut slices = slices.into_iter(); + + Self { + // Skip empty slices. + cur_slice: slices.find(|s| !s.deref().is_empty()), + slices, + } + } + + /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns [`None`] to try and obtain the remainder of the data. + /// + /// The closure `f` should split the slice received in it's first parameter + /// at the position given in the second parameter. + fn get_slice_internal( + &mut self, + len: usize, + mut f: impl FnMut(I::Item, usize) -> (I::Item, I::Item), + ) -> Option<I::Item> + where + I::Item: Deref<Target = [u8]>, + { + match self.cur_slice.take() { + None => None, + Some(cur_slice) => { + if len >= cur_slice.len() { + // Caller requested more data than is in the current slice, return it entirely + // and prepare the following slice for being used. Skip empty slices to avoid + // trouble. + self.cur_slice = self.slices.find(|s| !s.is_empty()); + + Some(cur_slice) + } else { + // The current slice can satisfy the request, split it and return a slice of + // the requested size. + let (ret, next) = f(cur_slice, len); + self.cur_slice = Some(next); + + Some(ret) + } + } + } + } + + /// Returns whether this buffer still has data available. + pub(crate) fn is_empty(&self) -> bool { + self.cur_slice.is_none() + } +} + +/// Provides a way to get non-mutable slices of data to read from. +impl<'a, I> SBufferIter<I> +where + I: Iterator<Item = &'a [u8]>, +{ + /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns [`None`] to try and obtain the remainder of the data. + fn get_slice(&mut self, len: usize) -> Option<&'a [u8]> { + self.get_slice_internal(len, |s, pos| s.split_at(pos)) + } + + /// Ideally we would implement `Read`, but it is not available in `core`. + /// So mimic `std::io::Read::read_exact`. + #[expect(unused)] + pub(crate) fn read_exact(&mut self, mut dst: &mut [u8]) -> Result { + while !dst.is_empty() { + match self.get_slice(dst.len()) { + None => return Err(EINVAL), + Some(src) => { + let dst_slice; + (dst_slice, dst) = dst.split_at_mut(src.len()); + dst_slice.copy_from_slice(src); + } + } + } + + Ok(()) + } + + /// Read all the remaining data into a [`KVec`]. + /// + /// `self` will be empty after this operation. + pub(crate) fn flush_into_kvec(&mut self, flags: kernel::alloc::Flags) -> Result<KVec<u8>> { + let mut buf = KVec::<u8>::new(); + + if let Some(slice) = core::mem::take(&mut self.cur_slice) { + buf.extend_from_slice(slice, flags)?; + } + for slice in &mut self.slices { + buf.extend_from_slice(slice, flags)?; + } + + Ok(buf) + } +} + +/// Provides a way to get mutable slices of data to write into. +impl<'a, I> SBufferIter<I> +where + I: Iterator<Item = &'a mut [u8]>, +{ + /// Returns a mutable slice of at most `len` bytes, or [`None`] if we are at the end of the + /// data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns `None` to try and obtain the remainder of the data. + fn get_slice_mut(&mut self, len: usize) -> Option<&'a mut [u8]> { + self.get_slice_internal(len, |s, pos| s.split_at_mut(pos)) + } + + /// Ideally we would implement [`Write`], but it is not available in `core`. + /// So mimic `std::io::Write::write_all`. + pub(crate) fn write_all(&mut self, mut src: &[u8]) -> Result { + while !src.is_empty() { + match self.get_slice_mut(src.len()) { + None => return Err(ETOOSMALL), + Some(dst) => { + let src_slice; + (src_slice, src) = src.split_at(dst.len()); + dst.copy_from_slice(src_slice); + } + } + } + + Ok(()) + } +} + +impl<'a, I> Iterator for SBufferIter<I> +where + I: Iterator<Item = &'a [u8]>, +{ + type Item = u8; + + fn next(&mut self) -> Option<Self::Item> { + // Returned slices are guaranteed to not be empty so we can safely index the first entry. + self.get_slice(1).map(|s| s[0]) + } +} diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index bf35f00cb732..4b503249a3ef 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -1,27 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::prelude::*; -use kernel::time::{Delta, Instant, Monotonic}; - -/// Wait until `cond` is true or `timeout` elapsed. -/// -/// When `cond` evaluates to `Some`, its return value is returned. +/// Converts a null-terminated byte slice to a string, or `None` if the array does not +/// contains any null byte or contains invalid characters. /// -/// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to -/// `Some`. -/// -/// TODO[DLAY]: replace with `read_poll_timeout` once it is available. -/// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) -pub(crate) fn wait_on<R, F: Fn() -> Option<R>>(timeout: Delta, cond: F) -> Result<R> { - let start_time = Instant::<Monotonic>::now(); - - loop { - if let Some(ret) = cond() { - return Ok(ret); - } +/// Contrary to [`kernel::str::CStr::from_bytes_with_nul`], the null byte can be anywhere in the +/// slice, and not only in the last position. +pub(crate) fn str_from_null_terminated(bytes: &[u8]) -> Option<&str> { + use kernel::str::CStr; - if start_time.elapsed().as_nanos() > timeout.as_nanos() { - return Err(ETIMEDOUT); - } - } + bytes + .iter() + .position(|&b| b == 0) + .and_then(|null_pos| CStr::from_bytes_with_nul(&bytes[..=null_pos]).ok()) + .and_then(|cstr| cstr.to_str().ok()) } diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 71fbe71b84db..abf423560ff4 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -2,15 +2,27 @@ //! VBIOS extraction and parsing. -use crate::driver::Bar0; -use crate::firmware::fwsec::Bcrt30Rsa3kSignature; -use crate::firmware::FalconUCodeDescV3; use core::convert::TryFrom; -use kernel::device; -use kernel::error::Result; -use kernel::prelude::*; -use kernel::ptr::{Alignable, Alignment}; -use kernel::types::ARef; + +use kernel::{ + device, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + transmute::FromBytes, + types::ARef, +}; + +use crate::{ + driver::Bar0, + firmware::{ + fwsec::Bcrt30Rsa3kSignature, + FalconUCodeDescV3, // + }, + num::FromSafeCast, +}; /// The offset of the VBIOS ROM in the BAR0 space. const ROM_OFFSET: usize = 0x300000; @@ -22,6 +34,34 @@ const BIOS_READ_AHEAD_SIZE: usize = 1024; /// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit. const LAST_IMAGE_BIT_MASK: u8 = 0x80; +/// BIOS Image Type from PCI Data Structure code_type field. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +enum BiosImageType { + /// PC-AT compatible BIOS image (x86 legacy) + PciAt = 0x00, + /// EFI (Extensible Firmware Interface) BIOS image + Efi = 0x03, + /// NBSI (Notebook System Information) BIOS image + Nbsi = 0x70, + /// FwSec (Firmware Security) BIOS image + FwSec = 0xE0, +} + +impl TryFrom<u8> for BiosImageType { + type Error = Error; + + fn try_from(code: u8) -> Result<Self> { + match code { + 0x00 => Ok(Self::PciAt), + 0x03 => Ok(Self::Efi), + 0x70 => Ok(Self::Nbsi), + 0xE0 => Ok(Self::FwSec), + _ => Err(EINVAL), + } + } +} + // PMU lookup table entry types. Used to locate PMU table entries // in the Fwsec image, corresponding to falcon ucodes. #[expect(dead_code)] @@ -197,32 +237,37 @@ impl Vbios { // Parse all VBIOS images in the ROM for image_result in VbiosIterator::new(dev, bar0)? { - let full_image = image_result?; + let image = image_result?; dev_dbg!( dev, - "Found BIOS image: size: {:#x}, type: {}, last: {}\n", - full_image.image_size_bytes(), - full_image.image_type_str(), - full_image.is_last() + "Found BIOS image: size: {:#x}, type: {:?}, last: {}\n", + image.image_size_bytes(), + image.image_type(), + image.is_last() ); - // Get references to images we will need after the loop, in order to - // setup the falcon data offset. - match full_image { - BiosImage::PciAt(image) => { - pci_at_image = Some(image); + // Convert to a specific image type + match BiosImageType::try_from(image.pcir.code_type) { + Ok(BiosImageType::PciAt) => { + pci_at_image = Some(PciAtBiosImage::try_from(image)?); } - BiosImage::FwSec(image) => { + Ok(BiosImageType::FwSec) => { + let fwsec = FwSecBiosBuilder { + base: image, + falcon_data_offset: None, + pmu_lookup_table: None, + falcon_ucode_offset: None, + }; if first_fwsec_image.is_none() { - first_fwsec_image = Some(image); + first_fwsec_image = Some(fwsec); } else { - second_fwsec_image = Some(image); + second_fwsec_image = Some(fwsec); } } - // For now we don't need to handle these - BiosImage::Efi(_image) => {} - BiosImage::Nbsi(_image) => {} + _ => { + // Ignore other image types or unknown types + } } } @@ -280,45 +325,29 @@ struct PcirStruct { max_runtime_image_len: u16, } +// SAFETY: all bit patterns are valid for `PcirStruct`. +unsafe impl FromBytes for PcirStruct {} + impl PcirStruct { fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { - if data.len() < core::mem::size_of::<PcirStruct>() { - dev_err!(dev, "Not enough data for PcirStruct\n"); - return Err(EINVAL); - } - - let mut signature = [0u8; 4]; - signature.copy_from_slice(&data[0..4]); + let (pcir, _) = PcirStruct::from_bytes_copy_prefix(data).ok_or(EINVAL)?; // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). - if &signature != b"PCIR" && &signature != b"NPDS" { - dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); + if &pcir.signature != b"PCIR" && &pcir.signature != b"NPDS" { + dev_err!( + dev, + "Invalid signature for PcirStruct: {:?}\n", + pcir.signature + ); return Err(EINVAL); } - let mut class_code = [0u8; 3]; - class_code.copy_from_slice(&data[13..16]); - - let image_len = u16::from_le_bytes([data[16], data[17]]); - if image_len == 0 { + if pcir.image_len == 0 { dev_err!(dev, "Invalid image length: 0\n"); return Err(EINVAL); } - Ok(PcirStruct { - signature, - vendor_id: u16::from_le_bytes([data[4], data[5]]), - device_id: u16::from_le_bytes([data[6], data[7]]), - device_list_ptr: u16::from_le_bytes([data[8], data[9]]), - pci_data_struct_len: u16::from_le_bytes([data[10], data[11]]), - pci_data_struct_rev: data[12], - class_code, - image_len, - vendor_rom_rev: u16::from_le_bytes([data[18], data[19]]), - code_type: data[20], - last_image: data[21], - max_runtime_image_len: u16::from_le_bytes([data[22], data[23]]), - }) + Ok(pcir) } /// Check if this is the last image in the ROM. @@ -328,7 +357,7 @@ impl PcirStruct { /// Calculate image size in bytes from 512-byte blocks. fn image_size_bytes(&self) -> usize { - self.image_len as usize * 512 + usize::from(self.image_len) * 512 } } @@ -356,30 +385,19 @@ struct BitHeader { checksum: u8, } +// SAFETY: all bit patterns are valid for `BitHeader`. +unsafe impl FromBytes for BitHeader {} + impl BitHeader { fn new(data: &[u8]) -> Result<Self> { - if data.len() < core::mem::size_of::<Self>() { - return Err(EINVAL); - } - - let mut signature = [0u8; 4]; - signature.copy_from_slice(&data[2..6]); + let (header, _) = BitHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?; // Check header ID and signature - let id = u16::from_le_bytes([data[0], data[1]]); - if id != 0xB8FF || &signature != b"BIT\0" { + if header.id != 0xB8FF || &header.signature != b"BIT\0" { return Err(EINVAL); } - Ok(BitHeader { - id, - signature, - bcd_version: u16::from_le_bytes([data[6], data[7]]), - header_size: data[8], - token_size: data[9], - token_entries: data[10], - checksum: data[11], - }) + Ok(header) } } @@ -406,13 +424,13 @@ impl BitToken { let header = &image.bit_header; // Offset to the first token entry - let tokens_start = image.bit_offset + header.header_size as usize; + let tokens_start = image.bit_offset + usize::from(header.header_size); - for i in 0..header.token_entries as usize { - let entry_offset = tokens_start + (i * header.token_size as usize); + for i in 0..usize::from(header.token_entries) { + let entry_offset = tokens_start + (i * usize::from(header.token_size)); // Make sure we don't go out of bounds - if entry_offset + header.token_size as usize > image.base.data.len() { + if entry_offset + usize::from(header.token_size) > image.base.data.len() { return Err(EINVAL); } @@ -530,35 +548,29 @@ struct NpdeStruct { last_image: u8, } +// SAFETY: all bit patterns are valid for `NpdeStruct`. +unsafe impl FromBytes for NpdeStruct {} + impl NpdeStruct { fn new(dev: &device::Device, data: &[u8]) -> Option<Self> { - if data.len() < core::mem::size_of::<Self>() { - dev_dbg!(dev, "Not enough data for NpdeStruct\n"); - return None; - } - - let mut signature = [0u8; 4]; - signature.copy_from_slice(&data[0..4]); + let (npde, _) = NpdeStruct::from_bytes_copy_prefix(data)?; // Signature should be "NPDE" (0x4544504E). - if &signature != b"NPDE" { - dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); + if &npde.signature != b"NPDE" { + dev_dbg!( + dev, + "Invalid signature for NpdeStruct: {:?}\n", + npde.signature + ); return None; } - let subimage_len = u16::from_le_bytes([data[8], data[9]]); - if subimage_len == 0 { + if npde.subimage_len == 0 { dev_dbg!(dev, "Invalid subimage length: 0\n"); return None; } - Some(NpdeStruct { - signature, - npci_data_ext_rev: u16::from_le_bytes([data[4], data[5]]), - npci_data_ext_len: u16::from_le_bytes([data[6], data[7]]), - subimage_len, - last_image: data[10], - }) + Some(npde) } /// Check if this is the last image in the ROM. @@ -568,7 +580,7 @@ impl NpdeStruct { /// Calculate image size in bytes from 512-byte blocks. fn image_size_bytes(&self) -> usize { - self.subimage_len as usize * 512 + usize::from(self.subimage_len) * 512 } /// Try to find NPDE in the data, the NPDE is right after the PCIR. @@ -580,8 +592,8 @@ impl NpdeStruct { ) -> Option<Self> { // Calculate the offset where NPDE might be located // NPDE should be right after the PCIR structure, aligned to 16 bytes - let pcir_offset = rom_header.pci_data_struct_offset as usize; - let npde_start = (pcir_offset + pcir.pci_data_struct_len as usize + 0x0F) & !0x0F; + let pcir_offset = usize::from(rom_header.pci_data_struct_offset); + let npde_start = (pcir_offset + usize::from(pcir.pci_data_struct_len) + 0x0F) & !0x0F; // Check if we have enough data if npde_start + core::mem::size_of::<Self>() > data.len() { @@ -594,108 +606,29 @@ impl NpdeStruct { } } -// Use a macro to implement BiosImage enum and methods. This avoids having to -// repeat each enum type when implementing functions like base() in BiosImage. -macro_rules! bios_image { - ( - $($variant:ident: $class:ident),* $(,)? - ) => { - // BiosImage enum with variants for each image type - enum BiosImage { - $($variant($class)),* - } - - impl BiosImage { - /// Get a reference to the common BIOS image data regardless of type - fn base(&self) -> &BiosImageBase { - match self { - $(Self::$variant(img) => &img.base),* - } - } - - /// Returns a string representing the type of BIOS image - fn image_type_str(&self) -> &'static str { - match self { - $(Self::$variant(_) => stringify!($variant)),* - } - } - } - } -} - -impl BiosImage { - /// Check if this is the last image. - fn is_last(&self) -> bool { - let base = self.base(); - - // For NBSI images (type == 0x70), return true as they're - // considered the last image - if matches!(self, Self::Nbsi(_)) { - return true; - } - - // For other image types, check the NPDE first if available - if let Some(ref npde) = base.npde { - return npde.is_last(); - } - - // Otherwise, fall back to checking the PCIR last_image flag - base.pcir.is_last() - } - - /// Get the image size in bytes. - fn image_size_bytes(&self) -> usize { - let base = self.base(); - - // Prefer NPDE image size if available - if let Some(ref npde) = base.npde { - return npde.image_size_bytes(); - } - - // Otherwise, fall back to the PCIR image size - base.pcir.image_size_bytes() - } - - /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which - /// triggers the constructor of the specific BiosImage enum variant. - fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { - let base = BiosImageBase::new(dev, data)?; - let image = base.into_image().inspect_err(|e| { - dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); - })?; - - Ok(image) - } -} - -bios_image! { - PciAt: PciAtBiosImage, // PCI-AT compatible BIOS image - Efi: EfiBiosImage, // EFI (Extensible Firmware Interface) - Nbsi: NbsiBiosImage, // NBSI (Nvidia Bios System Interface) - FwSec: FwSecBiosBuilder, // FWSEC (Firmware Security) -} - /// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. /// /// It contains the BIT header and the BIT tokens. struct PciAtBiosImage { - base: BiosImageBase, + base: BiosImage, bit_header: BitHeader, bit_offset: usize, } +#[expect(dead_code)] struct EfiBiosImage { - base: BiosImageBase, + base: BiosImage, // EFI-specific fields can be added here in the future. } +#[expect(dead_code)] struct NbsiBiosImage { - base: BiosImageBase, + base: BiosImage, // NBSI-specific fields can be added here in the future. } struct FwSecBiosBuilder { - base: BiosImageBase, + base: BiosImage, /// These are temporary fields that are used during the construction of the /// [`FwSecBiosBuilder`]. /// @@ -714,37 +647,16 @@ struct FwSecBiosBuilder { /// /// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode. pub(crate) struct FwSecBiosImage { - base: BiosImageBase, + base: BiosImage, /// The offset of the Falcon ucode. falcon_ucode_offset: usize, } -// Convert from BiosImageBase to BiosImage -impl TryFrom<BiosImageBase> for BiosImage { - type Error = Error; - - fn try_from(base: BiosImageBase) -> Result<Self> { - match base.pcir.code_type { - 0x00 => Ok(BiosImage::PciAt(base.try_into()?)), - 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), - 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), - 0xE0 => Ok(BiosImage::FwSec(FwSecBiosBuilder { - base, - falcon_data_offset: None, - pmu_lookup_table: None, - falcon_ucode_offset: None, - })), - _ => Err(EINVAL), - } - } -} - /// BIOS Image structure containing various headers and reference fields to all BIOS images. /// -/// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that -/// Rust favors composition of types over inheritance. +/// A BiosImage struct is embedded into all image types and implements common operations. #[expect(dead_code)] -struct BiosImageBase { +struct BiosImage { /// Used for logging. dev: ARef<device::Device>, /// PCI ROM Expansion Header @@ -757,12 +669,40 @@ struct BiosImageBase { data: KVec<u8>, } -impl BiosImageBase { - fn into_image(self) -> Result<BiosImage> { - BiosImage::try_from(self) +impl BiosImage { + /// Get the image size in bytes. + fn image_size_bytes(&self) -> usize { + // Prefer NPDE image size if available + if let Some(ref npde) = self.npde { + npde.image_size_bytes() + } else { + // Otherwise, fall back to the PCIR image size + self.pcir.image_size_bytes() + } + } + + /// Get the BIOS image type. + fn image_type(&self) -> Result<BiosImageType> { + BiosImageType::try_from(self.pcir.code_type) + } + + /// Check if this is the last image. + fn is_last(&self) -> bool { + // For NBSI images, return true as they're considered the last image. + if self.image_type() == Ok(BiosImageType::Nbsi) { + return true; + } + + // For other image types, check the NPDE first if available + if let Some(ref npde) = self.npde { + return npde.is_last(); + } + + // Otherwise, fall back to checking the PCIR last_image flag + self.pcir.is_last() } - /// Creates a new BiosImageBase from raw byte data. + /// Creates a new BiosImage from raw byte data. fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { // Ensure we have enough data for the ROM header. if data.len() < 26 { @@ -775,7 +715,7 @@ impl BiosImageBase { .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; // Get the PCI Data Structure using the pointer from the ROM header. - let pcir_offset = rom_header.pci_data_struct_offset as usize; + let pcir_offset = usize::from(rom_header.pci_data_struct_offset); let pcir_data = data .get(pcir_offset..pcir_offset + core::mem::size_of::<PcirStruct>()) .ok_or(EINVAL) @@ -802,7 +742,7 @@ impl BiosImageBase { let mut data_copy = KVec::new(); data_copy.extend_from_slice(data, GFP_KERNEL)?; - Ok(BiosImageBase { + Ok(BiosImage { dev: dev.into(), rom_header, pcir, @@ -843,12 +783,12 @@ impl PciAtBiosImage { let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; // Make sure we don't go out of bounds - if token.data_offset as usize + 4 > self.base.data.len() { + if usize::from(token.data_offset) + 4 > self.base.data.len() { return Err(EINVAL); } // read the 4 bytes at the offset specified in the token - let offset = token.data_offset as usize; + let offset = usize::from(token.data_offset); let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { dev_err!(self.base.dev, "Failed to convert data slice to array"); EINVAL @@ -856,7 +796,7 @@ impl PciAtBiosImage { let data_ptr = u32::from_le_bytes(bytes); - if (data_ptr as usize) < self.base.data.len() { + if (usize::from_safe_cast(data_ptr)) < self.base.data.len() { dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); return Err(EINVAL); } @@ -865,10 +805,10 @@ impl PciAtBiosImage { } } -impl TryFrom<BiosImageBase> for PciAtBiosImage { +impl TryFrom<BiosImage> for PciAtBiosImage { type Error = Error; - fn try_from(base: BiosImageBase) -> Result<Self> { + fn try_from(base: BiosImage) -> Result<Self> { let data_slice = &base.data; let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?; @@ -904,29 +844,34 @@ impl PmuLookupTableEntry { } } +#[repr(C)] +struct PmuLookupTableHeader { + version: u8, + header_len: u8, + entry_len: u8, + entry_count: u8, +} + +// SAFETY: all bit patterns are valid for `PmuLookupTableHeader`. +unsafe impl FromBytes for PmuLookupTableHeader {} + /// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given /// application ID. /// /// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to /// locate the Falcon Ucode. -#[expect(dead_code)] struct PmuLookupTable { - version: u8, - header_len: u8, - entry_len: u8, - entry_count: u8, + header: PmuLookupTableHeader, table_data: KVec<u8>, } impl PmuLookupTable { fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { - if data.len() < 4 { - return Err(EINVAL); - } + let (header, _) = PmuLookupTableHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?; - let header_len = data[1] as usize; - let entry_len = data[2] as usize; - let entry_count = data[3] as usize; + let header_len = usize::from(header.header_len); + let entry_len = usize::from(header.entry_len); + let entry_count = usize::from(header.entry_count); let required_bytes = header_len + (entry_count * entry_len); @@ -947,27 +892,21 @@ impl PmuLookupTable { dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); } - Ok(PmuLookupTable { - version: data[0], - header_len: header_len as u8, - entry_len: entry_len as u8, - entry_count: entry_count as u8, - table_data, - }) + Ok(PmuLookupTable { header, table_data }) } fn lookup_index(&self, idx: u8) -> Result<PmuLookupTableEntry> { - if idx >= self.entry_count { + if idx >= self.header.entry_count { return Err(EINVAL); } - let index = (idx as usize) * self.entry_len as usize; + let index = (usize::from(idx)) * usize::from(self.header.entry_len); PmuLookupTableEntry::new(&self.table_data[index..]) } // find entry by type value fn find_entry_by_type(&self, entry_type: u8) -> Result<PmuLookupTableEntry> { - for i in 0..self.entry_count { + for i in 0..self.header.entry_count { let entry = self.lookup_index(i)?; if entry.application_id == entry_type { return Ok(entry); @@ -984,7 +923,7 @@ impl FwSecBiosBuilder { pci_at_image: &PciAtBiosImage, first_fwsec: &FwSecBiosBuilder, ) -> Result { - let mut offset = pci_at_image.falcon_data_ptr()? as usize; + let mut offset = usize::from_safe_cast(pci_at_image.falcon_data_ptr()?); let mut pmu_in_first_fwsec = false; // The falcon data pointer assumes that the PciAt and FWSEC images @@ -1025,7 +964,7 @@ impl FwSecBiosBuilder { .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD) { Ok(entry) => { - let mut ucode_offset = entry.data as usize; + let mut ucode_offset = usize::from_safe_cast(entry.data); ucode_offset -= pci_at_image.base.data.len(); if ucode_offset < first_fwsec.base.data.len() { dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); @@ -1111,7 +1050,7 @@ impl FwSecBiosImage { // The ucode data follows the descriptor. let ucode_data_offset = falcon_ucode_offset + desc.size(); - let size = (desc.imem_load_size + desc.dmem_load_size) as usize; + let size = usize::from_safe_cast(desc.imem_load_size + desc.dmem_load_size); // Get the data slice, checking bounds in a single operation. self.base @@ -1130,8 +1069,8 @@ impl FwSecBiosImage { pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { // The signatures data follows the descriptor. let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>(); - let sigs_size = - desc.signature_count as usize * core::mem::size_of::<Bcrt30Rsa3kSignature>(); + let sigs_count = usize::from(desc.signature_count); + let sigs_size = sigs_count * core::mem::size_of::<Bcrt30Rsa3kSignature>(); // Make sure the data is within bounds. if sigs_data_offset + sigs_size > self.base.data.len() { @@ -1151,7 +1090,7 @@ impl FwSecBiosImage { .as_ptr() .add(sigs_data_offset) .cast::<Bcrt30Rsa3kSignature>(), - desc.signature_count as usize, + sigs_count, ) }) } |
