diff options
Diffstat (limited to 'rust')
-rw-r--r-- | rust/bindings/bindings_helper.h | 4 | ||||
-rw-r--r-- | rust/helpers/clk.c | 66 | ||||
-rw-r--r-- | rust/helpers/cpufreq.c | 10 | ||||
-rw-r--r-- | rust/helpers/cpumask.c | 25 | ||||
-rw-r--r-- | rust/helpers/helpers.c | 3 | ||||
-rw-r--r-- | rust/helpers/mm.c | 50 | ||||
-rw-r--r-- | rust/kernel/clk.rs | 334 | ||||
-rw-r--r-- | rust/kernel/cpu.rs | 30 | ||||
-rw-r--r-- | rust/kernel/cpufreq.rs | 1321 | ||||
-rw-r--r-- | rust/kernel/cpumask.rs | 330 | ||||
-rw-r--r-- | rust/kernel/lib.rs | 8 | ||||
-rw-r--r-- | rust/kernel/miscdevice.rs | 45 | ||||
-rw-r--r-- | rust/kernel/mm.rs | 344 | ||||
-rw-r--r-- | rust/kernel/mm/virt.rs | 471 | ||||
-rw-r--r-- | rust/kernel/opp.rs | 1146 | ||||
-rw-r--r-- | rust/kernel/task.rs | 247 | ||||
-rw-r--r-- | rust/macros/module.rs | 20 |
17 files changed, 4328 insertions, 126 deletions
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index f600a1f945f4..a5a6fb45d405 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -16,7 +16,10 @@ #include <linux/blk-mq.h> #include <linux/blk_types.h> #include <linux/blkdev.h> +#include <linux/clk.h> #include <linux/configfs.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> #include <linux/cpumask.h> #include <linux/cred.h> #include <linux/device/faux.h> @@ -35,6 +38,7 @@ #include <linux/phy.h> #include <linux/pid_namespace.h> #include <linux/platform_device.h> +#include <linux/pm_opp.h> #include <linux/poll.h> #include <linux/property.h> #include <linux/refcount.h> diff --git a/rust/helpers/clk.c b/rust/helpers/clk.c new file mode 100644 index 000000000000..6d04372c9f3b --- /dev/null +++ b/rust/helpers/clk.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/clk.h> + +/* + * The "inline" implementation of below helpers are only available when + * CONFIG_HAVE_CLK or CONFIG_HAVE_CLK_PREPARE aren't set. + */ +#ifndef CONFIG_HAVE_CLK +struct clk *rust_helper_clk_get(struct device *dev, const char *id) +{ + return clk_get(dev, id); +} + +void rust_helper_clk_put(struct clk *clk) +{ + clk_put(clk); +} + +int rust_helper_clk_enable(struct clk *clk) +{ + return clk_enable(clk); +} + +void rust_helper_clk_disable(struct clk *clk) +{ + clk_disable(clk); +} + +unsigned long rust_helper_clk_get_rate(struct clk *clk) +{ + return clk_get_rate(clk); +} + +int rust_helper_clk_set_rate(struct clk *clk, unsigned long rate) +{ + return clk_set_rate(clk, rate); +} +#endif + +#ifndef CONFIG_HAVE_CLK_PREPARE +int rust_helper_clk_prepare(struct clk *clk) +{ + return clk_prepare(clk); +} + +void rust_helper_clk_unprepare(struct clk *clk) +{ + clk_unprepare(clk); +} +#endif + +struct clk *rust_helper_clk_get_optional(struct device *dev, const char *id) +{ + return clk_get_optional(dev, id); +} + +int rust_helper_clk_prepare_enable(struct clk *clk) +{ + return clk_prepare_enable(clk); +} + +void rust_helper_clk_disable_unprepare(struct clk *clk) +{ + clk_disable_unprepare(clk); +} diff --git a/rust/helpers/cpufreq.c b/rust/helpers/cpufreq.c new file mode 100644 index 000000000000..7c1343c4d65e --- /dev/null +++ b/rust/helpers/cpufreq.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpufreq.h> + +#ifdef CONFIG_CPU_FREQ +void rust_helper_cpufreq_register_em_with_opp(struct cpufreq_policy *policy) +{ + cpufreq_register_em_with_opp(policy); +} +#endif diff --git a/rust/helpers/cpumask.c b/rust/helpers/cpumask.c index 2d380a86c34a..eb10598a0242 100644 --- a/rust/helpers/cpumask.c +++ b/rust/helpers/cpumask.c @@ -7,16 +7,41 @@ void rust_helper_cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) cpumask_set_cpu(cpu, dstp); } +void rust_helper___cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +{ + __cpumask_set_cpu(cpu, dstp); +} + void rust_helper_cpumask_clear_cpu(int cpu, struct cpumask *dstp) { cpumask_clear_cpu(cpu, dstp); } +void rust_helper___cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + __cpumask_clear_cpu(cpu, dstp); +} + +bool rust_helper_cpumask_test_cpu(int cpu, struct cpumask *srcp) +{ + return cpumask_test_cpu(cpu, srcp); +} + void rust_helper_cpumask_setall(struct cpumask *dstp) { cpumask_setall(dstp); } +bool rust_helper_cpumask_empty(struct cpumask *srcp) +{ + return cpumask_empty(srcp); +} + +bool rust_helper_cpumask_full(struct cpumask *srcp) +{ + return cpumask_full(srcp); +} + unsigned int rust_helper_cpumask_weight(struct cpumask *srcp) { return cpumask_weight(srcp); diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index c228a5363212..805307018f0e 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -12,6 +12,8 @@ #include "bug.c" #include "build_assert.c" #include "build_bug.c" +#include "clk.c" +#include "cpufreq.c" #include "cpumask.c" #include "cred.c" #include "device.c" @@ -22,6 +24,7 @@ #include "io.c" #include "jump_label.c" #include "kunit.c" +#include "mm.c" #include "mutex.c" #include "page.c" #include "platform.c" diff --git a/rust/helpers/mm.c b/rust/helpers/mm.c new file mode 100644 index 000000000000..81b510c96fd2 --- /dev/null +++ b/rust/helpers/mm.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/mm.h> +#include <linux/sched/mm.h> + +void rust_helper_mmgrab(struct mm_struct *mm) +{ + mmgrab(mm); +} + +void rust_helper_mmdrop(struct mm_struct *mm) +{ + mmdrop(mm); +} + +void rust_helper_mmget(struct mm_struct *mm) +{ + mmget(mm); +} + +bool rust_helper_mmget_not_zero(struct mm_struct *mm) +{ + return mmget_not_zero(mm); +} + +void rust_helper_mmap_read_lock(struct mm_struct *mm) +{ + mmap_read_lock(mm); +} + +bool rust_helper_mmap_read_trylock(struct mm_struct *mm) +{ + return mmap_read_trylock(mm); +} + +void rust_helper_mmap_read_unlock(struct mm_struct *mm) +{ + mmap_read_unlock(mm); +} + +struct vm_area_struct *rust_helper_vma_lookup(struct mm_struct *mm, + unsigned long addr) +{ + return vma_lookup(mm, addr); +} + +void rust_helper_vma_end_read(struct vm_area_struct *vma) +{ + vma_end_read(vma); +} diff --git a/rust/kernel/clk.rs b/rust/kernel/clk.rs new file mode 100644 index 000000000000..6041c6d07527 --- /dev/null +++ b/rust/kernel/clk.rs @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Clock abstractions. +//! +//! C header: [`include/linux/clk.h`](srctree/include/linux/clk.h) +//! +//! Reference: <https://docs.kernel.org/driver-api/clk.html> + +use crate::ffi::c_ulong; + +/// The frequency unit. +/// +/// Represents a frequency in hertz, wrapping a [`c_ulong`] value. +/// +/// ## Examples +/// +/// ``` +/// use kernel::clk::Hertz; +/// +/// let hz = 1_000_000_000; +/// let rate = Hertz(hz); +/// +/// assert_eq!(rate.as_hz(), hz); +/// assert_eq!(rate, Hertz(hz)); +/// assert_eq!(rate, Hertz::from_khz(hz / 1_000)); +/// assert_eq!(rate, Hertz::from_mhz(hz / 1_000_000)); +/// assert_eq!(rate, Hertz::from_ghz(hz / 1_000_000_000)); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Hertz(pub c_ulong); + +impl Hertz { + /// Create a new instance from kilohertz (kHz) + pub fn from_khz(khz: c_ulong) -> Self { + Self(khz * 1_000) + } + + /// Create a new instance from megahertz (MHz) + pub fn from_mhz(mhz: c_ulong) -> Self { + Self(mhz * 1_000_000) + } + + /// Create a new instance from gigahertz (GHz) + pub fn from_ghz(ghz: c_ulong) -> Self { + Self(ghz * 1_000_000_000) + } + + /// Get the frequency in hertz + pub fn as_hz(&self) -> c_ulong { + self.0 + } + + /// Get the frequency in kilohertz + pub fn as_khz(&self) -> c_ulong { + self.0 / 1_000 + } + + /// Get the frequency in megahertz + pub fn as_mhz(&self) -> c_ulong { + self.0 / 1_000_000 + } + + /// Get the frequency in gigahertz + pub fn as_ghz(&self) -> c_ulong { + self.0 / 1_000_000_000 + } +} + +impl From<Hertz> for c_ulong { + fn from(freq: Hertz) -> Self { + freq.0 + } +} + +#[cfg(CONFIG_COMMON_CLK)] +mod common_clk { + use super::Hertz; + use crate::{ + device::Device, + error::{from_err_ptr, to_result, Result}, + prelude::*, + }; + + use core::{ops::Deref, ptr}; + + /// A reference-counted clock. + /// + /// Rust abstraction for the C [`struct clk`]. + /// + /// # Invariants + /// + /// A [`Clk`] instance holds either a pointer to a valid [`struct clk`] created by the C + /// portion of the kernel or a NULL pointer. + /// + /// Instances of this type are reference-counted. Calling [`Clk::get`] ensures that the + /// allocation remains valid for the lifetime of the [`Clk`]. + /// + /// ## Examples + /// + /// The following example demonstrates how to obtain and configure a clock for a device. + /// + /// ``` + /// use kernel::c_str; + /// use kernel::clk::{Clk, Hertz}; + /// use kernel::device::Device; + /// use kernel::error::Result; + /// + /// fn configure_clk(dev: &Device) -> Result { + /// let clk = Clk::get(dev, Some(c_str!("apb_clk")))?; + /// + /// clk.prepare_enable()?; + /// + /// let expected_rate = Hertz::from_ghz(1); + /// + /// if clk.rate() != expected_rate { + /// clk.set_rate(expected_rate)?; + /// } + /// + /// clk.disable_unprepare(); + /// Ok(()) + /// } + /// ``` + /// + /// [`struct clk`]: https://docs.kernel.org/driver-api/clk.html + #[repr(transparent)] + pub struct Clk(*mut bindings::clk); + + impl Clk { + /// Gets [`Clk`] corresponding to a [`Device`] and a connection id. + /// + /// Equivalent to the kernel's [`clk_get`] API. + /// + /// [`clk_get`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_get + pub fn get(dev: &Device, name: Option<&CStr>) -> Result<Self> { + let con_id = if let Some(name) = name { + name.as_ptr() + } else { + ptr::null() + }; + + // SAFETY: It is safe to call [`clk_get`] for a valid device pointer. + // + // INVARIANT: The reference-count is decremented when [`Clk`] goes out of scope. + Ok(Self(from_err_ptr(unsafe { + bindings::clk_get(dev.as_raw(), con_id) + })?)) + } + + /// Obtain the raw [`struct clk`] pointer. + #[inline] + pub fn as_raw(&self) -> *mut bindings::clk { + self.0 + } + + /// Enable the clock. + /// + /// Equivalent to the kernel's [`clk_enable`] API. + /// + /// [`clk_enable`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_enable + #[inline] + pub fn enable(&self) -> Result { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_enable`]. + to_result(unsafe { bindings::clk_enable(self.as_raw()) }) + } + + /// Disable the clock. + /// + /// Equivalent to the kernel's [`clk_disable`] API. + /// + /// [`clk_disable`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_disable + #[inline] + pub fn disable(&self) { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_disable`]. + unsafe { bindings::clk_disable(self.as_raw()) }; + } + + /// Prepare the clock. + /// + /// Equivalent to the kernel's [`clk_prepare`] API. + /// + /// [`clk_prepare`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_prepare + #[inline] + pub fn prepare(&self) -> Result { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_prepare`]. + to_result(unsafe { bindings::clk_prepare(self.as_raw()) }) + } + + /// Unprepare the clock. + /// + /// Equivalent to the kernel's [`clk_unprepare`] API. + /// + /// [`clk_unprepare`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_unprepare + #[inline] + pub fn unprepare(&self) { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_unprepare`]. + unsafe { bindings::clk_unprepare(self.as_raw()) }; + } + + /// Prepare and enable the clock. + /// + /// Equivalent to calling [`Clk::prepare`] followed by [`Clk::enable`]. + #[inline] + pub fn prepare_enable(&self) -> Result { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_prepare_enable`]. + to_result(unsafe { bindings::clk_prepare_enable(self.as_raw()) }) + } + + /// Disable and unprepare the clock. + /// + /// Equivalent to calling [`Clk::disable`] followed by [`Clk::unprepare`]. + #[inline] + pub fn disable_unprepare(&self) { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_disable_unprepare`]. + unsafe { bindings::clk_disable_unprepare(self.as_raw()) }; + } + + /// Get clock's rate. + /// + /// Equivalent to the kernel's [`clk_get_rate`] API. + /// + /// [`clk_get_rate`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_get_rate + #[inline] + pub fn rate(&self) -> Hertz { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_get_rate`]. + Hertz(unsafe { bindings::clk_get_rate(self.as_raw()) }) + } + + /// Set clock's rate. + /// + /// Equivalent to the kernel's [`clk_set_rate`] API. + /// + /// [`clk_set_rate`]: https://docs.kernel.org/core-api/kernel-api.html#c.clk_set_rate + #[inline] + pub fn set_rate(&self, rate: Hertz) -> Result { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for + // [`clk_set_rate`]. + to_result(unsafe { bindings::clk_set_rate(self.as_raw(), rate.as_hz()) }) + } + } + + impl Drop for Clk { + fn drop(&mut self) { + // SAFETY: By the type invariants, self.as_raw() is a valid argument for [`clk_put`]. + unsafe { bindings::clk_put(self.as_raw()) }; + } + } + + /// A reference-counted optional clock. + /// + /// A lightweight wrapper around an optional [`Clk`]. An [`OptionalClk`] represents a [`Clk`] + /// that a driver can function without but may improve performance or enable additional + /// features when available. + /// + /// # Invariants + /// + /// An [`OptionalClk`] instance encapsulates a [`Clk`] with either a valid [`struct clk`] or + /// `NULL` pointer. + /// + /// Instances of this type are reference-counted. Calling [`OptionalClk::get`] ensures that the + /// allocation remains valid for the lifetime of the [`OptionalClk`]. + /// + /// ## Examples + /// + /// The following example demonstrates how to obtain and configure an optional clock for a + /// device. The code functions correctly whether or not the clock is available. + /// + /// ``` + /// use kernel::c_str; + /// use kernel::clk::{OptionalClk, Hertz}; + /// use kernel::device::Device; + /// use kernel::error::Result; + /// + /// fn configure_clk(dev: &Device) -> Result { + /// let clk = OptionalClk::get(dev, Some(c_str!("apb_clk")))?; + /// + /// clk.prepare_enable()?; + /// + /// let expected_rate = Hertz::from_ghz(1); + /// + /// if clk.rate() != expected_rate { + /// clk.set_rate(expected_rate)?; + /// } + /// + /// clk.disable_unprepare(); + /// Ok(()) + /// } + /// ``` + /// + /// [`struct clk`]: https://docs.kernel.org/driver-api/clk.html + pub struct OptionalClk(Clk); + + impl OptionalClk { + /// Gets [`OptionalClk`] corresponding to a [`Device`] and a connection id. + /// + /// Equivalent to the kernel's [`clk_get_optional`] API. + /// + /// [`clk_get_optional`]: + /// https://docs.kernel.org/core-api/kernel-api.html#c.clk_get_optional + pub fn get(dev: &Device, name: Option<&CStr>) -> Result<Self> { + let con_id = if let Some(name) = name { + name.as_ptr() + } else { + ptr::null() + }; + + // SAFETY: It is safe to call [`clk_get_optional`] for a valid device pointer. + // + // INVARIANT: The reference-count is decremented when [`OptionalClk`] goes out of + // scope. + Ok(Self(Clk(from_err_ptr(unsafe { + bindings::clk_get_optional(dev.as_raw(), con_id) + })?))) + } + } + + // Make [`OptionalClk`] behave like [`Clk`]. + impl Deref for OptionalClk { + type Target = Clk; + + fn deref(&self) -> &Clk { + &self.0 + } + } +} + +#[cfg(CONFIG_COMMON_CLK)] +pub use common_clk::*; diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs new file mode 100644 index 000000000000..10c5c3b25873 --- /dev/null +++ b/rust/kernel/cpu.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic CPU definitions. +//! +//! C header: [`include/linux/cpu.h`](srctree/include/linux/cpu.h) + +use crate::{bindings, device::Device, error::Result, prelude::ENODEV}; + +/// Creates a new instance of CPU's device. +/// +/// # Safety +/// +/// Reference counting is not implemented for the CPU device in the C code. When a CPU is +/// hot-unplugged, the corresponding CPU device is unregistered, but its associated memory +/// is not freed. +/// +/// Callers must ensure that the CPU device is not used after it has been unregistered. +/// This can be achieved, for example, by registering a CPU hotplug notifier and removing +/// any references to the CPU device within the notifier's callback. +pub unsafe fn from_cpu(cpu: u32) -> Result<&'static Device> { + // SAFETY: It is safe to call `get_cpu_device()` for any CPU. + let ptr = unsafe { bindings::get_cpu_device(cpu) }; + if ptr.is_null() { + return Err(ENODEV); + } + + // SAFETY: The pointer returned by `get_cpu_device()`, if not `NULL`, is a valid pointer to + // a `struct device` and is never freed by the C code. + Ok(unsafe { Device::as_ref(ptr) }) +} diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs new file mode 100644 index 000000000000..09b856bb297b --- /dev/null +++ b/rust/kernel/cpufreq.rs @@ -0,0 +1,1321 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! CPU frequency scaling. +//! +//! This module provides rust abstractions for interacting with the cpufreq subsystem. +//! +//! C header: [`include/linux/cpufreq.h`](srctree/include/linux/cpufreq.h) +//! +//! Reference: <https://docs.kernel.org/admin-guide/pm/cpufreq.html> + +use crate::{ + clk::Hertz, + cpumask, + device::{Bound, Device}, + devres::Devres, + error::{code::*, from_err_ptr, from_result, to_result, Result, VTABLE_DEFAULT_ERROR}, + ffi::{c_char, c_ulong}, + prelude::*, + types::ForeignOwnable, + types::Opaque, +}; + +#[cfg(CONFIG_COMMON_CLK)] +use crate::clk::Clk; + +use core::{ + cell::UnsafeCell, + marker::PhantomData, + mem::MaybeUninit, + ops::{Deref, DerefMut}, + pin::Pin, + ptr, +}; + +use macros::vtable; + +/// Maximum length of CPU frequency driver's name. +const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; + +/// Default transition latency value in nanoseconds. +pub const ETERNAL_LATENCY_NS: u32 = bindings::CPUFREQ_ETERNAL as u32; + +/// CPU frequency driver flags. +pub mod flags { + /// Driver needs to update internal limits even if frequency remains unchanged. + pub const NEED_UPDATE_LIMITS: u16 = 1 << 0; + + /// Platform where constants like `loops_per_jiffy` are unaffected by frequency changes. + pub const CONST_LOOPS: u16 = 1 << 1; + + /// Register driver as a thermal cooling device automatically. + pub const IS_COOLING_DEV: u16 = 1 << 2; + + /// Supports multiple clock domains with per-policy governors in `cpu/cpuN/cpufreq/`. + pub const HAVE_GOVERNOR_PER_POLICY: u16 = 1 << 3; + + /// Allows post-change notifications outside of the `target()` routine. + pub const ASYNC_NOTIFICATION: u16 = 1 << 4; + + /// Ensure CPU starts at a valid frequency from the driver's freq-table. + pub const NEED_INITIAL_FREQ_CHECK: u16 = 1 << 5; + + /// Disallow governors with `dynamic_switching` capability. + pub const NO_AUTO_DYNAMIC_SWITCHING: u16 = 1 << 6; +} + +/// Relations from the C code. +const CPUFREQ_RELATION_L: u32 = 0; +const CPUFREQ_RELATION_H: u32 = 1; +const CPUFREQ_RELATION_C: u32 = 2; + +/// Can be used with any of the above values. +const CPUFREQ_RELATION_E: u32 = 1 << 2; + +/// CPU frequency selection relations. +/// +/// CPU frequency selection relations, each optionally marked as "efficient". +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Relation { + /// Select the lowest frequency at or above target. + Low(bool), + /// Select the highest frequency below or at target. + High(bool), + /// Select the closest frequency to the target. + Close(bool), +} + +impl Relation { + // Construct from a C-compatible `u32` value. + fn new(val: u32) -> Result<Self> { + let efficient = val & CPUFREQ_RELATION_E != 0; + + Ok(match val & !CPUFREQ_RELATION_E { + CPUFREQ_RELATION_L => Self::Low(efficient), + CPUFREQ_RELATION_H => Self::High(efficient), + CPUFREQ_RELATION_C => Self::Close(efficient), + _ => return Err(EINVAL), + }) + } +} + +impl From<Relation> for u32 { + // Convert to a C-compatible `u32` value. + fn from(rel: Relation) -> Self { + let (mut val, efficient) = match rel { + Relation::Low(e) => (CPUFREQ_RELATION_L, e), + Relation::High(e) => (CPUFREQ_RELATION_H, e), + Relation::Close(e) => (CPUFREQ_RELATION_C, e), + }; + + if efficient { + val |= CPUFREQ_RELATION_E; + } + + val + } +} + +/// Policy data. +/// +/// Rust abstraction for the C `struct cpufreq_policy_data`. +/// +/// # Invariants +/// +/// A [`PolicyData`] instance always corresponds to a valid C `struct cpufreq_policy_data`. +/// +/// The callers must ensure that the `struct cpufreq_policy_data` is valid for access and remains +/// valid for the lifetime of the returned reference. +#[repr(transparent)] +pub struct PolicyData(Opaque<bindings::cpufreq_policy_data>); + +impl PolicyData { + /// Creates a mutable reference to an existing `struct cpufreq_policy_data` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for writing and remains valid for the lifetime + /// of the returned reference. + #[inline] + pub unsafe fn from_raw_mut<'a>(ptr: *mut bindings::cpufreq_policy_data) -> &'a mut Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for writing and remains valid for the + // lifetime of the returned reference. + unsafe { &mut *ptr.cast() } + } + + /// Returns a raw pointer to the underlying C `cpufreq_policy_data`. + #[inline] + pub fn as_raw(&self) -> *mut bindings::cpufreq_policy_data { + let this: *const Self = self; + this.cast_mut().cast() + } + + /// Wrapper for `cpufreq_generic_frequency_table_verify`. + #[inline] + pub fn generic_verify(&self) -> Result { + // SAFETY: By the type invariant, the pointer stored in `self` is valid. + to_result(unsafe { bindings::cpufreq_generic_frequency_table_verify(self.as_raw()) }) + } +} + +/// The frequency table index. +/// +/// Represents index with a frequency table. +/// +/// # Invariants +/// +/// The index must correspond to a valid entry in the [`Table`] it is used for. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct TableIndex(usize); + +impl TableIndex { + /// Creates an instance of [`TableIndex`]. + /// + /// # Safety + /// + /// The caller must ensure that `index` correspond to a valid entry in the [`Table`] it is used + /// for. + pub unsafe fn new(index: usize) -> Self { + // INVARIANT: The caller ensures that `index` correspond to a valid entry in the [`Table`]. + Self(index) + } +} + +impl From<TableIndex> for usize { + #[inline] + fn from(index: TableIndex) -> Self { + index.0 + } +} + +/// CPU frequency table. +/// +/// Rust abstraction for the C `struct cpufreq_frequency_table`. +/// +/// # Invariants +/// +/// A [`Table`] instance always corresponds to a valid C `struct cpufreq_frequency_table`. +/// +/// The callers must ensure that the `struct cpufreq_frequency_table` is valid for access and +/// remains valid for the lifetime of the returned reference. +/// +/// ## Examples +/// +/// The following example demonstrates how to read a frequency value from [`Table`]. +/// +/// ``` +/// use kernel::cpufreq::{Policy, TableIndex}; +/// +/// fn show_freq(policy: &Policy) -> Result { +/// let table = policy.freq_table()?; +/// +/// // SAFETY: Index is a valid entry in the table. +/// let index = unsafe { TableIndex::new(0) }; +/// +/// pr_info!("The frequency at index 0 is: {:?}\n", table.freq(index)?); +/// pr_info!("The flags at index 0 is: {}\n", table.flags(index)); +/// pr_info!("The data at index 0 is: {}\n", table.data(index)); +/// Ok(()) +/// } +/// ``` +#[repr(transparent)] +pub struct Table(Opaque<bindings::cpufreq_frequency_table>); + +impl Table { + /// Creates a reference to an existing C `struct cpufreq_frequency_table` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for reading and remains valid for the lifetime + /// of the returned reference. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *const bindings::cpufreq_frequency_table) -> &'a Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for reading and remains valid for the + // lifetime of the returned reference. + unsafe { &*ptr.cast() } + } + + /// Returns the raw mutable pointer to the C `struct cpufreq_frequency_table`. + #[inline] + pub fn as_raw(&self) -> *mut bindings::cpufreq_frequency_table { + let this: *const Self = self; + this.cast_mut().cast() + } + + /// Returns frequency at `index` in the [`Table`]. + #[inline] + pub fn freq(&self, index: TableIndex) -> Result<Hertz> { + // SAFETY: By the type invariant, the pointer stored in `self` is valid and `index` is + // guaranteed to be valid by its safety requirements. + Ok(Hertz::from_khz(unsafe { + (*self.as_raw().add(index.into())).frequency.try_into()? + })) + } + + /// Returns flags at `index` in the [`Table`]. + #[inline] + pub fn flags(&self, index: TableIndex) -> u32 { + // SAFETY: By the type invariant, the pointer stored in `self` is valid and `index` is + // guaranteed to be valid by its safety requirements. + unsafe { (*self.as_raw().add(index.into())).flags } + } + + /// Returns data at `index` in the [`Table`]. + #[inline] + pub fn data(&self, index: TableIndex) -> u32 { + // SAFETY: By the type invariant, the pointer stored in `self` is valid and `index` is + // guaranteed to be valid by its safety requirements. + unsafe { (*self.as_raw().add(index.into())).driver_data } + } +} + +/// CPU frequency table owned and pinned in memory, created from a [`TableBuilder`]. +pub struct TableBox { + entries: Pin<KVec<bindings::cpufreq_frequency_table>>, +} + +impl TableBox { + /// Constructs a new [`TableBox`] from a [`KVec`] of entries. + /// + /// # Errors + /// + /// Returns `EINVAL` if the entries list is empty. + #[inline] + fn new(entries: KVec<bindings::cpufreq_frequency_table>) -> Result<Self> { + if entries.is_empty() { + return Err(EINVAL); + } + + Ok(Self { + // Pin the entries to memory, since we are passing its pointer to the C code. + entries: Pin::new(entries), + }) + } + + /// Returns a raw pointer to the underlying C `cpufreq_frequency_table`. + #[inline] + fn as_raw(&self) -> *const bindings::cpufreq_frequency_table { + // The pointer is valid until the table gets dropped. + self.entries.as_ptr() + } +} + +impl Deref for TableBox { + type Target = Table; + + fn deref(&self) -> &Self::Target { + // SAFETY: The caller owns TableBox, it is safe to deref. + unsafe { Self::Target::from_raw(self.as_raw()) } + } +} + +/// CPU frequency table builder. +/// +/// This is used by the CPU frequency drivers to build a frequency table dynamically. +/// +/// ## Examples +/// +/// The following example demonstrates how to create a CPU frequency table. +/// +/// ``` +/// use kernel::cpufreq::{TableBuilder, TableIndex}; +/// use kernel::clk::Hertz; +/// +/// let mut builder = TableBuilder::new(); +/// +/// // Adds few entries to the table. +/// builder.add(Hertz::from_mhz(700), 0, 1).unwrap(); +/// builder.add(Hertz::from_mhz(800), 2, 3).unwrap(); +/// builder.add(Hertz::from_mhz(900), 4, 5).unwrap(); +/// builder.add(Hertz::from_ghz(1), 6, 7).unwrap(); +/// +/// let table = builder.to_table().unwrap(); +/// +/// // SAFETY: Index values correspond to valid entries in the table. +/// let (index0, index2) = unsafe { (TableIndex::new(0), TableIndex::new(2)) }; +/// +/// assert_eq!(table.freq(index0), Ok(Hertz::from_mhz(700))); +/// assert_eq!(table.flags(index0), 0); +/// assert_eq!(table.data(index0), 1); +/// +/// assert_eq!(table.freq(index2), Ok(Hertz::from_mhz(900))); +/// assert_eq!(table.flags(index2), 4); +/// assert_eq!(table.data(index2), 5); +/// ``` +#[derive(Default)] +#[repr(transparent)] +pub struct TableBuilder { + entries: KVec<bindings::cpufreq_frequency_table>, +} + +impl TableBuilder { + /// Creates a new instance of [`TableBuilder`]. + #[inline] + pub fn new() -> Self { + Self { + entries: KVec::new(), + } + } + + /// Adds a new entry to the table. + pub fn add(&mut self, freq: Hertz, flags: u32, driver_data: u32) -> Result { + // Adds the new entry at the end of the vector. + Ok(self.entries.push( + bindings::cpufreq_frequency_table { + flags, + driver_data, + frequency: freq.as_khz() as u32, + }, + GFP_KERNEL, + )?) + } + + /// Consumes the [`TableBuilder`] and returns [`TableBox`]. + pub fn to_table(mut self) -> Result<TableBox> { + // Add last entry to the table. + self.add(Hertz(c_ulong::MAX), 0, 0)?; + + TableBox::new(self.entries) + } +} + +/// CPU frequency policy. +/// +/// Rust abstraction for the C `struct cpufreq_policy`. +/// +/// # Invariants +/// +/// A [`Policy`] instance always corresponds to a valid C `struct cpufreq_policy`. +/// +/// The callers must ensure that the `struct cpufreq_policy` is valid for access and remains valid +/// for the lifetime of the returned reference. +/// +/// ## Examples +/// +/// The following example demonstrates how to create a CPU frequency table. +/// +/// ``` +/// use kernel::cpufreq::{ETERNAL_LATENCY_NS, Policy}; +/// +/// fn update_policy(policy: &mut Policy) { +/// policy +/// .set_dvfs_possible_from_any_cpu(true) +/// .set_fast_switch_possible(true) +/// .set_transition_latency_ns(ETERNAL_LATENCY_NS); +/// +/// pr_info!("The policy details are: {:?}\n", (policy.cpu(), policy.cur())); +/// } +/// ``` +#[repr(transparent)] +pub struct Policy(Opaque<bindings::cpufreq_policy>); + +impl Policy { + /// Creates a reference to an existing `struct cpufreq_policy` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for reading and remains valid for the lifetime + /// of the returned reference. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *const bindings::cpufreq_policy) -> &'a Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for reading and remains valid for the + // lifetime of the returned reference. + unsafe { &*ptr.cast() } + } + + /// Creates a mutable reference to an existing `struct cpufreq_policy` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for writing and remains valid for the lifetime + /// of the returned reference. + #[inline] + pub unsafe fn from_raw_mut<'a>(ptr: *mut bindings::cpufreq_policy) -> &'a mut Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for writing and remains valid for the + // lifetime of the returned reference. + unsafe { &mut *ptr.cast() } + } + + /// Returns a raw mutable pointer to the C `struct cpufreq_policy`. + #[inline] + fn as_raw(&self) -> *mut bindings::cpufreq_policy { + let this: *const Self = self; + this.cast_mut().cast() + } + + #[inline] + fn as_ref(&self) -> &bindings::cpufreq_policy { + // SAFETY: By the type invariant, the pointer stored in `self` is valid. + unsafe { &*self.as_raw() } + } + + #[inline] + fn as_mut_ref(&mut self) -> &mut bindings::cpufreq_policy { + // SAFETY: By the type invariant, the pointer stored in `self` is valid. + unsafe { &mut *self.as_raw() } + } + + /// Returns the primary CPU for the [`Policy`]. + #[inline] + pub fn cpu(&self) -> u32 { + self.as_ref().cpu + } + + /// Returns the minimum frequency for the [`Policy`]. + #[inline] + pub fn min(&self) -> Hertz { + Hertz::from_khz(self.as_ref().min as usize) + } + + /// Set the minimum frequency for the [`Policy`]. + #[inline] + pub fn set_min(&mut self, min: Hertz) -> &mut Self { + self.as_mut_ref().min = min.as_khz() as u32; + self + } + + /// Returns the maximum frequency for the [`Policy`]. + #[inline] + pub fn max(&self) -> Hertz { + Hertz::from_khz(self.as_ref().max as usize) + } + + /// Set the maximum frequency for the [`Policy`]. + #[inline] + pub fn set_max(&mut self, max: Hertz) -> &mut Self { + self.as_mut_ref().max = max.as_khz() as u32; + self + } + + /// Returns the current frequency for the [`Policy`]. + #[inline] + pub fn cur(&self) -> Hertz { + Hertz::from_khz(self.as_ref().cur as usize) + } + + /// Returns the suspend frequency for the [`Policy`]. + #[inline] + pub fn suspend_freq(&self) -> Hertz { + Hertz::from_khz(self.as_ref().suspend_freq as usize) + } + + /// Sets the suspend frequency for the [`Policy`]. + #[inline] + pub fn set_suspend_freq(&mut self, freq: Hertz) -> &mut Self { + self.as_mut_ref().suspend_freq = freq.as_khz() as u32; + self + } + + /// Provides a wrapper to the generic suspend routine. + #[inline] + pub fn generic_suspend(&mut self) -> Result { + // SAFETY: By the type invariant, the pointer stored in `self` is valid. + to_result(unsafe { bindings::cpufreq_generic_suspend(self.as_mut_ref()) }) + } + + /// Provides a wrapper to the generic get routine. + #[inline] + pub fn generic_get(&self) -> Result<u32> { + // SAFETY: By the type invariant, the pointer stored in `self` is valid. + Ok(unsafe { bindings::cpufreq_generic_get(self.cpu()) }) + } + + /// Provides a wrapper to the register with energy model using the OPP core. + #[cfg(CONFIG_PM_OPP)] + #[inline] + pub fn register_em_opp(&mut self) { + // SAFETY: By the type invariant, the pointer stored in `self` is valid. + unsafe { bindings::cpufreq_register_em_with_opp(self.as_mut_ref()) }; + } + + /// Gets [`cpumask::Cpumask`] for a cpufreq [`Policy`]. + #[inline] + pub fn cpus(&mut self) -> &mut cpumask::Cpumask { + // SAFETY: The pointer to `cpus` is valid for writing and remains valid for the lifetime of + // the returned reference. + unsafe { cpumask::CpumaskVar::as_mut_ref(&mut self.as_mut_ref().cpus) } + } + + /// Sets clock for the [`Policy`]. + /// + /// # Safety + /// + /// The caller must guarantee that the returned [`Clk`] is not dropped while it is getting used + /// by the C code. + #[cfg(CONFIG_COMMON_CLK)] + pub unsafe fn set_clk(&mut self, dev: &Device, name: Option<&CStr>) -> Result<Clk> { + let clk = Clk::get(dev, name)?; + self.as_mut_ref().clk = clk.as_raw(); + Ok(clk) + } + + /// Allows / disallows frequency switching code to run on any CPU. + #[inline] + pub fn set_dvfs_possible_from_any_cpu(&mut self, val: bool) -> &mut Self { + self.as_mut_ref().dvfs_possible_from_any_cpu = val; + self + } + + /// Returns if fast switching of frequencies is possible or not. + #[inline] + pub fn fast_switch_possible(&self) -> bool { + self.as_ref().fast_switch_possible + } + + /// Enables / disables fast frequency switching. + #[inline] + pub fn set_fast_switch_possible(&mut self, val: bool) -> &mut Self { + self.as_mut_ref().fast_switch_possible = val; + self + } + + /// Sets transition latency (in nanoseconds) for the [`Policy`]. + #[inline] + pub fn set_transition_latency_ns(&mut self, latency_ns: u32) -> &mut Self { + self.as_mut_ref().cpuinfo.transition_latency = latency_ns; + self + } + + /// Sets cpuinfo `min_freq`. + #[inline] + pub fn set_cpuinfo_min_freq(&mut self, min_freq: Hertz) -> &mut Self { + self.as_mut_ref().cpuinfo.min_freq = min_freq.as_khz() as u32; + self + } + + /// Sets cpuinfo `max_freq`. + #[inline] + pub fn set_cpuinfo_max_freq(&mut self, max_freq: Hertz) -> &mut Self { + self.as_mut_ref().cpuinfo.max_freq = max_freq.as_khz() as u32; + self + } + + /// Set `transition_delay_us`, i.e. the minimum time between successive frequency change + /// requests. + #[inline] + pub fn set_transition_delay_us(&mut self, transition_delay_us: u32) -> &mut Self { + self.as_mut_ref().transition_delay_us = transition_delay_us; + self + } + + /// Returns reference to the CPU frequency [`Table`] for the [`Policy`]. + pub fn freq_table(&self) -> Result<&Table> { + if self.as_ref().freq_table.is_null() { + return Err(EINVAL); + } + + // SAFETY: The `freq_table` is guaranteed to be valid for reading and remains valid for the + // lifetime of the returned reference. + Ok(unsafe { Table::from_raw(self.as_ref().freq_table) }) + } + + /// Sets the CPU frequency [`Table`] for the [`Policy`]. + /// + /// # Safety + /// + /// The caller must guarantee that the [`Table`] is not dropped while it is getting used by the + /// C code. + #[inline] + pub unsafe fn set_freq_table(&mut self, table: &Table) -> &mut Self { + self.as_mut_ref().freq_table = table.as_raw(); + self + } + + /// Returns the [`Policy`]'s private data. + pub fn data<T: ForeignOwnable>(&mut self) -> Option<<T>::Borrowed<'_>> { + if self.as_ref().driver_data.is_null() { + None + } else { + // SAFETY: The data is earlier set from [`set_data`]. + Some(unsafe { T::borrow(self.as_ref().driver_data) }) + } + } + + /// Sets the private data of the [`Policy`] using a foreign-ownable wrapper. + /// + /// # Errors + /// + /// Returns `EBUSY` if private data is already set. + fn set_data<T: ForeignOwnable>(&mut self, data: T) -> Result { + if self.as_ref().driver_data.is_null() { + // Transfer the ownership of the data to the foreign interface. + self.as_mut_ref().driver_data = <T as ForeignOwnable>::into_foreign(data) as _; + Ok(()) + } else { + Err(EBUSY) + } + } + + /// Clears and returns ownership of the private data. + fn clear_data<T: ForeignOwnable>(&mut self) -> Option<T> { + if self.as_ref().driver_data.is_null() { + None + } else { + let data = Some( + // SAFETY: The data is earlier set by us from [`set_data`]. It is safe to take + // back the ownership of the data from the foreign interface. + unsafe { <T as ForeignOwnable>::from_foreign(self.as_ref().driver_data) }, + ); + self.as_mut_ref().driver_data = ptr::null_mut(); + data + } + } +} + +/// CPU frequency policy created from a CPU number. +/// +/// This struct represents the CPU frequency policy obtained for a specific CPU, providing safe +/// access to the underlying `cpufreq_policy` and ensuring proper cleanup when the `PolicyCpu` is +/// dropped. +struct PolicyCpu<'a>(&'a mut Policy); + +impl<'a> PolicyCpu<'a> { + fn from_cpu(cpu: u32) -> Result<Self> { + // SAFETY: It is safe to call `cpufreq_cpu_get` for any valid CPU. + let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(cpu) })?; + + Ok(Self( + // SAFETY: The `ptr` is guaranteed to be valid and remains valid for the lifetime of + // the returned reference. + unsafe { Policy::from_raw_mut(ptr) }, + )) + } +} + +impl<'a> Deref for PolicyCpu<'a> { + type Target = Policy; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl<'a> DerefMut for PolicyCpu<'a> { + fn deref_mut(&mut self) -> &mut Policy { + self.0 + } +} + +impl<'a> Drop for PolicyCpu<'a> { + fn drop(&mut self) { + // SAFETY: The underlying pointer is guaranteed to be valid for the lifetime of `self`. + unsafe { bindings::cpufreq_cpu_put(self.0.as_raw()) }; + } +} + +/// CPU frequency driver. +/// +/// Implement this trait to provide a CPU frequency driver and its callbacks. +/// +/// Reference: <https://docs.kernel.org/cpu-freq/cpu-drivers.html> +#[vtable] +pub trait Driver { + /// Driver's name. + const NAME: &'static CStr; + + /// Driver's flags. + const FLAGS: u16; + + /// Boost support. + const BOOST_ENABLED: bool; + + /// Policy specific data. + /// + /// Require that `PData` implements `ForeignOwnable`. We guarantee to never move the underlying + /// wrapped data structure. + type PData: ForeignOwnable; + + /// Driver's `init` callback. + fn init(policy: &mut Policy) -> Result<Self::PData>; + + /// Driver's `exit` callback. + fn exit(_policy: &mut Policy, _data: Option<Self::PData>) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `online` callback. + fn online(_policy: &mut Policy) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `offline` callback. + fn offline(_policy: &mut Policy) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `suspend` callback. + fn suspend(_policy: &mut Policy) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `resume` callback. + fn resume(_policy: &mut Policy) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `ready` callback. + fn ready(_policy: &mut Policy) { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `verify` callback. + fn verify(data: &mut PolicyData) -> Result; + + /// Driver's `setpolicy` callback. + fn setpolicy(_policy: &mut Policy) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `target` callback. + fn target(_policy: &mut Policy, _target_freq: u32, _relation: Relation) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `target_index` callback. + fn target_index(_policy: &mut Policy, _index: TableIndex) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `fast_switch` callback. + fn fast_switch(_policy: &mut Policy, _target_freq: u32) -> u32 { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `adjust_perf` callback. + fn adjust_perf(_policy: &mut Policy, _min_perf: usize, _target_perf: usize, _capacity: usize) { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `get_intermediate` callback. + fn get_intermediate(_policy: &mut Policy, _index: TableIndex) -> u32 { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `target_intermediate` callback. + fn target_intermediate(_policy: &mut Policy, _index: TableIndex) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `get` callback. + fn get(_policy: &mut Policy) -> Result<u32> { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `update_limits` callback. + fn update_limits(_policy: &mut Policy) { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `bios_limit` callback. + fn bios_limit(_policy: &mut Policy, _limit: &mut u32) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `set_boost` callback. + fn set_boost(_policy: &mut Policy, _state: i32) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Driver's `register_em` callback. + fn register_em(_policy: &mut Policy) { + build_error!(VTABLE_DEFAULT_ERROR) + } +} + +/// CPU frequency driver Registration. +/// +/// ## Examples +/// +/// The following example demonstrates how to register a cpufreq driver. +/// +/// ``` +/// use kernel::{ +/// cpufreq, +/// c_str, +/// device::{Core, Device}, +/// macros::vtable, +/// of, platform, +/// sync::Arc, +/// }; +/// struct SampleDevice; +/// +/// #[derive(Default)] +/// struct SampleDriver; +/// +/// #[vtable] +/// impl cpufreq::Driver for SampleDriver { +/// const NAME: &'static CStr = c_str!("cpufreq-sample"); +/// const FLAGS: u16 = cpufreq::flags::NEED_INITIAL_FREQ_CHECK | cpufreq::flags::IS_COOLING_DEV; +/// const BOOST_ENABLED: bool = true; +/// +/// type PData = Arc<SampleDevice>; +/// +/// fn init(policy: &mut cpufreq::Policy) -> Result<Self::PData> { +/// // Initialize here +/// Ok(Arc::new(SampleDevice, GFP_KERNEL)?) +/// } +/// +/// fn exit(_policy: &mut cpufreq::Policy, _data: Option<Self::PData>) -> Result { +/// Ok(()) +/// } +/// +/// fn suspend(policy: &mut cpufreq::Policy) -> Result { +/// policy.generic_suspend() +/// } +/// +/// fn verify(data: &mut cpufreq::PolicyData) -> Result { +/// data.generic_verify() +/// } +/// +/// fn target_index(policy: &mut cpufreq::Policy, index: cpufreq::TableIndex) -> Result { +/// // Update CPU frequency +/// Ok(()) +/// } +/// +/// fn get(policy: &mut cpufreq::Policy) -> Result<u32> { +/// policy.generic_get() +/// } +/// } +/// +/// impl platform::Driver for SampleDriver { +/// type IdInfo = (); +/// const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None; +/// +/// fn probe( +/// pdev: &platform::Device<Core>, +/// _id_info: Option<&Self::IdInfo>, +/// ) -> Result<Pin<KBox<Self>>> { +/// cpufreq::Registration::<SampleDriver>::new_foreign_owned(pdev.as_ref())?; +/// Ok(KBox::new(Self {}, GFP_KERNEL)?.into()) +/// } +/// } +/// ``` +#[repr(transparent)] +pub struct Registration<T: Driver>(KBox<UnsafeCell<bindings::cpufreq_driver>>, PhantomData<T>); + +/// SAFETY: `Registration` doesn't offer any methods or access to fields when shared between threads +/// or CPUs, so it is safe to share it. +unsafe impl<T: Driver> Sync for Registration<T> {} + +#[allow(clippy::non_send_fields_in_send_ty)] +/// SAFETY: Registration with and unregistration from the cpufreq subsystem can happen from any +/// thread. +unsafe impl<T: Driver> Send for Registration<T> {} + +impl<T: Driver> Registration<T> { + const VTABLE: bindings::cpufreq_driver = bindings::cpufreq_driver { + name: Self::copy_name(T::NAME), + boost_enabled: T::BOOST_ENABLED, + flags: T::FLAGS, + + // Initialize mandatory callbacks. + init: Some(Self::init_callback), + verify: Some(Self::verify_callback), + + // Initialize optional callbacks based on the traits of `T`. + setpolicy: if T::HAS_SETPOLICY { + Some(Self::setpolicy_callback) + } else { + None + }, + target: if T::HAS_TARGET { + Some(Self::target_callback) + } else { + None + }, + target_index: if T::HAS_TARGET_INDEX { + Some(Self::target_index_callback) + } else { + None + }, + fast_switch: if T::HAS_FAST_SWITCH { + Some(Self::fast_switch_callback) + } else { + None + }, + adjust_perf: if T::HAS_ADJUST_PERF { + Some(Self::adjust_perf_callback) + } else { + None + }, + get_intermediate: if T::HAS_GET_INTERMEDIATE { + Some(Self::get_intermediate_callback) + } else { + None + }, + target_intermediate: if T::HAS_TARGET_INTERMEDIATE { + Some(Self::target_intermediate_callback) + } else { + None + }, + get: if T::HAS_GET { + Some(Self::get_callback) + } else { + None + }, + update_limits: if T::HAS_UPDATE_LIMITS { + Some(Self::update_limits_callback) + } else { + None + }, + bios_limit: if T::HAS_BIOS_LIMIT { + Some(Self::bios_limit_callback) + } else { + None + }, + online: if T::HAS_ONLINE { + Some(Self::online_callback) + } else { + None + }, + offline: if T::HAS_OFFLINE { + Some(Self::offline_callback) + } else { + None + }, + exit: if T::HAS_EXIT { + Some(Self::exit_callback) + } else { + None + }, + suspend: if T::HAS_SUSPEND { + Some(Self::suspend_callback) + } else { + None + }, + resume: if T::HAS_RESUME { + Some(Self::resume_callback) + } else { + None + }, + ready: if T::HAS_READY { + Some(Self::ready_callback) + } else { + None + }, + set_boost: if T::HAS_SET_BOOST { + Some(Self::set_boost_callback) + } else { + None + }, + register_em: if T::HAS_REGISTER_EM { + Some(Self::register_em_callback) + } else { + None + }, + // SAFETY: All zeros is a valid value for `bindings::cpufreq_driver`. + ..unsafe { MaybeUninit::zeroed().assume_init() } + }; + + const fn copy_name(name: &'static CStr) -> [c_char; CPUFREQ_NAME_LEN] { + let src = name.as_bytes_with_nul(); + let mut dst = [0; CPUFREQ_NAME_LEN]; + + build_assert!(src.len() <= CPUFREQ_NAME_LEN); + + let mut i = 0; + while i < src.len() { + dst[i] = src[i]; + i += 1; + } + + dst + } + + /// Registers a CPU frequency driver with the cpufreq core. + pub fn new() -> Result<Self> { + // We can't use `&Self::VTABLE` directly because the cpufreq core modifies some fields in + // the C `struct cpufreq_driver`, which requires a mutable reference. + let mut drv = KBox::new(UnsafeCell::new(Self::VTABLE), GFP_KERNEL)?; + + // SAFETY: `drv` is guaranteed to be valid for the lifetime of `Registration`. + to_result(unsafe { bindings::cpufreq_register_driver(drv.get_mut()) })?; + + Ok(Self(drv, PhantomData)) + } + + /// Same as [`Registration::new`], but does not return a [`Registration`] instance. + /// + /// Instead the [`Registration`] is owned by [`Devres`] and will be revoked / dropped, once the + /// device is detached. + pub fn new_foreign_owned(dev: &Device<Bound>) -> Result { + Devres::new_foreign_owned(dev, Self::new()?, GFP_KERNEL) + } +} + +/// CPU frequency driver callbacks. +impl<T: Driver> Registration<T> { + /// Driver's `init` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + + let data = T::init(policy)?; + policy.set_data(data)?; + Ok(0) + }) + } + + /// Driver's `exit` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + + let data = policy.clear_data(); + let _ = T::exit(policy, data); + } + + /// Driver's `online` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::online(policy).map(|()| 0) + }) + } + + /// Driver's `offline` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn offline_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::offline(policy).map(|()| 0) + }) + } + + /// Driver's `suspend` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn suspend_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::suspend(policy).map(|()| 0) + }) + } + + /// Driver's `resume` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::resume(policy).map(|()| 0) + }) + } + + /// Driver's `ready` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::ready(policy); + } + + /// Driver's `verify` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn verify_callback(ptr: *mut bindings::cpufreq_policy_data) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let data = unsafe { PolicyData::from_raw_mut(ptr) }; + T::verify(data).map(|()| 0) + }) + } + + /// Driver's `setpolicy` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn setpolicy_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::setpolicy(policy).map(|()| 0) + }) + } + + /// Driver's `target` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn target_callback( + ptr: *mut bindings::cpufreq_policy, + target_freq: u32, + relation: u32, + ) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::target(policy, target_freq, Relation::new(relation)?).map(|()| 0) + }) + } + + /// Driver's `target_index` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn target_index_callback( + ptr: *mut bindings::cpufreq_policy, + index: u32, + ) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + + // SAFETY: The C code guarantees that `index` corresponds to a valid entry in the + // frequency table. + let index = unsafe { TableIndex::new(index as usize) }; + + T::target_index(policy, index).map(|()| 0) + }) + } + + /// Driver's `fast_switch` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn fast_switch_callback( + ptr: *mut bindings::cpufreq_policy, + target_freq: u32, + ) -> kernel::ffi::c_uint { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::fast_switch(policy, target_freq) + } + + /// Driver's `adjust_perf` callback. + extern "C" fn adjust_perf_callback( + cpu: u32, + min_perf: usize, + target_perf: usize, + capacity: usize, + ) { + if let Ok(mut policy) = PolicyCpu::from_cpu(cpu) { + T::adjust_perf(&mut policy, min_perf, target_perf, capacity); + } + } + + /// Driver's `get_intermediate` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn get_intermediate_callback( + ptr: *mut bindings::cpufreq_policy, + index: u32, + ) -> kernel::ffi::c_uint { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + + // SAFETY: The C code guarantees that `index` corresponds to a valid entry in the + // frequency table. + let index = unsafe { TableIndex::new(index as usize) }; + + T::get_intermediate(policy, index) + } + + /// Driver's `target_intermediate` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn target_intermediate_callback( + ptr: *mut bindings::cpufreq_policy, + index: u32, + ) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + + // SAFETY: The C code guarantees that `index` corresponds to a valid entry in the + // frequency table. + let index = unsafe { TableIndex::new(index as usize) }; + + T::target_intermediate(policy, index).map(|()| 0) + }) + } + + /// Driver's `get` callback. + extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { + PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) + } + + /// Driver's `update_limit` callback. + extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::update_limits(policy); + } + + /// Driver's `bios_limit` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + from_result(|| { + let mut policy = PolicyCpu::from_cpu(cpu as u32)?; + + // SAFETY: `limit` is guaranteed by the C code to be valid. + T::bios_limit(&mut policy, &mut (unsafe { *limit })).map(|()| 0) + }) + } + + /// Driver's `set_boost` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn set_boost_callback( + ptr: *mut bindings::cpufreq_policy, + state: i32, + ) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::set_boost(policy, state).map(|()| 0) + }) + } + + /// Driver's `register_em` callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { + // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the + // lifetime of `policy`. + let policy = unsafe { Policy::from_raw_mut(ptr) }; + T::register_em(policy); + } +} + +impl<T: Driver> Drop for Registration<T> { + /// Unregisters with the cpufreq core. + fn drop(&mut self) { + // SAFETY: `self.0` is guaranteed to be valid for the lifetime of `Registration`. + unsafe { bindings::cpufreq_unregister_driver(self.0.get_mut()) }; + } +} diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs new file mode 100644 index 000000000000..c90bfac9346a --- /dev/null +++ b/rust/kernel/cpumask.rs @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! CPU Mask abstractions. +//! +//! C header: [`include/linux/cpumask.h`](srctree/include/linux/cpumask.h) + +use crate::{ + alloc::{AllocError, Flags}, + prelude::*, + types::Opaque, +}; + +#[cfg(CONFIG_CPUMASK_OFFSTACK)] +use core::ptr::{self, NonNull}; + +#[cfg(not(CONFIG_CPUMASK_OFFSTACK))] +use core::mem::MaybeUninit; + +use core::ops::{Deref, DerefMut}; + +/// A CPU Mask. +/// +/// Rust abstraction for the C `struct cpumask`. +/// +/// # Invariants +/// +/// A [`Cpumask`] instance always corresponds to a valid C `struct cpumask`. +/// +/// The callers must ensure that the `struct cpumask` is valid for access and +/// remains valid for the lifetime of the returned reference. +/// +/// ## Examples +/// +/// The following example demonstrates how to update a [`Cpumask`]. +/// +/// ``` +/// use kernel::bindings; +/// use kernel::cpumask::Cpumask; +/// +/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: u32, clear_cpu: i32) { +/// // SAFETY: The `ptr` is valid for writing and remains valid for the lifetime of the +/// // returned reference. +/// let mask = unsafe { Cpumask::as_mut_ref(ptr) }; +/// +/// mask.set(set_cpu); +/// mask.clear(clear_cpu); +/// } +/// ``` +#[repr(transparent)] +pub struct Cpumask(Opaque<bindings::cpumask>); + +impl Cpumask { + /// Creates a mutable reference to an existing `struct cpumask` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for writing and remains valid for the lifetime + /// of the returned reference. + pub unsafe fn as_mut_ref<'a>(ptr: *mut bindings::cpumask) -> &'a mut Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for writing and remains valid for the + // lifetime of the returned reference. + unsafe { &mut *ptr.cast() } + } + + /// Creates a reference to an existing `struct cpumask` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for reading and remains valid for the lifetime + /// of the returned reference. + pub unsafe fn as_ref<'a>(ptr: *const bindings::cpumask) -> &'a Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for reading and remains valid for the + // lifetime of the returned reference. + unsafe { &*ptr.cast() } + } + + /// Obtain the raw `struct cpumask` pointer. + pub fn as_raw(&self) -> *mut bindings::cpumask { + let this: *const Self = self; + this.cast_mut().cast() + } + + /// Set `cpu` in the cpumask. + /// + /// ATTENTION: Contrary to C, this Rust `set()` method is non-atomic. + /// This mismatches kernel naming convention and corresponds to the C + /// function `__cpumask_set_cpu()`. + #[inline] + pub fn set(&mut self, cpu: u32) { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `__cpumask_set_cpu`. + unsafe { bindings::__cpumask_set_cpu(cpu, self.as_raw()) }; + } + + /// Clear `cpu` in the cpumask. + /// + /// ATTENTION: Contrary to C, this Rust `clear()` method is non-atomic. + /// This mismatches kernel naming convention and corresponds to the C + /// function `__cpumask_clear_cpu()`. + #[inline] + pub fn clear(&mut self, cpu: i32) { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to + // `__cpumask_clear_cpu`. + unsafe { bindings::__cpumask_clear_cpu(cpu, self.as_raw()) }; + } + + /// Test `cpu` in the cpumask. + /// + /// Equivalent to the kernel's `cpumask_test_cpu` API. + #[inline] + pub fn test(&self, cpu: i32) -> bool { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_test_cpu`. + unsafe { bindings::cpumask_test_cpu(cpu, self.as_raw()) } + } + + /// Set all CPUs in the cpumask. + /// + /// Equivalent to the kernel's `cpumask_setall` API. + #[inline] + pub fn setall(&mut self) { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_setall`. + unsafe { bindings::cpumask_setall(self.as_raw()) }; + } + + /// Checks if cpumask is empty. + /// + /// Equivalent to the kernel's `cpumask_empty` API. + #[inline] + pub fn empty(&self) -> bool { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_empty`. + unsafe { bindings::cpumask_empty(self.as_raw()) } + } + + /// Checks if cpumask is full. + /// + /// Equivalent to the kernel's `cpumask_full` API. + #[inline] + pub fn full(&self) -> bool { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_full`. + unsafe { bindings::cpumask_full(self.as_raw()) } + } + + /// Get weight of the cpumask. + /// + /// Equivalent to the kernel's `cpumask_weight` API. + #[inline] + pub fn weight(&self) -> u32 { + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_weight`. + unsafe { bindings::cpumask_weight(self.as_raw()) } + } + + /// Copy cpumask. + /// + /// Equivalent to the kernel's `cpumask_copy` API. + #[inline] + pub fn copy(&self, dstp: &mut Self) { + // SAFETY: By the type invariant, `Self::as_raw` is a valid argument to `cpumask_copy`. + unsafe { bindings::cpumask_copy(dstp.as_raw(), self.as_raw()) }; + } +} + +/// A CPU Mask pointer. +/// +/// Rust abstraction for the C `struct cpumask_var_t`. +/// +/// # Invariants +/// +/// A [`CpumaskVar`] instance always corresponds to a valid C `struct cpumask_var_t`. +/// +/// The callers must ensure that the `struct cpumask_var_t` is valid for access and remains valid +/// for the lifetime of [`CpumaskVar`]. +/// +/// ## Examples +/// +/// The following example demonstrates how to create and update a [`CpumaskVar`]. +/// +/// ``` +/// use kernel::cpumask::CpumaskVar; +/// +/// let mut mask = CpumaskVar::new_zero(GFP_KERNEL).unwrap(); +/// +/// assert!(mask.empty()); +/// mask.set(2); +/// assert!(mask.test(2)); +/// mask.set(3); +/// assert!(mask.test(3)); +/// assert_eq!(mask.weight(), 2); +/// +/// let mask2 = CpumaskVar::try_clone(&mask).unwrap(); +/// assert!(mask2.test(2)); +/// assert!(mask2.test(3)); +/// assert_eq!(mask2.weight(), 2); +/// ``` +pub struct CpumaskVar { + #[cfg(CONFIG_CPUMASK_OFFSTACK)] + ptr: NonNull<Cpumask>, + #[cfg(not(CONFIG_CPUMASK_OFFSTACK))] + mask: Cpumask, +} + +impl CpumaskVar { + /// Creates a zero-initialized instance of the [`CpumaskVar`]. + pub fn new_zero(_flags: Flags) -> Result<Self, AllocError> { + Ok(Self { + #[cfg(CONFIG_CPUMASK_OFFSTACK)] + ptr: { + let mut ptr: *mut bindings::cpumask = ptr::null_mut(); + + // SAFETY: It is safe to call this method as the reference to `ptr` is valid. + // + // INVARIANT: The associated memory is freed when the `CpumaskVar` goes out of + // scope. + unsafe { bindings::zalloc_cpumask_var(&mut ptr, _flags.as_raw()) }; + NonNull::new(ptr.cast()).ok_or(AllocError)? + }, + + #[cfg(not(CONFIG_CPUMASK_OFFSTACK))] + // SAFETY: FFI type is valid to be zero-initialized. + // + // INVARIANT: The associated memory is freed when the `CpumaskVar` goes out of scope. + mask: unsafe { core::mem::zeroed() }, + }) + } + + /// Creates an instance of the [`CpumaskVar`]. + /// + /// # Safety + /// + /// The caller must ensure that the returned [`CpumaskVar`] is properly initialized before + /// getting used. + pub unsafe fn new(_flags: Flags) -> Result<Self, AllocError> { + Ok(Self { + #[cfg(CONFIG_CPUMASK_OFFSTACK)] + ptr: { + let mut ptr: *mut bindings::cpumask = ptr::null_mut(); + + // SAFETY: It is safe to call this method as the reference to `ptr` is valid. + // + // INVARIANT: The associated memory is freed when the `CpumaskVar` goes out of + // scope. + unsafe { bindings::alloc_cpumask_var(&mut ptr, _flags.as_raw()) }; + NonNull::new(ptr.cast()).ok_or(AllocError)? + }, + #[cfg(not(CONFIG_CPUMASK_OFFSTACK))] + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The associated memory is freed when the `CpumaskVar` goes out of scope. + mask: unsafe { MaybeUninit::uninit().assume_init() }, + }) + } + + /// Creates a mutable reference to an existing `struct cpumask_var_t` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for writing and remains valid for the lifetime + /// of the returned reference. + pub unsafe fn as_mut_ref<'a>(ptr: *mut bindings::cpumask_var_t) -> &'a mut Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for writing and remains valid for the + // lifetime of the returned reference. + unsafe { &mut *ptr.cast() } + } + + /// Creates a reference to an existing `struct cpumask_var_t` pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid for reading and remains valid for the lifetime + /// of the returned reference. + pub unsafe fn as_ref<'a>(ptr: *const bindings::cpumask_var_t) -> &'a Self { + // SAFETY: Guaranteed by the safety requirements of the function. + // + // INVARIANT: The caller ensures that `ptr` is valid for reading and remains valid for the + // lifetime of the returned reference. + unsafe { &*ptr.cast() } + } + + /// Clones cpumask. + pub fn try_clone(cpumask: &Cpumask) -> Result<Self> { + // SAFETY: The returned cpumask_var is initialized right after this call. + let mut cpumask_var = unsafe { Self::new(GFP_KERNEL) }?; + + cpumask.copy(&mut cpumask_var); + Ok(cpumask_var) + } +} + +// Make [`CpumaskVar`] behave like a pointer to [`Cpumask`]. +impl Deref for CpumaskVar { + type Target = Cpumask; + + #[cfg(CONFIG_CPUMASK_OFFSTACK)] + fn deref(&self) -> &Self::Target { + // SAFETY: The caller owns CpumaskVar, so it is safe to deref the cpumask. + unsafe { &*self.ptr.as_ptr() } + } + + #[cfg(not(CONFIG_CPUMASK_OFFSTACK))] + fn deref(&self) -> &Self::Target { + &self.mask + } +} + +impl DerefMut for CpumaskVar { + #[cfg(CONFIG_CPUMASK_OFFSTACK)] + fn deref_mut(&mut self) -> &mut Cpumask { + // SAFETY: The caller owns CpumaskVar, so it is safe to deref the cpumask. + unsafe { self.ptr.as_mut() } + } + + #[cfg(not(CONFIG_CPUMASK_OFFSTACK))] + fn deref_mut(&mut self) -> &mut Cpumask { + &mut self.mask + } +} + +impl Drop for CpumaskVar { + fn drop(&mut self) { + #[cfg(CONFIG_CPUMASK_OFFSTACK)] + // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `free_cpumask_var`. + unsafe { + bindings::free_cpumask_var(self.as_raw()) + }; + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 1cf1bd5b73fc..911c72a0fc21 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -44,8 +44,13 @@ pub mod auxiliary; pub mod block; #[doc(hidden)] pub mod build_assert; +pub mod clk; #[cfg(CONFIG_CONFIGFS_FS)] pub mod configfs; +pub mod cpu; +#[cfg(CONFIG_CPU_FREQ)] +pub mod cpufreq; +pub mod cpumask; pub mod cred; pub mod device; pub mod device_id; @@ -67,9 +72,12 @@ pub mod jump_label; pub mod kunit; pub mod list; pub mod miscdevice; +pub mod mm; #[cfg(CONFIG_NET)] pub mod net; pub mod of; +#[cfg(CONFIG_PM_OPP)] +pub mod opp; pub mod page; #[cfg(CONFIG_PCI)] pub mod pci; diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index fa9ecc42602a..9d9771247c38 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -14,6 +14,7 @@ use crate::{ error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, ffi::{c_int, c_long, c_uint, c_ulong}, fs::File, + mm::virt::VmaNew, prelude::*, seq_file::SeqFile, str::CStr, @@ -119,6 +120,22 @@ pub trait MiscDevice: Sized { drop(device); } + /// Handle for mmap. + /// + /// This function is invoked when a user space process invokes the `mmap` system call on + /// `file`. The function is a callback that is part of the VMA initializer. The kernel will do + /// initial setup of the VMA before calling this function. The function can then interact with + /// the VMA initialization by calling methods of `vma`. If the function does not return an + /// error, the kernel will complete initialization of the VMA according to the properties of + /// `vma`. + fn mmap( + _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, + _file: &File, + _vma: &VmaNew, + ) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + /// Handler for ioctls. /// /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. @@ -226,6 +243,33 @@ impl<T: MiscDevice> MiscdeviceVTable<T> { /// # Safety /// /// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`. + /// `vma` must be a vma that is currently being mmap'ed with this file. + unsafe extern "C" fn mmap( + file: *mut bindings::file, + vma: *mut bindings::vm_area_struct, + ) -> c_int { + // SAFETY: The mmap call of a file can access the private data. + let private = unsafe { (*file).private_data }; + // SAFETY: This is a Rust Miscdevice, so we call `into_foreign` in `open` and + // `from_foreign` in `release`, and `fops_mmap` is guaranteed to be called between those + // two operations. + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; + // SAFETY: The caller provides a vma that is undergoing initial VMA setup. + let area = unsafe { VmaNew::from_raw(vma) }; + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + + match T::mmap(device, file, area) { + Ok(()) => 0, + Err(err) => err.to_errno(), + } + } + + /// # Safety + /// + /// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`. unsafe extern "C" fn ioctl(file: *mut bindings::file, cmd: c_uint, arg: c_ulong) -> c_long { // SAFETY: The ioctl call of a file can access the private data. let private = unsafe { (*file).private_data }; @@ -291,6 +335,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> { const VTABLE: bindings::file_operations = bindings::file_operations { open: Some(Self::open), release: Some(Self::release), + mmap: if T::HAS_MMAP { Some(Self::mmap) } else { None }, unlocked_ioctl: if T::HAS_IOCTL { Some(Self::ioctl) } else { diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs new file mode 100644 index 000000000000..615907a0f3b4 --- /dev/null +++ b/rust/kernel/mm.rs @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Memory management. +//! +//! This module deals with managing the address space of userspace processes. Each process has an +//! instance of [`Mm`], which keeps track of multiple VMAs (virtual memory areas). Each VMA +//! corresponds to a region of memory that the userspace process can access, and the VMA lets you +//! control what happens when userspace reads or writes to that region of memory. +//! +//! C header: [`include/linux/mm.h`](srctree/include/linux/mm.h) +#![cfg(CONFIG_MMU)] + +use crate::{ + bindings, + types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, +}; +use core::{ops::Deref, ptr::NonNull}; + +pub mod virt; +use virt::VmaRef; + +/// A wrapper for the kernel's `struct mm_struct`. +/// +/// This represents the address space of a userspace process, so each process has one `Mm` +/// instance. It may hold many VMAs internally. +/// +/// There is a counter called `mm_users` that counts the users of the address space; this includes +/// the userspace process itself, but can also include kernel threads accessing the address space. +/// Once `mm_users` reaches zero, this indicates that the address space can be destroyed. To access +/// the address space, you must prevent `mm_users` from reaching zero while you are accessing it. +/// The [`MmWithUser`] type represents an address space where this is guaranteed, and you can +/// create one using [`mmget_not_zero`]. +/// +/// The `ARef<Mm>` smart pointer holds an `mmgrab` refcount. Its destructor may sleep. +/// +/// # Invariants +/// +/// Values of this type are always refcounted using `mmgrab`. +/// +/// [`mmget_not_zero`]: Mm::mmget_not_zero +#[repr(transparent)] +pub struct Mm { + mm: Opaque<bindings::mm_struct>, +} + +// SAFETY: It is safe to call `mmdrop` on another thread than where `mmgrab` was called. +unsafe impl Send for Mm {} +// SAFETY: All methods on `Mm` can be called in parallel from several threads. +unsafe impl Sync for Mm {} + +// SAFETY: By the type invariants, this type is always refcounted. +unsafe impl AlwaysRefCounted for Mm { + #[inline] + fn inc_ref(&self) { + // SAFETY: The pointer is valid since self is a reference. + unsafe { bindings::mmgrab(self.as_raw()) }; + } + + #[inline] + unsafe fn dec_ref(obj: NonNull<Self>) { + // SAFETY: The caller is giving up their refcount. + unsafe { bindings::mmdrop(obj.cast().as_ptr()) }; + } +} + +/// A wrapper for the kernel's `struct mm_struct`. +/// +/// This type is like [`Mm`], but with non-zero `mm_users`. It can only be used when `mm_users` can +/// be proven to be non-zero at compile-time, usually because the relevant code holds an `mmget` +/// refcount. It can be used to access the associated address space. +/// +/// The `ARef<MmWithUser>` smart pointer holds an `mmget` refcount. Its destructor may sleep. +/// +/// # Invariants +/// +/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero. +#[repr(transparent)] +pub struct MmWithUser { + mm: Mm, +} + +// SAFETY: It is safe to call `mmput` on another thread than where `mmget` was called. +unsafe impl Send for MmWithUser {} +// SAFETY: All methods on `MmWithUser` can be called in parallel from several threads. +unsafe impl Sync for MmWithUser {} + +// SAFETY: By the type invariants, this type is always refcounted. +unsafe impl AlwaysRefCounted for MmWithUser { + #[inline] + fn inc_ref(&self) { + // SAFETY: The pointer is valid since self is a reference. + unsafe { bindings::mmget(self.as_raw()) }; + } + + #[inline] + unsafe fn dec_ref(obj: NonNull<Self>) { + // SAFETY: The caller is giving up their refcount. + unsafe { bindings::mmput(obj.cast().as_ptr()) }; + } +} + +// Make all `Mm` methods available on `MmWithUser`. +impl Deref for MmWithUser { + type Target = Mm; + + #[inline] + fn deref(&self) -> &Mm { + &self.mm + } +} + +/// A wrapper for the kernel's `struct mm_struct`. +/// +/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a +/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic +/// context. +/// +/// # Invariants +/// +/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero. +#[repr(transparent)] +pub struct MmWithUserAsync { + mm: MmWithUser, +} + +// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called. +unsafe impl Send for MmWithUserAsync {} +// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads. +unsafe impl Sync for MmWithUserAsync {} + +// SAFETY: By the type invariants, this type is always refcounted. +unsafe impl AlwaysRefCounted for MmWithUserAsync { + #[inline] + fn inc_ref(&self) { + // SAFETY: The pointer is valid since self is a reference. + unsafe { bindings::mmget(self.as_raw()) }; + } + + #[inline] + unsafe fn dec_ref(obj: NonNull<Self>) { + // SAFETY: The caller is giving up their refcount. + unsafe { bindings::mmput_async(obj.cast().as_ptr()) }; + } +} + +// Make all `MmWithUser` methods available on `MmWithUserAsync`. +impl Deref for MmWithUserAsync { + type Target = MmWithUser; + + #[inline] + fn deref(&self) -> &MmWithUser { + &self.mm + } +} + +// These methods are safe to call even if `mm_users` is zero. +impl Mm { + /// Returns a raw pointer to the inner `mm_struct`. + #[inline] + pub fn as_raw(&self) -> *mut bindings::mm_struct { + self.mm.get() + } + + /// Obtain a reference from a raw pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` points at an `mm_struct`, and that it is not deallocated + /// during the lifetime 'a. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *const bindings::mm_struct) -> &'a Mm { + // SAFETY: Caller promises that the pointer is valid for 'a. Layouts are compatible due to + // repr(transparent). + unsafe { &*ptr.cast() } + } + + /// Calls `mmget_not_zero` and returns a handle if it succeeds. + #[inline] + pub fn mmget_not_zero(&self) -> Option<ARef<MmWithUser>> { + // SAFETY: The pointer is valid since self is a reference. + let success = unsafe { bindings::mmget_not_zero(self.as_raw()) }; + + if success { + // SAFETY: We just created an `mmget` refcount. + Some(unsafe { ARef::from_raw(NonNull::new_unchecked(self.as_raw().cast())) }) + } else { + None + } + } +} + +// These methods require `mm_users` to be non-zero. +impl MmWithUser { + /// Obtain a reference from a raw pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` points at an `mm_struct`, and that `mm_users` remains + /// non-zero for the duration of the lifetime 'a. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *const bindings::mm_struct) -> &'a MmWithUser { + // SAFETY: Caller promises that the pointer is valid for 'a. The layout is compatible due + // to repr(transparent). + unsafe { &*ptr.cast() } + } + + /// Use `mmput_async` when dropping this refcount. + #[inline] + pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> { + // SAFETY: The layouts and invariants are compatible. + unsafe { ARef::from_raw(ARef::into_raw(me).cast()) } + } + + /// Attempt to access a vma using the vma read lock. + /// + /// This is an optimistic trylock operation, so it may fail if there is contention. In that + /// case, you should fall back to taking the mmap read lock. + /// + /// When per-vma locks are disabled, this always returns `None`. + #[inline] + pub fn lock_vma_under_rcu(&self, vma_addr: usize) -> Option<VmaReadGuard<'_>> { + #[cfg(CONFIG_PER_VMA_LOCK)] + { + // SAFETY: Calling `bindings::lock_vma_under_rcu` is always okay given an mm where + // `mm_users` is non-zero. + let vma = unsafe { bindings::lock_vma_under_rcu(self.as_raw(), vma_addr) }; + if !vma.is_null() { + return Some(VmaReadGuard { + // SAFETY: If `lock_vma_under_rcu` returns a non-null ptr, then it points at a + // valid vma. The vma is stable for as long as the vma read lock is held. + vma: unsafe { VmaRef::from_raw(vma) }, + _nts: NotThreadSafe, + }); + } + } + + // Silence warnings about unused variables. + #[cfg(not(CONFIG_PER_VMA_LOCK))] + let _ = vma_addr; + + None + } + + /// Lock the mmap read lock. + #[inline] + pub fn mmap_read_lock(&self) -> MmapReadGuard<'_> { + // SAFETY: The pointer is valid since self is a reference. + unsafe { bindings::mmap_read_lock(self.as_raw()) }; + + // INVARIANT: We just acquired the read lock. + MmapReadGuard { + mm: self, + _nts: NotThreadSafe, + } + } + + /// Try to lock the mmap read lock. + #[inline] + pub fn mmap_read_trylock(&self) -> Option<MmapReadGuard<'_>> { + // SAFETY: The pointer is valid since self is a reference. + let success = unsafe { bindings::mmap_read_trylock(self.as_raw()) }; + + if success { + // INVARIANT: We just acquired the read lock. + Some(MmapReadGuard { + mm: self, + _nts: NotThreadSafe, + }) + } else { + None + } + } +} + +/// A guard for the mmap read lock. +/// +/// # Invariants +/// +/// This `MmapReadGuard` guard owns the mmap read lock. +pub struct MmapReadGuard<'a> { + mm: &'a MmWithUser, + // `mmap_read_lock` and `mmap_read_unlock` must be called on the same thread + _nts: NotThreadSafe, +} + +impl<'a> MmapReadGuard<'a> { + /// Look up a vma at the given address. + #[inline] + pub fn vma_lookup(&self, vma_addr: usize) -> Option<&virt::VmaRef> { + // SAFETY: By the type invariants we hold the mmap read guard, so we can safely call this + // method. Any value is okay for `vma_addr`. + let vma = unsafe { bindings::vma_lookup(self.mm.as_raw(), vma_addr) }; + + if vma.is_null() { + None + } else { + // SAFETY: We just checked that a vma was found, so the pointer references a valid vma. + // + // Furthermore, the returned vma is still under the protection of the read lock guard + // and can be used while the mmap read lock is still held. That the vma is not used + // after the MmapReadGuard gets dropped is enforced by the borrow-checker. + unsafe { Some(virt::VmaRef::from_raw(vma)) } + } + } +} + +impl Drop for MmapReadGuard<'_> { + #[inline] + fn drop(&mut self) { + // SAFETY: We hold the read lock by the type invariants. + unsafe { bindings::mmap_read_unlock(self.mm.as_raw()) }; + } +} + +/// A guard for the vma read lock. +/// +/// # Invariants +/// +/// This `VmaReadGuard` guard owns the vma read lock. +pub struct VmaReadGuard<'a> { + vma: &'a VmaRef, + // `vma_end_read` must be called on the same thread as where the lock was taken + _nts: NotThreadSafe, +} + +// Make all `VmaRef` methods available on `VmaReadGuard`. +impl Deref for VmaReadGuard<'_> { + type Target = VmaRef; + + #[inline] + fn deref(&self) -> &VmaRef { + self.vma + } +} + +impl Drop for VmaReadGuard<'_> { + #[inline] + fn drop(&mut self) { + // SAFETY: We hold the read lock by the type invariants. + unsafe { bindings::vma_end_read(self.vma.as_ptr()) }; + } +} diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs new file mode 100644 index 000000000000..31803674aecc --- /dev/null +++ b/rust/kernel/mm/virt.rs @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Virtual memory. +//! +//! This module deals with managing a single VMA in the address space of a userspace process. Each +//! VMA corresponds to a region of memory that the userspace process can access, and the VMA lets +//! you control what happens when userspace reads or writes to that region of memory. +//! +//! The module has several different Rust types that all correspond to the C type called +//! `vm_area_struct`. The different structs represent what kind of access you have to the VMA, e.g. +//! [`VmaRef`] is used when you hold the mmap or vma read lock. Using the appropriate struct +//! ensures that you can't, for example, accidentally call a function that requires holding the +//! write lock when you only hold the read lock. + +use crate::{ + bindings, + error::{code::EINVAL, to_result, Result}, + mm::MmWithUser, + page::Page, + types::Opaque, +}; + +use core::ops::Deref; + +/// A wrapper for the kernel's `struct vm_area_struct` with read access. +/// +/// It represents an area of virtual memory. +/// +/// # Invariants +/// +/// The caller must hold the mmap read lock or the vma read lock. +#[repr(transparent)] +pub struct VmaRef { + vma: Opaque<bindings::vm_area_struct>, +} + +// Methods you can call when holding the mmap or vma read lock (or stronger). They must be usable +// no matter what the vma flags are. +impl VmaRef { + /// Access a virtual memory area given a raw pointer. + /// + /// # Safety + /// + /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap or vma + /// read lock (or stronger) is held for at least the duration of 'a. + #[inline] + pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self { + // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. + unsafe { &*vma.cast() } + } + + /// Returns a raw pointer to this area. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::vm_area_struct { + self.vma.get() + } + + /// Access the underlying `mm_struct`. + #[inline] + pub fn mm(&self) -> &MmWithUser { + // SAFETY: By the type invariants, this `vm_area_struct` is valid and we hold the mmap/vma + // read lock or stronger. This implies that the underlying mm has a non-zero value of + // `mm_users`. + unsafe { MmWithUser::from_raw((*self.as_ptr()).vm_mm) } + } + + /// Returns the flags associated with the virtual memory area. + /// + /// The possible flags are a combination of the constants in [`flags`]. + #[inline] + pub fn flags(&self) -> vm_flags_t { + // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this + // access is not a data race. + unsafe { (*self.as_ptr()).__bindgen_anon_2.vm_flags } + } + + /// Returns the (inclusive) start address of the virtual memory area. + #[inline] + pub fn start(&self) -> usize { + // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this + // access is not a data race. + unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_start } + } + + /// Returns the (exclusive) end address of the virtual memory area. + #[inline] + pub fn end(&self) -> usize { + // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this + // access is not a data race. + unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_end } + } + + /// Zap pages in the given page range. + /// + /// This clears page table mappings for the range at the leaf level, leaving all other page + /// tables intact, and freeing any memory referenced by the VMA in this range. That is, + /// anonymous memory is completely freed, file-backed memory has its reference count on page + /// cache folio's dropped, any dirty data will still be written back to disk as usual. + /// + /// It may seem odd that we clear at the leaf level, this is however a product of the page + /// table structure used to map physical memory into a virtual address space - each virtual + /// address actually consists of a bitmap of array indices into page tables, which form a + /// hierarchical page table level structure. + /// + /// As a result, each page table level maps a multiple of page table levels below, and thus + /// span ever increasing ranges of pages. At the leaf or PTE level, we map the actual physical + /// memory. + /// + /// It is here where a zap operates, as it the only place we can be certain of clearing without + /// impacting any other virtual mappings. It is an implementation detail as to whether the + /// kernel goes further in freeing unused page tables, but for the purposes of this operation + /// we must only assume that the leaf level is cleared. + #[inline] + pub fn zap_page_range_single(&self, address: usize, size: usize) { + let (end, did_overflow) = address.overflowing_add(size); + if did_overflow || address < self.start() || self.end() < end { + // TODO: call WARN_ONCE once Rust version of it is added + return; + } + + // SAFETY: By the type invariants, the caller has read access to this VMA, which is + // sufficient for this method call. This method has no requirements on the vma flags. The + // address range is checked to be within the vma. + unsafe { + bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut()) + }; + } + + /// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise + /// returns `None`. + /// + /// This can be used to access methods that require [`VM_MIXEDMAP`] to be set. + /// + /// [`VM_MIXEDMAP`]: flags::MIXEDMAP + #[inline] + pub fn as_mixedmap_vma(&self) -> Option<&VmaMixedMap> { + if self.flags() & flags::MIXEDMAP != 0 { + // SAFETY: We just checked that `VM_MIXEDMAP` is set. All other requirements are + // satisfied by the type invariants of `VmaRef`. + Some(unsafe { VmaMixedMap::from_raw(self.as_ptr()) }) + } else { + None + } + } +} + +/// A wrapper for the kernel's `struct vm_area_struct` with read access and [`VM_MIXEDMAP`] set. +/// +/// It represents an area of virtual memory. +/// +/// This struct is identical to [`VmaRef`] except that it must only be used when the +/// [`VM_MIXEDMAP`] flag is set on the vma. +/// +/// # Invariants +/// +/// The caller must hold the mmap read lock or the vma read lock. The `VM_MIXEDMAP` flag must be +/// set. +/// +/// [`VM_MIXEDMAP`]: flags::MIXEDMAP +#[repr(transparent)] +pub struct VmaMixedMap { + vma: VmaRef, +} + +// Make all `VmaRef` methods available on `VmaMixedMap`. +impl Deref for VmaMixedMap { + type Target = VmaRef; + + #[inline] + fn deref(&self) -> &VmaRef { + &self.vma + } +} + +impl VmaMixedMap { + /// Access a virtual memory area given a raw pointer. + /// + /// # Safety + /// + /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap read lock + /// (or stronger) is held for at least the duration of 'a. The `VM_MIXEDMAP` flag must be set. + #[inline] + pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self { + // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. + unsafe { &*vma.cast() } + } + + /// Maps a single page at the given address within the virtual memory area. + /// + /// This operation does not take ownership of the page. + #[inline] + pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result { + // SAFETY: By the type invariant of `Self` caller has read access and has verified that + // `VM_MIXEDMAP` is set. By invariant on `Page` the page has order 0. + to_result(unsafe { bindings::vm_insert_page(self.as_ptr(), address, page.as_ptr()) }) + } +} + +/// A configuration object for setting up a VMA in an `f_ops->mmap()` hook. +/// +/// The `f_ops->mmap()` hook is called when a new VMA is being created, and the hook is able to +/// configure the VMA in various ways to fit the driver that owns it. Using `VmaNew` indicates that +/// you are allowed to perform operations on the VMA that can only be performed before the VMA is +/// fully initialized. +/// +/// # Invariants +/// +/// For the duration of 'a, the referenced vma must be undergoing initialization in an +/// `f_ops->mmap()` hook. +pub struct VmaNew { + vma: VmaRef, +} + +// Make all `VmaRef` methods available on `VmaNew`. +impl Deref for VmaNew { + type Target = VmaRef; + + #[inline] + fn deref(&self) -> &VmaRef { + &self.vma + } +} + +impl VmaNew { + /// Access a virtual memory area given a raw pointer. + /// + /// # Safety + /// + /// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a. + #[inline] + pub unsafe fn from_raw<'a>(vma: *mut bindings::vm_area_struct) -> &'a Self { + // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. + unsafe { &*vma.cast() } + } + + /// Internal method for updating the vma flags. + /// + /// # Safety + /// + /// This must not be used to set the flags to an invalid value. + #[inline] + unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) { + let mut flags = self.flags(); + flags |= set; + flags &= !unset; + + // SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet + // shared. Additionally, `VmaNew` is `!Sync`, so it cannot be used to write in parallel. + // The caller promises that this does not set the flags to an invalid value. + unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags }; + } + + /// Set the `VM_MIXEDMAP` flag on this vma. + /// + /// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference + /// that can be used to call `vm_insert_page` on the vma. + #[inline] + pub fn set_mixedmap(&self) -> &VmaMixedMap { + // SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an + // invalid state. + unsafe { self.update_flags(flags::MIXEDMAP, 0) }; + + // SAFETY: We just set `VM_MIXEDMAP` on the vma. + unsafe { VmaMixedMap::from_raw(self.vma.as_ptr()) } + } + + /// Set the `VM_IO` flag on this vma. + /// + /// This is used for memory mapped IO and similar. The flag tells other parts of the kernel to + /// avoid looking at the pages. For memory mapped IO this is useful as accesses to the pages + /// could have side effects. + #[inline] + pub fn set_io(&self) { + // SAFETY: Setting the VM_IO flag is always okay. + unsafe { self.update_flags(flags::IO, 0) }; + } + + /// Set the `VM_DONTEXPAND` flag on this vma. + /// + /// This prevents the vma from being expanded with `mremap()`. + #[inline] + pub fn set_dontexpand(&self) { + // SAFETY: Setting the VM_DONTEXPAND flag is always okay. + unsafe { self.update_flags(flags::DONTEXPAND, 0) }; + } + + /// Set the `VM_DONTCOPY` flag on this vma. + /// + /// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO` + /// is set. + #[inline] + pub fn set_dontcopy(&self) { + // SAFETY: Setting the VM_DONTCOPY flag is always okay. + unsafe { self.update_flags(flags::DONTCOPY, 0) }; + } + + /// Set the `VM_DONTDUMP` flag on this vma. + /// + /// This prevents the vma from being included in core dumps. This option is only permanent if + /// `VM_IO` is set. + #[inline] + pub fn set_dontdump(&self) { + // SAFETY: Setting the VM_DONTDUMP flag is always okay. + unsafe { self.update_flags(flags::DONTDUMP, 0) }; + } + + /// Returns whether `VM_READ` is set. + /// + /// This flag indicates whether userspace is mapping this vma as readable. + #[inline] + pub fn readable(&self) -> bool { + (self.flags() & flags::READ) != 0 + } + + /// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set. + /// + /// This flag indicates whether userspace is allowed to make this vma readable with + /// `mprotect()`. + /// + /// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never + /// be set again. + #[inline] + pub fn try_clear_mayread(&self) -> Result { + if self.readable() { + return Err(EINVAL); + } + // SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set. + unsafe { self.update_flags(0, flags::MAYREAD) }; + Ok(()) + } + + /// Returns whether `VM_WRITE` is set. + /// + /// This flag indicates whether userspace is mapping this vma as writable. + #[inline] + pub fn writable(&self) -> bool { + (self.flags() & flags::WRITE) != 0 + } + + /// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set. + /// + /// This flag indicates whether userspace is allowed to make this vma writable with + /// `mprotect()`. + /// + /// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never + /// be set again. + #[inline] + pub fn try_clear_maywrite(&self) -> Result { + if self.writable() { + return Err(EINVAL); + } + // SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set. + unsafe { self.update_flags(0, flags::MAYWRITE) }; + Ok(()) + } + + /// Returns whether `VM_EXEC` is set. + /// + /// This flag indicates whether userspace is mapping this vma as executable. + #[inline] + pub fn executable(&self) -> bool { + (self.flags() & flags::EXEC) != 0 + } + + /// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set. + /// + /// This flag indicates whether userspace is allowed to make this vma executable with + /// `mprotect()`. + /// + /// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never + /// be set again. + #[inline] + pub fn try_clear_mayexec(&self) -> Result { + if self.executable() { + return Err(EINVAL); + } + // SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set. + unsafe { self.update_flags(0, flags::MAYEXEC) }; + Ok(()) + } +} + +/// The integer type used for vma flags. +#[doc(inline)] +pub use bindings::vm_flags_t; + +/// All possible flags for [`VmaRef`]. +pub mod flags { + use super::vm_flags_t; + use crate::bindings; + + /// No flags are set. + pub const NONE: vm_flags_t = bindings::VM_NONE as _; + + /// Mapping allows reads. + pub const READ: vm_flags_t = bindings::VM_READ as _; + + /// Mapping allows writes. + pub const WRITE: vm_flags_t = bindings::VM_WRITE as _; + + /// Mapping allows execution. + pub const EXEC: vm_flags_t = bindings::VM_EXEC as _; + + /// Mapping is shared. + pub const SHARED: vm_flags_t = bindings::VM_SHARED as _; + + /// Mapping may be updated to allow reads. + pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as _; + + /// Mapping may be updated to allow writes. + pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as _; + + /// Mapping may be updated to allow execution. + pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as _; + + /// Mapping may be updated to be shared. + pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as _; + + /// Page-ranges managed without `struct page`, just pure PFN. + pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as _; + + /// Memory mapped I/O or similar. + pub const IO: vm_flags_t = bindings::VM_IO as _; + + /// Do not copy this vma on fork. + pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as _; + + /// Cannot expand with mremap(). + pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as _; + + /// Lock the pages covered when they are faulted in. + pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as _; + + /// Is a VM accounted object. + pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as _; + + /// Should the VM suppress accounting. + pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as _; + + /// Huge TLB Page VM. + pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as _; + + /// Synchronous page faults. (DAX-specific) + pub const SYNC: vm_flags_t = bindings::VM_SYNC as _; + + /// Architecture-specific flag. + pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as _; + + /// Wipe VMA contents in child on fork. + pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as _; + + /// Do not include in the core dump. + pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as _; + + /// Not soft dirty clean area. + pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as _; + + /// Can contain `struct page` and pure PFN pages. + pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as _; + + /// MADV_HUGEPAGE marked this vma. + pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as _; + + /// MADV_NOHUGEPAGE marked this vma. + pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as _; + + /// KSM may merge identical pages. + pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as _; +} diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs new file mode 100644 index 000000000000..a566fc3e7dcb --- /dev/null +++ b/rust/kernel/opp.rs @@ -0,0 +1,1146 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Operating performance points. +//! +//! This module provides rust abstractions for interacting with the OPP subsystem. +//! +//! C header: [`include/linux/pm_opp.h`](srctree/include/linux/pm_opp.h) +//! +//! Reference: <https://docs.kernel.org/power/opp.html> + +use crate::{ + clk::Hertz, + cpumask::{Cpumask, CpumaskVar}, + device::Device, + error::{code::*, from_err_ptr, from_result, to_result, Error, Result, VTABLE_DEFAULT_ERROR}, + ffi::c_ulong, + prelude::*, + str::CString, + types::{ARef, AlwaysRefCounted, Opaque}, +}; + +#[cfg(CONFIG_CPU_FREQ)] +/// Frequency table implementation. +mod freq { + use super::*; + use crate::cpufreq; + use core::ops::Deref; + + /// OPP frequency table. + /// + /// A [`cpufreq::Table`] created from [`Table`]. + pub struct FreqTable { + dev: ARef<Device>, + ptr: *mut bindings::cpufreq_frequency_table, + } + + impl FreqTable { + /// Creates a new instance of [`FreqTable`] from [`Table`]. + pub(crate) fn new(table: &Table) -> Result<Self> { + let mut ptr: *mut bindings::cpufreq_frequency_table = ptr::null_mut(); + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { + bindings::dev_pm_opp_init_cpufreq_table(table.dev.as_raw(), &mut ptr) + })?; + + Ok(Self { + dev: table.dev.clone(), + ptr, + }) + } + + /// Returns a reference to the underlying [`cpufreq::Table`]. + #[inline] + fn table(&self) -> &cpufreq::Table { + // SAFETY: The `ptr` is guaranteed by the C code to be valid. + unsafe { cpufreq::Table::from_raw(self.ptr) } + } + } + + impl Deref for FreqTable { + type Target = cpufreq::Table; + + #[inline] + fn deref(&self) -> &Self::Target { + self.table() + } + } + + impl Drop for FreqTable { + fn drop(&mut self) { + // SAFETY: The pointer was created via `dev_pm_opp_init_cpufreq_table`, and is only + // freed here. + unsafe { + bindings::dev_pm_opp_free_cpufreq_table(self.dev.as_raw(), &mut self.as_raw()) + }; + } + } +} + +#[cfg(CONFIG_CPU_FREQ)] +pub use freq::FreqTable; + +use core::{marker::PhantomData, ptr}; + +use macros::vtable; + +/// Creates a null-terminated slice of pointers to [`Cstring`]s. +fn to_c_str_array(names: &[CString]) -> Result<KVec<*const u8>> { + // Allocated a null-terminated vector of pointers. + let mut list = KVec::with_capacity(names.len() + 1, GFP_KERNEL)?; + + for name in names.iter() { + list.push(name.as_ptr() as _, GFP_KERNEL)?; + } + + list.push(ptr::null(), GFP_KERNEL)?; + Ok(list) +} + +/// The voltage unit. +/// +/// Represents voltage in microvolts, wrapping a [`c_ulong`] value. +/// +/// ## Examples +/// +/// ``` +/// use kernel::opp::MicroVolt; +/// +/// let raw = 90500; +/// let volt = MicroVolt(raw); +/// +/// assert_eq!(usize::from(volt), raw); +/// assert_eq!(volt, MicroVolt(raw)); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct MicroVolt(pub c_ulong); + +impl From<MicroVolt> for c_ulong { + #[inline] + fn from(volt: MicroVolt) -> Self { + volt.0 + } +} + +/// The power unit. +/// +/// Represents power in microwatts, wrapping a [`c_ulong`] value. +/// +/// ## Examples +/// +/// ``` +/// use kernel::opp::MicroWatt; +/// +/// let raw = 1000000; +/// let power = MicroWatt(raw); +/// +/// assert_eq!(usize::from(power), raw); +/// assert_eq!(power, MicroWatt(raw)); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct MicroWatt(pub c_ulong); + +impl From<MicroWatt> for c_ulong { + #[inline] + fn from(power: MicroWatt) -> Self { + power.0 + } +} + +/// Handle for a dynamically created [`OPP`]. +/// +/// The associated [`OPP`] is automatically removed when the [`Token`] is dropped. +/// +/// ## Examples +/// +/// The following example demonstrates how to create an [`OPP`] dynamically. +/// +/// ``` +/// use kernel::clk::Hertz; +/// use kernel::device::Device; +/// use kernel::error::Result; +/// use kernel::opp::{Data, MicroVolt, Token}; +/// use kernel::types::ARef; +/// +/// fn create_opp(dev: &ARef<Device>, freq: Hertz, volt: MicroVolt, level: u32) -> Result<Token> { +/// let data = Data::new(freq, volt, level, false); +/// +/// // OPP is removed once token goes out of scope. +/// data.add_opp(dev) +/// } +/// ``` +pub struct Token { + dev: ARef<Device>, + freq: Hertz, +} + +impl Token { + /// Dynamically adds an [`OPP`] and returns a [`Token`] that removes it on drop. + fn new(dev: &ARef<Device>, mut data: Data) -> Result<Self> { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_add_dynamic(dev.as_raw(), &mut data.0) })?; + Ok(Self { + dev: dev.clone(), + freq: data.freq(), + }) + } +} + +impl Drop for Token { + fn drop(&mut self) { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + unsafe { bindings::dev_pm_opp_remove(self.dev.as_raw(), self.freq.into()) }; + } +} + +/// OPP data. +/// +/// Rust abstraction for the C `struct dev_pm_opp_data`, used to define operating performance +/// points (OPPs) dynamically. +/// +/// ## Examples +/// +/// The following example demonstrates how to create an [`OPP`] with [`Data`]. +/// +/// ``` +/// use kernel::clk::Hertz; +/// use kernel::device::Device; +/// use kernel::error::Result; +/// use kernel::opp::{Data, MicroVolt, Token}; +/// use kernel::types::ARef; +/// +/// fn create_opp(dev: &ARef<Device>, freq: Hertz, volt: MicroVolt, level: u32) -> Result<Token> { +/// let data = Data::new(freq, volt, level, false); +/// +/// // OPP is removed once token goes out of scope. +/// data.add_opp(dev) +/// } +/// ``` +#[repr(transparent)] +pub struct Data(bindings::dev_pm_opp_data); + +impl Data { + /// Creates a new instance of [`Data`]. + /// + /// This can be used to define a dynamic OPP to be added to a device. + pub fn new(freq: Hertz, volt: MicroVolt, level: u32, turbo: bool) -> Self { + Self(bindings::dev_pm_opp_data { + turbo, + freq: freq.into(), + u_volt: volt.into(), + level, + }) + } + + /// Adds an [`OPP`] dynamically. + /// + /// Returns a [`Token`] that ensures the OPP is automatically removed + /// when it goes out of scope. + #[inline] + pub fn add_opp(self, dev: &ARef<Device>) -> Result<Token> { + Token::new(dev, self) + } + + /// Returns the frequency associated with this OPP data. + #[inline] + fn freq(&self) -> Hertz { + Hertz(self.0.freq) + } +} + +/// [`OPP`] search options. +/// +/// ## Examples +/// +/// Defines how to search for an [`OPP`] in a [`Table`] relative to a frequency. +/// +/// ``` +/// use kernel::clk::Hertz; +/// use kernel::error::Result; +/// use kernel::opp::{OPP, SearchType, Table}; +/// use kernel::types::ARef; +/// +/// fn find_opp(table: &Table, freq: Hertz) -> Result<ARef<OPP>> { +/// let opp = table.opp_from_freq(freq, Some(true), None, SearchType::Exact)?; +/// +/// pr_info!("OPP frequency is: {:?}\n", opp.freq(None)); +/// pr_info!("OPP voltage is: {:?}\n", opp.voltage()); +/// pr_info!("OPP level is: {}\n", opp.level()); +/// pr_info!("OPP power is: {:?}\n", opp.power()); +/// +/// Ok(opp) +/// } +/// ``` +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum SearchType { + /// Match the exact frequency. + Exact, + /// Find the highest frequency less than or equal to the given value. + Floor, + /// Find the lowest frequency greater than or equal to the given value. + Ceil, +} + +/// OPP configuration callbacks. +/// +/// Implement this trait to customize OPP clock and regulator setup for your device. +#[vtable] +pub trait ConfigOps { + /// This is typically used to scale clocks when transitioning between OPPs. + #[inline] + fn config_clks(_dev: &Device, _table: &Table, _opp: &OPP, _scaling_down: bool) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// This provides access to the old and new OPPs, allowing for safe regulator adjustments. + #[inline] + fn config_regulators( + _dev: &Device, + _opp_old: &OPP, + _opp_new: &OPP, + _data: *mut *mut bindings::regulator, + _count: u32, + ) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } +} + +/// OPP configuration token. +/// +/// Returned by the OPP core when configuration is applied to a [`Device`]. The associated +/// configuration is automatically cleared when the token is dropped. +pub struct ConfigToken(i32); + +impl Drop for ConfigToken { + fn drop(&mut self) { + // SAFETY: This is the same token value returned by the C code via `dev_pm_opp_set_config`. + unsafe { bindings::dev_pm_opp_clear_config(self.0) }; + } +} + +/// OPP configurations. +/// +/// Rust abstraction for the C `struct dev_pm_opp_config`. +/// +/// ## Examples +/// +/// The following example demonstrates how to set OPP property-name configuration for a [`Device`]. +/// +/// ``` +/// use kernel::device::Device; +/// use kernel::error::Result; +/// use kernel::opp::{Config, ConfigOps, ConfigToken}; +/// use kernel::str::CString; +/// use kernel::types::ARef; +/// use kernel::macros::vtable; +/// +/// #[derive(Default)] +/// struct Driver; +/// +/// #[vtable] +/// impl ConfigOps for Driver {} +/// +/// fn configure(dev: &ARef<Device>) -> Result<ConfigToken> { +/// let name = CString::try_from_fmt(fmt!("{}", "slow"))?; +/// +/// // The OPP configuration is cleared once the [`ConfigToken`] goes out of scope. +/// Config::<Driver>::new() +/// .set_prop_name(name)? +/// .set(dev) +/// } +/// ``` +#[derive(Default)] +pub struct Config<T: ConfigOps> +where + T: Default, +{ + clk_names: Option<KVec<CString>>, + prop_name: Option<CString>, + regulator_names: Option<KVec<CString>>, + supported_hw: Option<KVec<u32>>, + + // Tuple containing (required device, index) + required_dev: Option<(ARef<Device>, u32)>, + _data: PhantomData<T>, +} + +impl<T: ConfigOps + Default> Config<T> { + /// Creates a new instance of [`Config`]. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Initializes clock names. + pub fn set_clk_names(mut self, names: KVec<CString>) -> Result<Self> { + if self.clk_names.is_some() { + return Err(EBUSY); + } + + if names.is_empty() { + return Err(EINVAL); + } + + self.clk_names = Some(names); + Ok(self) + } + + /// Initializes property name. + pub fn set_prop_name(mut self, name: CString) -> Result<Self> { + if self.prop_name.is_some() { + return Err(EBUSY); + } + + self.prop_name = Some(name); + Ok(self) + } + + /// Initializes regulator names. + pub fn set_regulator_names(mut self, names: KVec<CString>) -> Result<Self> { + if self.regulator_names.is_some() { + return Err(EBUSY); + } + + if names.is_empty() { + return Err(EINVAL); + } + + self.regulator_names = Some(names); + + Ok(self) + } + + /// Initializes required devices. + pub fn set_required_dev(mut self, dev: ARef<Device>, index: u32) -> Result<Self> { + if self.required_dev.is_some() { + return Err(EBUSY); + } + + self.required_dev = Some((dev, index)); + Ok(self) + } + + /// Initializes supported hardware. + pub fn set_supported_hw(mut self, hw: KVec<u32>) -> Result<Self> { + if self.supported_hw.is_some() { + return Err(EBUSY); + } + + if hw.is_empty() { + return Err(EINVAL); + } + + self.supported_hw = Some(hw); + Ok(self) + } + + /// Sets the configuration with the OPP core. + /// + /// The returned [`ConfigToken`] will remove the configuration when dropped. + pub fn set(self, dev: &Device) -> Result<ConfigToken> { + let (_clk_list, clk_names) = match &self.clk_names { + Some(x) => { + let list = to_c_str_array(x)?; + let ptr = list.as_ptr(); + (Some(list), ptr) + } + None => (None, ptr::null()), + }; + + let (_regulator_list, regulator_names) = match &self.regulator_names { + Some(x) => { + let list = to_c_str_array(x)?; + let ptr = list.as_ptr(); + (Some(list), ptr) + } + None => (None, ptr::null()), + }; + + let prop_name = self + .prop_name + .as_ref() + .map_or(ptr::null(), |p| p.as_char_ptr()); + + let (supported_hw, supported_hw_count) = self + .supported_hw + .as_ref() + .map_or((ptr::null(), 0), |hw| (hw.as_ptr(), hw.len() as u32)); + + let (required_dev, required_dev_index) = self + .required_dev + .as_ref() + .map_or((ptr::null_mut(), 0), |(dev, idx)| (dev.as_raw(), *idx)); + + let mut config = bindings::dev_pm_opp_config { + clk_names, + config_clks: if T::HAS_CONFIG_CLKS { + Some(Self::config_clks) + } else { + None + }, + prop_name, + regulator_names, + config_regulators: if T::HAS_CONFIG_REGULATORS { + Some(Self::config_regulators) + } else { + None + }, + supported_hw, + supported_hw_count, + + required_dev, + required_dev_index, + }; + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The OPP core guarantees not to access fields of [`Config`] after this call + // and so we don't need to save a copy of them for future use. + let ret = unsafe { bindings::dev_pm_opp_set_config(dev.as_raw(), &mut config) }; + if ret < 0 { + Err(Error::from_errno(ret)) + } else { + Ok(ConfigToken(ret)) + } + } + + /// Config's clk callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn config_clks( + dev: *mut bindings::device, + opp_table: *mut bindings::opp_table, + opp: *mut bindings::dev_pm_opp, + _data: *mut kernel::ffi::c_void, + scaling_down: bool, + ) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: 'dev' is guaranteed by the C code to be valid. + let dev = unsafe { Device::get_device(dev) }; + T::config_clks( + &dev, + // SAFETY: 'opp_table' is guaranteed by the C code to be valid. + &unsafe { Table::from_raw_table(opp_table, &dev) }, + // SAFETY: 'opp' is guaranteed by the C code to be valid. + unsafe { OPP::from_raw_opp(opp)? }, + scaling_down, + ) + .map(|()| 0) + }) + } + + /// Config's regulator callback. + /// + /// SAFETY: Called from C. Inputs must be valid pointers. + extern "C" fn config_regulators( + dev: *mut bindings::device, + old_opp: *mut bindings::dev_pm_opp, + new_opp: *mut bindings::dev_pm_opp, + regulators: *mut *mut bindings::regulator, + count: kernel::ffi::c_uint, + ) -> kernel::ffi::c_int { + from_result(|| { + // SAFETY: 'dev' is guaranteed by the C code to be valid. + let dev = unsafe { Device::get_device(dev) }; + T::config_regulators( + &dev, + // SAFETY: 'old_opp' is guaranteed by the C code to be valid. + unsafe { OPP::from_raw_opp(old_opp)? }, + // SAFETY: 'new_opp' is guaranteed by the C code to be valid. + unsafe { OPP::from_raw_opp(new_opp)? }, + regulators, + count, + ) + .map(|()| 0) + }) + } +} + +/// A reference-counted OPP table. +/// +/// Rust abstraction for the C `struct opp_table`. +/// +/// # Invariants +/// +/// The pointer stored in `Self` is non-null and valid for the lifetime of the [`Table`]. +/// +/// Instances of this type are reference-counted. +/// +/// ## Examples +/// +/// The following example demonstrates how to get OPP [`Table`] for a [`Cpumask`] and set its +/// frequency. +/// +/// ``` +/// # #![cfg(CONFIG_OF)] +/// use kernel::clk::Hertz; +/// use kernel::cpumask::Cpumask; +/// use kernel::device::Device; +/// use kernel::error::Result; +/// use kernel::opp::Table; +/// use kernel::types::ARef; +/// +/// fn get_table(dev: &ARef<Device>, mask: &mut Cpumask, freq: Hertz) -> Result<Table> { +/// let mut opp_table = Table::from_of_cpumask(dev, mask)?; +/// +/// if opp_table.opp_count()? == 0 { +/// return Err(EINVAL); +/// } +/// +/// pr_info!("Max transition latency is: {} ns\n", opp_table.max_transition_latency_ns()); +/// pr_info!("Suspend frequency is: {:?}\n", opp_table.suspend_freq()); +/// +/// opp_table.set_rate(freq)?; +/// Ok(opp_table) +/// } +/// ``` +pub struct Table { + ptr: *mut bindings::opp_table, + dev: ARef<Device>, + #[allow(dead_code)] + em: bool, + #[allow(dead_code)] + of: bool, + cpus: Option<CpumaskVar>, +} + +/// SAFETY: It is okay to send ownership of [`Table`] across thread boundaries. +unsafe impl Send for Table {} + +/// SAFETY: It is okay to access [`Table`] through shared references from other threads because +/// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for Table {} + +impl Table { + /// Creates a new reference-counted [`Table`] from a raw pointer. + /// + /// # Safety + /// + /// Callers must ensure that `ptr` is valid and non-null. + unsafe fn from_raw_table(ptr: *mut bindings::opp_table, dev: &ARef<Device>) -> Self { + // SAFETY: By the safety requirements, ptr is valid and its refcount will be incremented. + // + // INVARIANT: The reference-count is decremented when [`Table`] goes out of scope. + unsafe { bindings::dev_pm_opp_get_opp_table_ref(ptr) }; + + Self { + ptr, + dev: dev.clone(), + em: false, + of: false, + cpus: None, + } + } + + /// Creates a new reference-counted [`Table`] instance for a [`Device`]. + pub fn from_dev(dev: &Device) -> Result<Self> { + // SAFETY: The requirements are satisfied by the existence of the [`Device`] and its safety + // requirements. + // + // INVARIANT: The reference-count is incremented by the C code and is decremented when + // [`Table`] goes out of scope. + let ptr = from_err_ptr(unsafe { bindings::dev_pm_opp_get_opp_table(dev.as_raw()) })?; + + Ok(Self { + ptr, + dev: dev.into(), + em: false, + of: false, + cpus: None, + }) + } + + /// Creates a new reference-counted [`Table`] instance for a [`Device`] based on device tree + /// entries. + #[cfg(CONFIG_OF)] + pub fn from_of(dev: &ARef<Device>, index: i32) -> Result<Self> { + // SAFETY: The requirements are satisfied by the existence of the [`Device`] and its safety + // requirements. + // + // INVARIANT: The reference-count is incremented by the C code and is decremented when + // [`Table`] goes out of scope. + to_result(unsafe { bindings::dev_pm_opp_of_add_table_indexed(dev.as_raw(), index) })?; + + // Get the newly created [`Table`]. + let mut table = Self::from_dev(dev)?; + table.of = true; + + Ok(table) + } + + /// Remove device tree based [`Table`]. + #[cfg(CONFIG_OF)] + #[inline] + fn remove_of(&self) { + // SAFETY: The requirements are satisfied by the existence of the [`Device`] and its safety + // requirements. We took the reference from [`from_of`] earlier, it is safe to drop the + // same now. + unsafe { bindings::dev_pm_opp_of_remove_table(self.dev.as_raw()) }; + } + + /// Creates a new reference-counted [`Table`] instance for a [`Cpumask`] based on device tree + /// entries. + #[cfg(CONFIG_OF)] + pub fn from_of_cpumask(dev: &Device, cpumask: &mut Cpumask) -> Result<Self> { + // SAFETY: The cpumask is valid and the returned pointer will be owned by the [`Table`] + // instance. + // + // INVARIANT: The reference-count is incremented by the C code and is decremented when + // [`Table`] goes out of scope. + to_result(unsafe { bindings::dev_pm_opp_of_cpumask_add_table(cpumask.as_raw()) })?; + + // Fetch the newly created table. + let mut table = Self::from_dev(dev)?; + table.cpus = Some(CpumaskVar::try_clone(cpumask)?); + + Ok(table) + } + + /// Remove device tree based [`Table`] for a [`Cpumask`]. + #[cfg(CONFIG_OF)] + #[inline] + fn remove_of_cpumask(&self, cpumask: &Cpumask) { + // SAFETY: The cpumask is valid and we took the reference from [`from_of_cpumask`] earlier, + // it is safe to drop the same now. + unsafe { bindings::dev_pm_opp_of_cpumask_remove_table(cpumask.as_raw()) }; + } + + /// Returns the number of [`OPP`]s in the [`Table`]. + pub fn opp_count(&self) -> Result<u32> { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + let ret = unsafe { bindings::dev_pm_opp_get_opp_count(self.dev.as_raw()) }; + if ret < 0 { + Err(Error::from_errno(ret)) + } else { + Ok(ret as u32) + } + } + + /// Returns max clock latency (in nanoseconds) of the [`OPP`]s in the [`Table`]. + #[inline] + pub fn max_clock_latency_ns(&self) -> usize { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + unsafe { bindings::dev_pm_opp_get_max_clock_latency(self.dev.as_raw()) } + } + + /// Returns max volt latency (in nanoseconds) of the [`OPP`]s in the [`Table`]. + #[inline] + pub fn max_volt_latency_ns(&self) -> usize { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + unsafe { bindings::dev_pm_opp_get_max_volt_latency(self.dev.as_raw()) } + } + + /// Returns max transition latency (in nanoseconds) of the [`OPP`]s in the [`Table`]. + #[inline] + pub fn max_transition_latency_ns(&self) -> usize { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + unsafe { bindings::dev_pm_opp_get_max_transition_latency(self.dev.as_raw()) } + } + + /// Returns the suspend [`OPP`]'s frequency. + #[inline] + pub fn suspend_freq(&self) -> Hertz { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + Hertz(unsafe { bindings::dev_pm_opp_get_suspend_opp_freq(self.dev.as_raw()) }) + } + + /// Synchronizes regulators used by the [`Table`]. + #[inline] + pub fn sync_regulators(&self) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_sync_regulators(self.dev.as_raw()) }) + } + + /// Gets sharing CPUs. + #[inline] + pub fn sharing_cpus(dev: &Device, cpumask: &mut Cpumask) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_get_sharing_cpus(dev.as_raw(), cpumask.as_raw()) }) + } + + /// Sets sharing CPUs. + pub fn set_sharing_cpus(&mut self, cpumask: &mut Cpumask) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { + bindings::dev_pm_opp_set_sharing_cpus(self.dev.as_raw(), cpumask.as_raw()) + })?; + + if let Some(mask) = self.cpus.as_mut() { + // Update the cpumask as this will be used while removing the table. + cpumask.copy(mask); + } + + Ok(()) + } + + /// Gets sharing CPUs from device tree. + #[cfg(CONFIG_OF)] + #[inline] + pub fn of_sharing_cpus(dev: &Device, cpumask: &mut Cpumask) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { + bindings::dev_pm_opp_of_get_sharing_cpus(dev.as_raw(), cpumask.as_raw()) + }) + } + + /// Updates the voltage value for an [`OPP`]. + #[inline] + pub fn adjust_voltage( + &self, + freq: Hertz, + volt: MicroVolt, + volt_min: MicroVolt, + volt_max: MicroVolt, + ) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { + bindings::dev_pm_opp_adjust_voltage( + self.dev.as_raw(), + freq.into(), + volt.into(), + volt_min.into(), + volt_max.into(), + ) + }) + } + + /// Creates [`FreqTable`] from [`Table`]. + #[cfg(CONFIG_CPU_FREQ)] + #[inline] + pub fn cpufreq_table(&mut self) -> Result<FreqTable> { + FreqTable::new(self) + } + + /// Configures device with [`OPP`] matching the frequency value. + #[inline] + pub fn set_rate(&self, freq: Hertz) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_set_rate(self.dev.as_raw(), freq.into()) }) + } + + /// Configures device with [`OPP`]. + #[inline] + pub fn set_opp(&self, opp: &OPP) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_set_opp(self.dev.as_raw(), opp.as_raw()) }) + } + + /// Finds [`OPP`] based on frequency. + pub fn opp_from_freq( + &self, + freq: Hertz, + available: Option<bool>, + index: Option<u32>, + stype: SearchType, + ) -> Result<ARef<OPP>> { + let raw_dev = self.dev.as_raw(); + let index = index.unwrap_or(0); + let mut rate = freq.into(); + + let ptr = from_err_ptr(match stype { + SearchType::Exact => { + if let Some(available) = available { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and + // its safety requirements. The returned pointer will be owned by the new + // [`OPP`] instance. + unsafe { + bindings::dev_pm_opp_find_freq_exact_indexed( + raw_dev, rate, index, available, + ) + } + } else { + return Err(EINVAL); + } + } + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Ceil => unsafe { + bindings::dev_pm_opp_find_freq_ceil_indexed(raw_dev, &mut rate, index) + }, + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Floor => unsafe { + bindings::dev_pm_opp_find_freq_floor_indexed(raw_dev, &mut rate, index) + }, + })?; + + // SAFETY: The `ptr` is guaranteed by the C code to be valid. + unsafe { OPP::from_raw_opp_owned(ptr) } + } + + /// Finds [`OPP`] based on level. + pub fn opp_from_level(&self, mut level: u32, stype: SearchType) -> Result<ARef<OPP>> { + let raw_dev = self.dev.as_raw(); + + let ptr = from_err_ptr(match stype { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Exact => unsafe { bindings::dev_pm_opp_find_level_exact(raw_dev, level) }, + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Ceil => unsafe { + bindings::dev_pm_opp_find_level_ceil(raw_dev, &mut level) + }, + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Floor => unsafe { + bindings::dev_pm_opp_find_level_floor(raw_dev, &mut level) + }, + })?; + + // SAFETY: The `ptr` is guaranteed by the C code to be valid. + unsafe { OPP::from_raw_opp_owned(ptr) } + } + + /// Finds [`OPP`] based on bandwidth. + pub fn opp_from_bw(&self, mut bw: u32, index: i32, stype: SearchType) -> Result<ARef<OPP>> { + let raw_dev = self.dev.as_raw(); + + let ptr = from_err_ptr(match stype { + // The OPP core doesn't support this yet. + SearchType::Exact => return Err(EINVAL), + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Ceil => unsafe { + bindings::dev_pm_opp_find_bw_ceil(raw_dev, &mut bw, index) + }, + + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. The returned pointer will be owned by the new [`OPP`] instance. + SearchType::Floor => unsafe { + bindings::dev_pm_opp_find_bw_floor(raw_dev, &mut bw, index) + }, + })?; + + // SAFETY: The `ptr` is guaranteed by the C code to be valid. + unsafe { OPP::from_raw_opp_owned(ptr) } + } + + /// Enables the [`OPP`]. + #[inline] + pub fn enable_opp(&self, freq: Hertz) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_enable(self.dev.as_raw(), freq.into()) }) + } + + /// Disables the [`OPP`]. + #[inline] + pub fn disable_opp(&self, freq: Hertz) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { bindings::dev_pm_opp_disable(self.dev.as_raw(), freq.into()) }) + } + + /// Registers with the Energy model. + #[cfg(CONFIG_OF)] + pub fn of_register_em(&mut self, cpumask: &mut Cpumask) -> Result { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. + to_result(unsafe { + bindings::dev_pm_opp_of_register_em(self.dev.as_raw(), cpumask.as_raw()) + })?; + + self.em = true; + Ok(()) + } + + /// Unregisters with the Energy model. + #[cfg(all(CONFIG_OF, CONFIG_ENERGY_MODEL))] + #[inline] + fn of_unregister_em(&self) { + // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety + // requirements. We registered with the EM framework earlier, it is safe to unregister now. + unsafe { bindings::em_dev_unregister_perf_domain(self.dev.as_raw()) }; + } +} + +impl Drop for Table { + fn drop(&mut self) { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe + // to relinquish it now. + unsafe { bindings::dev_pm_opp_put_opp_table(self.ptr) }; + + #[cfg(CONFIG_OF)] + { + #[cfg(CONFIG_ENERGY_MODEL)] + if self.em { + self.of_unregister_em(); + } + + if self.of { + self.remove_of(); + } else if let Some(cpumask) = self.cpus.take() { + self.remove_of_cpumask(&cpumask); + } + } + } +} + +/// A reference-counted Operating performance point (OPP). +/// +/// Rust abstraction for the C `struct dev_pm_opp`. +/// +/// # Invariants +/// +/// The pointer stored in `Self` is non-null and valid for the lifetime of the [`OPP`]. +/// +/// Instances of this type are reference-counted. The reference count is incremented by the +/// `dev_pm_opp_get` function and decremented by `dev_pm_opp_put`. The Rust type `ARef<OPP>` +/// represents a pointer that owns a reference count on the [`OPP`]. +/// +/// A reference to the [`OPP`], &[`OPP`], isn't refcounted by the Rust code. +/// +/// ## Examples +/// +/// The following example demonstrates how to get [`OPP`] corresponding to a frequency value and +/// configure the device with it. +/// +/// ``` +/// use kernel::clk::Hertz; +/// use kernel::error::Result; +/// use kernel::opp::{SearchType, Table}; +/// +/// fn configure_opp(table: &Table, freq: Hertz) -> Result { +/// let opp = table.opp_from_freq(freq, Some(true), None, SearchType::Exact)?; +/// +/// if opp.freq(None) != freq { +/// return Err(EINVAL); +/// } +/// +/// table.set_opp(&opp) +/// } +/// ``` +#[repr(transparent)] +pub struct OPP(Opaque<bindings::dev_pm_opp>); + +/// SAFETY: It is okay to send the ownership of [`OPP`] across thread boundaries. +unsafe impl Send for OPP {} + +/// SAFETY: It is okay to access [`OPP`] through shared references from other threads because we're +/// either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for OPP {} + +/// SAFETY: The type invariants guarantee that [`OPP`] is always refcounted. +unsafe impl AlwaysRefCounted for OPP { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::dev_pm_opp_get(self.0.get()) }; + } + + unsafe fn dec_ref(obj: ptr::NonNull<Self>) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. + unsafe { bindings::dev_pm_opp_put(obj.cast().as_ptr()) } + } +} + +impl OPP { + /// Creates an owned reference to a [`OPP`] from a valid pointer. + /// + /// The refcount is incremented by the C code and will be decremented by `dec_ref` when the + /// [`ARef`] object is dropped. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and the refcount of the [`OPP`] is incremented. + /// The caller must also ensure that it doesn't explicitly drop the refcount of the [`OPP`], as + /// the returned [`ARef`] object takes over the refcount increment on the underlying object and + /// the same will be dropped along with it. + pub unsafe fn from_raw_opp_owned(ptr: *mut bindings::dev_pm_opp) -> Result<ARef<Self>> { + let ptr = ptr::NonNull::new(ptr).ok_or(ENODEV)?; + + // SAFETY: The safety requirements guarantee the validity of the pointer. + // + // INVARIANT: The reference-count is decremented when [`OPP`] goes out of scope. + Ok(unsafe { ARef::from_raw(ptr.cast()) }) + } + + /// Creates a reference to a [`OPP`] from a valid pointer. + /// + /// The refcount is not updated by the Rust API unless the returned reference is converted to + /// an [`ARef`] object. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and remains valid for the duration of `'a`. + #[inline] + pub unsafe fn from_raw_opp<'a>(ptr: *mut bindings::dev_pm_opp) -> Result<&'a Self> { + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the + // duration of 'a. The cast is okay because [`OPP`] is `repr(transparent)`. + Ok(unsafe { &*ptr.cast() }) + } + + #[inline] + fn as_raw(&self) -> *mut bindings::dev_pm_opp { + self.0.get() + } + + /// Returns the frequency of an [`OPP`]. + pub fn freq(&self, index: Option<u32>) -> Hertz { + let index = index.unwrap_or(0); + + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // use it. + Hertz(unsafe { bindings::dev_pm_opp_get_freq_indexed(self.as_raw(), index) }) + } + + /// Returns the voltage of an [`OPP`]. + #[inline] + pub fn voltage(&self) -> MicroVolt { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // use it. + MicroVolt(unsafe { bindings::dev_pm_opp_get_voltage(self.as_raw()) }) + } + + /// Returns the level of an [`OPP`]. + #[inline] + pub fn level(&self) -> u32 { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // use it. + unsafe { bindings::dev_pm_opp_get_level(self.as_raw()) } + } + + /// Returns the power of an [`OPP`]. + #[inline] + pub fn power(&self) -> MicroWatt { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // use it. + MicroWatt(unsafe { bindings::dev_pm_opp_get_power(self.as_raw()) }) + } + + /// Returns the required pstate of an [`OPP`]. + #[inline] + pub fn required_pstate(&self, index: u32) -> u32 { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // use it. + unsafe { bindings::dev_pm_opp_get_required_pstate(self.as_raw(), index) } + } + + /// Returns true if the [`OPP`] is turbo. + #[inline] + pub fn is_turbo(&self) -> bool { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // use it. + unsafe { bindings::dev_pm_opp_is_turbo(self.as_raw()) } + } +} diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 9e6f6854948d..927413d85484 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -7,6 +7,7 @@ use crate::{ bindings, ffi::{c_int, c_long, c_uint}, + mm::MmWithUser, pid_namespace::PidNamespace, types::{ARef, NotThreadSafe, Opaque}, }; @@ -33,22 +34,20 @@ pub const TASK_NORMAL: c_uint = bindings::TASK_NORMAL as c_uint; #[macro_export] macro_rules! current { () => { - // SAFETY: Deref + addr-of below create a temporary `TaskRef` that cannot outlive the - // caller. + // SAFETY: This expression creates a temporary value that is dropped at the end of the + // caller's scope. The following mechanisms ensure that the resulting `&CurrentTask` cannot + // leave current task context: + // + // * To return to userspace, the caller must leave the current scope. + // * Operations such as `begin_new_exec()` are necessarily unsafe and the caller of + // `begin_new_exec()` is responsible for safety. + // * Rust abstractions for things such as a `kthread_use_mm()` scope must require the + // closure to be `Send`, so the `NotThreadSafe` field of `CurrentTask` ensures that the + // `&CurrentTask` cannot cross the scope in either direction. unsafe { &*$crate::task::Task::current() } }; } -/// Returns the currently running task's pid namespace. -#[macro_export] -macro_rules! current_pid_ns { - () => { - // SAFETY: Deref + addr-of below create a temporary `PidNamespaceRef` that cannot outlive - // the caller. - unsafe { &*$crate::task::Task::current_pid_ns() } - }; -} - /// Wraps the kernel's `struct task_struct`. /// /// # Invariants @@ -87,7 +86,7 @@ macro_rules! current_pid_ns { /// impl State { /// fn new() -> Self { /// Self { -/// creator: current!().into(), +/// creator: ARef::from(&**current!()), /// index: 0, /// } /// } @@ -107,6 +106,44 @@ unsafe impl Send for Task {} // synchronised by C code (e.g., `signal_pending`). unsafe impl Sync for Task {} +/// Represents the [`Task`] in the `current` global. +/// +/// This type exists to provide more efficient operations that are only valid on the current task. +/// For example, to retrieve the pid-namespace of a task, you must use rcu protection unless it is +/// the current task. +/// +/// # Invariants +/// +/// Each value of this type must only be accessed from the task context it was created within. +/// +/// Of course, every thread is in a different task context, but for the purposes of this invariant, +/// these operations also permanently leave the task context: +/// +/// * Returning to userspace from system call context. +/// * Calling `release_task()`. +/// * Calling `begin_new_exec()` in a binary format loader. +/// +/// Other operations temporarily create a new sub-context: +/// +/// * Calling `kthread_use_mm()` creates a new context, and `kthread_unuse_mm()` returns to the +/// old context. +/// +/// This means that a `CurrentTask` obtained before a `kthread_use_mm()` call may be used again +/// once `kthread_unuse_mm()` is called, but it must not be used between these two calls. +/// Conversely, a `CurrentTask` obtained between a `kthread_use_mm()`/`kthread_unuse_mm()` pair +/// must not be used after `kthread_unuse_mm()`. +#[repr(transparent)] +pub struct CurrentTask(Task, NotThreadSafe); + +// Make all `Task` methods available on `CurrentTask`. +impl Deref for CurrentTask { + type Target = Task; + #[inline] + fn deref(&self) -> &Task { + &self.0 + } +} + /// The type of process identifiers (PIDs). pub type Pid = bindings::pid_t; @@ -133,119 +170,29 @@ impl Task { /// /// # Safety /// - /// Callers must ensure that the returned object doesn't outlive the current task/thread. - pub unsafe fn current() -> impl Deref<Target = Task> { - struct TaskRef<'a> { - task: &'a Task, - _not_send: NotThreadSafe, + /// Callers must ensure that the returned object is only used to access a [`CurrentTask`] + /// within the task context that was active when this function was called. For more details, + /// see the invariants section for [`CurrentTask`]. + pub unsafe fn current() -> impl Deref<Target = CurrentTask> { + struct TaskRef { + task: *const CurrentTask, } - impl Deref for TaskRef<'_> { - type Target = Task; + impl Deref for TaskRef { + type Target = CurrentTask; fn deref(&self) -> &Self::Target { - self.task + // SAFETY: The returned reference borrows from this `TaskRef`, so it cannot outlive + // the `TaskRef`, which the caller of `Task::current()` has promised will not + // outlive the task/thread for which `self.task` is the `current` pointer. Thus, it + // is okay to return a `CurrentTask` reference here. + unsafe { &*self.task } } } - let current = Task::current_raw(); TaskRef { - // SAFETY: If the current thread is still running, the current task is valid. Given - // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread - // (where it could potentially outlive the caller). - task: unsafe { &*current.cast() }, - _not_send: NotThreadSafe, - } - } - - /// Returns a PidNamespace reference for the currently executing task's/thread's pid namespace. - /// - /// This function can be used to create an unbounded lifetime by e.g., storing the returned - /// PidNamespace in a global variable which would be a bug. So the recommended way to get the - /// current task's/thread's pid namespace is to use the [`current_pid_ns`] macro because it is - /// safe. - /// - /// # Safety - /// - /// Callers must ensure that the returned object doesn't outlive the current task/thread. - pub unsafe fn current_pid_ns() -> impl Deref<Target = PidNamespace> { - struct PidNamespaceRef<'a> { - task: &'a PidNamespace, - _not_send: NotThreadSafe, - } - - impl Deref for PidNamespaceRef<'_> { - type Target = PidNamespace; - - fn deref(&self) -> &Self::Target { - self.task - } - } - - // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`. - // - // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A - // `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect - // on the calling `Task`'s pid namespace. It will only effect the pid namespace of children - // created by the calling `Task`. This invariant guarantees that after having acquired a - // reference to a `Task`'s pid namespace it will remain unchanged. - // - // When a task has exited and been reaped `release_task()` will be called. This will set - // the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task - // that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a - // referencing count to - // the `Task` will prevent `release_task()` being called. - // - // In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function - // can be used. There are two cases to consider: - // - // (1) retrieving the `PidNamespace` of the `current` task - // (2) retrieving the `PidNamespace` of a non-`current` task - // - // From system call context retrieving the `PidNamespace` for case (1) is always safe and - // requires neither RCU locking nor a reference count to be held. Retrieving the - // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath - // like that is exposed to Rust. - // - // Retrieving the `PidNamespace` from system call context for (2) requires RCU protection. - // Accessing `PidNamespace` outside of RCU protection requires a reference count that - // must've been acquired while holding the RCU lock. Note that accessing a non-`current` - // task means `NULL` can be returned as the non-`current` task could have already passed - // through `release_task()`. - // - // To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the - // returned `PidNamespace` cannot outlive the calling scope. The associated - // `current_pid_ns()` function should not be called directly as it could be abused to - // created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows - // Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU - // protection and without having to acquire a reference count. - // - // For (2) the `task_get_pid_ns()` method must be used. This will always acquire a - // reference on `PidNamespace` and will return an `Option` to force the caller to - // explicitly handle the case where `PidNamespace` is `None`, something that tends to be - // forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it - // difficult to perform operations that are otherwise safe without holding a reference - // count as long as RCU protection is guaranteed. But it is not important currently. But we - // do want it in the future. - // - // Note for (2) the required RCU protection around calling `task_active_pid_ns()` - // synchronizes against putting the last reference of the associated `struct pid` of - // `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the - // `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be - // `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via - // `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired - // from `task->thread_pid` to finish. - // - // SAFETY: The current task's pid namespace is valid as long as the current task is running. - let pidns = unsafe { bindings::task_active_pid_ns(Task::current_raw()) }; - PidNamespaceRef { - // SAFETY: If the current thread is still running, the current task and its associated - // pid namespace are valid. `PidNamespaceRef` is not `Send`, so we know it cannot be - // transferred to another thread (where it could potentially outlive the current - // `Task`). The caller needs to ensure that the PidNamespaceRef doesn't outlive the - // current task/thread. - task: unsafe { PidNamespace::from_ptr(pidns) }, - _not_send: NotThreadSafe, + // CAST: The layout of `struct task_struct` and `CurrentTask` is identical. + task: Task::current_raw().cast(), } } @@ -328,6 +275,70 @@ impl Task { } } +impl CurrentTask { + /// Access the address space of the current task. + /// + /// This function does not touch the refcount of the mm. + #[inline] + pub fn mm(&self) -> Option<&MmWithUser> { + // SAFETY: The `mm` field of `current` is not modified from other threads, so reading it is + // not a data race. + let mm = unsafe { (*self.as_ptr()).mm }; + + if mm.is_null() { + return None; + } + + // SAFETY: If `current->mm` is non-null, then it references a valid mm with a non-zero + // value of `mm_users`. Furthermore, the returned `&MmWithUser` borrows from this + // `CurrentTask`, so it cannot escape the scope in which the current pointer was obtained. + // + // This is safe even if `kthread_use_mm()`/`kthread_unuse_mm()` are used. There are two + // relevant cases: + // * If the `&CurrentTask` was created before `kthread_use_mm()`, then it cannot be + // accessed during the `kthread_use_mm()`/`kthread_unuse_mm()` scope due to the + // `NotThreadSafe` field of `CurrentTask`. + // * If the `&CurrentTask` was created within a `kthread_use_mm()`/`kthread_unuse_mm()` + // scope, then the `&CurrentTask` cannot escape that scope, so the returned `&MmWithUser` + // also cannot escape that scope. + // In either case, it's not possible to read `current->mm` and keep using it after the + // scope is ended with `kthread_unuse_mm()`. + Some(unsafe { MmWithUser::from_raw(mm) }) + } + + /// Access the pid namespace of the current task. + /// + /// This function does not touch the refcount of the namespace or use RCU protection. + /// + /// To access the pid namespace of another task, see [`Task::get_pid_ns`]. + #[doc(alias = "task_active_pid_ns")] + #[inline] + pub fn active_pid_ns(&self) -> Option<&PidNamespace> { + // SAFETY: It is safe to call `task_active_pid_ns` without RCU protection when calling it + // on the current task. + let active_ns = unsafe { bindings::task_active_pid_ns(self.as_ptr()) }; + + if active_ns.is_null() { + return None; + } + + // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`. + // + // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. + // + // From system call context retrieving the `PidNamespace` for the current task is always + // safe and requires neither RCU locking nor a reference count to be held. Retrieving the + // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath + // like that is exposed to Rust. + // + // SAFETY: If `current`'s pid ns is non-null, then it references a valid pid ns. + // Furthermore, the returned `&PidNamespace` borrows from this `CurrentTask`, so it cannot + // escape the scope in which the current pointer was obtained, e.g. it cannot live past a + // `release_task()` call. + Some(unsafe { PidNamespace::from_ptr(active_ns) }) + } +} + // SAFETY: The type invariants guarantee that `Task` is always refcounted. unsafe impl crate::types::AlwaysRefCounted for Task { fn inc_ref(&self) { diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 2f66107847f7..c4afdd69e490 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -176,7 +176,9 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { let info = ModuleInfo::parse(&mut it); - let mut modinfo = ModInfoBuilder::new(info.name.as_ref()); + // Rust does not allow hyphens in identifiers, use underscore instead. + let ident = info.name.replace('-', "_"); + let mut modinfo = ModInfoBuilder::new(ident.as_ref()); if let Some(author) = info.author { modinfo.emit("author", &author); } @@ -301,14 +303,15 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { #[doc(hidden)] #[link_section = \"{initcall_section}\"] #[used] - pub static __{name}_initcall: extern \"C\" fn() -> kernel::ffi::c_int = __{name}_init; + pub static __{ident}_initcall: extern \"C\" fn() -> + kernel::ffi::c_int = __{ident}_init; #[cfg(not(MODULE))] #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] core::arch::global_asm!( r#\".section \"{initcall_section}\", \"a\" - __{name}_initcall: - .long __{name}_init - . + __{ident}_initcall: + .long __{ident}_init - . .previous \"# ); @@ -316,7 +319,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { #[cfg(not(MODULE))] #[doc(hidden)] #[no_mangle] - pub extern \"C\" fn __{name}_init() -> kernel::ffi::c_int {{ + pub extern \"C\" fn __{ident}_init() -> kernel::ffi::c_int {{ // SAFETY: This function is inaccessible to the outside due to the double // module wrapping it. It is called exactly once by the C side via its // placement above in the initcall section. @@ -326,13 +329,13 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { #[cfg(not(MODULE))] #[doc(hidden)] #[no_mangle] - pub extern \"C\" fn __{name}_exit() {{ + pub extern \"C\" fn __{ident}_exit() {{ // SAFETY: // - This function is inaccessible to the outside due to the double // module wrapping it. It is called exactly once by the C side via its // unique name, - // - furthermore it is only called after `__{name}_init` has returned `0` - // (which delegates to `__init`). + // - furthermore it is only called after `__{ident}_init` has + // returned `0` (which delegates to `__init`). unsafe {{ __exit() }} }} @@ -372,6 +375,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { ", type_ = info.type_, name = info.name, + ident = ident, modinfo = modinfo.buffer, initcall_section = ".initcall6.init" ) |