summaryrefslogtreecommitdiff
path: root/rust/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'rust/kernel')
-rw-r--r--rust/kernel/lib.rs1
-rw-r--r--rust/kernel/miscdevice.rs45
-rw-r--r--rust/kernel/mm.rs344
-rw-r--r--rust/kernel/mm/virt.rs471
-rw-r--r--rust/kernel/task.rs247
5 files changed, 990 insertions, 118 deletions
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 24440751ca26..911c72a0fc21 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -72,6 +72,7 @@ pub mod jump_label;
pub mod kunit;
pub mod list;
pub mod miscdevice;
+pub mod mm;
#[cfg(CONFIG_NET)]
pub mod net;
pub mod of;
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
index fa9ecc42602a..9d9771247c38 100644
--- a/rust/kernel/miscdevice.rs
+++ b/rust/kernel/miscdevice.rs
@@ -14,6 +14,7 @@ use crate::{
error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
ffi::{c_int, c_long, c_uint, c_ulong},
fs::File,
+ mm::virt::VmaNew,
prelude::*,
seq_file::SeqFile,
str::CStr,
@@ -119,6 +120,22 @@ pub trait MiscDevice: Sized {
drop(device);
}
+ /// Handle for mmap.
+ ///
+ /// This function is invoked when a user space process invokes the `mmap` system call on
+ /// `file`. The function is a callback that is part of the VMA initializer. The kernel will do
+ /// initial setup of the VMA before calling this function. The function can then interact with
+ /// the VMA initialization by calling methods of `vma`. If the function does not return an
+ /// error, the kernel will complete initialization of the VMA according to the properties of
+ /// `vma`.
+ fn mmap(
+ _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+ _file: &File,
+ _vma: &VmaNew,
+ ) -> Result {
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+
/// Handler for ioctls.
///
/// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
@@ -226,6 +243,33 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
/// # Safety
///
/// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
+ /// `vma` must be a vma that is currently being mmap'ed with this file.
+ unsafe extern "C" fn mmap(
+ file: *mut bindings::file,
+ vma: *mut bindings::vm_area_struct,
+ ) -> c_int {
+ // SAFETY: The mmap call of a file can access the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: This is a Rust Miscdevice, so we call `into_foreign` in `open` and
+ // `from_foreign` in `release`, and `fops_mmap` is guaranteed to be called between those
+ // two operations.
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+ // SAFETY: The caller provides a vma that is undergoing initial VMA setup.
+ let area = unsafe { VmaNew::from_raw(vma) };
+ // SAFETY:
+ // * The file is valid for the duration of this call.
+ // * There is no active fdget_pos region on the file on this thread.
+ let file = unsafe { File::from_raw_file(file) };
+
+ match T::mmap(device, file, area) {
+ Ok(()) => 0,
+ Err(err) => err.to_errno(),
+ }
+ }
+
+ /// # Safety
+ ///
+ /// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
unsafe extern "C" fn ioctl(file: *mut bindings::file, cmd: c_uint, arg: c_ulong) -> c_long {
// SAFETY: The ioctl call of a file can access the private data.
let private = unsafe { (*file).private_data };
@@ -291,6 +335,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
const VTABLE: bindings::file_operations = bindings::file_operations {
open: Some(Self::open),
release: Some(Self::release),
+ mmap: if T::HAS_MMAP { Some(Self::mmap) } else { None },
unlocked_ioctl: if T::HAS_IOCTL {
Some(Self::ioctl)
} else {
diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs
new file mode 100644
index 000000000000..615907a0f3b4
--- /dev/null
+++ b/rust/kernel/mm.rs
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Memory management.
+//!
+//! This module deals with managing the address space of userspace processes. Each process has an
+//! instance of [`Mm`], which keeps track of multiple VMAs (virtual memory areas). Each VMA
+//! corresponds to a region of memory that the userspace process can access, and the VMA lets you
+//! control what happens when userspace reads or writes to that region of memory.
+//!
+//! C header: [`include/linux/mm.h`](srctree/include/linux/mm.h)
+#![cfg(CONFIG_MMU)]
+
+use crate::{
+ bindings,
+ types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque},
+};
+use core::{ops::Deref, ptr::NonNull};
+
+pub mod virt;
+use virt::VmaRef;
+
+/// A wrapper for the kernel's `struct mm_struct`.
+///
+/// This represents the address space of a userspace process, so each process has one `Mm`
+/// instance. It may hold many VMAs internally.
+///
+/// There is a counter called `mm_users` that counts the users of the address space; this includes
+/// the userspace process itself, but can also include kernel threads accessing the address space.
+/// Once `mm_users` reaches zero, this indicates that the address space can be destroyed. To access
+/// the address space, you must prevent `mm_users` from reaching zero while you are accessing it.
+/// The [`MmWithUser`] type represents an address space where this is guaranteed, and you can
+/// create one using [`mmget_not_zero`].
+///
+/// The `ARef<Mm>` smart pointer holds an `mmgrab` refcount. Its destructor may sleep.
+///
+/// # Invariants
+///
+/// Values of this type are always refcounted using `mmgrab`.
+///
+/// [`mmget_not_zero`]: Mm::mmget_not_zero
+#[repr(transparent)]
+pub struct Mm {
+ mm: Opaque<bindings::mm_struct>,
+}
+
+// SAFETY: It is safe to call `mmdrop` on another thread than where `mmgrab` was called.
+unsafe impl Send for Mm {}
+// SAFETY: All methods on `Mm` can be called in parallel from several threads.
+unsafe impl Sync for Mm {}
+
+// SAFETY: By the type invariants, this type is always refcounted.
+unsafe impl AlwaysRefCounted for Mm {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The pointer is valid since self is a reference.
+ unsafe { bindings::mmgrab(self.as_raw()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: NonNull<Self>) {
+ // SAFETY: The caller is giving up their refcount.
+ unsafe { bindings::mmdrop(obj.cast().as_ptr()) };
+ }
+}
+
+/// A wrapper for the kernel's `struct mm_struct`.
+///
+/// This type is like [`Mm`], but with non-zero `mm_users`. It can only be used when `mm_users` can
+/// be proven to be non-zero at compile-time, usually because the relevant code holds an `mmget`
+/// refcount. It can be used to access the associated address space.
+///
+/// The `ARef<MmWithUser>` smart pointer holds an `mmget` refcount. Its destructor may sleep.
+///
+/// # Invariants
+///
+/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero.
+#[repr(transparent)]
+pub struct MmWithUser {
+ mm: Mm,
+}
+
+// SAFETY: It is safe to call `mmput` on another thread than where `mmget` was called.
+unsafe impl Send for MmWithUser {}
+// SAFETY: All methods on `MmWithUser` can be called in parallel from several threads.
+unsafe impl Sync for MmWithUser {}
+
+// SAFETY: By the type invariants, this type is always refcounted.
+unsafe impl AlwaysRefCounted for MmWithUser {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The pointer is valid since self is a reference.
+ unsafe { bindings::mmget(self.as_raw()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: NonNull<Self>) {
+ // SAFETY: The caller is giving up their refcount.
+ unsafe { bindings::mmput(obj.cast().as_ptr()) };
+ }
+}
+
+// Make all `Mm` methods available on `MmWithUser`.
+impl Deref for MmWithUser {
+ type Target = Mm;
+
+ #[inline]
+ fn deref(&self) -> &Mm {
+ &self.mm
+ }
+}
+
+/// A wrapper for the kernel's `struct mm_struct`.
+///
+/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a
+/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic
+/// context.
+///
+/// # Invariants
+///
+/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero.
+#[repr(transparent)]
+pub struct MmWithUserAsync {
+ mm: MmWithUser,
+}
+
+// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called.
+unsafe impl Send for MmWithUserAsync {}
+// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads.
+unsafe impl Sync for MmWithUserAsync {}
+
+// SAFETY: By the type invariants, this type is always refcounted.
+unsafe impl AlwaysRefCounted for MmWithUserAsync {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The pointer is valid since self is a reference.
+ unsafe { bindings::mmget(self.as_raw()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: NonNull<Self>) {
+ // SAFETY: The caller is giving up their refcount.
+ unsafe { bindings::mmput_async(obj.cast().as_ptr()) };
+ }
+}
+
+// Make all `MmWithUser` methods available on `MmWithUserAsync`.
+impl Deref for MmWithUserAsync {
+ type Target = MmWithUser;
+
+ #[inline]
+ fn deref(&self) -> &MmWithUser {
+ &self.mm
+ }
+}
+
+// These methods are safe to call even if `mm_users` is zero.
+impl Mm {
+ /// Returns a raw pointer to the inner `mm_struct`.
+ #[inline]
+ pub fn as_raw(&self) -> *mut bindings::mm_struct {
+ self.mm.get()
+ }
+
+ /// Obtain a reference from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `ptr` points at an `mm_struct`, and that it is not deallocated
+ /// during the lifetime 'a.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *const bindings::mm_struct) -> &'a Mm {
+ // SAFETY: Caller promises that the pointer is valid for 'a. Layouts are compatible due to
+ // repr(transparent).
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Calls `mmget_not_zero` and returns a handle if it succeeds.
+ #[inline]
+ pub fn mmget_not_zero(&self) -> Option<ARef<MmWithUser>> {
+ // SAFETY: The pointer is valid since self is a reference.
+ let success = unsafe { bindings::mmget_not_zero(self.as_raw()) };
+
+ if success {
+ // SAFETY: We just created an `mmget` refcount.
+ Some(unsafe { ARef::from_raw(NonNull::new_unchecked(self.as_raw().cast())) })
+ } else {
+ None
+ }
+ }
+}
+
+// These methods require `mm_users` to be non-zero.
+impl MmWithUser {
+ /// Obtain a reference from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `ptr` points at an `mm_struct`, and that `mm_users` remains
+ /// non-zero for the duration of the lifetime 'a.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *const bindings::mm_struct) -> &'a MmWithUser {
+ // SAFETY: Caller promises that the pointer is valid for 'a. The layout is compatible due
+ // to repr(transparent).
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Use `mmput_async` when dropping this refcount.
+ #[inline]
+ pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> {
+ // SAFETY: The layouts and invariants are compatible.
+ unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
+ }
+
+ /// Attempt to access a vma using the vma read lock.
+ ///
+ /// This is an optimistic trylock operation, so it may fail if there is contention. In that
+ /// case, you should fall back to taking the mmap read lock.
+ ///
+ /// When per-vma locks are disabled, this always returns `None`.
+ #[inline]
+ pub fn lock_vma_under_rcu(&self, vma_addr: usize) -> Option<VmaReadGuard<'_>> {
+ #[cfg(CONFIG_PER_VMA_LOCK)]
+ {
+ // SAFETY: Calling `bindings::lock_vma_under_rcu` is always okay given an mm where
+ // `mm_users` is non-zero.
+ let vma = unsafe { bindings::lock_vma_under_rcu(self.as_raw(), vma_addr) };
+ if !vma.is_null() {
+ return Some(VmaReadGuard {
+ // SAFETY: If `lock_vma_under_rcu` returns a non-null ptr, then it points at a
+ // valid vma. The vma is stable for as long as the vma read lock is held.
+ vma: unsafe { VmaRef::from_raw(vma) },
+ _nts: NotThreadSafe,
+ });
+ }
+ }
+
+ // Silence warnings about unused variables.
+ #[cfg(not(CONFIG_PER_VMA_LOCK))]
+ let _ = vma_addr;
+
+ None
+ }
+
+ /// Lock the mmap read lock.
+ #[inline]
+ pub fn mmap_read_lock(&self) -> MmapReadGuard<'_> {
+ // SAFETY: The pointer is valid since self is a reference.
+ unsafe { bindings::mmap_read_lock(self.as_raw()) };
+
+ // INVARIANT: We just acquired the read lock.
+ MmapReadGuard {
+ mm: self,
+ _nts: NotThreadSafe,
+ }
+ }
+
+ /// Try to lock the mmap read lock.
+ #[inline]
+ pub fn mmap_read_trylock(&self) -> Option<MmapReadGuard<'_>> {
+ // SAFETY: The pointer is valid since self is a reference.
+ let success = unsafe { bindings::mmap_read_trylock(self.as_raw()) };
+
+ if success {
+ // INVARIANT: We just acquired the read lock.
+ Some(MmapReadGuard {
+ mm: self,
+ _nts: NotThreadSafe,
+ })
+ } else {
+ None
+ }
+ }
+}
+
+/// A guard for the mmap read lock.
+///
+/// # Invariants
+///
+/// This `MmapReadGuard` guard owns the mmap read lock.
+pub struct MmapReadGuard<'a> {
+ mm: &'a MmWithUser,
+ // `mmap_read_lock` and `mmap_read_unlock` must be called on the same thread
+ _nts: NotThreadSafe,
+}
+
+impl<'a> MmapReadGuard<'a> {
+ /// Look up a vma at the given address.
+ #[inline]
+ pub fn vma_lookup(&self, vma_addr: usize) -> Option<&virt::VmaRef> {
+ // SAFETY: By the type invariants we hold the mmap read guard, so we can safely call this
+ // method. Any value is okay for `vma_addr`.
+ let vma = unsafe { bindings::vma_lookup(self.mm.as_raw(), vma_addr) };
+
+ if vma.is_null() {
+ None
+ } else {
+ // SAFETY: We just checked that a vma was found, so the pointer references a valid vma.
+ //
+ // Furthermore, the returned vma is still under the protection of the read lock guard
+ // and can be used while the mmap read lock is still held. That the vma is not used
+ // after the MmapReadGuard gets dropped is enforced by the borrow-checker.
+ unsafe { Some(virt::VmaRef::from_raw(vma)) }
+ }
+ }
+}
+
+impl Drop for MmapReadGuard<'_> {
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY: We hold the read lock by the type invariants.
+ unsafe { bindings::mmap_read_unlock(self.mm.as_raw()) };
+ }
+}
+
+/// A guard for the vma read lock.
+///
+/// # Invariants
+///
+/// This `VmaReadGuard` guard owns the vma read lock.
+pub struct VmaReadGuard<'a> {
+ vma: &'a VmaRef,
+ // `vma_end_read` must be called on the same thread as where the lock was taken
+ _nts: NotThreadSafe,
+}
+
+// Make all `VmaRef` methods available on `VmaReadGuard`.
+impl Deref for VmaReadGuard<'_> {
+ type Target = VmaRef;
+
+ #[inline]
+ fn deref(&self) -> &VmaRef {
+ self.vma
+ }
+}
+
+impl Drop for VmaReadGuard<'_> {
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY: We hold the read lock by the type invariants.
+ unsafe { bindings::vma_end_read(self.vma.as_ptr()) };
+ }
+}
diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
new file mode 100644
index 000000000000..31803674aecc
--- /dev/null
+++ b/rust/kernel/mm/virt.rs
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Virtual memory.
+//!
+//! This module deals with managing a single VMA in the address space of a userspace process. Each
+//! VMA corresponds to a region of memory that the userspace process can access, and the VMA lets
+//! you control what happens when userspace reads or writes to that region of memory.
+//!
+//! The module has several different Rust types that all correspond to the C type called
+//! `vm_area_struct`. The different structs represent what kind of access you have to the VMA, e.g.
+//! [`VmaRef`] is used when you hold the mmap or vma read lock. Using the appropriate struct
+//! ensures that you can't, for example, accidentally call a function that requires holding the
+//! write lock when you only hold the read lock.
+
+use crate::{
+ bindings,
+ error::{code::EINVAL, to_result, Result},
+ mm::MmWithUser,
+ page::Page,
+ types::Opaque,
+};
+
+use core::ops::Deref;
+
+/// A wrapper for the kernel's `struct vm_area_struct` with read access.
+///
+/// It represents an area of virtual memory.
+///
+/// # Invariants
+///
+/// The caller must hold the mmap read lock or the vma read lock.
+#[repr(transparent)]
+pub struct VmaRef {
+ vma: Opaque<bindings::vm_area_struct>,
+}
+
+// Methods you can call when holding the mmap or vma read lock (or stronger). They must be usable
+// no matter what the vma flags are.
+impl VmaRef {
+ /// Access a virtual memory area given a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap or vma
+ /// read lock (or stronger) is held for at least the duration of 'a.
+ #[inline]
+ pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {
+ // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
+ unsafe { &*vma.cast() }
+ }
+
+ /// Returns a raw pointer to this area.
+ #[inline]
+ pub fn as_ptr(&self) -> *mut bindings::vm_area_struct {
+ self.vma.get()
+ }
+
+ /// Access the underlying `mm_struct`.
+ #[inline]
+ pub fn mm(&self) -> &MmWithUser {
+ // SAFETY: By the type invariants, this `vm_area_struct` is valid and we hold the mmap/vma
+ // read lock or stronger. This implies that the underlying mm has a non-zero value of
+ // `mm_users`.
+ unsafe { MmWithUser::from_raw((*self.as_ptr()).vm_mm) }
+ }
+
+ /// Returns the flags associated with the virtual memory area.
+ ///
+ /// The possible flags are a combination of the constants in [`flags`].
+ #[inline]
+ pub fn flags(&self) -> vm_flags_t {
+ // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this
+ // access is not a data race.
+ unsafe { (*self.as_ptr()).__bindgen_anon_2.vm_flags }
+ }
+
+ /// Returns the (inclusive) start address of the virtual memory area.
+ #[inline]
+ pub fn start(&self) -> usize {
+ // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this
+ // access is not a data race.
+ unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_start }
+ }
+
+ /// Returns the (exclusive) end address of the virtual memory area.
+ #[inline]
+ pub fn end(&self) -> usize {
+ // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this
+ // access is not a data race.
+ unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_end }
+ }
+
+ /// Zap pages in the given page range.
+ ///
+ /// This clears page table mappings for the range at the leaf level, leaving all other page
+ /// tables intact, and freeing any memory referenced by the VMA in this range. That is,
+ /// anonymous memory is completely freed, file-backed memory has its reference count on page
+ /// cache folio's dropped, any dirty data will still be written back to disk as usual.
+ ///
+ /// It may seem odd that we clear at the leaf level, this is however a product of the page
+ /// table structure used to map physical memory into a virtual address space - each virtual
+ /// address actually consists of a bitmap of array indices into page tables, which form a
+ /// hierarchical page table level structure.
+ ///
+ /// As a result, each page table level maps a multiple of page table levels below, and thus
+ /// span ever increasing ranges of pages. At the leaf or PTE level, we map the actual physical
+ /// memory.
+ ///
+ /// It is here where a zap operates, as it the only place we can be certain of clearing without
+ /// impacting any other virtual mappings. It is an implementation detail as to whether the
+ /// kernel goes further in freeing unused page tables, but for the purposes of this operation
+ /// we must only assume that the leaf level is cleared.
+ #[inline]
+ pub fn zap_page_range_single(&self, address: usize, size: usize) {
+ let (end, did_overflow) = address.overflowing_add(size);
+ if did_overflow || address < self.start() || self.end() < end {
+ // TODO: call WARN_ONCE once Rust version of it is added
+ return;
+ }
+
+ // SAFETY: By the type invariants, the caller has read access to this VMA, which is
+ // sufficient for this method call. This method has no requirements on the vma flags. The
+ // address range is checked to be within the vma.
+ unsafe {
+ bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut())
+ };
+ }
+
+ /// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise
+ /// returns `None`.
+ ///
+ /// This can be used to access methods that require [`VM_MIXEDMAP`] to be set.
+ ///
+ /// [`VM_MIXEDMAP`]: flags::MIXEDMAP
+ #[inline]
+ pub fn as_mixedmap_vma(&self) -> Option<&VmaMixedMap> {
+ if self.flags() & flags::MIXEDMAP != 0 {
+ // SAFETY: We just checked that `VM_MIXEDMAP` is set. All other requirements are
+ // satisfied by the type invariants of `VmaRef`.
+ Some(unsafe { VmaMixedMap::from_raw(self.as_ptr()) })
+ } else {
+ None
+ }
+ }
+}
+
+/// A wrapper for the kernel's `struct vm_area_struct` with read access and [`VM_MIXEDMAP`] set.
+///
+/// It represents an area of virtual memory.
+///
+/// This struct is identical to [`VmaRef`] except that it must only be used when the
+/// [`VM_MIXEDMAP`] flag is set on the vma.
+///
+/// # Invariants
+///
+/// The caller must hold the mmap read lock or the vma read lock. The `VM_MIXEDMAP` flag must be
+/// set.
+///
+/// [`VM_MIXEDMAP`]: flags::MIXEDMAP
+#[repr(transparent)]
+pub struct VmaMixedMap {
+ vma: VmaRef,
+}
+
+// Make all `VmaRef` methods available on `VmaMixedMap`.
+impl Deref for VmaMixedMap {
+ type Target = VmaRef;
+
+ #[inline]
+ fn deref(&self) -> &VmaRef {
+ &self.vma
+ }
+}
+
+impl VmaMixedMap {
+ /// Access a virtual memory area given a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap read lock
+ /// (or stronger) is held for at least the duration of 'a. The `VM_MIXEDMAP` flag must be set.
+ #[inline]
+ pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {
+ // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
+ unsafe { &*vma.cast() }
+ }
+
+ /// Maps a single page at the given address within the virtual memory area.
+ ///
+ /// This operation does not take ownership of the page.
+ #[inline]
+ pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result {
+ // SAFETY: By the type invariant of `Self` caller has read access and has verified that
+ // `VM_MIXEDMAP` is set. By invariant on `Page` the page has order 0.
+ to_result(unsafe { bindings::vm_insert_page(self.as_ptr(), address, page.as_ptr()) })
+ }
+}
+
+/// A configuration object for setting up a VMA in an `f_ops->mmap()` hook.
+///
+/// The `f_ops->mmap()` hook is called when a new VMA is being created, and the hook is able to
+/// configure the VMA in various ways to fit the driver that owns it. Using `VmaNew` indicates that
+/// you are allowed to perform operations on the VMA that can only be performed before the VMA is
+/// fully initialized.
+///
+/// # Invariants
+///
+/// For the duration of 'a, the referenced vma must be undergoing initialization in an
+/// `f_ops->mmap()` hook.
+pub struct VmaNew {
+ vma: VmaRef,
+}
+
+// Make all `VmaRef` methods available on `VmaNew`.
+impl Deref for VmaNew {
+ type Target = VmaRef;
+
+ #[inline]
+ fn deref(&self) -> &VmaRef {
+ &self.vma
+ }
+}
+
+impl VmaNew {
+ /// Access a virtual memory area given a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a.
+ #[inline]
+ pub unsafe fn from_raw<'a>(vma: *mut bindings::vm_area_struct) -> &'a Self {
+ // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
+ unsafe { &*vma.cast() }
+ }
+
+ /// Internal method for updating the vma flags.
+ ///
+ /// # Safety
+ ///
+ /// This must not be used to set the flags to an invalid value.
+ #[inline]
+ unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) {
+ let mut flags = self.flags();
+ flags |= set;
+ flags &= !unset;
+
+ // SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet
+ // shared. Additionally, `VmaNew` is `!Sync`, so it cannot be used to write in parallel.
+ // The caller promises that this does not set the flags to an invalid value.
+ unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags };
+ }
+
+ /// Set the `VM_MIXEDMAP` flag on this vma.
+ ///
+ /// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference
+ /// that can be used to call `vm_insert_page` on the vma.
+ #[inline]
+ pub fn set_mixedmap(&self) -> &VmaMixedMap {
+ // SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an
+ // invalid state.
+ unsafe { self.update_flags(flags::MIXEDMAP, 0) };
+
+ // SAFETY: We just set `VM_MIXEDMAP` on the vma.
+ unsafe { VmaMixedMap::from_raw(self.vma.as_ptr()) }
+ }
+
+ /// Set the `VM_IO` flag on this vma.
+ ///
+ /// This is used for memory mapped IO and similar. The flag tells other parts of the kernel to
+ /// avoid looking at the pages. For memory mapped IO this is useful as accesses to the pages
+ /// could have side effects.
+ #[inline]
+ pub fn set_io(&self) {
+ // SAFETY: Setting the VM_IO flag is always okay.
+ unsafe { self.update_flags(flags::IO, 0) };
+ }
+
+ /// Set the `VM_DONTEXPAND` flag on this vma.
+ ///
+ /// This prevents the vma from being expanded with `mremap()`.
+ #[inline]
+ pub fn set_dontexpand(&self) {
+ // SAFETY: Setting the VM_DONTEXPAND flag is always okay.
+ unsafe { self.update_flags(flags::DONTEXPAND, 0) };
+ }
+
+ /// Set the `VM_DONTCOPY` flag on this vma.
+ ///
+ /// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO`
+ /// is set.
+ #[inline]
+ pub fn set_dontcopy(&self) {
+ // SAFETY: Setting the VM_DONTCOPY flag is always okay.
+ unsafe { self.update_flags(flags::DONTCOPY, 0) };
+ }
+
+ /// Set the `VM_DONTDUMP` flag on this vma.
+ ///
+ /// This prevents the vma from being included in core dumps. This option is only permanent if
+ /// `VM_IO` is set.
+ #[inline]
+ pub fn set_dontdump(&self) {
+ // SAFETY: Setting the VM_DONTDUMP flag is always okay.
+ unsafe { self.update_flags(flags::DONTDUMP, 0) };
+ }
+
+ /// Returns whether `VM_READ` is set.
+ ///
+ /// This flag indicates whether userspace is mapping this vma as readable.
+ #[inline]
+ pub fn readable(&self) -> bool {
+ (self.flags() & flags::READ) != 0
+ }
+
+ /// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set.
+ ///
+ /// This flag indicates whether userspace is allowed to make this vma readable with
+ /// `mprotect()`.
+ ///
+ /// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never
+ /// be set again.
+ #[inline]
+ pub fn try_clear_mayread(&self) -> Result {
+ if self.readable() {
+ return Err(EINVAL);
+ }
+ // SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set.
+ unsafe { self.update_flags(0, flags::MAYREAD) };
+ Ok(())
+ }
+
+ /// Returns whether `VM_WRITE` is set.
+ ///
+ /// This flag indicates whether userspace is mapping this vma as writable.
+ #[inline]
+ pub fn writable(&self) -> bool {
+ (self.flags() & flags::WRITE) != 0
+ }
+
+ /// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set.
+ ///
+ /// This flag indicates whether userspace is allowed to make this vma writable with
+ /// `mprotect()`.
+ ///
+ /// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never
+ /// be set again.
+ #[inline]
+ pub fn try_clear_maywrite(&self) -> Result {
+ if self.writable() {
+ return Err(EINVAL);
+ }
+ // SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set.
+ unsafe { self.update_flags(0, flags::MAYWRITE) };
+ Ok(())
+ }
+
+ /// Returns whether `VM_EXEC` is set.
+ ///
+ /// This flag indicates whether userspace is mapping this vma as executable.
+ #[inline]
+ pub fn executable(&self) -> bool {
+ (self.flags() & flags::EXEC) != 0
+ }
+
+ /// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set.
+ ///
+ /// This flag indicates whether userspace is allowed to make this vma executable with
+ /// `mprotect()`.
+ ///
+ /// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never
+ /// be set again.
+ #[inline]
+ pub fn try_clear_mayexec(&self) -> Result {
+ if self.executable() {
+ return Err(EINVAL);
+ }
+ // SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set.
+ unsafe { self.update_flags(0, flags::MAYEXEC) };
+ Ok(())
+ }
+}
+
+/// The integer type used for vma flags.
+#[doc(inline)]
+pub use bindings::vm_flags_t;
+
+/// All possible flags for [`VmaRef`].
+pub mod flags {
+ use super::vm_flags_t;
+ use crate::bindings;
+
+ /// No flags are set.
+ pub const NONE: vm_flags_t = bindings::VM_NONE as _;
+
+ /// Mapping allows reads.
+ pub const READ: vm_flags_t = bindings::VM_READ as _;
+
+ /// Mapping allows writes.
+ pub const WRITE: vm_flags_t = bindings::VM_WRITE as _;
+
+ /// Mapping allows execution.
+ pub const EXEC: vm_flags_t = bindings::VM_EXEC as _;
+
+ /// Mapping is shared.
+ pub const SHARED: vm_flags_t = bindings::VM_SHARED as _;
+
+ /// Mapping may be updated to allow reads.
+ pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as _;
+
+ /// Mapping may be updated to allow writes.
+ pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as _;
+
+ /// Mapping may be updated to allow execution.
+ pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as _;
+
+ /// Mapping may be updated to be shared.
+ pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as _;
+
+ /// Page-ranges managed without `struct page`, just pure PFN.
+ pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as _;
+
+ /// Memory mapped I/O or similar.
+ pub const IO: vm_flags_t = bindings::VM_IO as _;
+
+ /// Do not copy this vma on fork.
+ pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as _;
+
+ /// Cannot expand with mremap().
+ pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as _;
+
+ /// Lock the pages covered when they are faulted in.
+ pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as _;
+
+ /// Is a VM accounted object.
+ pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as _;
+
+ /// Should the VM suppress accounting.
+ pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as _;
+
+ /// Huge TLB Page VM.
+ pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as _;
+
+ /// Synchronous page faults. (DAX-specific)
+ pub const SYNC: vm_flags_t = bindings::VM_SYNC as _;
+
+ /// Architecture-specific flag.
+ pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as _;
+
+ /// Wipe VMA contents in child on fork.
+ pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as _;
+
+ /// Do not include in the core dump.
+ pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as _;
+
+ /// Not soft dirty clean area.
+ pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as _;
+
+ /// Can contain `struct page` and pure PFN pages.
+ pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as _;
+
+ /// MADV_HUGEPAGE marked this vma.
+ pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as _;
+
+ /// MADV_NOHUGEPAGE marked this vma.
+ pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as _;
+
+ /// KSM may merge identical pages.
+ pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as _;
+}
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 9e6f6854948d..927413d85484 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -7,6 +7,7 @@
use crate::{
bindings,
ffi::{c_int, c_long, c_uint},
+ mm::MmWithUser,
pid_namespace::PidNamespace,
types::{ARef, NotThreadSafe, Opaque},
};
@@ -33,22 +34,20 @@ pub const TASK_NORMAL: c_uint = bindings::TASK_NORMAL as c_uint;
#[macro_export]
macro_rules! current {
() => {
- // SAFETY: Deref + addr-of below create a temporary `TaskRef` that cannot outlive the
- // caller.
+ // SAFETY: This expression creates a temporary value that is dropped at the end of the
+ // caller's scope. The following mechanisms ensure that the resulting `&CurrentTask` cannot
+ // leave current task context:
+ //
+ // * To return to userspace, the caller must leave the current scope.
+ // * Operations such as `begin_new_exec()` are necessarily unsafe and the caller of
+ // `begin_new_exec()` is responsible for safety.
+ // * Rust abstractions for things such as a `kthread_use_mm()` scope must require the
+ // closure to be `Send`, so the `NotThreadSafe` field of `CurrentTask` ensures that the
+ // `&CurrentTask` cannot cross the scope in either direction.
unsafe { &*$crate::task::Task::current() }
};
}
-/// Returns the currently running task's pid namespace.
-#[macro_export]
-macro_rules! current_pid_ns {
- () => {
- // SAFETY: Deref + addr-of below create a temporary `PidNamespaceRef` that cannot outlive
- // the caller.
- unsafe { &*$crate::task::Task::current_pid_ns() }
- };
-}
-
/// Wraps the kernel's `struct task_struct`.
///
/// # Invariants
@@ -87,7 +86,7 @@ macro_rules! current_pid_ns {
/// impl State {
/// fn new() -> Self {
/// Self {
-/// creator: current!().into(),
+/// creator: ARef::from(&**current!()),
/// index: 0,
/// }
/// }
@@ -107,6 +106,44 @@ unsafe impl Send for Task {}
// synchronised by C code (e.g., `signal_pending`).
unsafe impl Sync for Task {}
+/// Represents the [`Task`] in the `current` global.
+///
+/// This type exists to provide more efficient operations that are only valid on the current task.
+/// For example, to retrieve the pid-namespace of a task, you must use rcu protection unless it is
+/// the current task.
+///
+/// # Invariants
+///
+/// Each value of this type must only be accessed from the task context it was created within.
+///
+/// Of course, every thread is in a different task context, but for the purposes of this invariant,
+/// these operations also permanently leave the task context:
+///
+/// * Returning to userspace from system call context.
+/// * Calling `release_task()`.
+/// * Calling `begin_new_exec()` in a binary format loader.
+///
+/// Other operations temporarily create a new sub-context:
+///
+/// * Calling `kthread_use_mm()` creates a new context, and `kthread_unuse_mm()` returns to the
+/// old context.
+///
+/// This means that a `CurrentTask` obtained before a `kthread_use_mm()` call may be used again
+/// once `kthread_unuse_mm()` is called, but it must not be used between these two calls.
+/// Conversely, a `CurrentTask` obtained between a `kthread_use_mm()`/`kthread_unuse_mm()` pair
+/// must not be used after `kthread_unuse_mm()`.
+#[repr(transparent)]
+pub struct CurrentTask(Task, NotThreadSafe);
+
+// Make all `Task` methods available on `CurrentTask`.
+impl Deref for CurrentTask {
+ type Target = Task;
+ #[inline]
+ fn deref(&self) -> &Task {
+ &self.0
+ }
+}
+
/// The type of process identifiers (PIDs).
pub type Pid = bindings::pid_t;
@@ -133,119 +170,29 @@ impl Task {
///
/// # Safety
///
- /// Callers must ensure that the returned object doesn't outlive the current task/thread.
- pub unsafe fn current() -> impl Deref<Target = Task> {
- struct TaskRef<'a> {
- task: &'a Task,
- _not_send: NotThreadSafe,
+ /// Callers must ensure that the returned object is only used to access a [`CurrentTask`]
+ /// within the task context that was active when this function was called. For more details,
+ /// see the invariants section for [`CurrentTask`].
+ pub unsafe fn current() -> impl Deref<Target = CurrentTask> {
+ struct TaskRef {
+ task: *const CurrentTask,
}
- impl Deref for TaskRef<'_> {
- type Target = Task;
+ impl Deref for TaskRef {
+ type Target = CurrentTask;
fn deref(&self) -> &Self::Target {
- self.task
+ // SAFETY: The returned reference borrows from this `TaskRef`, so it cannot outlive
+ // the `TaskRef`, which the caller of `Task::current()` has promised will not
+ // outlive the task/thread for which `self.task` is the `current` pointer. Thus, it
+ // is okay to return a `CurrentTask` reference here.
+ unsafe { &*self.task }
}
}
- let current = Task::current_raw();
TaskRef {
- // SAFETY: If the current thread is still running, the current task is valid. Given
- // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread
- // (where it could potentially outlive the caller).
- task: unsafe { &*current.cast() },
- _not_send: NotThreadSafe,
- }
- }
-
- /// Returns a PidNamespace reference for the currently executing task's/thread's pid namespace.
- ///
- /// This function can be used to create an unbounded lifetime by e.g., storing the returned
- /// PidNamespace in a global variable which would be a bug. So the recommended way to get the
- /// current task's/thread's pid namespace is to use the [`current_pid_ns`] macro because it is
- /// safe.
- ///
- /// # Safety
- ///
- /// Callers must ensure that the returned object doesn't outlive the current task/thread.
- pub unsafe fn current_pid_ns() -> impl Deref<Target = PidNamespace> {
- struct PidNamespaceRef<'a> {
- task: &'a PidNamespace,
- _not_send: NotThreadSafe,
- }
-
- impl Deref for PidNamespaceRef<'_> {
- type Target = PidNamespace;
-
- fn deref(&self) -> &Self::Target {
- self.task
- }
- }
-
- // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`.
- //
- // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A
- // `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect
- // on the calling `Task`'s pid namespace. It will only effect the pid namespace of children
- // created by the calling `Task`. This invariant guarantees that after having acquired a
- // reference to a `Task`'s pid namespace it will remain unchanged.
- //
- // When a task has exited and been reaped `release_task()` will be called. This will set
- // the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task
- // that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a
- // referencing count to
- // the `Task` will prevent `release_task()` being called.
- //
- // In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function
- // can be used. There are two cases to consider:
- //
- // (1) retrieving the `PidNamespace` of the `current` task
- // (2) retrieving the `PidNamespace` of a non-`current` task
- //
- // From system call context retrieving the `PidNamespace` for case (1) is always safe and
- // requires neither RCU locking nor a reference count to be held. Retrieving the
- // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath
- // like that is exposed to Rust.
- //
- // Retrieving the `PidNamespace` from system call context for (2) requires RCU protection.
- // Accessing `PidNamespace` outside of RCU protection requires a reference count that
- // must've been acquired while holding the RCU lock. Note that accessing a non-`current`
- // task means `NULL` can be returned as the non-`current` task could have already passed
- // through `release_task()`.
- //
- // To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the
- // returned `PidNamespace` cannot outlive the calling scope. The associated
- // `current_pid_ns()` function should not be called directly as it could be abused to
- // created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows
- // Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU
- // protection and without having to acquire a reference count.
- //
- // For (2) the `task_get_pid_ns()` method must be used. This will always acquire a
- // reference on `PidNamespace` and will return an `Option` to force the caller to
- // explicitly handle the case where `PidNamespace` is `None`, something that tends to be
- // forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it
- // difficult to perform operations that are otherwise safe without holding a reference
- // count as long as RCU protection is guaranteed. But it is not important currently. But we
- // do want it in the future.
- //
- // Note for (2) the required RCU protection around calling `task_active_pid_ns()`
- // synchronizes against putting the last reference of the associated `struct pid` of
- // `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the
- // `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be
- // `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via
- // `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired
- // from `task->thread_pid` to finish.
- //
- // SAFETY: The current task's pid namespace is valid as long as the current task is running.
- let pidns = unsafe { bindings::task_active_pid_ns(Task::current_raw()) };
- PidNamespaceRef {
- // SAFETY: If the current thread is still running, the current task and its associated
- // pid namespace are valid. `PidNamespaceRef` is not `Send`, so we know it cannot be
- // transferred to another thread (where it could potentially outlive the current
- // `Task`). The caller needs to ensure that the PidNamespaceRef doesn't outlive the
- // current task/thread.
- task: unsafe { PidNamespace::from_ptr(pidns) },
- _not_send: NotThreadSafe,
+ // CAST: The layout of `struct task_struct` and `CurrentTask` is identical.
+ task: Task::current_raw().cast(),
}
}
@@ -328,6 +275,70 @@ impl Task {
}
}
+impl CurrentTask {
+ /// Access the address space of the current task.
+ ///
+ /// This function does not touch the refcount of the mm.
+ #[inline]
+ pub fn mm(&self) -> Option<&MmWithUser> {
+ // SAFETY: The `mm` field of `current` is not modified from other threads, so reading it is
+ // not a data race.
+ let mm = unsafe { (*self.as_ptr()).mm };
+
+ if mm.is_null() {
+ return None;
+ }
+
+ // SAFETY: If `current->mm` is non-null, then it references a valid mm with a non-zero
+ // value of `mm_users`. Furthermore, the returned `&MmWithUser` borrows from this
+ // `CurrentTask`, so it cannot escape the scope in which the current pointer was obtained.
+ //
+ // This is safe even if `kthread_use_mm()`/`kthread_unuse_mm()` are used. There are two
+ // relevant cases:
+ // * If the `&CurrentTask` was created before `kthread_use_mm()`, then it cannot be
+ // accessed during the `kthread_use_mm()`/`kthread_unuse_mm()` scope due to the
+ // `NotThreadSafe` field of `CurrentTask`.
+ // * If the `&CurrentTask` was created within a `kthread_use_mm()`/`kthread_unuse_mm()`
+ // scope, then the `&CurrentTask` cannot escape that scope, so the returned `&MmWithUser`
+ // also cannot escape that scope.
+ // In either case, it's not possible to read `current->mm` and keep using it after the
+ // scope is ended with `kthread_unuse_mm()`.
+ Some(unsafe { MmWithUser::from_raw(mm) })
+ }
+
+ /// Access the pid namespace of the current task.
+ ///
+ /// This function does not touch the refcount of the namespace or use RCU protection.
+ ///
+ /// To access the pid namespace of another task, see [`Task::get_pid_ns`].
+ #[doc(alias = "task_active_pid_ns")]
+ #[inline]
+ pub fn active_pid_ns(&self) -> Option<&PidNamespace> {
+ // SAFETY: It is safe to call `task_active_pid_ns` without RCU protection when calling it
+ // on the current task.
+ let active_ns = unsafe { bindings::task_active_pid_ns(self.as_ptr()) };
+
+ if active_ns.is_null() {
+ return None;
+ }
+
+ // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`.
+ //
+ // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive.
+ //
+ // From system call context retrieving the `PidNamespace` for the current task is always
+ // safe and requires neither RCU locking nor a reference count to be held. Retrieving the
+ // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath
+ // like that is exposed to Rust.
+ //
+ // SAFETY: If `current`'s pid ns is non-null, then it references a valid pid ns.
+ // Furthermore, the returned `&PidNamespace` borrows from this `CurrentTask`, so it cannot
+ // escape the scope in which the current pointer was obtained, e.g. it cannot live past a
+ // `release_task()` call.
+ Some(unsafe { PidNamespace::from_ptr(active_ns) })
+ }
+}
+
// SAFETY: The type invariants guarantee that `Task` is always refcounted.
unsafe impl crate::types::AlwaysRefCounted for Task {
fn inc_ref(&self) {