diff options
110 files changed, 1568 insertions, 814 deletions
@@ -155,6 +155,9 @@ Brian King <brking@us.ibm.com> Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com> Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com> +Casey Connolly <casey.connolly@linaro.org> <caleb.connolly@linaro.org> +Casey Connolly <casey.connolly@linaro.org> <caleb@connolly.tech> +Casey Connolly <casey.connolly@linaro.org> <caleb@postmarketos.org> Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org> Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org> Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com> diff --git a/Documentation/ABI/testing/sysfs-kernel-hardlockup_count b/Documentation/ABI/testing/sysfs-kernel-hardlockup_count new file mode 100644 index 000000000000..dfdd4078b077 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-hardlockup_count @@ -0,0 +1,7 @@ +What: /sys/kernel/hardlockup_count +Date: May 2025 +KernelVersion: 6.16 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: + Shows how many times the system has detected a hard lockup since last boot. + Available only if CONFIG_HARDLOCKUP_DETECTOR is enabled. diff --git a/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count new file mode 100644 index 000000000000..a4a97a7f4a4d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count @@ -0,0 +1,6 @@ +What: /sys/kernel/rcu_stall_count +Date: May 2025 +KernelVersion: 6.16 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: + Shows how many times the system has detected an RCU stall since last boot. diff --git a/Documentation/ABI/testing/sysfs-kernel-softlockup_count b/Documentation/ABI/testing/sysfs-kernel-softlockup_count new file mode 100644 index 000000000000..337ff5531b5f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-softlockup_count @@ -0,0 +1,7 @@ +What: /sys/kernel/softlockup_count +Date: May 2025 +KernelVersion: 6.16 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: + Shows how many times the system has detected a soft lockup since last boot. + Available only if CONFIG_SOFTLOCKUP_DETECTOR is enabled. diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 1f7f14c6e184..20fabdf6567e 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -547,6 +547,38 @@ from within add_taint() whenever the value set in this bitmask matches with the bit flag being set by add_taint(). This will cause a kdump to occur at the add_taint()->panic() call. +Write the dump file to encrypted disk volume +============================================ + +CONFIG_CRASH_DM_CRYPT can be enabled to support saving the dump file to an +encrypted disk volume (only x86_64 supported for now). User space can interact +with /sys/kernel/config/crash_dm_crypt_keys for setup, + +1. Tell the first kernel what logon keys are needed to unlock the disk volumes, + # Add key #1 + mkdir /sys/kernel/config/crash_dm_crypt_keys/7d26b7b4-e342-4d2d-b660-7426b0996720 + # Add key #1's description + echo cryptsetup:7d26b7b4-e342-4d2d-b660-7426b0996720 > /sys/kernel/config/crash_dm_crypt_keys/description + + # how many keys do we have now? + cat /sys/kernel/config/crash_dm_crypt_keys/count + 1 + + # Add key #2 in the same way + + # how many keys do we have now? + cat /sys/kernel/config/crash_dm_crypt_keys/count + 2 + + # To support CPU/memory hot-plugging, re-use keys already saved to reserved + # memory + echo true > /sys/kernel/config/crash_dm_crypt_key/reuse + +2. Load the dump-capture kernel + +3. After the dump-capture kerne get booted, restore the keys to user keyring + echo yes > /sys/kernel/crash_dm_crypt_keys/restore + Contact ======= diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 0f714fc945ac..8cf4614385b7 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -331,8 +331,8 @@ PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|P Page attributes. These flags are used to filter various unnecessary for dumping pages. -PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline) ------------------------------------------------------------------------------ +PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_unaccepted) +------------------------------------------------------------------------------------------------------------------------- More page attributes. These flags are used to filter various unnecessary for dumping pages. diff --git a/Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml b/Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml index bbaaa783d184..2219d3d4ac43 100644 --- a/Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml +++ b/Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LG SW43408 1080x2160 DSI panel maintainers: - - Caleb Connolly <caleb.connolly@linaro.org> + - Casey Connolly <casey.connolly@linaro.org> description: This panel is used on the Pixel 3, it is a 60hz OLED panel which diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-rradc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-rradc.yaml index f39bc92c2b99..862e450da214 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-rradc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-rradc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm's SPMI PMIC Round Robin ADC maintainers: - - Caleb Connolly <caleb.connolly@linaro.org> + - Casey Connolly <casey.connolly@linaro.org> description: | The Qualcomm SPMI Round Robin ADC (RRADC) provides interface to clients to diff --git a/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml b/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml index a0f9d49ff8fb..90c7dc7632c5 100644 --- a/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm PMI8998/PM660 Switch-Mode Battery Charger "2" maintainers: - - Caleb Connolly <caleb.connolly@linaro.org> + - Casey Connolly <casey.connolly@linaro.org> properties: compatible: diff --git a/Documentation/filesystems/relay.rst b/Documentation/filesystems/relay.rst index 46447dbc75ad..301ff4c6e6c6 100644 --- a/Documentation/filesystems/relay.rst +++ b/Documentation/filesystems/relay.rst @@ -301,16 +301,6 @@ user-defined data with a channel, and is immediately available (including in create_buf_file()) via chan->private_data or buf->chan->private_data. -Buffer-only channels --------------------- - -These channels have no files associated and can be created with -relay_open(NULL, NULL, ...). Such channels are useful in scenarios such -as when doing early tracing in the kernel, before the VFS is up. In these -cases, one may open a buffer-only channel and then call -relay_late_setup_files() when the kernel is ready to handle files, -to expose the buffered data to the userspace. - Channel 'modes' --------------- diff --git a/Documentation/process/debugging/gdb-kernel-debugging.rst b/Documentation/process/debugging/gdb-kernel-debugging.rst index 895285c037c7..9475c759c722 100644 --- a/Documentation/process/debugging/gdb-kernel-debugging.rst +++ b/Documentation/process/debugging/gdb-kernel-debugging.rst @@ -127,35 +127,31 @@ Examples of using the Linux-provided gdb helpers - Make use of the per-cpu function for the current or a specified CPU:: - (gdb) p $lx_per_cpu("runqueues").nr_running + (gdb) p $lx_per_cpu(runqueues).nr_running $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running + (gdb) p $lx_per_cpu(runqueues, 2).nr_running $4 = 0 - Dig into hrtimers using the container_of helper:: - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") + (gdb) set $leftmost = $lx_per_cpu(hrtimer_bases).clock_base[0].active.rb_root.rb_leftmost + (gdb) p *$container_of($leftmost, "struct hrtimer", "node") $5 = { node = { node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 <irq_stack_union>, - rb_left = 0x0 <irq_stack_union> + __rb_parent_color = 18446612686384860673, + rb_right = 0xffff888231da8b00, + rb_left = 0x0 }, - expires = { - tv64 = 1835268000000 - } + expires = 1228461000000 }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 <tick_sched_timer>, - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f <hrtimer_start_range_ns+20>, - start_comm = "swapper/2\000\000\000\000\000\000" + _softexpires = 1228461000000, + function = 0xffffffff8137ab20 <tick_nohz_handler>, + base = 0xffff888231d9b4c0, + state = 1 '\001', + is_rel = 0 '\000', + is_soft = 0 '\000', + is_hard = 1 '\001' } diff --git a/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst b/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst index 3c133a918f30..282aacd33442 100644 --- a/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst @@ -120,35 +120,31 @@ Kgdb内核调试器、QEMU等虚拟机管理程序或基于JTAG的硬件接口 - 对当前或指定的CPU使用per-cpu函数:: - (gdb) p $lx_per_cpu("runqueues").nr_running + (gdb) p $lx_per_cpu(runqueues).nr_running $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running + (gdb) p $lx_per_cpu(runqueues, 2).nr_running $4 = 0 - 使用container_of查看更多hrtimers信息:: - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") + (gdb) set $leftmost = $lx_per_cpu(hrtimer_bases).clock_base[0].active.rb_root.rb_leftmost + (gdb) p *$container_of($leftmost, "struct hrtimer", "node") $5 = { node = { node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 <irq_stack_union>, - rb_left = 0x0 <irq_stack_union> + __rb_parent_color = 18446612686384860673, + rb_right = 0xffff888231da8b00, + rb_left = 0x0 }, - expires = { - tv64 = 1835268000000 - } + expires = 1228461000000 }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 <tick_sched_timer>, - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f <hrtimer_start_range_ns+20>, - start_comm = "swapper/2\000\000\000\000\000\000" + _softexpires = 1228461000000, + function = 0xffffffff8137ab20 <tick_nohz_handler>, + base = 0xffff888231d9b4c0, + state = 1 '\001', + is_rel = 0 '\000', + is_soft = 0 '\000', + is_hard = 1 '\001' } diff --git a/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst b/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst index c881e8872b19..b595af59ba78 100644 --- a/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst @@ -116,35 +116,31 @@ Kgdb內核調試器、QEMU等虛擬機管理程序或基於JTAG的硬件接口 - 對當前或指定的CPU使用per-cpu函數:: - (gdb) p $lx_per_cpu("runqueues").nr_running + (gdb) p $lx_per_cpu(runqueues).nr_running $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running + (gdb) p $lx_per_cpu(runqueues, 2).nr_running $4 = 0 - 使用container_of查看更多hrtimers信息:: - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") + (gdb) set $leftmost = $lx_per_cpu(hrtimer_bases).clock_base[0].active.rb_root.rb_leftmost + (gdb) p *$container_of($leftmost, "struct hrtimer", "node") $5 = { node = { node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 <irq_stack_union>, - rb_left = 0x0 <irq_stack_union> + __rb_parent_color = 18446612686384860673, + rb_right = 0xffff888231da8b00, + rb_left = 0x0 }, - expires = { - tv64 = 1835268000000 - } + expires = 1228461000000 }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 <tick_sched_timer>, - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f <hrtimer_start_range_ns+20>, - start_comm = "swapper/2\000\000\000\000\000\000" + _softexpires = 1228461000000, + function = 0xffffffff8137ab20 <tick_nohz_handler>, + base = 0xffff888231d9b4c0, + state = 1 '\001', + is_rel = 0 '\000', + is_soft = 0 '\000', + is_hard = 1 '\001' } diff --git a/MAINTAINERS b/MAINTAINERS index 945384f869c3..75da23034e18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7538,7 +7538,7 @@ F: drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c DRM DRIVER FOR LG SW43408 PANELS M: Sumit Semwal <sumit.semwal@linaro.org> -M: Caleb Connolly <caleb.connolly@linaro.org> +M: Casey Connolly <casey.connolly@linaro.org> S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml diff --git a/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts b/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts index 712f29fbe85e..b9a0f7ac4d9c 100644 --- a/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts +++ b/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause /* * Copyright (c) 2023, Luca Weiss <luca.weiss@fairphone.com> - * Copyright (c) 2024, Caleb Connolly <caleb@postmarketos.org> + * Copyright (c) 2024, Casey Connolly <casey.connolly@linaro.org> */ /dts-v1/; diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts index e5da58d11064..2cf7b5e1243c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2022, Alexander Martinz <amartinz@shiftphones.com> - * Copyright (c) 2022, Caleb Connolly <caleb@connolly.tech> + * Copyright (c) 2022, Casey Connolly <casey.connolly@linaro.org> * Copyright (c) 2022, Dylan Van Assche <me@dylanvanassche.be> */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 8d9f1c9aaa4c..61f56cdaccb5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -4,6 +4,7 @@ #include <asm/page.h> #include <asm-generic/set_memory.h> +#include <asm/pgtable.h> #define set_memory_rox set_memory_rox int set_memory_rox(unsigned long addr, int numpages); @@ -37,6 +38,7 @@ int set_memory_rox(unsigned long addr, int numpages); * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 0be61c45400c..bcb534688dfe 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -278,6 +278,7 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, unsigned long long mend) { unsigned long start, end; + int ret; cmem->ranges[0].start = mstart; cmem->ranges[0].end = mend; @@ -286,22 +287,43 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, /* Exclude elf header region */ start = image->elf_load_addr; end = start + image->elf_headers_sz - 1; - return crash_exclude_mem_range(cmem, start, end); + ret = crash_exclude_mem_range(cmem, start, end); + + if (ret) + return ret; + + /* Exclude dm crypt keys region */ + if (image->dm_crypt_keys_addr) { + start = image->dm_crypt_keys_addr; + end = start + image->dm_crypt_keys_sz - 1; + return crash_exclude_mem_range(cmem, start, end); + } + + return ret; } /* Prepare memory map for crash dump kernel */ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) { + unsigned int nr_ranges = 0; int i, ret = 0; unsigned long flags; struct e820_entry ei; struct crash_memmap_data cmd; struct crash_mem *cmem; - cmem = vzalloc(struct_size(cmem, ranges, 1)); + /* + * Using random kexec_buf for passing dm crypt keys may cause a range + * split. So use two slots here. + */ + nr_ranges = 2; + cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return -ENOMEM; + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + memset(&cmd, 0, sizeof(struct crash_memmap_data)); cmd.params = params; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index dad174e3bed0..24a41f0e0cf1 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -27,6 +27,8 @@ #include <asm/kexec-bzimage64.h> #define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */ +#define MAX_DMCRYPTKEYS_STR_LEN 31 /* dmcryptkeys=0x<64bit-value> */ + /* * Defines lowest physical address for various segments. Not sure where @@ -76,6 +78,10 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params, if (image->type == KEXEC_TYPE_CRASH) { len = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", image->elf_load_addr); + + if (image->dm_crypt_keys_addr != 0) + len += sprintf(cmdline_ptr + len, + "dmcryptkeys=0x%lx ", image->dm_crypt_keys_addr); } memcpy(cmdline_ptr + len, cmdline, cmdline_len); cmdline_len += len; @@ -474,6 +480,19 @@ static void *bzImage64_load(struct kimage *image, char *kernel, ret = crash_load_segments(image); if (ret) return ERR_PTR(ret); + ret = crash_load_dm_crypt_keys(image); + if (ret == -ENOENT) { + kexec_dprintk("No dm crypt key to load\n"); + } else if (ret) { + pr_err("Failed to load dm crypt keys\n"); + return ERR_PTR(ret); + } + if (image->dm_crypt_keys_addr && + cmdline_len + MAX_ELFCOREHDR_STR_LEN + MAX_DMCRYPTKEYS_STR_LEN > + header->cmdline_size) { + pr_err("Appending dmcryptkeys=<addr> to command line exceeds maximum allowed length\n"); + return ERR_PTR(-EINVAL); + } } #endif @@ -501,6 +520,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_sz = efi_get_runtime_map_size(); params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + MAX_ELFCOREHDR_STR_LEN; + if (image->dm_crypt_keys_addr) + params_cmdline_sz += MAX_DMCRYPTKEYS_STR_LEN; params_cmdline_sz = ALIGN(params_cmdline_sz, 16); kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) + sizeof(struct setup_data) + diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 949c9e4bfad2..697fb99406e6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -630,13 +630,35 @@ static void kexec_mark_crashkres(bool protect) kexec_mark_range(control, crashk_res.end, protect); } +/* make the memory storing dm crypt keys in/accessible */ +static void kexec_mark_dm_crypt_keys(bool protect) +{ + unsigned long start_paddr, end_paddr; + unsigned int nr_pages; + + if (kexec_crash_image->dm_crypt_keys_addr) { + start_paddr = kexec_crash_image->dm_crypt_keys_addr; + end_paddr = start_paddr + kexec_crash_image->dm_crypt_keys_sz - 1; + nr_pages = (PAGE_ALIGN(end_paddr) - PAGE_ALIGN_DOWN(start_paddr))/PAGE_SIZE; + if (protect) + set_memory_np((unsigned long)phys_to_virt(start_paddr), nr_pages); + else + __set_memory_prot( + (unsigned long)phys_to_virt(start_paddr), + nr_pages, + __pgprot(_PAGE_PRESENT | _PAGE_NX | _PAGE_RW)); + } +} + void arch_kexec_protect_crashkres(void) { kexec_mark_crashkres(true); + kexec_mark_dm_crypt_keys(true); } void arch_kexec_unprotect_crashkres(void) { + kexec_mark_dm_crypt_keys(false); kexec_mark_crashkres(false); } #endif diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 30ab4aced761..46edc11726b7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2148,6 +2148,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * __set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 4e3ba6e68c32..f7512b4e923e 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -482,7 +482,7 @@ static void check_supported_cpu(void *_rc) cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - pr_info("Power state transitions not supported\n"); + pr_info_once("Power state transitions not supported\n"); return; } *rc = 0; diff --git a/drivers/gpu/drm/panel/panel-samsung-sofef00.c b/drivers/gpu/drm/panel/panel-samsung-sofef00.c index 210a25afe82b..d92ae6b6100f 100644 --- a/drivers/gpu/drm/panel/panel-samsung-sofef00.c +++ b/drivers/gpu/drm/panel/panel-samsung-sofef00.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2020 Caleb Connolly <caleb@connolly.tech> +/* Copyright (c) 2020 Casey Connolly <casey.connolly@linaro.org> * Generated with linux-mdss-dsi-panel-driver-generator from vendor device tree: * Copyright (c) 2020, The Linux Foundation. All rights reserved. */ @@ -260,6 +260,6 @@ static struct mipi_dsi_driver sofef00_panel_driver = { module_mipi_dsi_driver(sofef00_panel_driver); -MODULE_AUTHOR("Caleb Connolly <caleb@connolly.tech>"); +MODULE_AUTHOR("Casey Connolly <casey.connolly@linaro.org>"); MODULE_DESCRIPTION("DRM driver for Samsung AMOLED DSI panels found in OnePlus 6/6T phones"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/adc/qcom-spmi-rradc.c b/drivers/iio/adc/qcom-spmi-rradc.c index 63ebaf13ef19..f61ad0510f04 100644 --- a/drivers/iio/adc/qcom-spmi-rradc.c +++ b/drivers/iio/adc/qcom-spmi-rradc.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2016-2017, 2019, The Linux Foundation. All rights reserved. * Copyright (c) 2022 Linaro Limited. - * Author: Caleb Connolly <caleb.connolly@linaro.org> + * Author: Casey Connolly <casey.connolly@linaro.org> * * This driver is for the Round Robin ADC found in the pmi8998 and pm660 PMICs. */ @@ -1016,5 +1016,5 @@ static struct platform_driver rradc_driver = { module_platform_driver(rradc_driver); MODULE_DESCRIPTION("QCOM SPMI PMIC RR ADC driver"); -MODULE_AUTHOR("Caleb Connolly <caleb.connolly@linaro.org>"); +MODULE_AUTHOR("Casey Connolly <casey.connolly@linaro.org>"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ed40d8600656..2cc2eb24dc8a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -36,6 +36,7 @@ #include <linux/sched/clock.h> #include <linux/rculist.h> #include <linux/delay.h> +#include <linux/sort.h> #include <trace/events/bcache.h> /* @@ -559,8 +560,6 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) } } -#define cmp_int(l, r) ((l > r) - (l < r)) - #ifdef CONFIG_PROVE_LOCKING static int btree_lock_cmp_fn(const struct lockdep_map *_a, const struct lockdep_map *_b) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 4c253b433bf7..4904097dfd49 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -3,7 +3,7 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK - depends on CONFIGFS_FS + select CONFIGFS_FS select NVME_KEYRING if NVME_TARGET_TCP_TLS select KEYS if NVME_TARGET_TCP_TLS select SGL_ALLOC diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c index 74a8d8ed8d9f..c2f8f2e24398 100644 --- a/drivers/power/supply/qcom_pmi8998_charger.c +++ b/drivers/power/supply/qcom_pmi8998_charger.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2016-2019 The Linux Foundation. All rights reserved. * Copyright (c) 2023, Linaro Ltd. - * Author: Caleb Connolly <caleb.connolly@linaro.org> + * Author: Casey Connolly <casey.connolly@linaro.org> * * This driver is for the switch-mode battery charger and boost * hardware found in pmi8998 and related PMICs. @@ -1045,6 +1045,6 @@ static struct platform_driver qcom_spmi_smb2 = { module_platform_driver(qcom_spmi_smb2); -MODULE_AUTHOR("Caleb Connolly <caleb.connolly@linaro.org>"); +MODULE_AUTHOR("Casey Connolly <casey.connolly@linaro.org>"); MODULE_DESCRIPTION("Qualcomm SMB2 Charger Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index cbf531d0ba68..995cfeca972b 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -98,18 +98,6 @@ MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)"); #endif /* - * An internal DMA coherent buffer - */ -struct mport_dma_buf { - void *ib_base; - dma_addr_t ib_phys; - u32 ib_size; - u64 ib_rio_base; - bool ib_map; - struct file *filp; -}; - -/* * Internal memory mapping structure */ enum rio_mport_map_dir { @@ -131,14 +119,6 @@ struct rio_mport_mapping { struct file *filp; }; -struct rio_mport_dma_map { - int valid; - u64 length; - void *vaddr; - dma_addr_t paddr; -}; - -#define MPORT_MAX_DMA_BUFS 16 #define MPORT_EVENT_DEPTH 10 /* diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 9544b8ee0c96..46daf32ea13b 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1775,19 +1775,6 @@ struct dma_chan *rio_request_mport_dma(struct rio_mport *mport) EXPORT_SYMBOL_GPL(rio_request_mport_dma); /** - * rio_request_dma - request RapidIO capable DMA channel that supports - * specified target RapidIO device. - * @rdev: RIO device associated with DMA transfer - * - * Returns pointer to allocated DMA channel or NULL if failed. - */ -struct dma_chan *rio_request_dma(struct rio_dev *rdev) -{ - return rio_request_mport_dma(rdev->net->hport); -} -EXPORT_SYMBOL_GPL(rio_request_dma); - -/** * rio_release_dma - release specified DMA channel * @dchan: DMA channel to release */ @@ -1834,57 +1821,9 @@ struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan, } EXPORT_SYMBOL_GPL(rio_dma_prep_xfer); -/** - * rio_dma_prep_slave_sg - RapidIO specific wrapper - * for device_prep_slave_sg callback defined by DMAENGINE. - * @rdev: RIO device control structure - * @dchan: DMA channel to configure - * @data: RIO specific data descriptor - * @direction: DMA data transfer direction (TO or FROM the device) - * @flags: dmaengine defined flags - * - * Initializes RapidIO capable DMA channel for the specified data transfer. - * Uses DMA channel private extension to pass information related to remote - * target RIO device. - * - * Returns: pointer to DMA transaction descriptor if successful, - * error-valued pointer or NULL if failed. - */ -struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev, - struct dma_chan *dchan, struct rio_dma_data *data, - enum dma_transfer_direction direction, unsigned long flags) -{ - return rio_dma_prep_xfer(dchan, rdev->destid, data, direction, flags); -} -EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg); - #endif /* CONFIG_RAPIDIO_DMA_ENGINE */ /** - * rio_find_mport - find RIO mport by its ID - * @mport_id: number (ID) of mport device - * - * Given a RIO mport number, the desired mport is located - * in the global list of mports. If the mport is found, a pointer to its - * data structure is returned. If no mport is found, %NULL is returned. - */ -struct rio_mport *rio_find_mport(int mport_id) -{ - struct rio_mport *port; - - mutex_lock(&rio_mport_list_lock); - list_for_each_entry(port, &rio_mports, node) { - if (port->id == mport_id) - goto found; - } - port = NULL; -found: - mutex_unlock(&rio_mport_list_lock); - - return port; -} - -/** * rio_register_scan - enumeration/discovery method registration interface * @mport_id: mport device ID for which fabric scan routine has to be set * (RIO_MPORT_ANY = set for all available mports) @@ -1962,48 +1901,6 @@ err_out: EXPORT_SYMBOL_GPL(rio_register_scan); /** - * rio_unregister_scan - removes enumeration/discovery method from mport - * @mport_id: mport device ID for which fabric scan routine has to be - * unregistered (RIO_MPORT_ANY = apply to all mports that use - * the specified scan_ops) - * @scan_ops: enumeration/discovery operations structure - * - * Removes enumeration or discovery method assigned to the specified mport - * device. If RIO_MPORT_ANY is specified, removes the specified operations from - * all mports that have them attached. - */ -int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops) -{ - struct rio_mport *port; - struct rio_scan_node *scan; - - pr_debug("RIO: %s for mport_id=%d\n", __func__, mport_id); - - if (mport_id != RIO_MPORT_ANY && mport_id >= RIO_MAX_MPORTS) - return -EINVAL; - - mutex_lock(&rio_mport_list_lock); - - list_for_each_entry(port, &rio_mports, node) - if (port->id == mport_id || - (mport_id == RIO_MPORT_ANY && port->nscan == scan_ops)) - port->nscan = NULL; - - list_for_each_entry(scan, &rio_scans, node) { - if (scan->mport_id == mport_id) { - list_del(&scan->node); - kfree(scan); - break; - } - } - - mutex_unlock(&rio_mport_list_lock); - - return 0; -} -EXPORT_SYMBOL_GPL(rio_unregister_scan); - -/** * rio_mport_scan - execute enumeration/discovery on the specified mport * @mport_id: number (ID) of mport device */ diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index f482de0d0370..a0e2a09ddb8e 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -41,9 +41,7 @@ extern void rio_del_device(struct rio_dev *rdev, enum rio_device_state state); extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid, u8 hopcount, u8 port_num); extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops); -extern int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops); extern void rio_attach_device(struct rio_dev *rdev); -extern struct rio_mport *rio_find_mport(int mport_id); extern int rio_mport_scan(int mport_id); /* Structures internal to the RIO core code */ diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 9135227301c8..97287e838ce1 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -198,12 +198,6 @@ struct cm_peer { struct rio_dev *rdev; }; -struct rio_cm_work { - struct work_struct work; - struct cm_dev *cm; - void *data; -}; - struct conn_req { struct list_head node; u32 destid; /* requester destID */ diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index dac85294d2f5..e284eea331d7 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -255,7 +255,7 @@ static int vmlogrdr_recording(struct vmlogrdr_priv_t * logptr, /* * The recording commands needs to be called with option QID - * for guests that have previlege classes A or B. + * for guests that have privilege classes A or B. * Purging has to be done as separate step, because recording * can't be switched on as long as records are on the queue. * Doing both at the same time doesn't work. @@ -557,7 +557,7 @@ static ssize_t vmlogrdr_purge_store(struct device * dev, /* * The recording command needs to be called with option QID - * for guests that have previlege classes A or B. + * for guests that have privilege classes A or B. * Other guests will not recognize the command and we have to * issue the same command without the QID parameter. */ diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 25cf61ebd40c..0a4b1d433621 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -17,6 +17,7 @@ #include <linux/random.h> #include <linux/ratelimit.h> #include <linux/slab.h> +#include <linux/sort.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -672,8 +673,6 @@ static inline void percpu_memset(void __percpu *p, int c, size_t bytes) u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned); -#define cmp_int(l, r) ((l > r) - (l < r)) - static inline int u8_cmp(u8 l, u8 r) { return cmp_int(l, r); diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig index 272b64456999..1fcd761fe7be 100644 --- a/fs/configfs/Kconfig +++ b/fs/configfs/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config CONFIGFS_FS tristate "Userspace-driven configuration filesystem" - select SYSFS help configfs is a RAM-based filesystem that provides the converse of sysfs's functionality. Where sysfs is a filesystem-based diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 0d8f7fb15c2e..dd0c8e560ef6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 2f850a18d6e7..946b0d3534a5 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -422,8 +422,6 @@ static int nilfs_mdt_write_folio(struct folio *folio, if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); - else if (wbc->for_reclaim) - nilfs_flush_segment(sb, inode->i_ino); return err; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 83970d97840b..61a4141f8d6b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2221,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) spin_unlock(&sci->sc_state_lock); } -/** - * nilfs_flush_segment - trigger a segment construction for resource control - * @sb: super block - * @ino: inode number of the file to be flushed out. - */ -void nilfs_flush_segment(struct super_block *sb, ino_t ino) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - struct nilfs_sc_info *sci = nilfs->ns_writer; - - if (!sci || nilfs_doing_construction()) - return; - nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); - /* assign bit 0 to data files */ -} - struct nilfs_segctor_wait_request { wait_queue_entry_t wq; __u32 seq; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index f723f47ddc4e..4b39ed43ae72 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -226,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); -extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index fce9beb214f0..43e652a2adaf 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1483,7 +1483,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work) sc_put(sc); } -/* socket shutdown does a del_timer_sync against this as it tears down. +/* socket shutdown does a timer_delete_sync against this as it tears down. * we can't start this timer until we've got to the point in sc buildup * where shutdown is going to be involved */ static void o2net_idle_timer(struct timer_list *t) diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index 1ad7106741f8..3ad7baf67658 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c @@ -505,5 +505,5 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj, ocfs2_filecheck_handle_entry(ent, entry); exit: - return (!ret ? count : ret); + return ret ?: count; } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index e272429da3db..de7f12858729 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -674,7 +674,7 @@ out_put: break; } out: - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index ddd761cf44c8..a28c127b9934 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -691,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void) memset(&locking_max_version, 0, sizeof(struct ocfs2_protocol_version)); ocfs2_sysfs_exit(); - if (ocfs2_table_header) - unregister_sysctl_table(ocfs2_table_header); + unregister_sysctl_table(ocfs2_table_header); } MODULE_AUTHOR("Oracle"); diff --git a/fs/pipe.c b/fs/pipe.c index da45edd68c41..45077c37bad1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -26,6 +26,7 @@ #include <linux/memcontrol.h> #include <linux/watch_queue.h> #include <linux/sysctl.h> +#include <linux/sort.h> #include <linux/uaccess.h> #include <asm/ioctls.h> @@ -76,8 +77,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ -#define cmp_int(l, r) ((l > r) - (l < r)) - #ifdef CONFIG_PROVE_LOCKING static int pipe_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) diff --git a/fs/proc/base.c b/fs/proc/base.c index fe33a5843fbd..c667702dc69b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -827,7 +827,13 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; - +/* + * proc_mem_open() can return errno, NULL or mm_struct*. + * + * - Returns NULL if the task has no mm (PF_KTHREAD or PF_EXITING) + * - Returns mm_struct* on success + * - Returns error code on failure + */ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); @@ -854,8 +860,8 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct mm_struct *mm = proc_mem_open(inode, mode); - if (IS_ERR(mm)) - return PTR_ERR(mm); + if (IS_ERR_OR_NULL(mm)) + return mm ? PTR_ERR(mm) : -ESRCH; file->private_data = mm; return 0; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b9e4fbbdf6e6..27972c0749e7 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -212,8 +212,8 @@ static int proc_maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - int err = PTR_ERR(priv->mm); + if (IS_ERR_OR_NULL(priv->mm)) { + int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; seq_release_private(inode, file); return err; @@ -1325,8 +1325,8 @@ static int smaps_rollup_open(struct inode *inode, struct file *file) priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - ret = PTR_ERR(priv->mm); + if (IS_ERR_OR_NULL(priv->mm)) { + ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; single_release(inode, file); goto out_free; @@ -2069,8 +2069,8 @@ static int pagemap_open(struct inode *inode, struct file *file) struct mm_struct *mm; mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(mm)) - return PTR_ERR(mm); + if (IS_ERR_OR_NULL(mm)) + return mm ? PTR_ERR(mm) : -ESRCH; file->private_data = mm; return 0; } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index bce674533000..59bfd61d653a 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -260,8 +260,8 @@ static int maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - int err = PTR_ERR(priv->mm); + if (IS_ERR_OR_NULL(priv->mm)) { + int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; seq_release_private(inode, file); return err; diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index b1091e70434a..a9602aae21ef 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -149,6 +149,27 @@ config SQUASHFS_XATTR If unsure, say N. +config SQUASHFS_COMP_CACHE_FULL + bool "Enable full caching of compressed blocks" + depends on SQUASHFS + default n + help + This option enables caching of all compressed blocks, Without caching, + repeated reads of the same files trigger excessive disk I/O, significantly + reducinng performance in workloads like fio-based benchmarks. + + For example, fio tests (iodepth=1, numjobs=1, ioengine=psync) show: + With caching: IOPS=2223, BW=278MiB/s (291MB/s) + Without caching: IOPS=815, BW=102MiB/s (107MB/s) + + Enabling this option restores performance to pre-regression levels by + caching all compressed blocks in the page cache, reducing disk I/O for + repeated reads. However, this increases memory usage, which may be a + concern in memory-constrained environments. + + Enable this option if your workload involves frequent repeated reads and + memory usage is not a limiting factor. If unsure, say N. + config SQUASHFS_ZLIB bool "Include support for ZLIB compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2dc730800f44..3061043e915c 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -88,6 +88,10 @@ static int squashfs_bio_read_cached(struct bio *fullbio, struct bio_vec *bv; int idx = 0; int err = 0; +#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL + struct page **cache_pages = kmalloc_array(page_count, + sizeof(void *), GFP_KERNEL | __GFP_ZERO); +#endif bio_for_each_segment_all(bv, fullbio, iter_all) { struct page *page = bv->bv_page; @@ -110,6 +114,11 @@ static int squashfs_bio_read_cached(struct bio *fullbio, head_to_cache = page; else if (idx == page_count - 1 && index + length != read_end) tail_to_cache = page; +#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL + /* Cache all pages in the BIO for repeated reads */ + else if (cache_pages) + cache_pages[idx] = page; +#endif if (!bio || idx != end_idx) { struct bio *new = bio_alloc_clone(bdev, fullbio, @@ -163,6 +172,25 @@ static int squashfs_bio_read_cached(struct bio *fullbio, } } +#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL + if (!cache_pages) + goto out; + + for (idx = 0; idx < page_count; idx++) { + if (!cache_pages[idx]) + continue; + int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping, + (read_start >> PAGE_SHIFT) + idx, + GFP_NOIO); + + if (!ret) { + SetPageUptodate(cache_pages[idx]); + unlock_page(cache_pages[idx]); + } + } + kfree(cache_pages); +out: +#endif return 0; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 67c55fe32ce8..992ea0e37257 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); + if (!msblk->devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index d613a4094db6..9c00fc5baa30 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -290,8 +290,6 @@ xfs_zone_gc_query_cb( return 0; } -#define cmp_int(l, r) ((l > r) - (l < r)) - static int xfs_zone_gc_rmap_rec_cmp( const void *a, diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 20881cc761fa..2b77d12e07b2 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -530,6 +530,12 @@ struct ftrace_likely_data { sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #ifdef __OPTIMIZE__ +/* + * #ifdef __OPTIMIZE__ is only a good approximation; for instance "make + * CFLAGS_foo.o=-Og" defines __OPTIMIZE__, does not elide the conditional code + * and can break compilation with wrong error message(s). Combine with + * -U__OPTIMIZE__ when needed. + */ # define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ /* \ @@ -543,7 +549,7 @@ struct ftrace_likely_data { prefix ## suffix(); \ } while (0) #else -# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +# define __compiletime_assert(condition, msg, prefix, suffix) ((void)(condition)) #endif #define _compiletime_assert(condition, msg, prefix, suffix) \ diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 44305336314e..d35726d6a415 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -34,7 +34,12 @@ static inline void arch_kexec_protect_crashkres(void) { } static inline void arch_kexec_unprotect_crashkres(void) { } #endif - +#ifdef CONFIG_CRASH_DM_CRYPT +int crash_load_dm_crypt_keys(struct kimage *image); +ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); +#else +static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } +#endif #ifndef arch_crash_handle_hotplug_event static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 2f2555e6407c..dd6fc3b2133b 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -15,6 +15,8 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; +extern unsigned long long dm_crypt_keys_addr; + #ifdef CONFIG_CRASH_DUMP extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); extern void elfcorehdr_free(unsigned long long addr); diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index d2a9fc96424b..af5fb4ad77eb 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -295,7 +295,7 @@ enum cpu_boot_dev_sts { * Initialized in: linux * * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from - * previleged entity. FW sets this status + * privileged entity. FW sets this status * bit for host. If this bit is set then * GIC can not be accessed from host. * Initialized in: linux diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h new file mode 100644 index 000000000000..1bc2b3244613 --- /dev/null +++ b/include/linux/hung_task.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Detect Hung Task: detecting tasks stuck in D state + * + * Copyright (C) 2025 Tongcheng Travel (www.ly.com) + * Author: Lance Yang <mingzhe.yang@ly.com> + */ +#ifndef __LINUX_HUNG_TASK_H +#define __LINUX_HUNG_TASK_H + +#include <linux/bug.h> +#include <linux/sched.h> +#include <linux/compiler.h> + +/* + * @blocker: Combines lock address and blocking type. + * + * Since lock pointers are at least 4-byte aligned(32-bit) or 8-byte + * aligned(64-bit). This leaves the 2 least bits (LSBs) of the pointer + * always zero. So we can use these bits to encode the specific blocking + * type. + * + * Type encoding: + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) + * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + */ +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RTMUTEX 0x02UL +#define BLOCKER_TYPE_RWSEM 0x03UL + +#define BLOCKER_TYPE_MASK 0x03UL + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +static inline void hung_task_set_blocker(void *lock, unsigned long type) +{ + unsigned long lock_ptr = (unsigned long)lock; + + WARN_ON_ONCE(!lock_ptr); + WARN_ON_ONCE(READ_ONCE(current->blocker)); + + /* + * If the lock pointer matches the BLOCKER_TYPE_MASK, return + * without writing anything. + */ + if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) + return; + + WRITE_ONCE(current->blocker, lock_ptr | type); +} + +static inline void hung_task_clear_blocker(void) +{ + WARN_ON_ONCE(!READ_ONCE(current->blocker)); + + WRITE_ONCE(current->blocker, 0UL); +} + +/* + * hung_task_get_blocker_type - Extracts blocker type from encoded blocker + * address. + * + * @blocker: Blocker pointer with encoded type (via LSB bits) + * + * Returns: BLOCKER_TYPE_MUTEX, BLOCKER_TYPE_SEM, etc. + */ +static inline unsigned long hung_task_get_blocker_type(unsigned long blocker) +{ + WARN_ON_ONCE(!blocker); + + return blocker & BLOCKER_TYPE_MASK; +} + +static inline void *hung_task_blocker_to_lock(unsigned long blocker) +{ + WARN_ON_ONCE(!blocker); + + return (void *)(blocker & ~BLOCKER_TYPE_MASK); +} +#else +static inline void hung_task_set_blocker(void *lock, unsigned long type) +{ +} +static inline void hung_task_clear_blocker(void) +{ +} +static inline unsigned long hung_task_get_blocker_type(unsigned long blocker) +{ + return 0UL; +} +static inline void *hung_task_blocker_to_lock(unsigned long blocker) +{ + return NULL; +} +#endif + +#endif /* __LINUX_HUNG_TASK_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index be2e8c0a187e..1cce1f6410a9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ #include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> +#include <linux/util_macros.h> #include <linux/wordpart.h> #include <asm/byteorder.h> @@ -41,19 +42,6 @@ #define STACK_MAGIC 0xdeadbeef -/* generic data direction definitions */ -#define READ 0 -#define WRITE 1 - -#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) - -#define u64_to_user_ptr(x) ( \ -{ \ - typecheck(u64, (x)); \ - (void __user *)(uintptr_t)(x); \ -} \ -) - struct completion; struct user; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f03ee2b40816..03f85ad03025 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -25,6 +25,10 @@ extern note_buf_t __percpu *crash_notes; +#ifdef CONFIG_CRASH_DUMP +#include <linux/prandom.h> +#endif + #ifdef CONFIG_KEXEC_CORE #include <linux/list.h> #include <linux/compat.h> @@ -169,6 +173,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. * @top_down: Allocate from top of memory. + * @random: Place the buffer at a random position. */ struct kexec_buf { struct kimage *image; @@ -180,8 +185,33 @@ struct kexec_buf { unsigned long buf_min; unsigned long buf_max; bool top_down; +#ifdef CONFIG_CRASH_DUMP + bool random; +#endif }; + +#ifdef CONFIG_CRASH_DUMP +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{ + unsigned short i; + + if (kbuf->random) { + get_random_bytes(&i, sizeof(unsigned short)); + *temp_start = start + (end - start) / USHRT_MAX * i; + } +} +#else +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{} +#endif + int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf); int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, @@ -383,6 +413,10 @@ struct kimage { void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; + + /* dm crypt keys buffer */ + unsigned long dm_crypt_keys_addr; + unsigned long dm_crypt_keys_sz; }; /* kexec interface functions */ diff --git a/include/linux/list.h b/include/linux/list.h index 29a375889fb8..e7e28afd28f8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -50,9 +50,9 @@ static inline void INIT_LIST_HEAD(struct list_head *list) * Performs the full set of list corruption checks before __list_add(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, - struct list_head *prev, - struct list_head *next); +bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); /* * Performs list corruption checks before __list_add(). Returns false if a @@ -93,7 +93,7 @@ static __always_inline bool __list_add_valid(struct list_head *new, * Performs the full set of list corruption checks before __list_del_entry(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); +bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); /* * Performs list corruption checks before __list_del_entry(). Returns false if a diff --git a/include/linux/llist.h b/include/linux/llist.h index 2c982ff7475a..27b17f64bcee 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -223,9 +223,26 @@ static inline struct llist_node *llist_next(struct llist_node *node) return node->next; } -extern bool llist_add_batch(struct llist_node *new_first, - struct llist_node *new_last, - struct llist_head *head); +/** + * llist_add_batch - add several linked entries in batch + * @new_first: first entry in batch to be added + * @new_last: last entry in batch to be added + * @head: the head for your lock-less list + * + * Return whether list is empty before adding. + */ +static inline bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head) +{ + struct llist_node *first = READ_ONCE(head->first); + + do { + new_last->next = first; + } while (!try_cmpxchg(&head->first, &first, new_first)); + + return !first; +} static inline bool __llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 6f9242259edc..6de479ebbe5d 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -151,6 +151,5 @@ enum OID { extern enum OID look_up_OID(const void *data, size_t datasize); extern int parse_OID(const void *data, size_t datasize, enum OID *oid); extern int sprint_oid(const void *, size_t, char *, size_t); -extern int sprint_OID(enum OID, char *, size_t); #endif /* _LINUX_OID_REGISTRY_H */ diff --git a/include/linux/relay.h b/include/linux/relay.h index 72b876dd5cb8..b3224111d074 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -159,9 +159,6 @@ struct rchan *relay_open(const char *base_filename, size_t n_subbufs, const struct rchan_callbacks *cb, void *private_data); -extern int relay_late_setup_files(struct rchan *chan, - const char *base_filename, - struct dentry *parent); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); extern void relay_subbufs_consumed(struct rchan *chan, diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index e49c32b0f394..dd8afe511242 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -391,13 +391,8 @@ struct rio_dev *rio_dev_get(struct rio_dev *); void rio_dev_put(struct rio_dev *); #ifdef CONFIG_RAPIDIO_DMA_ENGINE -extern struct dma_chan *rio_request_dma(struct rio_dev *rdev); extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport); extern void rio_release_dma(struct dma_chan *dchan); -extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg( - struct rio_dev *rdev, struct dma_chan *dchan, - struct rio_dma_data *data, - enum dma_transfer_direction direction, unsigned long flags); extern struct dma_async_tx_descriptor *rio_dma_prep_xfer( struct dma_chan *dchan, u16 destid, struct rio_dma_data *data, diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 138e2f1bd08f..0cdbfc42f153 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -95,6 +95,28 @@ static inline bool sg_is_last(struct scatterlist *sg) } /** + * sg_next - return the next scatterlist entry in a list + * @sg: The current sg entry + * + * Description: + * Usually the next entry will be @sg@ + 1, but if this sg element is part + * of a chained scatterlist, it could jump to the start of a new + * scatterlist array. + * + **/ +static inline struct scatterlist *sg_next(struct scatterlist *sg) +{ + if (sg_is_last(sg)) + return NULL; + + sg++; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + + return sg; +} + +/** * sg_assign_page - Assign a given page to an SG entry * @sg: SG entry * @page: The page @@ -418,7 +440,6 @@ static inline void sg_init_marker(struct scatterlist *sgl, int sg_nents(struct scatterlist *sg); int sg_nents_for_len(struct scatterlist *sg, u64 len); -struct scatterlist *sg_next(struct scatterlist *); struct scatterlist *sg_last(struct scatterlist *s, unsigned int); void sg_init_table(struct scatterlist *, unsigned int); void sg_init_one(struct scatterlist *, const void *, unsigned int); diff --git a/include/linux/sched.h b/include/linux/sched.h index 1f054f1f11b5..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1240,7 +1240,11 @@ struct task_struct { #endif #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER - struct mutex *blocker_mutex; + /* + * Encoded lock address causing task block (lower 2 bits = type from + * <linux/hung_task.h>). Accessed via hung_task_*() helpers. + */ + unsigned long blocker; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index cffad65bdc6a..85c5a6392e02 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -106,7 +106,6 @@ static inline unsigned long stack_not_used(struct task_struct *p) #endif extern void set_task_stack_end_magic(struct task_struct *tsk); -#ifndef __HAVE_ARCH_KSTACK_END static inline int kstack_end(void *addr) { /* Reliable end of stack detection: @@ -114,6 +113,5 @@ static inline int kstack_end(void *addr) */ return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); } -#endif #endif /* _LINUX_SCHED_TASK_STACK_H */ diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 04655faadc2d..89706157e622 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -16,13 +16,25 @@ struct semaphore { raw_spinlock_t lock; unsigned int count; struct list_head wait_list; + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + unsigned long last_holder; +#endif }; +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +#define __LAST_HOLDER_SEMAPHORE_INITIALIZER \ + , .last_holder = 0UL +#else +#define __LAST_HOLDER_SEMAPHORE_INITIALIZER +#endif + #define __SEMAPHORE_INITIALIZER(name, n) \ { \ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ + .wait_list = LIST_HEAD_INIT((name).wait_list) \ + __LAST_HOLDER_SEMAPHORE_INITIALIZER \ } /* @@ -47,5 +59,6 @@ extern int __must_check down_killable(struct semaphore *sem); extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); extern void up(struct semaphore *sem); +extern unsigned long sem_last_holder(struct semaphore *sem); #endif /* __LINUX_SEMAPHORE_H */ diff --git a/include/linux/sort.h b/include/linux/sort.h index 8e5603b10941..c01ef804a0eb 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -4,6 +4,16 @@ #include <linux/types.h> +/** + * cmp_int - perform a three-way comparison of the arguments + * @l: the left argument + * @r: the right argument + * + * Return: 1 if the left argument is greater than the right one; 0 if the + * arguments are equal; -1 if the left argument is less than the right one. + */ +#define cmp_int(l, r) (((l) > (r)) - ((l) < (r))) + void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, diff --git a/include/linux/types.h b/include/linux/types.h index 49b79c8bb1a9..6dfdb8e8e4c3 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -136,6 +136,10 @@ typedef s64 ktime_t; typedef u64 sector_t; typedef u64 blkcnt_t; +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + /* * The type of an index into the pagecache. */ diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 76ca2b83c13e..9373962aade9 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -83,6 +83,72 @@ }) /** + * PTR_IF - evaluate to @ptr if @cond is true, or to NULL otherwise. + * @cond: A conditional, usually in a form of IS_ENABLED(CONFIG_FOO) + * @ptr: A pointer to assign if @cond is true. + * + * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set + * to 'y' or 'm', or to NULL otherwise. The @ptr argument must be a pointer. + * + * The macro can be very useful to help compiler dropping dead code. + * + * For instance, consider the following:: + * + * #ifdef CONFIG_FOO_SUSPEND + * static int foo_suspend(struct device *dev) + * { + * ... + * } + * #endif + * + * static struct pm_ops foo_ops = { + * #ifdef CONFIG_FOO_SUSPEND + * .suspend = foo_suspend, + * #endif + * }; + * + * While this works, the foo_suspend() macro is compiled conditionally, + * only when CONFIG_FOO_SUSPEND is set. This is problematic, as there could + * be a build bug in this function, we wouldn't have a way to know unless + * the configuration option is set. + * + * An alternative is to declare foo_suspend() always, but mark it + * as __maybe_unused. This works, but the __maybe_unused attribute + * is required to instruct the compiler that the function may not + * be referenced anywhere, and is safe to remove without making + * a fuss about it. This makes the programmer responsible for tagging + * the functions that can be garbage-collected. + * + * With the macro it is possible to write the following: + * + * static int foo_suspend(struct device *dev) + * { + * ... + * } + * + * static struct pm_ops foo_ops = { + * .suspend = PTR_IF(IS_ENABLED(CONFIG_FOO_SUSPEND), foo_suspend), + * }; + * + * The foo_suspend() function will now be automatically dropped by the + * compiler, and it does not require any specific attribute. + */ +#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) + +/** + * to_user_ptr - cast a pointer passed as u64 from user space to void __user * + * @x: The u64 value from user space, usually via IOCTL + * + * to_user_ptr() simply casts a pointer passed as u64 from user space to void + * __user * correctly. Using this lets us get rid of all the tiresome casts. + */ +#define u64_to_user_ptr(x) \ +({ \ + typecheck(u64, (x)); \ + (void __user *)(uintptr_t)(x); \ +}) + +/** * is_insidevar - check if the @ptr points inside the @var memory range. * @ptr: the pointer to a memory address. * @var: the variable which address and size identify the memory range. diff --git a/include/soc/qcom/qcom-spmi-pmic.h b/include/soc/qcom/qcom-spmi-pmic.h index a62d500a6fda..df3d3a0af98a 100644 --- a/include/soc/qcom/qcom-spmi-pmic.h +++ b/include/soc/qcom/qcom-spmi-pmic.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2022 Linaro. All rights reserved. - * Author: Caleb Connolly <caleb.connolly@linaro.org> + * Author: Casey Connolly <casey.connolly@linaro.org> */ #ifndef __QCOM_SPMI_PMIC_H__ diff --git a/init/main.c b/init/main.c index d2ffb1a10084..ed576c7f475d 100644 --- a/init/main.c +++ b/init/main.c @@ -1216,6 +1216,12 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } +static __init_or_module void +trace_initcall_level_cb(void *data, const char *level) +{ + printk(KERN_DEBUG "entering initcall level: %s\n", level); +} + static ktime_t initcall_calltime; #ifdef TRACEPOINTS_ENABLED @@ -1227,10 +1233,12 @@ static void __init initcall_debug_enable(void) &initcall_calltime); ret |= register_trace_initcall_finish(trace_initcall_finish_cb, &initcall_calltime); + ret |= register_trace_initcall_level(trace_initcall_level_cb, NULL); WARN(ret, "Failed to register initcall tracepoints\n"); } # define do_trace_initcall_start trace_initcall_start # define do_trace_initcall_finish trace_initcall_finish +# define do_trace_initcall_level trace_initcall_level #else static inline void do_trace_initcall_start(initcall_t fn) { @@ -1244,6 +1252,12 @@ static inline void do_trace_initcall_finish(initcall_t fn, int ret) return; trace_initcall_finish_cb(&initcall_calltime, fn, ret); } +static inline void do_trace_initcall_level(const char *level) +{ + if (!initcall_debug) + return; + trace_initcall_level_cb(NULL, level); +} #endif /* !TRACEPOINTS_ENABLED */ int __init_or_module do_one_initcall(initcall_t fn) @@ -1316,7 +1330,7 @@ static void __init do_initcall_level(int level, char *command_line) level, level, NULL, ignore_unknown_bootoption); - trace_initcall_level(initcall_level_names[level]); + do_trace_initcall_level(initcall_level_names[level]); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) do_one_initcall(initcall_from_entry(fn)); } @@ -1360,7 +1374,7 @@ static void __init do_pre_smp_initcalls(void) { initcall_entry_t *fn; - trace_initcall_level("early"); + do_trace_initcall_level("early"); for (fn = __initcall_start; fn < __initcall0_start; fn++) do_one_initcall(initcall_from_entry(fn)); } diff --git a/ipc/shm.c b/ipc/shm.c index 99564c870084..492fcc699985 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -431,8 +431,11 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { down_write(&shm_ids(ns).rwsem); - if (shm_ids(ns).in_use) + if (shm_ids(ns).in_use) { + rcu_read_lock(); idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + rcu_read_unlock(); + } up_write(&shm_ids(ns).rwsem); } diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 4fa212909d69..e64ce21f9a80 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -38,8 +38,7 @@ config KEXEC config KEXEC_FILE bool "Enable kexec file based system call" depends on ARCH_SUPPORTS_KEXEC_FILE - select CRYPTO - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 select KEXEC_CORE help This is new version of kexec system call. This system call is @@ -130,6 +129,23 @@ config CRASH_DUMP For s390, this option also enables zfcpdump. See also <file:Documentation/arch/s390/zfcpdump.rst> +config CRASH_DM_CRYPT + bool "Support saving crash dump to dm-crypt encrypted volume" + depends on KEXEC_FILE + depends on CRASH_DUMP + depends on DM_CRYPT + help + With this option enabled, user space can intereact with + /sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys + persistent for the dump-capture kernel. + +config CRASH_DM_CRYPT_CONFIGS + def_tristate CRASH_DM_CRYPT + select CONFIGFS_FS + help + CRASH_DM_CRYPT cannot directly select CONFIGFS_FS, because that + is required to be built-in. + config CRASH_HOTPLUG bool "Update the crash elfcorehdr on system configuration changes" default y diff --git a/kernel/Makefile b/kernel/Makefile index 97c09847db42..32e80dd626af 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o elfcorehdr.o obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o obj-$(CONFIG_KEXEC_CORE) += kexec_core.o obj-$(CONFIG_CRASH_DUMP) += crash_core.o +obj-$(CONFIG_CRASH_DM_CRYPT) += crash_dump_dm_crypt.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c new file mode 100644 index 000000000000..401423ba477d --- /dev/null +++ b/kernel/crash_dump_dm_crypt.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/key.h> +#include <linux/keyctl.h> +#include <keys/user-type.h> +#include <linux/crash_dump.h> +#include <linux/cc_platform.h> +#include <linux/configfs.h> +#include <linux/module.h> + +#define KEY_NUM_MAX 128 /* maximum dm crypt keys */ +#define KEY_SIZE_MAX 256 /* maximum dm crypt key size */ +#define KEY_DESC_MAX_LEN 128 /* maximum dm crypt key description size */ + +static unsigned int key_count; + +struct dm_crypt_key { + unsigned int key_size; + char key_desc[KEY_DESC_MAX_LEN]; + u8 data[KEY_SIZE_MAX]; +}; + +static struct keys_header { + unsigned int total_keys; + struct dm_crypt_key keys[] __counted_by(total_keys); +} *keys_header; + +static size_t get_keys_header_size(size_t total_keys) +{ + return struct_size(keys_header, keys, total_keys); +} + +unsigned long long dm_crypt_keys_addr; +EXPORT_SYMBOL_GPL(dm_crypt_keys_addr); + +static int __init setup_dmcryptkeys(char *arg) +{ + char *end; + + if (!arg) + return -EINVAL; + dm_crypt_keys_addr = memparse(arg, &end); + if (end > arg) + return 0; + + dm_crypt_keys_addr = 0; + return -EINVAL; +} + +early_param("dmcryptkeys", setup_dmcryptkeys); + +/* + * Architectures may override this function to read dm crypt keys + */ +ssize_t __weak dm_crypt_keys_read(char *buf, size_t count, u64 *ppos) +{ + struct kvec kvec = { .iov_base = buf, .iov_len = count }; + struct iov_iter iter; + + iov_iter_kvec(&iter, READ, &kvec, 1, count); + return read_from_oldmem(&iter, count, ppos, cc_platform_has(CC_ATTR_MEM_ENCRYPT)); +} + +static int add_key_to_keyring(struct dm_crypt_key *dm_key, + key_ref_t keyring_ref) +{ + key_ref_t key_ref; + int r; + + /* create or update the requested key and add it to the target keyring */ + key_ref = key_create_or_update(keyring_ref, "user", dm_key->key_desc, + dm_key->data, dm_key->key_size, + KEY_USR_ALL, KEY_ALLOC_IN_QUOTA); + + if (!IS_ERR(key_ref)) { + r = key_ref_to_ptr(key_ref)->serial; + key_ref_put(key_ref); + kexec_dprintk("Success adding key %s", dm_key->key_desc); + } else { + r = PTR_ERR(key_ref); + kexec_dprintk("Error when adding key"); + } + + key_ref_put(keyring_ref); + return r; +} + +static void get_keys_from_kdump_reserved_memory(void) +{ + struct keys_header *keys_header_loaded; + + arch_kexec_unprotect_crashkres(); + + keys_header_loaded = kmap_local_page(pfn_to_page( + kexec_crash_image->dm_crypt_keys_addr >> PAGE_SHIFT)); + + memcpy(keys_header, keys_header_loaded, get_keys_header_size(key_count)); + kunmap_local(keys_header_loaded); + arch_kexec_protect_crashkres(); +} + +static int restore_dm_crypt_keys_to_thread_keyring(void) +{ + struct dm_crypt_key *key; + size_t keys_header_size; + key_ref_t keyring_ref; + u64 addr; + + /* find the target keyring (which must be writable) */ + keyring_ref = + lookup_user_key(KEY_SPEC_USER_KEYRING, 0x01, KEY_NEED_WRITE); + if (IS_ERR(keyring_ref)) { + kexec_dprintk("Failed to get the user keyring\n"); + return PTR_ERR(keyring_ref); + } + + addr = dm_crypt_keys_addr; + dm_crypt_keys_read((char *)&key_count, sizeof(key_count), &addr); + if (key_count < 0 || key_count > KEY_NUM_MAX) { + kexec_dprintk("Failed to read the number of dm-crypt keys\n"); + return -1; + } + + kexec_dprintk("There are %u keys\n", key_count); + addr = dm_crypt_keys_addr; + + keys_header_size = get_keys_header_size(key_count); + keys_header = kzalloc(keys_header_size, GFP_KERNEL); + if (!keys_header) + return -ENOMEM; + + dm_crypt_keys_read((char *)keys_header, keys_header_size, &addr); + + for (int i = 0; i < keys_header->total_keys; i++) { + key = &keys_header->keys[i]; + kexec_dprintk("Get key (size=%u)\n", key->key_size); + add_key_to_keyring(key, keyring_ref); + } + + return 0; +} + +static int read_key_from_user_keying(struct dm_crypt_key *dm_key) +{ + const struct user_key_payload *ukp; + struct key *key; + + kexec_dprintk("Requesting logon key %s", dm_key->key_desc); + key = request_key(&key_type_logon, dm_key->key_desc, NULL); + + if (IS_ERR(key)) { + pr_warn("No such logon key %s\n", dm_key->key_desc); + return PTR_ERR(key); + } + + ukp = user_key_payload_locked(key); + if (!ukp) + return -EKEYREVOKED; + + if (ukp->datalen > KEY_SIZE_MAX) { + pr_err("Key size %u exceeds maximum (%u)\n", ukp->datalen, KEY_SIZE_MAX); + return -EINVAL; + } + + memcpy(dm_key->data, ukp->data, ukp->datalen); + dm_key->key_size = ukp->datalen; + kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size, + dm_key->key_desc, dm_key->data); + return 0; +} + +struct config_key { + struct config_item item; + const char *description; +}; + +static inline struct config_key *to_config_key(struct config_item *item) +{ + return container_of(item, struct config_key, item); +} + +static ssize_t config_key_description_show(struct config_item *item, char *page) +{ + return sprintf(page, "%s\n", to_config_key(item)->description); +} + +static ssize_t config_key_description_store(struct config_item *item, + const char *page, size_t count) +{ + struct config_key *config_key = to_config_key(item); + size_t len; + int ret; + + ret = -EINVAL; + len = strcspn(page, "\n"); + + if (len > KEY_DESC_MAX_LEN) { + pr_err("The key description shouldn't exceed %u characters", KEY_DESC_MAX_LEN); + return ret; + } + + if (!len) + return ret; + + kfree(config_key->description); + ret = -ENOMEM; + config_key->description = kmemdup_nul(page, len, GFP_KERNEL); + if (!config_key->description) + return ret; + + return count; +} + +CONFIGFS_ATTR(config_key_, description); + +static struct configfs_attribute *config_key_attrs[] = { + &config_key_attr_description, + NULL, +}; + +static void config_key_release(struct config_item *item) +{ + kfree(to_config_key(item)); + key_count--; +} + +static struct configfs_item_operations config_key_item_ops = { + .release = config_key_release, +}; + +static const struct config_item_type config_key_type = { + .ct_item_ops = &config_key_item_ops, + .ct_attrs = config_key_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_item *config_keys_make_item(struct config_group *group, + const char *name) +{ + struct config_key *config_key; + + if (key_count > KEY_NUM_MAX) { + pr_err("Only %u keys at maximum to be created\n", KEY_NUM_MAX); + return ERR_PTR(-EINVAL); + } + + config_key = kzalloc(sizeof(struct config_key), GFP_KERNEL); + if (!config_key) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&config_key->item, name, &config_key_type); + + key_count++; + + return &config_key->item; +} + +static ssize_t config_keys_count_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", key_count); +} + +CONFIGFS_ATTR_RO(config_keys_, count); + +static bool is_dm_key_reused; + +static ssize_t config_keys_reuse_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", is_dm_key_reused); +} + +static ssize_t config_keys_reuse_store(struct config_item *item, + const char *page, size_t count) +{ + if (!kexec_crash_image || !kexec_crash_image->dm_crypt_keys_addr) { + kexec_dprintk( + "dm-crypt keys haven't be saved to crash-reserved memory\n"); + return -EINVAL; + } + + if (kstrtobool(page, &is_dm_key_reused)) + return -EINVAL; + + if (is_dm_key_reused) + get_keys_from_kdump_reserved_memory(); + + return count; +} + +CONFIGFS_ATTR(config_keys_, reuse); + +static struct configfs_attribute *config_keys_attrs[] = { + &config_keys_attr_count, + &config_keys_attr_reuse, + NULL, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations config_keys_group_ops = { + .make_item = config_keys_make_item, +}; + +static const struct config_item_type config_keys_type = { + .ct_group_ops = &config_keys_group_ops, + .ct_attrs = config_keys_attrs, + .ct_owner = THIS_MODULE, +}; + +static bool restore; + +static ssize_t config_keys_restore_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", restore); +} + +static ssize_t config_keys_restore_store(struct config_item *item, + const char *page, size_t count) +{ + if (!restore) + restore_dm_crypt_keys_to_thread_keyring(); + + if (kstrtobool(page, &restore)) + return -EINVAL; + + return count; +} + +CONFIGFS_ATTR(config_keys_, restore); + +static struct configfs_attribute *kdump_config_keys_attrs[] = { + &config_keys_attr_restore, + NULL, +}; + +static const struct config_item_type kdump_config_keys_type = { + .ct_attrs = kdump_config_keys_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem config_keys_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "crash_dm_crypt_keys", + .ci_type = &config_keys_type, + }, + }, +}; + +static int build_keys_header(void) +{ + struct config_item *item = NULL; + struct config_key *key; + int i, r; + + if (keys_header != NULL) + kvfree(keys_header); + + keys_header = kzalloc(get_keys_header_size(key_count), GFP_KERNEL); + if (!keys_header) + return -ENOMEM; + + keys_header->total_keys = key_count; + + i = 0; + list_for_each_entry(item, &config_keys_subsys.su_group.cg_children, + ci_entry) { + if (item->ci_type != &config_key_type) + continue; + + key = to_config_key(item); + + if (!key->description) { + pr_warn("No key description for key %s\n", item->ci_name); + return -EINVAL; + } + + strscpy(keys_header->keys[i].key_desc, key->description, + KEY_DESC_MAX_LEN); + r = read_key_from_user_keying(&keys_header->keys[i]); + if (r != 0) { + kexec_dprintk("Failed to read key %s\n", + keys_header->keys[i].key_desc); + return r; + } + i++; + kexec_dprintk("Found key: %s\n", item->ci_name); + } + + return 0; +} + +int crash_load_dm_crypt_keys(struct kimage *image) +{ + struct kexec_buf kbuf = { + .image = image, + .buf_min = 0, + .buf_max = ULONG_MAX, + .top_down = false, + .random = true, + }; + int r; + + + if (key_count <= 0) { + kexec_dprintk("No dm-crypt keys\n"); + return -ENOENT; + } + + if (!is_dm_key_reused) { + image->dm_crypt_keys_addr = 0; + r = build_keys_header(); + if (r) + return r; + } + + kbuf.buffer = keys_header; + kbuf.bufsz = get_keys_header_size(key_count); + + kbuf.memsz = kbuf.bufsz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + r = kexec_add_buffer(&kbuf); + if (r) { + kvfree((void *)kbuf.buffer); + return r; + } + image->dm_crypt_keys_addr = kbuf.mem; + image->dm_crypt_keys_sz = kbuf.bufsz; + kexec_dprintk( + "Loaded dm crypt keys to kexec_buffer bufsz=0x%lx memsz=0x%lx\n", + kbuf.bufsz, kbuf.memsz); + + return r; +} + +static int __init configfs_dmcrypt_keys_init(void) +{ + int ret; + + if (is_kdump_kernel()) { + config_keys_subsys.su_group.cg_item.ci_type = + &kdump_config_keys_type; + } + + config_group_init(&config_keys_subsys.su_group); + mutex_init(&config_keys_subsys.su_mutex); + ret = configfs_register_subsystem(&config_keys_subsys); + if (ret) { + pr_err("Error %d while registering subsystem %s\n", ret, + config_keys_subsys.su_group.cg_item.ci_namebuf); + goto out_unregister; + } + + return 0; + +out_unregister: + configfs_unregister_subsystem(&config_keys_subsys); + + return ret; +} + +module_init(configfs_dmcrypt_keys_init); diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index aff7c0fdbefa..acb6bf42e30d 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -131,7 +131,7 @@ static int __init parse_crashkernel_mem(char *cmdline, cur++; *crash_base = memparse(cur, &tmp); if (cur == tmp) { - pr_warn("crahskernel: Memory value expected after '@'\n"); + pr_warn("crashkernel: Memory value expected after '@'\n"); return -EINVAL; } } diff --git a/kernel/delayacct.c b/kernel/delayacct.c index eb63a021ac04..30e7912ebb0d 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -14,6 +14,15 @@ #include <linux/delayacct.h> #include <linux/module.h> +#define UPDATE_DELAY(type) \ +do { \ + d->type##_delay_max = tsk->delays->type##_delay_max; \ + d->type##_delay_min = tsk->delays->type##_delay_min; \ + tmp = d->type##_delay_total + tsk->delays->type##_delay; \ + d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \ + d->type##_count += tsk->delays->type##_count; \ +} while (0) + DEFINE_STATIC_KEY_FALSE(delayacct_key); int delayacct_on __read_mostly; /* Delay accounting turned on/off */ struct kmem_cache *delayacct_cache; @@ -173,41 +182,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ raw_spin_lock_irqsave(&tsk->delays->lock, flags); - d->blkio_delay_max = tsk->delays->blkio_delay_max; - d->blkio_delay_min = tsk->delays->blkio_delay_min; - tmp = d->blkio_delay_total + tsk->delays->blkio_delay; - d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; - d->swapin_delay_max = tsk->delays->swapin_delay_max; - d->swapin_delay_min = tsk->delays->swapin_delay_min; - tmp = d->swapin_delay_total + tsk->delays->swapin_delay; - d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; - d->freepages_delay_max = tsk->delays->freepages_delay_max; - d->freepages_delay_min = tsk->delays->freepages_delay_min; - tmp = d->freepages_delay_total + tsk->delays->freepages_delay; - d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; - d->thrashing_delay_max = tsk->delays->thrashing_delay_max; - d->thrashing_delay_min = tsk->delays->thrashing_delay_min; - tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; - d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; - d->compact_delay_max = tsk->delays->compact_delay_max; - d->compact_delay_min = tsk->delays->compact_delay_min; - tmp = d->compact_delay_total + tsk->delays->compact_delay; - d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; - d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max; - d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min; - tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; - d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; - d->irq_delay_max = tsk->delays->irq_delay_max; - d->irq_delay_min = tsk->delays->irq_delay_min; - tmp = d->irq_delay_total + tsk->delays->irq_delay; - d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp; - d->blkio_count += tsk->delays->blkio_count; - d->swapin_count += tsk->delays->swapin_count; - d->freepages_count += tsk->delays->freepages_count; - d->thrashing_count += tsk->delays->thrashing_count; - d->compact_count += tsk->delays->compact_count; - d->wpcopy_count += tsk->delays->wpcopy_count; - d->irq_count += tsk->delays->irq_count; + UPDATE_DELAY(blkio); + UPDATE_DELAY(swapin); + UPDATE_DELAY(freepages); + UPDATE_DELAY(thrashing); + UPDATE_DELAY(compact); + UPDATE_DELAY(wpcopy); + UPDATE_DELAY(irq); raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; diff --git a/kernel/exit.c b/kernel/exit.c index 38645039dd8f..bd743900354c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -421,44 +421,30 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) } } -static void coredump_task_exit(struct task_struct *tsk) +static void coredump_task_exit(struct task_struct *tsk, + struct core_state *core_state) { - struct core_state *core_state; + struct core_thread self; + self.task = tsk; + if (self.task->flags & PF_SIGNALED) + self.next = xchg(&core_state->dumper.next, &self); + else + self.task = NULL; /* - * Serialize with any possible pending coredump. - * We must hold siglock around checking core_state - * and setting PF_POSTCOREDUMP. The core-inducing thread - * will increment ->nr_threads for each thread in the - * group without PF_POSTCOREDUMP set. + * Implies mb(), the result of xchg() must be visible + * to core_state->dumper. */ - spin_lock_irq(&tsk->sighand->siglock); - tsk->flags |= PF_POSTCOREDUMP; - core_state = tsk->signal->core_state; - spin_unlock_irq(&tsk->sighand->siglock); - if (core_state) { - struct core_thread self; - - self.task = current; - if (self.task->flags & PF_SIGNALED) - self.next = xchg(&core_state->dumper.next, &self); - else - self.task = NULL; - /* - * Implies mb(), the result of xchg() must be visible - * to core_state->dumper. - */ - if (atomic_dec_and_test(&core_state->nr_threads)) - complete(&core_state->startup); + if (atomic_dec_and_test(&core_state->nr_threads)) + complete(&core_state->startup); - for (;;) { - set_current_state(TASK_IDLE|TASK_FREEZABLE); - if (!self.task) /* see coredump_finish() */ - break; - schedule(); - } - __set_current_state(TASK_RUNNING); + for (;;) { + set_current_state(TASK_IDLE|TASK_FREEZABLE); + if (!self.task) /* see coredump_finish() */ + break; + schedule(); } + __set_current_state(TASK_RUNNING); } #ifdef CONFIG_MEMCG @@ -882,6 +868,7 @@ static void synchronize_group_exit(struct task_struct *tsk, long code) { struct sighand_struct *sighand = tsk->sighand; struct signal_struct *signal = tsk->signal; + struct core_state *core_state; spin_lock_irq(&sighand->siglock); signal->quick_threads--; @@ -891,7 +878,19 @@ static void synchronize_group_exit(struct task_struct *tsk, long code) signal->group_exit_code = code; signal->group_stop_count = 0; } + /* + * Serialize with any possible pending coredump. + * We must hold siglock around checking core_state + * and setting PF_POSTCOREDUMP. The core-inducing thread + * will increment ->nr_threads for each thread in the + * group without PF_POSTCOREDUMP set. + */ + tsk->flags |= PF_POSTCOREDUMP; + core_state = signal->core_state; spin_unlock_irq(&sighand->siglock); + + if (unlikely(core_state)) + coredump_task_exit(tsk, core_state); } void __noreturn do_exit(long code) @@ -900,15 +899,12 @@ void __noreturn do_exit(long code) int group_dead; WARN_ON(irqs_disabled()); - - synchronize_group_exit(tsk, code); - WARN_ON(tsk->plug); kcov_task_exit(tsk); kmsan_task_exit(tsk); - coredump_task_exit(tsk); + synchronize_group_exit(tsk, code); ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index dc898ec93463..d2432df2b905 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -22,6 +22,7 @@ #include <linux/sched/signal.h> #include <linux/sched/debug.h> #include <linux/sched/sysctl.h> +#include <linux/hung_task.h> #include <trace/events/sched.h> @@ -98,30 +99,62 @@ static struct notifier_block panic_block = { static void debug_show_blocker(struct task_struct *task) { struct task_struct *g, *t; - unsigned long owner; - struct mutex *lock; + unsigned long owner, blocker, blocker_type; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); - lock = READ_ONCE(task->blocker_mutex); - if (!lock) + blocker = READ_ONCE(task->blocker); + if (!blocker) return; - owner = mutex_get_owner(lock); + blocker_type = hung_task_get_blocker_type(blocker); + + switch (blocker_type) { + case BLOCKER_TYPE_MUTEX: + owner = mutex_get_owner( + (struct mutex *)hung_task_blocker_to_lock(blocker)); + break; + case BLOCKER_TYPE_SEM: + owner = sem_last_holder( + (struct semaphore *)hung_task_blocker_to_lock(blocker)); + break; + default: + WARN_ON_ONCE(1); + return; + } + + if (unlikely(!owner)) { - pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", - task->comm, task->pid); + switch (blocker_type) { + case BLOCKER_TYPE_MUTEX: + pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", + task->comm, task->pid); + break; + case BLOCKER_TYPE_SEM: + pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", + task->comm, task->pid); + break; + } return; } /* Ensure the owner information is correct. */ for_each_process_thread(g, t) { - if ((unsigned long)t == owner) { + if ((unsigned long)t != owner) + continue; + + switch (blocker_type) { + case BLOCKER_TYPE_MUTEX: pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n", - task->comm, task->pid, t->comm, t->pid); - sched_show_task(t); - return; + task->comm, task->pid, t->comm, t->pid); + break; + case BLOCKER_TYPE_SEM: + pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", + task->comm, task->pid, t->comm, t->pid); + break; } + sched_show_task(t); + return; } } #else diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 10a78358257b..69fe76fd9233 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -19,7 +19,6 @@ #include <linux/list.h> #include <linux/fs.h> #include <linux/ima.h> -#include <crypto/hash.h> #include <crypto/sha2.h> #include <linux/elf.h> #include <linux/elfcore.h> @@ -474,6 +473,7 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end, temp_end = min(end, kbuf->buf_max); temp_start = temp_end - kbuf->memsz + 1; + kexec_random_range_start(temp_start, temp_end, kbuf, &temp_start); do { /* align down start */ @@ -518,6 +518,8 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, temp_start = max(start, kbuf->buf_min); + kexec_random_range_start(temp_start, end, kbuf, &temp_start); + do { temp_start = ALIGN(temp_start, kbuf->buf_align); temp_end = temp_start + kbuf->memsz - 1; @@ -749,11 +751,10 @@ int kexec_add_buffer(struct kexec_buf *kbuf) /* Calculate and store the digest of segments */ static int kexec_calculate_store_digests(struct kimage *image) { - struct crypto_shash *tfm; - struct shash_desc *desc; + struct sha256_state state; int ret = 0, i, j, zero_buf_sz, sha_region_sz; - size_t desc_size, nullsz; - char *digest; + size_t nullsz; + u8 digest[SHA256_DIGEST_SIZE]; void *zero_buf; struct kexec_sha_region *sha_regions; struct purgatory_info *pi = &image->purgatory_info; @@ -764,37 +765,12 @@ static int kexec_calculate_store_digests(struct kimage *image) zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT); zero_buf_sz = PAGE_SIZE; - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - ret = PTR_ERR(tfm); - goto out; - } - - desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); - desc = kzalloc(desc_size, GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto out_free_tfm; - } - sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); sha_regions = vzalloc(sha_region_sz); - if (!sha_regions) { - ret = -ENOMEM; - goto out_free_desc; - } - - desc->tfm = tfm; - - ret = crypto_shash_init(desc); - if (ret < 0) - goto out_free_sha_regions; + if (!sha_regions) + return -ENOMEM; - digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); - if (!digest) { - ret = -ENOMEM; - goto out_free_sha_regions; - } + sha256_init(&state); for (j = i = 0; i < image->nr_segments; i++) { struct kexec_segment *ksegment; @@ -820,10 +796,7 @@ static int kexec_calculate_store_digests(struct kimage *image) if (check_ima_segment_index(image, i)) continue; - ret = crypto_shash_update(desc, ksegment->kbuf, - ksegment->bufsz); - if (ret) - break; + sha256_update(&state, ksegment->kbuf, ksegment->bufsz); /* * Assume rest of the buffer is filled with zero and @@ -835,44 +808,26 @@ static int kexec_calculate_store_digests(struct kimage *image) if (bytes > zero_buf_sz) bytes = zero_buf_sz; - ret = crypto_shash_update(desc, zero_buf, bytes); - if (ret) - break; + sha256_update(&state, zero_buf, bytes); nullsz -= bytes; } - if (ret) - break; - sha_regions[j].start = ksegment->mem; sha_regions[j].len = ksegment->memsz; j++; } - if (!ret) { - ret = crypto_shash_final(desc, digest); - if (ret) - goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", - sha_regions, sha_region_sz, 0); - if (ret) - goto out_free_digest; + sha256_final(&state, digest); - ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", - digest, SHA256_DIGEST_SIZE, 0); - if (ret) - goto out_free_digest; - } + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", + sha_regions, sha_region_sz, 0); + if (ret) + goto out_free_sha_regions; -out_free_digest: - kfree(digest); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", + digest, SHA256_DIGEST_SIZE, 0); out_free_sha_regions: vfree(sha_regions); -out_free_desc: - kfree(desc); -out_free_tfm: - kfree(tfm); -out: return ret; } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 555e2b3a665a..61fa97da7989 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -29,6 +29,7 @@ #include <linux/interrupt.h> #include <linux/debug_locks.h> #include <linux/osq_lock.h> +#include <linux/hung_task.h> #define CREATE_TRACE_POINTS #include <trace/events/lock.h> @@ -191,7 +192,7 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct list_head *list) { #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER - WRITE_ONCE(current->blocker_mutex, lock); + hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX); #endif debug_mutex_add_waiter(lock, waiter, current); @@ -209,7 +210,7 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) debug_mutex_remove_waiter(lock, waiter, current); #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER - WRITE_ONCE(current->blocker_mutex, NULL); + hung_task_clear_blocker(); #endif } diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index de9117c0e671..3ef032e22f7e 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -34,6 +34,7 @@ #include <linux/spinlock.h> #include <linux/ftrace.h> #include <trace/events/lock.h> +#include <linux/hung_task.h> static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); @@ -41,6 +42,41 @@ static noinline int __down_killable(struct semaphore *sem); static noinline int __down_timeout(struct semaphore *sem, long timeout); static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q); +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +static inline void hung_task_sem_set_holder(struct semaphore *sem) +{ + WRITE_ONCE((sem)->last_holder, (unsigned long)current); +} + +static inline void hung_task_sem_clear_if_holder(struct semaphore *sem) +{ + if (READ_ONCE((sem)->last_holder) == (unsigned long)current) + WRITE_ONCE((sem)->last_holder, 0UL); +} + +unsigned long sem_last_holder(struct semaphore *sem) +{ + return READ_ONCE(sem->last_holder); +} +#else +static inline void hung_task_sem_set_holder(struct semaphore *sem) +{ +} +static inline void hung_task_sem_clear_if_holder(struct semaphore *sem) +{ +} +unsigned long sem_last_holder(struct semaphore *sem) +{ + return 0UL; +} +#endif + +static inline void __sem_acquire(struct semaphore *sem) +{ + sem->count--; + hung_task_sem_set_holder(sem); +} + /** * down - acquire the semaphore * @sem: the semaphore to be acquired @@ -59,7 +95,7 @@ void __sched down(struct semaphore *sem) might_sleep(); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) - sem->count--; + __sem_acquire(sem); else __down(sem); raw_spin_unlock_irqrestore(&sem->lock, flags); @@ -83,7 +119,7 @@ int __sched down_interruptible(struct semaphore *sem) might_sleep(); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) - sem->count--; + __sem_acquire(sem); else result = __down_interruptible(sem); raw_spin_unlock_irqrestore(&sem->lock, flags); @@ -110,7 +146,7 @@ int __sched down_killable(struct semaphore *sem) might_sleep(); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) - sem->count--; + __sem_acquire(sem); else result = __down_killable(sem); raw_spin_unlock_irqrestore(&sem->lock, flags); @@ -140,7 +176,7 @@ int __sched down_trylock(struct semaphore *sem) raw_spin_lock_irqsave(&sem->lock, flags); count = sem->count - 1; if (likely(count >= 0)) - sem->count = count; + __sem_acquire(sem); raw_spin_unlock_irqrestore(&sem->lock, flags); return (count < 0); @@ -165,7 +201,7 @@ int __sched down_timeout(struct semaphore *sem, long timeout) might_sleep(); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) - sem->count--; + __sem_acquire(sem); else result = __down_timeout(sem, timeout); raw_spin_unlock_irqrestore(&sem->lock, flags); @@ -187,6 +223,9 @@ void __sched up(struct semaphore *sem) DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->lock, flags); + + hung_task_sem_clear_if_holder(sem); + if (likely(list_empty(&sem->wait_list))) sem->count++; else @@ -228,8 +267,10 @@ static inline int __sched ___down_common(struct semaphore *sem, long state, raw_spin_unlock_irq(&sem->lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->lock); - if (waiter.up) + if (waiter.up) { + hung_task_sem_set_holder(sem); return 0; + } } timed_out: @@ -246,10 +287,14 @@ static inline int __sched __down_common(struct semaphore *sem, long state, { int ret; + hung_task_set_blocker(sem, BLOCKER_TYPE_SEM); + trace_contention_begin(sem, 0); ret = ___down_common(sem, state, timeout); trace_contention_end(sem, ret); + hung_task_clear_blocker(); + return ret; } diff --git a/kernel/panic.c b/kernel/panic.c index 047ea3215312..b0b9a8bf4560 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -307,12 +307,10 @@ static void panic_other_cpus_shutdown(bool crash_kexec) } /** - * panic - halt the system - * @fmt: The text string to print + * panic - halt the system + * @fmt: The text string to print * - * Display a message, then perform cleanups. - * - * This function never returns. + * Display a message, then perform cleanups. This function never returns. */ void panic(const char *fmt, ...) { diff --git a/kernel/relay.c b/kernel/relay.c index 5ac7e711e4b6..c0c93a04d4ce 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -452,7 +452,7 @@ int relay_prepare_cpu(unsigned int cpu) /** * relay_open - create a new relay channel - * @base_filename: base name of files to create, %NULL for buffering only + * @base_filename: base name of files to create * @parent: dentry of parent directory, %NULL for root directory or buffer * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers @@ -465,10 +465,6 @@ int relay_prepare_cpu(unsigned int cpu) * attributes specified. The created channel buffer files * will be named base_filename0...base_filenameN-1. File * permissions will be %S_IRUSR. - * - * If opening a buffer (@parent = NULL) that you later wish to register - * in a filesystem, call relay_late_setup_files() once the @parent dentry - * is available. */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, @@ -540,111 +536,6 @@ struct rchan_percpu_buf_dispatcher { struct dentry *dentry; }; -/* Called in atomic context. */ -static void __relay_set_buf_dentry(void *info) -{ - struct rchan_percpu_buf_dispatcher *p = info; - - relay_set_buf_dentry(p->buf, p->dentry); -} - -/** - * relay_late_setup_files - triggers file creation - * @chan: channel to operate on - * @base_filename: base name of files to create - * @parent: dentry of parent directory, %NULL for root directory - * - * Returns 0 if successful, non-zero otherwise. - * - * Use to setup files for a previously buffer-only channel created - * by relay_open() with a NULL parent dentry. - * - * For example, this is useful for perfomring early tracing in kernel, - * before VFS is up and then exposing the early results once the dentry - * is available. - */ -int relay_late_setup_files(struct rchan *chan, - const char *base_filename, - struct dentry *parent) -{ - int err = 0; - unsigned int i, curr_cpu; - unsigned long flags; - struct dentry *dentry; - struct rchan_buf *buf; - struct rchan_percpu_buf_dispatcher disp; - - if (!chan || !base_filename) - return -EINVAL; - - strscpy(chan->base_filename, base_filename, NAME_MAX); - - mutex_lock(&relay_channels_mutex); - /* Is chan already set up? */ - if (unlikely(chan->has_base_filename)) { - mutex_unlock(&relay_channels_mutex); - return -EEXIST; - } - chan->has_base_filename = 1; - chan->parent = parent; - - if (chan->is_global) { - err = -EINVAL; - buf = *per_cpu_ptr(chan->buf, 0); - if (!WARN_ON_ONCE(!buf)) { - dentry = relay_create_buf_file(chan, buf, 0); - if (dentry && !WARN_ON_ONCE(!chan->is_global)) { - relay_set_buf_dentry(buf, dentry); - err = 0; - } - } - mutex_unlock(&relay_channels_mutex); - return err; - } - - curr_cpu = get_cpu(); - /* - * The CPU hotplug notifier ran before us and created buffers with - * no files associated. So it's safe to call relay_setup_buf_file() - * on all currently online CPUs. - */ - for_each_online_cpu(i) { - buf = *per_cpu_ptr(chan->buf, i); - if (unlikely(!buf)) { - WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n"); - err = -EINVAL; - break; - } - - dentry = relay_create_buf_file(chan, buf, i); - if (unlikely(!dentry)) { - err = -EINVAL; - break; - } - - if (curr_cpu == i) { - local_irq_save(flags); - relay_set_buf_dentry(buf, dentry); - local_irq_restore(flags); - } else { - disp.buf = buf; - disp.dentry = dentry; - smp_mb(); - /* relay_channels_mutex must be held, so wait. */ - err = smp_call_function_single(i, - __relay_set_buf_dentry, - &disp, 1); - } - if (unlikely(err)) - break; - } - put_cpu(); - mutex_unlock(&relay_channels_mutex); - - return err; -} -EXPORT_SYMBOL_GPL(relay_late_setup_files); - /** * relay_switch_subbuf - switch to a new sub-buffer * @buf: channel buffer diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index 1fec61603ef3..e066d31d08f8 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -210,6 +210,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (PGTY_offline << 24) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); +#ifdef CONFIG_UNACCEPTED_MEMORY +#define PAGE_UNACCEPTED_MAPCOUNT_VALUE (PGTY_unaccepted << 24) + VMCOREINFO_NUMBER(PAGE_UNACCEPTED_MAPCOUNT_VALUE); +#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9fa2af9dbf2c..80b56c002c7f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -47,6 +47,7 @@ int __read_mostly watchdog_user_enabled = 1; static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; static int __read_mostly watchdog_softlockup_user_enabled = 1; int __read_mostly watchdog_thresh = 10; +static int __read_mostly watchdog_thresh_next; static int __read_mostly watchdog_hardlockup_available; struct cpumask watchdog_cpumask __read_mostly; @@ -63,6 +64,29 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace; */ unsigned int __read_mostly hardlockup_panic = IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC); + +#ifdef CONFIG_SYSFS + +static unsigned int hardlockup_count; + +static ssize_t hardlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%u\n", hardlockup_count); +} + +static struct kobj_attribute hardlockup_count_attr = __ATTR_RO(hardlockup_count); + +static __init int kernel_hardlockup_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &hardlockup_count_attr.attr, NULL); + return 0; +} + +late_initcall(kernel_hardlockup_sysfs_init); + +#endif // CONFIG_SYSFS + /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these @@ -169,6 +193,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) unsigned int this_cpu = smp_processor_id(); unsigned long flags; +#ifdef CONFIG_SYSFS + ++hardlockup_count; +#endif + /* Only print hardlockups once. */ if (per_cpu(watchdog_hardlockup_warned, cpu)) return; @@ -311,6 +339,28 @@ unsigned int __read_mostly softlockup_panic = static bool softlockup_initialized __read_mostly; static u64 __read_mostly sample_period; +#ifdef CONFIG_SYSFS + +static unsigned int softlockup_count; + +static ssize_t softlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%u\n", softlockup_count); +} + +static struct kobj_attribute softlockup_count_attr = __ATTR_RO(softlockup_count); + +static __init int kernel_softlockup_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &softlockup_count_attr.attr, NULL); + return 0; +} + +late_initcall(kernel_softlockup_sysfs_init); + +#endif // CONFIG_SYSFS + /* Timestamp taken after the last successful reschedule. */ static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); /* Timestamp of the last softlockup report. */ @@ -742,6 +792,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) touch_ts = __this_cpu_read(watchdog_touch_ts); duration = is_softlockup(touch_ts, period_ts, now); if (unlikely(duration)) { +#ifdef CONFIG_SYSFS + ++softlockup_count; +#endif + /* * Prevent multiple soft-lockup reports if one cpu is already * engaged in dumping all cpu back traces. @@ -870,12 +924,20 @@ int lockup_detector_offline_cpu(unsigned int cpu) return 0; } -static void __lockup_detector_reconfigure(void) +static void __lockup_detector_reconfigure(bool thresh_changed) { cpus_read_lock(); watchdog_hardlockup_stop(); softlockup_stop_all(); + /* + * To prevent watchdog_timer_fn from using the old interval and + * the new watchdog_thresh at the same time, which could lead to + * false softlockup reports, it is necessary to update the + * watchdog_thresh after the softlockup is completed. + */ + if (thresh_changed) + watchdog_thresh = READ_ONCE(watchdog_thresh_next); set_sample_period(); lockup_detector_update_enable(); if (watchdog_enabled && watchdog_thresh) @@ -888,7 +950,7 @@ static void __lockup_detector_reconfigure(void) void lockup_detector_reconfigure(void) { mutex_lock(&watchdog_mutex); - __lockup_detector_reconfigure(); + __lockup_detector_reconfigure(false); mutex_unlock(&watchdog_mutex); } @@ -908,27 +970,29 @@ static __init void lockup_detector_setup(void) return; mutex_lock(&watchdog_mutex); - __lockup_detector_reconfigure(); + __lockup_detector_reconfigure(false); softlockup_initialized = true; mutex_unlock(&watchdog_mutex); } #else /* CONFIG_SOFTLOCKUP_DETECTOR */ -static void __lockup_detector_reconfigure(void) +static void __lockup_detector_reconfigure(bool thresh_changed) { cpus_read_lock(); watchdog_hardlockup_stop(); + if (thresh_changed) + watchdog_thresh = READ_ONCE(watchdog_thresh_next); lockup_detector_update_enable(); watchdog_hardlockup_start(); cpus_read_unlock(); } void lockup_detector_reconfigure(void) { - __lockup_detector_reconfigure(); + __lockup_detector_reconfigure(false); } static inline void lockup_detector_setup(void) { - __lockup_detector_reconfigure(); + __lockup_detector_reconfigure(false); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ @@ -946,11 +1010,11 @@ void lockup_detector_soft_poweroff(void) #ifdef CONFIG_SYSCTL /* Propagate any changes to the watchdog infrastructure */ -static void proc_watchdog_update(void) +static void proc_watchdog_update(bool thresh_changed) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - __lockup_detector_reconfigure(); + __lockup_detector_reconfigure(thresh_changed); } /* @@ -984,7 +1048,7 @@ static int proc_watchdog_common(int which, const struct ctl_table *table, int wr } else { err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!err && old != READ_ONCE(*param)) - proc_watchdog_update(); + proc_watchdog_update(false); } mutex_unlock(&watchdog_mutex); return err; @@ -1035,11 +1099,13 @@ static int proc_watchdog_thresh(const struct ctl_table *table, int write, mutex_lock(&watchdog_mutex); - old = READ_ONCE(watchdog_thresh); + watchdog_thresh_next = READ_ONCE(watchdog_thresh); + + old = watchdog_thresh_next; err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!err && write && old != READ_ONCE(watchdog_thresh)) - proc_watchdog_update(); + if (!err && write && old != READ_ONCE(watchdog_thresh_next)) + proc_watchdog_update(true); mutex_unlock(&watchdog_mutex); return err; @@ -1060,7 +1126,7 @@ static int proc_watchdog_cpumask(const struct ctl_table *table, int write, err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) - proc_watchdog_update(); + proc_watchdog_update(false); mutex_unlock(&watchdog_mutex); return err; @@ -1080,7 +1146,7 @@ static const struct ctl_table watchdog_sysctls[] = { }, { .procname = "watchdog_thresh", - .data = &watchdog_thresh, + .data = &watchdog_thresh_next, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_watchdog_thresh, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 90edcc06e770..ebe33181b6e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2982,13 +2982,7 @@ config TEST_DYNAMIC_DEBUG config TEST_KMOD tristate "kmod stress tester" depends on m - depends on NETDEVICES && NET_CORE && INET # for TUN - depends on BLOCK - depends on PAGE_SIZE_LESS_THAN_256KB # for BTRFS select TEST_LKM - select XFS_FS - select TUN - select BTRFS_FS help Test the kernel's module loading mechanism: kmod. kmod implements support to load modules using the Linux kernel's usermode helper. diff --git a/lib/errseq.c b/lib/errseq.c index 93e9b94358dc..13a2581c5a87 100644 --- a/lib/errseq.c +++ b/lib/errseq.c @@ -34,11 +34,14 @@ */ /* The low bits are designated for error code (max of MAX_ERRNO) */ -#define ERRSEQ_SHIFT ilog2(MAX_ERRNO + 1) +#define ERRSEQ_SHIFT (ilog2(MAX_ERRNO) + 1) /* This bit is used as a flag to indicate whether the value has been seen */ #define ERRSEQ_SEEN (1 << ERRSEQ_SHIFT) +/* Leverage macro ERRSEQ_SEEN to define errno mask macro here */ +#define ERRNO_MASK (ERRSEQ_SEEN - 1) + /* The lowest bit of the counter */ #define ERRSEQ_CTR_INC (1 << (ERRSEQ_SHIFT + 1)) @@ -60,8 +63,6 @@ errseq_t errseq_set(errseq_t *eseq, int err) { errseq_t cur, old; - /* MAX_ERRNO must be able to serve as a mask */ - BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1); /* * Ensure the error code actually fits where we want it to go. If it @@ -79,7 +80,7 @@ errseq_t errseq_set(errseq_t *eseq, int err) errseq_t new; /* Clear out error bits and set new error */ - new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err; + new = (old & ~(ERRNO_MASK | ERRSEQ_SEEN)) | -err; /* Only increment if someone has looked at it */ if (old & ERRSEQ_SEEN) @@ -148,7 +149,7 @@ int errseq_check(errseq_t *eseq, errseq_t since) if (likely(cur == since)) return 0; - return -(cur & MAX_ERRNO); + return -(cur & ERRNO_MASK); } EXPORT_SYMBOL(errseq_check); @@ -200,7 +201,7 @@ int errseq_check_and_advance(errseq_t *eseq, errseq_t *since) if (new != old) cmpxchg(eseq, old, new); *since = new; - err = -(new & MAX_ERRNO); + err = -(new & ERRNO_MASK); } return err; } diff --git a/lib/kstrtox.c b/lib/kstrtox.c index d586e6af5e5a..bdde40cd69d7 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -351,6 +351,8 @@ int kstrtobool(const char *s, bool *res) return -EINVAL; switch (s[0]) { + case 'e': + case 'E': case 'y': case 'Y': case 't': @@ -358,6 +360,8 @@ int kstrtobool(const char *s, bool *res) case '1': *res = true; return 0; + case 'd': + case 'D': case 'n': case 'N': case 'f': diff --git a/lib/llist.c b/lib/llist.c index f21d0cfbbaaa..f574c17a238e 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -14,28 +14,6 @@ #include <linux/export.h> #include <linux/llist.h> - -/** - * llist_add_batch - add several linked entries in batch - * @new_first: first entry in batch to be added - * @new_last: last entry in batch to be added - * @head: the head for your lock-less list - * - * Return whether list is empty before adding. - */ -bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, - struct llist_head *head) -{ - struct llist_node *first = READ_ONCE(head->first); - - do { - new_last->next = first; - } while (!try_cmpxchg(&head->first, &first, new_first)); - - return !first; -} -EXPORT_SYMBOL_GPL(llist_add_batch); - /** * llist_del_first - delete the first entry of lock-less list * @head: the head for your lock-less list diff --git a/lib/oid_registry.c b/lib/oid_registry.c index fe6705cfd780..9b757a117f09 100644 --- a/lib/oid_registry.c +++ b/lib/oid_registry.c @@ -117,7 +117,7 @@ int parse_OID(const void *data, size_t datasize, enum OID *oid) EXPORT_SYMBOL_GPL(parse_OID); /* - * sprint_OID - Print an Object Identifier into a buffer + * sprint_oid - Print an Object Identifier into a buffer * @data: The encoded OID to print * @datasize: The size of the encoded OID * @buffer: The buffer to render into @@ -173,26 +173,3 @@ bad: return -EBADMSG; } EXPORT_SYMBOL_GPL(sprint_oid); - -/** - * sprint_OID - Print an Object Identifier into a buffer - * @oid: The OID to print - * @buffer: The buffer to render into - * @bufsize: The size of the buffer - * - * The OID is rendered into the buffer in "a.b.c.d" format and the number of - * bytes is returned. - */ -int sprint_OID(enum OID oid, char *buffer, size_t bufsize) -{ - int ret; - - BUG_ON(oid >= OID__NR); - - ret = sprint_oid(oid_data + oid_index[oid], - oid_index[oid + 1] - oid_index[oid], - buffer, bufsize); - BUG_ON(ret == -EBADMSG); - return ret; -} -EXPORT_SYMBOL_GPL(sprint_OID); diff --git a/lib/rbtree.c b/lib/rbtree.c index 989c2d615f92..5114eda6309c 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -297,9 +297,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * N S --> N sl * / \ \ - * sl sr S + * sl Sr S * \ - * sr + * Sr * * Note: p might be red, and then both * p and sl are red after rotation(which @@ -312,9 +312,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * N sl --> P S * \ / \ - * S N sr + * S N Sr * \ - * sr + * Sr */ tmp1 = tmp2->rb_right; WRITE_ONCE(sibling->rb_left, tmp1); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index b58d5ef1a34b..7582dfab7fe3 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -14,29 +14,6 @@ #include <linux/folio_queue.h> /** - * sg_next - return the next scatterlist entry in a list - * @sg: The current sg entry - * - * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part - * of a chained scatterlist, it could jump to the start of a new - * scatterlist array. - * - **/ -struct scatterlist *sg_next(struct scatterlist *sg) -{ - if (sg_is_last(sg)) - return NULL; - - sg++; - if (unlikely(sg_is_chain(sg))) - sg = sg_chain_ptr(sg); - - return sg; -} -EXPORT_SYMBOL(sg_next); - -/** * sg_nents - return total count of entries in scatterlist * @sg: The scatterlist * diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 064ed0fce75a..f0dd092860ea 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -28,14 +28,20 @@ #define TEST_START_NUM_THREADS 50 #define TEST_START_DRIVER "test_module" -#define TEST_START_TEST_FS "xfs" #define TEST_START_TEST_CASE TEST_KMOD_DRIVER - static bool force_init_test = false; -module_param(force_init_test, bool_enable_only, 0644); +module_param(force_init_test, bool_enable_only, 0444); MODULE_PARM_DESC(force_init_test, "Force kicking a test immediately after driver loads"); +static char *start_driver; +module_param(start_driver, charp, 0444); +MODULE_PARM_DESC(start_driver, + "Module/driver to use for the testing after driver loads"); +static char *start_test_fs; +module_param(start_test_fs, charp, 0444); +MODULE_PARM_DESC(start_test_fs, + "File system to use for the testing after driver loads"); /* * For device allocation / registration @@ -508,6 +514,11 @@ static int __trigger_config_run(struct kmod_test_device *test_dev) case TEST_KMOD_DRIVER: return run_test_driver(test_dev); case TEST_KMOD_FS_TYPE: + if (!config->test_fs) { + dev_warn(test_dev->dev, + "No fs type specified, can't run the test\n"); + return -EINVAL; + } return run_test_fs_type(test_dev); default: dev_warn(test_dev->dev, @@ -721,26 +732,20 @@ static ssize_t config_test_fs_show(struct device *dev, static DEVICE_ATTR_RW(config_test_fs); static int trigger_config_run_type(struct kmod_test_device *test_dev, - enum kmod_test_case test_case, - const char *test_str) + enum kmod_test_case test_case) { - int copied = 0; struct test_config *config = &test_dev->config; mutex_lock(&test_dev->config_mutex); switch (test_case) { case TEST_KMOD_DRIVER: - kfree_const(config->test_driver); - config->test_driver = NULL; - copied = config_copy_test_driver_name(config, test_str, - strlen(test_str)); break; case TEST_KMOD_FS_TYPE: - kfree_const(config->test_fs); - config->test_fs = NULL; - copied = config_copy_test_fs(config, test_str, - strlen(test_str)); + if (!config->test_fs) { + mutex_unlock(&test_dev->config_mutex); + return 0; + } break; default: mutex_unlock(&test_dev->config_mutex); @@ -751,11 +756,6 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev, mutex_unlock(&test_dev->config_mutex); - if (copied <= 0 || copied != strlen(test_str)) { - test_dev->test_is_oom = true; - return -ENOMEM; - } - test_dev->test_is_oom = false; return trigger_config_run(test_dev); @@ -800,19 +800,24 @@ static unsigned int kmod_init_test_thread_limit(void) static int __kmod_config_init(struct kmod_test_device *test_dev) { struct test_config *config = &test_dev->config; + const char *test_start_driver = start_driver ? start_driver : + TEST_START_DRIVER; int ret = -ENOMEM, copied; __kmod_config_free(config); - copied = config_copy_test_driver_name(config, TEST_START_DRIVER, - strlen(TEST_START_DRIVER)); - if (copied != strlen(TEST_START_DRIVER)) + copied = config_copy_test_driver_name(config, test_start_driver, + strlen(test_start_driver)); + if (copied != strlen(test_start_driver)) goto err_out; - copied = config_copy_test_fs(config, TEST_START_TEST_FS, - strlen(TEST_START_TEST_FS)); - if (copied != strlen(TEST_START_TEST_FS)) - goto err_out; + + if (start_test_fs) { + copied = config_copy_test_fs(config, start_test_fs, + strlen(start_test_fs)); + if (copied != strlen(start_test_fs)) + goto err_out; + } config->num_threads = kmod_init_test_thread_limit(); config->test_result = 0; @@ -1178,12 +1183,11 @@ static int __init test_kmod_init(void) * lowering the init level for more fun. */ if (force_init_test) { - ret = trigger_config_run_type(test_dev, - TEST_KMOD_DRIVER, "tun"); + ret = trigger_config_run_type(test_dev, TEST_KMOD_DRIVER); if (WARN_ON(ret)) return ret; - ret = trigger_config_run_type(test_dev, - TEST_KMOD_FS_TYPE, "btrfs"); + + ret = trigger_config_run_type(test_dev, TEST_KMOD_FS_TYPE); if (WARN_ON(ret)) return ret; } diff --git a/mm/maccess.c b/mm/maccess.c index 8f0906180a94..831b4dd7296c 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -196,7 +196,7 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, if (ret >= count) { ret = count; dst[ret - 1] = '\0'; - } else if (ret > 0) { + } else if (ret >= 0) { ret++; } diff --git a/samples/Kconfig b/samples/Kconfig index 6ade17cb16b4..ffef99950206 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -315,10 +315,11 @@ config SAMPLE_HUNG_TASK tristate "Hung task detector test code" depends on DETECT_HUNG_TASK && DEBUG_FS help - Build a module which provide a simple debugfs file. If user reads - the file, it will sleep long time (256 seconds) with holding a - mutex. Thus if there are 2 or more processes read this file, it - will be detected by the hung_task watchdog. + Build a module that provides debugfs files (e.g., mutex, semaphore, + etc.) under <debugfs>/hung_task. If user reads one of these files, + it will sleep long time (256 seconds) with holding a lock. Thus, + if 2 or more processes read the same file concurrently, it will + be detected by the hung_task watchdog. source "samples/rust/Kconfig" diff --git a/samples/hung_task/Makefile b/samples/hung_task/Makefile index f4d6ab563488..86036f1a204d 100644 --- a/samples/hung_task/Makefile +++ b/samples/hung_task/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task_mutex.o +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task_tests.o diff --git a/samples/hung_task/hung_task_mutex.c b/samples/hung_task/hung_task_mutex.c deleted file mode 100644 index 47ed38239ea3..000000000000 --- a/samples/hung_task/hung_task_mutex.c +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * hung_task_mutex.c - Sample code which causes hung task by mutex - * - * Usage: load this module and read `<debugfs>/hung_task/mutex` - * by 2 or more processes. - * - * This is for testing kernel hung_task error message. - * Note that this will make your system freeze and maybe - * cause panic. So do not use this except for the test. - */ - -#include <linux/debugfs.h> -#include <linux/delay.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/mutex.h> - -#define HUNG_TASK_DIR "hung_task" -#define HUNG_TASK_FILE "mutex" -#define SLEEP_SECOND 256 - -static const char dummy_string[] = "This is a dummy string."; -static DEFINE_MUTEX(dummy_mutex); -static struct dentry *hung_task_dir; - -static ssize_t read_dummy(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - /* If the second task waits on the lock, it is uninterruptible sleep. */ - guard(mutex)(&dummy_mutex); - - /* When the first task sleep here, it is interruptible. */ - msleep_interruptible(SLEEP_SECOND * 1000); - - return simple_read_from_buffer(user_buf, count, ppos, - dummy_string, sizeof(dummy_string)); -} - -static const struct file_operations hung_task_fops = { - .read = read_dummy, -}; - -static int __init hung_task_sample_init(void) -{ - hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); - if (IS_ERR(hung_task_dir)) - return PTR_ERR(hung_task_dir); - - debugfs_create_file(HUNG_TASK_FILE, 0400, hung_task_dir, - NULL, &hung_task_fops); - - return 0; -} - -static void __exit hung_task_sample_exit(void) -{ - debugfs_remove_recursive(hung_task_dir); -} - -module_init(hung_task_sample_init); -module_exit(hung_task_sample_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Masami Hiramatsu"); -MODULE_DESCRIPTION("Simple sleep under mutex file for testing hung task"); diff --git a/samples/hung_task/hung_task_tests.c b/samples/hung_task/hung_task_tests.c new file mode 100644 index 000000000000..a5c09bd3a47d --- /dev/null +++ b/samples/hung_task/hung_task_tests.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * hung_task_tests.c - Sample code for testing hung tasks with mutex, + * semaphore, etc. + * + * Usage: Load this module and read `<debugfs>/hung_task/mutex`, + * `<debugfs>/hung_task/semaphore`, etc., with 2 or more processes. + * + * This is for testing kernel hung_task error messages with various locking + * mechanisms (e.g., mutex, semaphore, etc.). Note that this may freeze + * your system or cause a panic. Use only for testing purposes. + */ + +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/semaphore.h> + +#define HUNG_TASK_DIR "hung_task" +#define HUNG_TASK_MUTEX_FILE "mutex" +#define HUNG_TASK_SEM_FILE "semaphore" +#define SLEEP_SECOND 256 + +static const char dummy_string[] = "This is a dummy string."; +static DEFINE_MUTEX(dummy_mutex); +static DEFINE_SEMAPHORE(dummy_sem, 1); +static struct dentry *hung_task_dir; + +/* Mutex-based read function */ +static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Second task waits on mutex, entering uninterruptible sleep */ + guard(mutex)(&dummy_mutex); + + /* First task sleeps here, interruptible */ + msleep_interruptible(SLEEP_SECOND * 1000); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Semaphore-based read function */ +static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Second task waits on semaphore, entering uninterruptible sleep */ + down(&dummy_sem); + + /* First task sleeps here, interruptible */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up(&dummy_sem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* File operations for mutex */ +static const struct file_operations hung_task_mutex_fops = { + .read = read_dummy_mutex, +}; + +/* File operations for semaphore */ +static const struct file_operations hung_task_sem_fops = { + .read = read_dummy_semaphore, +}; + +static int __init hung_task_tests_init(void) +{ + hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); + if (IS_ERR(hung_task_dir)) + return PTR_ERR(hung_task_dir); + + /* Create debugfs files for mutex and semaphore tests */ + debugfs_create_file(HUNG_TASK_MUTEX_FILE, 0400, hung_task_dir, NULL, + &hung_task_mutex_fops); + debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL, + &hung_task_sem_fops); + + return 0; +} + +static void __exit hung_task_tests_exit(void) +{ + debugfs_remove_recursive(hung_task_dir); +} + +module_init(hung_task_tests_init); +module_exit(hung_task_tests_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Masami Hiramatsu <mhiramat@kernel.org>"); +MODULE_AUTHOR("Zi Li <amaindex@outlook.com>"); +MODULE_DESCRIPTION("Simple sleep under lock files for testing hung task"); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 966813c2573c..664f7b7a622c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -151,6 +151,24 @@ EOM exit($exitcode); } +my $DO_WHILE_0_ADVICE = q{ + do {} while (0) advice is over-stated in a few situations: + + The more obvious case is macros, like MODULE_PARM_DESC, invoked at + file-scope, where C disallows code (it must be in functions). See + $exceptions if you have one to add by name. + + More troublesome is declarative macros used at top of new scope, + like DECLARE_PER_CPU. These might just compile with a do-while-0 + wrapper, but would be incorrect. Most of these are handled by + detecting struct,union,etc declaration primitives in $exceptions. + + Theres also macros called inside an if (block), which "return" an + expression. These cannot do-while, and need a ({}) wrapper. + + Enjoy this qualification while we work to improve our heuristics. +}; + sub uniq { my %seen; return grep { !$seen{$_}++ } @_; @@ -5885,9 +5903,9 @@ sub process { } } -# multi-statement macros should be enclosed in a do while loop, grab the -# first statement and ensure its the whole macro if its not enclosed -# in a known good container +# Usually multi-statement macros should be enclosed in a do {} while +# (0) loop. Grab the first statement and ensure its the whole macro +# if its not enclosed in a known good container if ($realfile !~ m@/vmlinux.lds.h$@ && $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) { my $ln = $linenr; @@ -5940,10 +5958,13 @@ sub process { my $exceptions = qr{ $Declare| + # named exceptions module_param_named| MODULE_PARM_DESC| DECLARE_PER_CPU| DEFINE_PER_CPU| + static_assert| + # declaration primitives __typeof__\(| union| struct| @@ -5978,11 +5999,11 @@ sub process { ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", "Macros starting with if should be enclosed by a do - while loop to avoid possible if/else logic defects\n" . "$herectx"); } elsif ($dstat =~ /;/) { - ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", - "Macros with multiple statements should be enclosed in a do - while loop\n" . "$herectx"); + WARN("MULTISTATEMENT_MACRO_USE_DO_WHILE", + "Non-declarative macros with multiple statements should be enclosed in a do - while loop\n" . "$herectx\nBUT SEE:\n$DO_WHILE_0_ADVICE"); } else { ERROR("COMPLEX_MACRO", - "Macros with complex values should be enclosed in parentheses\n" . "$herectx"); + "Macros with complex values should be enclosed in parentheses\n" . "$herectx\nBUT SEE:\n$DO_WHILE_0_ADVICE"); } } @@ -6026,7 +6047,7 @@ sub process { } # check if this is an unused argument - if ($define_stmt !~ /\b$arg\b/) { + if ($define_stmt !~ /\b$arg\b/ && $define_stmt) { WARN("MACRO_ARG_UNUSED", "Argument '$arg' is not used in function-like macro\n" . "$herectx"); } diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index f506965ea759..6edf4ef61636 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -141,7 +141,7 @@ LxCpus() class PerCpu(gdb.Function): """Return per-cpu variable. -$lx_per_cpu("VAR"[, CPU]): Return the per-cpu variable called VAR for the +$lx_per_cpu(VAR[, CPU]): Return the per-cpu variable called VAR for the given CPU number. If CPU is omitted, the CPU of the current context is used. Note that VAR has to be quoted as string.""" @@ -158,7 +158,7 @@ PerCpu() class PerCpuPtr(gdb.Function): """Return per-cpu pointer. -$lx_per_cpu_ptr("VAR"[, CPU]): Return the per-cpu pointer called VAR for the +$lx_per_cpu_ptr(VAR[, CPU]): Return the per-cpu pointer called VAR for the given CPU number. If CPU is omitted, the CPU of the current context is used. Note that VAR has to be quoted as string.""" diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index b255177301e9..2332bd8eddf1 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -38,19 +38,13 @@ if hasattr(gdb, 'Breakpoint'): # Disable pagination while reporting symbol (re-)loading. # The console input is blocked in this context so that we would # get stuck waiting for the user to acknowledge paged output. - show_pagination = gdb.execute("show pagination", to_string=True) - pagination = show_pagination.endswith("on.\n") - gdb.execute("set pagination off") - - if module_name in cmd.loaded_modules: - gdb.write("refreshing all symbols to reload module " - "'{0}'\n".format(module_name)) - cmd.load_all_symbols() - else: - cmd.load_module_symbols(module) - - # restore pagination state - gdb.execute("set pagination %s" % ("on" if pagination else "off")) + with utils.pagination_off(): + if module_name in cmd.loaded_modules: + gdb.write("refreshing all symbols to reload module " + "'{0}'\n".format(module_name)) + cmd.load_all_symbols() + else: + cmd.load_module_symbols(module) return False @@ -60,6 +54,18 @@ def get_vmcore_s390(): vmcore_info = 0x0e0c paddr_vmcoreinfo_note = gdb.parse_and_eval("*(unsigned long long *)" + hex(vmcore_info)) + if paddr_vmcoreinfo_note == 0 or paddr_vmcoreinfo_note & 1: + # In the early boot case, extract vm_layout.kaslr_offset from the + # vmlinux image in physical memory. + if paddr_vmcoreinfo_note == 0: + kaslr_offset_phys = 0 + else: + kaslr_offset_phys = paddr_vmcoreinfo_note - 1 + with utils.pagination_off(): + gdb.execute("symbol-file {0} -o {1}".format( + utils.get_vmlinux(), hex(kaslr_offset_phys))) + kaslr_offset = gdb.parse_and_eval("vm_layout.kaslr_offset") + return "KERNELOFFSET=" + hex(kaslr_offset)[2:] inferior = gdb.selected_inferior() elf_note = inferior.read_memory(paddr_vmcoreinfo_note, 12) n_namesz, n_descsz, n_type = struct.unpack(">III", elf_note) @@ -178,11 +184,7 @@ lx-symbols command.""" saved_states.append({'breakpoint': bp, 'enabled': bp.enabled}) # drop all current symbols and reload vmlinux - orig_vmlinux = 'vmlinux' - for obj in gdb.objfiles(): - if (obj.filename.endswith('vmlinux') or - obj.filename.endswith('vmlinux.debug')): - orig_vmlinux = obj.filename + orig_vmlinux = utils.get_vmlinux() gdb.execute("symbol-file", to_string=True) kerneloffset = get_kerneloffset() if kerneloffset is None: diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index 03ebdccf5f69..e11f6f67961a 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -200,7 +200,7 @@ def get_gdbserver_type(): def probe_kgdb(): try: - thread_info = gdb.execute("info thread 2", to_string=True) + thread_info = gdb.execute("info thread 1", to_string=True) return "shadowCPU" in thread_info except gdb.error: return False @@ -251,3 +251,23 @@ def parse_vmcore(s): else: kerneloffset = int(match.group(1), 16) return VmCore(kerneloffset=kerneloffset) + + +def get_vmlinux(): + vmlinux = 'vmlinux' + for obj in gdb.objfiles(): + if (obj.filename.endswith('vmlinux') or + obj.filename.endswith('vmlinux.debug')): + vmlinux = obj.filename + return vmlinux + + +@contextlib.contextmanager +def pagination_off(): + show_pagination = gdb.execute("show pagination", to_string=True) + pagination = show_pagination.endswith("on.\n") + gdb.execute("set pagination off") + try: + yield + finally: + gdb.execute("set pagination %s" % ("on" if pagination else "off")) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index a290db720b0f..ac94fa1c2415 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1240,6 +1240,8 @@ prefered||preferred prefferably||preferably prefitler||prefilter preform||perform +previleged||privileged +previlege||privilege premption||preemption prepaired||prepared prepate||prepare diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c index 1136f93a9977..01dd89f8e52f 100644 --- a/tools/testing/selftests/filesystems/file_stressor.c +++ b/tools/testing/selftests/filesystems/file_stressor.c @@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2) ssize_t nr_read; /* - * Concurrently read /proc/<pid>/fd/ which rougly does: + * Concurrently read /proc/<pid>/fd/ which roughly does: * * f = fget_task_next(p, &fd); * if (!f) diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config index 259f4fd6b5e2..1f1e63494af9 100644 --- a/tools/testing/selftests/kmod/config +++ b/tools/testing/selftests/kmod/config @@ -1,7 +1,2 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_LKM=m -CONFIG_XFS_FS=m - -# For the module parameter force_init_test is used -CONFIG_TUN=m -CONFIG_BTRFS_FS=m diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 21595b20bbc3..d50ada0c7dbf 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -158,7 +158,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* * R/O pinning or pinning in a private mapping is always * expected to work. Otherwise, we expect long-term R/W pinning - * to only succeed for special fielesystems. + * to only succeed for special filesystems. */ should_work = !shared || !rw || fs_supports_writable_longterm_pinning(fs_type); diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c index 0326b39a11b9..30cab5d425d2 100644 --- a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c +++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c @@ -56,7 +56,7 @@ int main(int argc, char **argv) } if (write(fd, "1\n", 2) < 0) { - perror("Can' enable power floor notifications\n"); + perror("Can't enable power floor notifications\n"); exit(1); } diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index 217c3a641c53..a40097232967 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -37,7 +37,7 @@ void workload_hint_exit(int signum) } if (write(fd, "0\n", 2) < 0) { - perror("Can' disable workload hints\n"); + perror("Can't disable workload hints\n"); exit(1); } @@ -99,7 +99,7 @@ int main(int argc, char **argv) } if (write(fd, "1\n", 2) < 0) { - perror("Can' enable workload hints\n"); + perror("Can't enable workload hints\n"); exit(1); } |