From 2b089bf8d19c66f70ae3b2d2d101be1ae49bfe24 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 22 Jul 2019 11:20:08 +0200 Subject: kernel/configs: Replace GPL boilerplate code with SPDX identifier The FSF does not reside in "675 Mass Ave, Cambridge" anymore... let's replace the old GPL boilerplate code with a proper SPDX identifier instead. Signed-off-by: Thomas Huth Signed-off-by: Greg Kroah-Hartman --- kernel/configs.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/configs.c b/kernel/configs.c index b062425ccf8d..c09ea4c995e1 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * kernel/configs.c * Echo the kernel .config file used to build the kernel @@ -6,21 +7,6 @@ * Copyright (C) 2002 Randy Dunlap * Copyright (C) 2002 Al Stone * Copyright (C) 2002 Hewlett-Packard Company - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include -- cgit From f4904815f97a934258445a8f763f6b6c48f007e7 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 2 Aug 2019 15:59:43 +0100 Subject: sched/deadline: Fix double accounting of rq/running bw in push & pull {push,pull}_dl_task() always calls {de,}activate_task() with .flags=0 which sets p->on_rq=TASK_ON_RQ_MIGRATING. {push,pull}_dl_task()->{de,}activate_task()->{de,en}queue_task()-> {de,en}queue_task_dl() calls {sub,add}_{running,rq}_bw() since p->on_rq==TASK_ON_RQ_MIGRATING. So {sub,add}_{running,rq}_bw() in {push,pull}_dl_task() is double-accounting for that task. Fix it by removing rq/running bw accounting in [push/pull]_dl_task(). Fixes: 7dd778841164 ("sched/core: Unify p->on_rq updates") Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Cc: Valentin Schneider Cc: Ingo Molnar Cc: Luca Abeni Cc: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Qais Yousef Link: https://lkml.kernel.org/r/20190802145945.18702-2-dietmar.eggemann@arm.com --- kernel/sched/deadline.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ef5b9f6b1d42..46122edd8552 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2088,17 +2088,13 @@ retry: } deactivate_task(rq, next_task, 0); - sub_running_bw(&next_task->dl, &rq->dl); - sub_rq_bw(&next_task->dl, &rq->dl); set_task_cpu(next_task, later_rq->cpu); - add_rq_bw(&next_task->dl, &later_rq->dl); /* * Update the later_rq clock here, because the clock is used * by the cpufreq_update_util() inside __add_running_bw(). */ update_rq_clock(later_rq); - add_running_bw(&next_task->dl, &later_rq->dl); activate_task(later_rq, next_task, ENQUEUE_NOCLOCK); ret = 1; @@ -2186,11 +2182,7 @@ static void pull_dl_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); - sub_running_bw(&p->dl, &src_rq->dl); - sub_rq_bw(&p->dl, &src_rq->dl); set_task_cpu(p, this_cpu); - add_rq_bw(&p->dl, &this_rq->dl); - add_running_bw(&p->dl, &this_rq->dl); activate_task(this_rq, p, 0); dmin = p->dl.deadline; -- cgit From 14f5c7b46a41a595fc61db37f55721714729e59e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Aug 2019 12:41:31 +0200 Subject: sched/psi: Reduce psimon FIFO priority PSI defaults to a FIFO-99 thread, reduce this to FIFO-1. FIFO-99 is the very highest priority available to SCHED_FIFO and it not a suitable default; it would indicate the psi work is the most important work on the machine. Since Real-Time tasks will have pre-allocated memory and locked it in place, Real-Time tasks do not care about PSI. All it needs is to be above OTHER. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Tested-by: Suren Baghdasaryan Cc: Thomas Gleixner --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7acc632c3b82..7fe2c5fd26b5 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1051,7 +1051,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, if (!rcu_access_pointer(group->poll_kworker)) { struct sched_param param = { - .sched_priority = MAX_RT_PRIO - 1, + .sched_priority = 1, }; struct kthread_worker *kworker; -- cgit From 04e048cf09d7b5fc995817cdc5ae1acd4482429c Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 29 Jul 2019 18:33:10 -0700 Subject: sched/psi: Do not require setsched permission from the trigger creator When a process creates a new trigger by writing into /proc/pressure/* files, permissions to write such a file should be used to determine whether the process is allowed to do so or not. Current implementation would also require such a process to have setsched capability. Setting of psi trigger thread's scheduling policy is an implementation detail and should not be exposed to the user level. Remove the permission check by using _nocheck version of the function. Suggested-by: Nick Kralevich Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Cc: lizefan@huawei.com Cc: mingo@redhat.com Cc: akpm@linux-foundation.org Cc: kernel-team@android.com Cc: dennisszhou@gmail.com Cc: dennis@kernel.org Cc: hannes@cmpxchg.org Cc: axboe@kernel.dk Link: https://lkml.kernel.org/r/20190730013310.162367-1-surenb@google.com --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7fe2c5fd26b5..23fbbcc414d5 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1061,7 +1061,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, mutex_unlock(&group->trigger_lock); return ERR_CAST(kworker); } - sched_setscheduler(kworker->task, SCHED_FIFO, ¶m); + sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); kthread_init_delayed_work(&group->poll_work, psi_poll_work); rcu_assign_pointer(group->poll_kworker, kworker); -- cgit From 491beed3b102b6e6c0e7734200661242226e3933 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 Aug 2019 09:19:06 +0800 Subject: genirq/affinity: Create affinity mask for single vector Since commit c66d4bd110a1f8 ("genirq/affinity: Add new callback for (re)calculating interrupt sets"), irq_create_affinity_masks() returns NULL in case of single vector. This change has caused regression on some drivers, such as lpfc. The problem is that single vector requests can happen in some generic cases: 1) kdump kernel 2) irq vectors resource is close to exhaustion. If in that situation the affinity mask for a single vector is not created, every caller has to handle the special case. There is no reason why the mask cannot be created, so remove the check for a single vector and create the mask. Fixes: c66d4bd110a1f8 ("genirq/affinity: Add new callback for (re)calculating interrupt sets") Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190805011906.5020-1-ming.lei@redhat.com --- kernel/irq/affinity.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 4352b08ae48d..6fef48033f96 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -251,11 +251,9 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) * Determine the number of vectors which need interrupt affinities * assigned. If the pre/post request exhausts the available vectors * then nothing to do here except for invoking the calc_sets() - * callback so the device driver can adjust to the situation. If there - * is only a single vector, then managing the queue is pointless as - * well. + * callback so the device driver can adjust to the situation. */ - if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors) + if (nvecs > affd->pre_vectors + affd->post_vectors) affvecs = nvecs - affd->pre_vectors - affd->post_vectors; else affvecs = 0; -- cgit From 600f5badb78c316146d062cfd7af4a2cfb655baa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 7 Aug 2019 12:36:01 +0530 Subject: cpufreq: schedutil: Don't skip freq update when limits change To avoid reducing the frequency of a CPU prematurely, we skip reducing the frequency if the CPU had been busy recently. This should not be done when the limits of the policy are changed, for example due to thermal throttling. We should always get the frequency within the new limits as soon as possible. Trying to fix this by using only one flag, i.e. need_freq_update, can lead to a race condition where the flag gets cleared without forcing us to change the frequency at least once. And so this patch introduces another flag to avoid that race condition. Fixes: ecd288429126 ("cpufreq: schedutil: Don't set next_freq to UINT_MAX") Cc: v4.18+ # v4.18+ Reported-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 636ca6f88c8e..867b4bb6d4be 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -40,6 +40,7 @@ struct sugov_policy { struct task_struct *thread; bool work_in_progress; + bool limits_changed; bool need_freq_update; }; @@ -89,8 +90,11 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) !cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (unlikely(sg_policy->need_freq_update)) + if (unlikely(sg_policy->limits_changed)) { + sg_policy->limits_changed = false; + sg_policy->need_freq_update = true; return true; + } delta_ns = time - sg_policy->last_freq_update_time; @@ -437,7 +441,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) - sg_policy->need_freq_update = true; + sg_policy->limits_changed = true; } static void sugov_update_single(struct update_util_data *hook, u64 time, @@ -457,7 +461,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; - busy = sugov_cpu_is_busy(sg_cpu); + /* Limits may have changed, don't skip frequency update */ + busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); util = sugov_get_util(sg_cpu); max = sg_cpu->max; @@ -831,6 +836,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->last_freq_update_time = 0; sg_policy->next_freq = 0; sg_policy->work_in_progress = false; + sg_policy->limits_changed = false; sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = 0; @@ -879,7 +885,7 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_unlock(&sg_policy->work_lock); } - sg_policy->need_freq_update = true; + sg_policy->limits_changed = true; } struct cpufreq_governor schedutil_gov = { -- cgit From cf14be0b41c659ede89abef3f7ec0e98e6cfea5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Aug 2019 14:33:23 +0300 Subject: dma-direct: fix DMA_ATTR_NO_KERNEL_MAPPING The new DMA_ATTR_NO_KERNEL_MAPPING needs to actually assign a dma_addr to work. Also skip it if the architecture needs forced decryption handling, as that needs a kernel virtual address. Fixes: d98849aff879 (dma-direct: handle DMA_ATTR_NO_KERNEL_MAPPING in common code) Signed-off-by: Christoph Hellwig Reviewed-by: Lucas Stach --- kernel/dma/direct.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 59bdceea3737..974e96a1de44 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -130,10 +130,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev)) { /* remove any dirty cache lines on the kernel alias */ if (!PageHighMem(page)) arch_dma_prep_coherent(page, size); + *dma_handle = phys_to_dma(dev, page_to_phys(page)); /* return the page pointer as the opaque cookie */ return page; } @@ -178,7 +180,8 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, { unsigned int page_order = get_order(size); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ __dma_direct_free_pages(dev, size, cpu_addr); return; -- cgit From d8ad55538abe443919e20e0bb996561bca9cad84 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 5 Aug 2019 17:51:53 +0200 Subject: dma-direct: don't truncate dma_required_mask to bus addressing capabilities The dma required_mask needs to reflect the actual addressing capabilities needed to handle the whole system RAM. When truncated down to the bus addressing capabilities dma_addressing_limited() will incorrectly signal no limitations for devices which are restricted by the bus_dma_mask. Fixes: b4ebe6063204 (dma-direct: implement complete bus_dma_mask handling) Signed-off-by: Lucas Stach Tested-by: Atish Patra Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 974e96a1de44..795c9b095d75 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -47,9 +47,6 @@ u64 dma_direct_get_required_mask(struct device *dev) { u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); - if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) - max_dma = dev->bus_dma_mask; - return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } -- cgit From 33dcb37cef741294b481f4d889a465b8091f11bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jul 2019 09:26:40 +0200 Subject: dma-mapping: fix page attributes for dma_mmap_* All the way back to introducing dma_common_mmap we've defaulted to mark the pages as uncached. But this is wrong for DMA coherent devices. Later on DMA_ATTR_WRITE_COMBINE also got incorrect treatment as that flag is only treated special on the alloc side for non-coherent devices. Introduce a new dma_pgprot helper that deals with the check for coherent devices so that only the remapping cases ever reach arch_dma_mmap_pgprot and we thus ensure no aliasing of page attributes happens, which makes the powerpc version of arch_dma_mmap_pgprot obsolete and simplifies the remaining ones. Note that this means arch_dma_mmap_pgprot is a bit misnamed now, but we'll phase it out soon. Fixes: 64ccc9c033c6 ("common: dma-mapping: add support for generic dma_mmap_* calls") Reported-by: Shawn Anastasio Reported-by: Gavin Li Signed-off-by: Christoph Hellwig Acked-by: Catalin Marinas # arm64 --- kernel/dma/mapping.c | 19 ++++++++++++++++++- kernel/dma/remap.c | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index b945239621d8..b0038ca3aa92 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -150,6 +150,23 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, } EXPORT_SYMBOL(dma_get_sgtable_attrs); +#ifdef CONFIG_MMU +/* + * Return the page attributes used for mapping dma_alloc_* memory, either in + * kernel space if remapping is needed, or to userspace through dma_mmap_*. + */ +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) +{ + if (dev_is_dma_coherent(dev) || + (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && + (attrs & DMA_ATTR_NON_CONSISTENT))) + return prot; + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT)) + return arch_dma_mmap_pgprot(dev, prot, attrs); + return pgprot_noncached(prot); +} +#endif /* CONFIG_MMU */ + /* * Create userspace mapping for the DMA-coherent memory. */ @@ -164,7 +181,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn; int ret = -ENXIO; - vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index a594aec07882..ffe78f0b2fe4 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -218,7 +218,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, /* create a coherent mapping */ ret = dma_common_contiguous_remap(page, size, VM_USERMAP, - arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), + dma_pgprot(dev, PAGE_KERNEL, attrs), __builtin_return_address(0)); if (!ret) { __dma_direct_free_pages(dev, size, page); -- cgit