diff options
Diffstat (limited to 'kernel/sched/topology.c')
| -rw-r--r-- | kernel/sched/topology.c | 114 |
1 files changed, 92 insertions, 22 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 444bdfdab731..cf643a5ddedd 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1590,10 +1590,17 @@ static void claim_allocations(int cpu, struct sched_domain *sd) #ifdef CONFIG_NUMA enum numa_topology_type sched_numa_topology_type; +/* + * sched_domains_numa_distance is derived from sched_numa_node_distance + * and provides a simplified view of NUMA distances used specifically + * for building NUMA scheduling domains. + */ static int sched_domains_numa_levels; +static int sched_numa_node_levels; int sched_max_numa_distance; static int *sched_domains_numa_distance; +static int *sched_numa_node_distance; static struct cpumask ***sched_domains_numa_masks; #endif /* CONFIG_NUMA */ @@ -1662,6 +1669,12 @@ sd_init(struct sched_domain_topology_level *tl, .last_balance = jiffies, .balance_interval = sd_weight, + + /* 50% success rate */ + .newidle_call = 512, + .newidle_success = 256, + .newidle_ratio = 512, + .max_newidle_lb_cost = 0, .last_decay_max_lb_cost = jiffies, .child = child, @@ -1845,10 +1858,10 @@ bool find_numa_distance(int distance) return true; rcu_read_lock(); - distances = rcu_dereference(sched_domains_numa_distance); + distances = rcu_dereference(sched_numa_node_distance); if (!distances) goto unlock; - for (i = 0; i < sched_domains_numa_levels; i++) { + for (i = 0; i < sched_numa_node_levels; i++) { if (distances[i] == distance) { found = true; break; @@ -1924,14 +1937,34 @@ static void init_numa_topology_type(int offline_node) #define NR_DISTANCE_VALUES (1 << DISTANCE_BITS) -void sched_init_numa(int offline_node) +/* + * An architecture could modify its NUMA distance, to change + * grouping of NUMA nodes and number of NUMA levels when creating + * NUMA level sched domains. + * + * A NUMA level is created for each unique + * arch_sched_node_distance. + */ +static int numa_node_dist(int i, int j) { - struct sched_domain_topology_level *tl; - unsigned long *distance_map; + return node_distance(i, j); +} + +int arch_sched_node_distance(int from, int to) + __weak __alias(numa_node_dist); + +static bool modified_sched_node_distance(void) +{ + return numa_node_dist != arch_sched_node_distance; +} + +static int sched_record_numa_dist(int offline_node, int (*n_dist)(int, int), + int **dist, int *levels) +{ + unsigned long *distance_map __free(bitmap) = NULL; int nr_levels = 0; int i, j; int *distances; - struct cpumask ***masks; /* * O(nr_nodes^2) de-duplicating selection sort -- in order to find the @@ -1939,17 +1972,16 @@ void sched_init_numa(int offline_node) */ distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); if (!distance_map) - return; + return -ENOMEM; bitmap_zero(distance_map, NR_DISTANCE_VALUES); for_each_cpu_node_but(i, offline_node) { for_each_cpu_node_but(j, offline_node) { - int distance = node_distance(i, j); + int distance = n_dist(i, j); if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) { sched_numa_warn("Invalid distance value range"); - bitmap_free(distance_map); - return; + return -EINVAL; } bitmap_set(distance_map, distance, 1); @@ -1962,18 +1994,46 @@ void sched_init_numa(int offline_node) nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES); distances = kcalloc(nr_levels, sizeof(int), GFP_KERNEL); - if (!distances) { - bitmap_free(distance_map); - return; - } + if (!distances) + return -ENOMEM; for (i = 0, j = 0; i < nr_levels; i++, j++) { j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j); distances[i] = j; } - rcu_assign_pointer(sched_domains_numa_distance, distances); + *dist = distances; + *levels = nr_levels; - bitmap_free(distance_map); + return 0; +} + +void sched_init_numa(int offline_node) +{ + struct sched_domain_topology_level *tl; + int nr_levels, nr_node_levels; + int i, j; + int *distances, *domain_distances; + struct cpumask ***masks; + + /* Record the NUMA distances from SLIT table */ + if (sched_record_numa_dist(offline_node, numa_node_dist, &distances, + &nr_node_levels)) + return; + + /* Record modified NUMA distances for building sched domains */ + if (modified_sched_node_distance()) { + if (sched_record_numa_dist(offline_node, arch_sched_node_distance, + &domain_distances, &nr_levels)) { + kfree(distances); + return; + } + } else { + domain_distances = distances; + nr_levels = nr_node_levels; + } + rcu_assign_pointer(sched_numa_node_distance, distances); + WRITE_ONCE(sched_max_numa_distance, distances[nr_node_levels - 1]); + WRITE_ONCE(sched_numa_node_levels, nr_node_levels); /* * 'nr_levels' contains the number of unique distances @@ -1991,6 +2051,8 @@ void sched_init_numa(int offline_node) * * We reset it to 'nr_levels' at the end of this function. */ + rcu_assign_pointer(sched_domains_numa_distance, domain_distances); + sched_domains_numa_levels = 0; masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL); @@ -2016,10 +2078,13 @@ void sched_init_numa(int offline_node) masks[i][j] = mask; for_each_cpu_node_but(k, offline_node) { - if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) + if (sched_debug() && + (arch_sched_node_distance(j, k) != + arch_sched_node_distance(k, j))) sched_numa_warn("Node-distance not symmetric"); - if (node_distance(j, k) > sched_domains_numa_distance[i]) + if (arch_sched_node_distance(j, k) > + sched_domains_numa_distance[i]) continue; cpumask_or(mask, mask, cpumask_of_node(k)); @@ -2059,7 +2124,6 @@ void sched_init_numa(int offline_node) sched_domain_topology = tl; sched_domains_numa_levels = nr_levels; - WRITE_ONCE(sched_max_numa_distance, sched_domains_numa_distance[nr_levels - 1]); init_numa_topology_type(offline_node); } @@ -2067,14 +2131,18 @@ void sched_init_numa(int offline_node) static void sched_reset_numa(void) { - int nr_levels, *distances; + int nr_levels, *distances, *dom_distances = NULL; struct cpumask ***masks; nr_levels = sched_domains_numa_levels; + sched_numa_node_levels = 0; sched_domains_numa_levels = 0; sched_max_numa_distance = 0; sched_numa_topology_type = NUMA_DIRECT; - distances = sched_domains_numa_distance; + distances = sched_numa_node_distance; + if (sched_numa_node_distance != sched_domains_numa_distance) + dom_distances = sched_domains_numa_distance; + rcu_assign_pointer(sched_numa_node_distance, NULL); rcu_assign_pointer(sched_domains_numa_distance, NULL); masks = sched_domains_numa_masks; rcu_assign_pointer(sched_domains_numa_masks, NULL); @@ -2083,6 +2151,7 @@ static void sched_reset_numa(void) synchronize_rcu(); kfree(distances); + kfree(dom_distances); for (i = 0; i < nr_levels && masks; i++) { if (!masks[i]) continue; @@ -2129,7 +2198,8 @@ void sched_domains_numa_masks_set(unsigned int cpu) continue; /* Set ourselves in the remote node's masks */ - if (node_distance(j, node) <= sched_domains_numa_distance[i]) + if (arch_sched_node_distance(j, node) <= + sched_domains_numa_distance[i]) cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); } } |
