summaryrefslogtreecommitdiff
path: root/fs/resctrl/internal.h
blob: 9a8cf6f11151d9c8512344505388135d4196c388 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_RESCTRL_INTERNAL_H
#define _FS_RESCTRL_INTERNAL_H

#include <linux/resctrl.h>
#include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/tick.h>

#define CQM_LIMBOCHECK_INTERVAL	1000

/**
 * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
 *			        aren't marked nohz_full
 * @mask:	The mask to pick a CPU from.
 * @exclude_cpu:The CPU to avoid picking.
 *
 * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
 * CPUs that don't use nohz_full, these are preferred. Pass
 * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
 *
 * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
 */
static inline unsigned int
cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
{
	unsigned int cpu;

	/* Try to find a CPU that isn't nohz_full to use in preference */
	if (tick_nohz_full_enabled()) {
		cpu = cpumask_any_andnot_but(mask, tick_nohz_full_mask, exclude_cpu);
		if (cpu < nr_cpu_ids)
			return cpu;
	}

	return cpumask_any_but(mask, exclude_cpu);
}

struct rdt_fs_context {
	struct kernfs_fs_context	kfc;
	bool				enable_cdpl2;
	bool				enable_cdpl3;
	bool				enable_mba_mbps;
	bool				enable_debug;
};

static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
{
	struct kernfs_fs_context *kfc = fc->fs_private;

	return container_of(kfc, struct rdt_fs_context, kfc);
}

/**
 * struct mon_evt - Entry in the event list of a resource
 * @evtid:		event id
 * @name:		name of the event
 * @configurable:	true if the event is configurable
 * @list:		entry in &rdt_resource->evt_list
 */
struct mon_evt {
	enum resctrl_event_id	evtid;
	char			*name;
	bool			configurable;
	struct list_head	list;
};

/**
 * struct mon_data - Monitoring details for each event file.
 * @list:            Member of the global @mon_data_kn_priv_list list.
 * @rid:             Resource id associated with the event file.
 * @evtid:           Event id associated with the event file.
 * @sum:             Set when event must be summed across multiple
 *                   domains.
 * @domid:           When @sum is zero this is the domain to which
 *                   the event file belongs. When @sum is one this
 *                   is the id of the L3 cache that all domains to be
 *                   summed share.
 *
 * Pointed to by the kernfs kn->priv field of monitoring event files.
 * Readers and writers must hold rdtgroup_mutex.
 */
struct mon_data {
	struct list_head	list;
	enum resctrl_res_level	rid;
	enum resctrl_event_id	evtid;
	int			domid;
	bool			sum;
};

/**
 * struct rmid_read - Data passed across smp_call*() to read event count.
 * @rgrp:  Resource group for which the counter is being read. If it is a parent
 *	   resource group then its event count is summed with the count from all
 *	   its child resource groups.
 * @r:	   Resource describing the properties of the event being read.
 * @d:	   Domain that the counter should be read from. If NULL then sum all
 *	   domains in @r sharing L3 @ci.id
 * @evtid: Which monitor event to read.
 * @first: Initialize MBM counter when true.
 * @ci:    Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
 * @err:   Error encountered when reading counter.
 * @val:   Returned value of event counter. If @rgrp is a parent resource group,
 *	   @val includes the sum of event counts from its child resource groups.
 *	   If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
 *	   (summed across child resource groups if @rgrp is a parent resource group).
 * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
 */
struct rmid_read {
	struct rdtgroup		*rgrp;
	struct rdt_resource	*r;
	struct rdt_mon_domain	*d;
	enum resctrl_event_id	evtid;
	bool			first;
	struct cacheinfo	*ci;
	int			err;
	u64			val;
	void			*arch_mon_ctx;
};

extern struct list_head resctrl_schema_all;

extern bool resctrl_mounted;

enum rdt_group_type {
	RDTCTRL_GROUP = 0,
	RDTMON_GROUP,
	RDT_NUM_GROUP,
};

/**
 * enum rdtgrp_mode - Mode of a RDT resource group
 * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
 * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
 * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
 * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
 *                          allowed AND the allocations are Cache Pseudo-Locked
 * @RDT_NUM_MODES: Total number of modes
 *
 * The mode of a resource group enables control over the allowed overlap
 * between allocations associated with different resource groups (classes
 * of service). User is able to modify the mode of a resource group by
 * writing to the "mode" resctrl file associated with the resource group.
 *
 * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
 * writing the appropriate text to the "mode" file. A resource group enters
 * "pseudo-locked" mode after the schemata is written while the resource
 * group is in "pseudo-locksetup" mode.
 */
enum rdtgrp_mode {
	RDT_MODE_SHAREABLE = 0,
	RDT_MODE_EXCLUSIVE,
	RDT_MODE_PSEUDO_LOCKSETUP,
	RDT_MODE_PSEUDO_LOCKED,

	/* Must be last */
	RDT_NUM_MODES,
};

/**
 * struct mongroup - store mon group's data in resctrl fs.
 * @mon_data_kn:		kernfs node for the mon_data directory
 * @parent:			parent rdtgrp
 * @crdtgrp_list:		child rdtgroup node list
 * @rmid:			rmid for this rdtgroup
 */
struct mongroup {
	struct kernfs_node	*mon_data_kn;
	struct rdtgroup		*parent;
	struct list_head	crdtgrp_list;
	u32			rmid;
};

/**
 * struct rdtgroup - store rdtgroup's data in resctrl file system.
 * @kn:				kernfs node
 * @rdtgroup_list:		linked list for all rdtgroups
 * @closid:			closid for this rdtgroup
 * @cpu_mask:			CPUs assigned to this rdtgroup
 * @flags:			status bits
 * @waitcount:			how many cpus expect to find this
 *				group when they acquire rdtgroup_mutex
 * @type:			indicates type of this rdtgroup - either
 *				monitor only or ctrl_mon group
 * @mon:			mongroup related data
 * @mode:			mode of resource group
 * @mba_mbps_event:		input monitoring event id when mba_sc is enabled
 * @plr:			pseudo-locked region
 */
struct rdtgroup {
	struct kernfs_node		*kn;
	struct list_head		rdtgroup_list;
	u32				closid;
	struct cpumask			cpu_mask;
	int				flags;
	atomic_t			waitcount;
	enum rdt_group_type		type;
	struct mongroup			mon;
	enum rdtgrp_mode		mode;
	enum resctrl_event_id		mba_mbps_event;
	struct pseudo_lock_region	*plr;
};

/* rdtgroup.flags */
#define	RDT_DELETED		1

/* rftype.flags */
#define RFTYPE_FLAGS_CPUS_LIST	1

/*
 * Define the file type flags for base and info directories.
 */
#define RFTYPE_INFO			BIT(0)

#define RFTYPE_BASE			BIT(1)

#define RFTYPE_CTRL			BIT(4)

#define RFTYPE_MON			BIT(5)

#define RFTYPE_TOP			BIT(6)

#define RFTYPE_RES_CACHE		BIT(8)

#define RFTYPE_RES_MB			BIT(9)

#define RFTYPE_DEBUG			BIT(10)

#define RFTYPE_CTRL_INFO		(RFTYPE_INFO | RFTYPE_CTRL)

#define RFTYPE_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)

#define RFTYPE_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)

#define RFTYPE_CTRL_BASE		(RFTYPE_BASE | RFTYPE_CTRL)

#define RFTYPE_MON_BASE			(RFTYPE_BASE | RFTYPE_MON)

/* List of all resource groups */
extern struct list_head rdt_all_groups;

extern int max_name_width;

/**
 * struct rftype - describe each file in the resctrl file system
 * @name:	File name
 * @mode:	Access mode
 * @kf_ops:	File operations
 * @flags:	File specific RFTYPE_FLAGS_* flags
 * @fflags:	File specific RFTYPE_* flags
 * @seq_show:	Show content of the file
 * @write:	Write to the file
 */
struct rftype {
	char			*name;
	umode_t			mode;
	const struct kernfs_ops	*kf_ops;
	unsigned long		flags;
	unsigned long		fflags;

	int (*seq_show)(struct kernfs_open_file *of,
			struct seq_file *sf, void *v);
	/*
	 * write() is the generic write callback which maps directly to
	 * kernfs write operation and overrides all other operations.
	 * Maximum write size is determined by ->max_write_len.
	 */
	ssize_t (*write)(struct kernfs_open_file *of,
			 char *buf, size_t nbytes, loff_t off);
};

/**
 * struct mbm_state - status for each MBM counter in each domain
 * @prev_bw_bytes: Previous bytes value read for bandwidth calculation
 * @prev_bw:	The most recent bandwidth in MBps
 */
struct mbm_state {
	u64	prev_bw_bytes;
	u32	prev_bw;
};

extern struct mutex rdtgroup_mutex;

static inline const char *rdt_kn_name(const struct kernfs_node *kn)
{
	return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex));
}

extern struct rdtgroup rdtgroup_default;

extern struct dentry *debugfs_resctrl;

extern enum resctrl_event_id mba_mbps_default_event;

void rdt_last_cmd_clear(void);

void rdt_last_cmd_puts(const char *s);

__printf(1, 2)
void rdt_last_cmd_printf(const char *fmt, ...);

struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);

void rdtgroup_kn_unlock(struct kernfs_node *kn);

int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);

int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
			     umode_t mask);

ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
				char *buf, size_t nbytes, loff_t off);

int rdtgroup_schemata_show(struct kernfs_open_file *of,
			   struct seq_file *s, void *v);

ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
				      char *buf, size_t nbytes, loff_t off);

int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
				 struct seq_file *s, void *v);

bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
			   unsigned long cbm, int closid, bool exclusive);

unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
				  unsigned long cbm);

enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);

int rdtgroup_tasks_assigned(struct rdtgroup *r);

int closids_supported(void);

void closid_free(int closid);

int alloc_rmid(u32 closid);

void free_rmid(u32 closid, u32 rmid);

void resctrl_mon_resource_exit(void);

void mon_event_count(void *info);

int rdtgroup_mondata_show(struct seq_file *m, void *arg);

void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
		    struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
		    cpumask_t *cpumask, int evtid, int first);

int resctrl_mon_resource_init(void);

void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
				unsigned long delay_ms,
				int exclude_cpu);

void mbm_handle_overflow(struct work_struct *work);

bool is_mba_sc(struct rdt_resource *r);

void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
			     int exclude_cpu);

void cqm_handle_limbo(struct work_struct *work);

bool has_busy_rmid(struct rdt_mon_domain *d);

void __check_limbo(struct rdt_mon_domain *d, bool force_free);

void resctrl_file_fflags_init(const char *config, unsigned long fflags);

void rdt_staged_configs_clear(void);

bool closid_allocated(unsigned int closid);

int resctrl_find_cleanest_closid(void);

#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);

int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);

bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);

bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);

int rdt_pseudo_lock_init(void);

void rdt_pseudo_lock_release(void);

int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);

void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);

#else
static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
{
	return -EOPNOTSUPP;
}

static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
{
	return -EOPNOTSUPP;
}

static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
{
	return false;
}

static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
{
	return false;
}

static inline int rdt_pseudo_lock_init(void) { return 0; }
static inline void rdt_pseudo_lock_release(void) { }
static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
{
	return -EOPNOTSUPP;
}

static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { }
#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */

#endif /* _FS_RESCTRL_INTERNAL_H */