summaryrefslogtreecommitdiff
path: root/kernel/trace/trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r--kernel/trace/trace.c765
1 files changed, 666 insertions, 99 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fd3cb2b2ab82..465989585135 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -49,6 +49,9 @@
#include <linux/fsnotify.h>
#include <linux/irq_work.h>
#include <linux/workqueue.h>
+#include <linux/sort.h>
+#include <linux/io.h> /* vmap_page_range() */
+#include <linux/fs_context.h>
#include <asm/setup.h> /* COMMAND_LINE_SIZE */
@@ -87,6 +90,7 @@ void __init disable_tracing_selftest(const char *reason)
static struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;
static bool tracepoint_printk_stop_on_boot __initdata;
+static bool traceoff_after_boot __initdata;
static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
/* For tracers that don't implement custom flags */
@@ -117,6 +121,7 @@ static int tracing_disabled = 1;
cpumask_var_t __read_mostly tracing_buffer_mask;
+#define MAX_TRACER_SIZE 100
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
*
@@ -139,7 +144,40 @@ cpumask_var_t __read_mostly tracing_buffer_mask;
char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
/* When set, tracing will stop when a WARN*() is hit */
-int __disable_trace_on_warning;
+static int __disable_trace_on_warning;
+
+int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+static const struct ctl_table trace_sysctl_table[] = {
+ {
+ .procname = "ftrace_dump_on_oops",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = MAX_TRACER_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "traceoff_on_warning",
+ .data = &__disable_trace_on_warning,
+ .maxlen = sizeof(__disable_trace_on_warning),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tracepoint_printk",
+ .data = &tracepoint_printk,
+ .maxlen = sizeof(tracepoint_printk),
+ .mode = 0644,
+ .proc_handler = tracepoint_printk_sysctl,
+ },
+};
+
+static int __init init_trace_sysctls(void)
+{
+ register_sysctl_init("kernel", trace_sysctl_table);
+ return 0;
+}
+subsys_initcall(init_trace_sysctls);
#ifdef CONFIG_TRACE_EVAL_MAP_FILE
/* Map of enums to their values, for "eval_map" file */
@@ -330,6 +368,13 @@ static int __init set_tracepoint_printk_stop(char *str)
}
__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
+static int __init set_traceoff_after_boot(char *str)
+{
+ traceoff_after_boot = true;
+ return 1;
+}
+__setup("traceoff_after_boot", set_traceoff_after_boot);
+
unsigned long long ns2usecs(u64 nsec)
{
nsec += 500;
@@ -483,7 +528,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
- TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
+ TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
+ TRACE_ITER_COPY_MARKER)
/* trace_options that are only supported by global_trace */
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
@@ -491,7 +537,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
/* trace_flags that are default zero for instances */
#define ZEROED_TRACE_FLAGS \
- (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
+ (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
+ TRACE_ITER_COPY_MARKER)
/*
* The global_trace is the descriptor that holds the top-level tracing
@@ -503,6 +550,9 @@ static struct trace_array global_trace = {
static struct trace_array *printk_trace = &global_trace;
+/* List of trace_arrays interested in the top level trace_marker */
+static LIST_HEAD(marker_copies);
+
static __always_inline bool printk_binsafe(struct trace_array *tr)
{
/*
@@ -524,6 +574,28 @@ static void update_printk_trace(struct trace_array *tr)
tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
}
+/* Returns true if the status of tr changed */
+static bool update_marker_trace(struct trace_array *tr, int enabled)
+{
+ lockdep_assert_held(&event_mutex);
+
+ if (enabled) {
+ if (!list_empty(&tr->marker_list))
+ return false;
+
+ list_add_rcu(&tr->marker_list, &marker_copies);
+ tr->trace_flags |= TRACE_ITER_COPY_MARKER;
+ return true;
+ }
+
+ if (list_empty(&tr->marker_list))
+ return false;
+
+ list_del_init(&tr->marker_list);
+ tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
+ return true;
+}
+
void trace_set_ring_buffer_expanded(struct trace_array *tr)
{
if (!tr)
@@ -1573,6 +1645,39 @@ void tracer_tracing_off(struct trace_array *tr)
}
/**
+ * tracer_tracing_disable() - temporary disable the buffer from write
+ * @tr: The trace array to disable its buffer for
+ *
+ * Expects trace_tracing_enable() to re-enable tracing.
+ * The difference between this and tracer_tracing_off() is that this
+ * is a counter and can nest, whereas, tracer_tracing_off() can
+ * be called multiple times and a single trace_tracing_on() will
+ * enable it.
+ */
+void tracer_tracing_disable(struct trace_array *tr)
+{
+ if (WARN_ON_ONCE(!tr->array_buffer.buffer))
+ return;
+
+ ring_buffer_record_disable(tr->array_buffer.buffer);
+}
+
+/**
+ * tracer_tracing_enable() - counter part of tracer_tracing_disable()
+ * @tr: The trace array that had tracer_tracincg_disable() called on it
+ *
+ * This is called after tracer_tracing_disable() has been called on @tr,
+ * when it's safe to re-enable tracing.
+ */
+void tracer_tracing_enable(struct trace_array *tr)
+{
+ if (WARN_ON_ONCE(!tr->array_buffer.buffer))
+ return;
+
+ ring_buffer_record_enable(tr->array_buffer.buffer);
+}
+
+/**
* tracing_off - turn off tracing buffers
*
* This function stops the tracing buffers from recording data.
@@ -2878,13 +2983,16 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
void
trace_function(struct trace_array *tr, unsigned long ip, unsigned long
- parent_ip, unsigned int trace_ctx)
+ parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
{
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
+ int size = sizeof(*entry);
- event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
+ size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
+
+ event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
trace_ctx);
if (!event)
return;
@@ -2892,6 +3000,13 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long
entry->ip = ip;
entry->parent_ip = parent_ip;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ if (fregs) {
+ for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
+ entry->args[i] = ftrace_regs_get_argument(fregs, i);
+ }
+#endif
+
if (static_branch_unlikely(&trace_function_exports_enabled))
ftrace_exports(event, TRACE_EXPORT_FUNCTION);
__buffer_unlock_commit(buffer, event);
@@ -3322,10 +3437,9 @@ out_nobuffer:
}
EXPORT_SYMBOL_GPL(trace_vbprintk);
-__printf(3, 0)
-static int
-__trace_array_vprintk(struct trace_buffer *buffer,
- unsigned long ip, const char *fmt, va_list args)
+static __printf(3, 0)
+int __trace_array_vprintk(struct trace_buffer *buffer,
+ unsigned long ip, const char *fmt, va_list args)
{
struct ring_buffer_event *event;
int len = 0, size;
@@ -3375,7 +3489,6 @@ out_nobuffer:
return len;
}
-__printf(3, 0)
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
@@ -3405,7 +3518,6 @@ int trace_array_vprintk(struct trace_array *tr,
* Note, trace_array_init_printk() must be called on @tr before this
* can be used.
*/
-__printf(3, 0)
int trace_array_printk(struct trace_array *tr,
unsigned long ip, const char *fmt, ...)
{
@@ -3450,7 +3562,6 @@ int trace_array_init_printk(struct trace_array *tr)
}
EXPORT_SYMBOL_GPL(trace_array_init_printk);
-__printf(3, 4)
int trace_array_printk_buf(struct trace_buffer *buffer,
unsigned long ip, const char *fmt, ...)
{
@@ -3466,7 +3577,6 @@ int trace_array_printk_buf(struct trace_buffer *buffer,
return ret;
}
-__printf(2, 0)
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
return trace_array_vprintk(printk_trace, ip, fmt, args);
@@ -4188,7 +4298,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
* safe to use if the array has delta offsets
* Force printing via the fields.
*/
- if ((tr->text_delta || tr->data_delta) &&
+ if ((tr->text_delta) &&
event->type > __TRACE_LAST_TYPE)
return print_event_fields(iter, event);
@@ -5033,7 +5143,6 @@ int tracing_set_cpumask(struct trace_array *tr,
*/
if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
- atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
@@ -5041,7 +5150,6 @@ int tracing_set_cpumask(struct trace_array *tr,
}
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
- atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
@@ -5174,7 +5282,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
if ((mask == TRACE_ITER_RECORD_TGID) ||
(mask == TRACE_ITER_RECORD_CMD) ||
- (mask == TRACE_ITER_TRACE_PRINTK))
+ (mask == TRACE_ITER_TRACE_PRINTK) ||
+ (mask == TRACE_ITER_COPY_MARKER))
lockdep_assert_held(&event_mutex);
/* do nothing if flag is already set */
@@ -5205,6 +5314,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
}
}
+ if (mask == TRACE_ITER_COPY_MARKER)
+ update_marker_trace(tr, enabled);
+
if (enabled)
tr->trace_flags |= mask;
else
@@ -5983,11 +6095,137 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
return __tracing_resize_ring_buffer(tr, size, cpu_id);
}
+struct trace_mod_entry {
+ unsigned long mod_addr;
+ char mod_name[MODULE_NAME_LEN];
+};
+
+struct trace_scratch {
+ unsigned int clock_id;
+ unsigned long text_addr;
+ unsigned long nr_entries;
+ struct trace_mod_entry entries[];
+};
+
+static DEFINE_MUTEX(scratch_mutex);
+
+static int cmp_mod_entry(const void *key, const void *pivot)
+{
+ unsigned long addr = (unsigned long)key;
+ const struct trace_mod_entry *ent = pivot;
+
+ if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
+ return 0;
+ else
+ return addr - ent->mod_addr;
+}
+
+/**
+ * trace_adjust_address() - Adjust prev boot address to current address.
+ * @tr: Persistent ring buffer's trace_array.
+ * @addr: Address in @tr which is adjusted.
+ */
+unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
+{
+ struct trace_module_delta *module_delta;
+ struct trace_scratch *tscratch;
+ struct trace_mod_entry *entry;
+ unsigned long raddr;
+ int idx = 0, nr_entries;
+
+ /* If we don't have last boot delta, return the address */
+ if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ return addr;
+
+ /* tr->module_delta must be protected by rcu. */
+ guard(rcu)();
+ tscratch = tr->scratch;
+ /* if there is no tscrach, module_delta must be NULL. */
+ module_delta = READ_ONCE(tr->module_delta);
+ if (!module_delta || !tscratch->nr_entries ||
+ tscratch->entries[0].mod_addr > addr) {
+ raddr = addr + tr->text_delta;
+ return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
+ is_kernel_rodata(raddr) ? raddr : addr;
+ }
+
+ /* Note that entries must be sorted. */
+ nr_entries = tscratch->nr_entries;
+ if (nr_entries == 1 ||
+ tscratch->entries[nr_entries - 1].mod_addr < addr)
+ idx = nr_entries - 1;
+ else {
+ entry = __inline_bsearch((void *)addr,
+ tscratch->entries,
+ nr_entries - 1,
+ sizeof(tscratch->entries[0]),
+ cmp_mod_entry);
+ if (entry)
+ idx = entry - tscratch->entries;
+ }
+
+ return addr + module_delta->delta[idx];
+}
+
+#ifdef CONFIG_MODULES
+static int save_mod(struct module *mod, void *data)
+{
+ struct trace_array *tr = data;
+ struct trace_scratch *tscratch;
+ struct trace_mod_entry *entry;
+ unsigned int size;
+
+ tscratch = tr->scratch;
+ if (!tscratch)
+ return -1;
+ size = tr->scratch_size;
+
+ if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
+ return -1;
+
+ entry = &tscratch->entries[tscratch->nr_entries];
+
+ tscratch->nr_entries++;
+
+ entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
+ strscpy(entry->mod_name, mod->name);
+
+ return 0;
+}
+#else
+static int save_mod(struct module *mod, void *data)
+{
+ return 0;
+}
+#endif
+
static void update_last_data(struct trace_array *tr)
{
- if (!tr->text_delta && !tr->data_delta)
+ struct trace_module_delta *module_delta;
+ struct trace_scratch *tscratch;
+
+ if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
+ return;
+
+ if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
return;
+ /* Only if the buffer has previous boot data clear and update it. */
+ tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
+
+ /* Reset the module list and reload them */
+ if (tr->scratch) {
+ struct trace_scratch *tscratch = tr->scratch;
+
+ tscratch->clock_id = tr->clock_id;
+ memset(tscratch->entries, 0,
+ flex_array_size(tscratch, entries, tscratch->nr_entries));
+ tscratch->nr_entries = 0;
+
+ guard(mutex)(&scratch_mutex);
+ module_for_each_mod(save_mod, tr);
+ }
+
/*
* Need to clear all CPU buffers as there cannot be events
* from the previous boot mixed with events with this boot
@@ -5998,7 +6236,17 @@ static void update_last_data(struct trace_array *tr)
/* Using current data now */
tr->text_delta = 0;
- tr->data_delta = 0;
+
+ if (!tr->scratch)
+ return;
+
+ tscratch = tr->scratch;
+ module_delta = READ_ONCE(tr->module_delta);
+ WRITE_ONCE(tr->module_delta, NULL);
+ kfree_rcu(module_delta, rcu);
+
+ /* Set the persistent ring buffer meta data to this address */
+ tscratch->text_addr = (unsigned long)_text;
}
/**
@@ -6677,13 +6925,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
/* Copy the data into the page, so we can start over. */
ret = trace_seq_to_buffer(&iter->seq,
page_address(spd.pages[i]),
- trace_seq_used(&iter->seq));
+ min((size_t)trace_seq_used(&iter->seq),
+ (size_t)PAGE_SIZE));
if (ret < 0) {
__free_page(spd.pages[i]);
break;
}
spd.partial[i].offset = 0;
- spd.partial[i].len = trace_seq_used(&iter->seq);
+ spd.partial[i].len = ret;
trace_seq_init(&iter->seq);
}
@@ -6807,19 +7056,102 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static ssize_t
-tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+#define LAST_BOOT_HEADER ((void *)1)
+
+static void *l_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct trace_array *tr = filp->private_data;
- struct seq_buf seq;
- char buf[64];
+ struct trace_array *tr = m->private;
+ struct trace_scratch *tscratch = tr->scratch;
+ unsigned int index = *pos;
+
+ (*pos)++;
- seq_buf_init(&seq, buf, 64);
+ if (*pos == 1)
+ return LAST_BOOT_HEADER;
+
+ /* Only show offsets of the last boot data */
+ if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ return NULL;
- seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
- seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
+ /* *pos 0 is for the header, 1 is for the first module */
+ index--;
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
+ if (index >= tscratch->nr_entries)
+ return NULL;
+
+ return &tscratch->entries[index];
+}
+
+static void *l_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&scratch_mutex);
+
+ return l_next(m, NULL, pos);
+}
+
+static void l_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&scratch_mutex);
+}
+
+static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
+{
+ struct trace_scratch *tscratch = tr->scratch;
+
+ /*
+ * Do not leak KASLR address. This only shows the KASLR address of
+ * the last boot. When the ring buffer is started, the LAST_BOOT
+ * flag gets cleared, and this should only report "current".
+ * Otherwise it shows the KASLR address from the previous boot which
+ * should not be the same as the current boot.
+ */
+ if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
+ else
+ seq_puts(m, "# Current\n");
+}
+
+static int l_show(struct seq_file *m, void *v)
+{
+ struct trace_array *tr = m->private;
+ struct trace_mod_entry *entry = v;
+
+ if (v == LAST_BOOT_HEADER) {
+ show_last_boot_header(m, tr);
+ return 0;
+ }
+
+ seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
+ return 0;
+}
+
+static const struct seq_operations last_boot_seq_ops = {
+ .start = l_start,
+ .next = l_next,
+ .stop = l_stop,
+ .show = l_show,
+};
+
+static int tracing_last_boot_open(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
+
+ ret = seq_open(file, &last_boot_seq_ops);
+ if (ret) {
+ trace_array_put(tr);
+ return ret;
+ }
+
+ m = file->private_data;
+ m->private = tr;
+
+ return 0;
}
static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
@@ -6870,11 +7202,9 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
#define TRACE_MARKER_MAX_SIZE 4096
-static ssize_t
-tracing_mark_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *fpos)
+static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
+ size_t cnt, unsigned long ip)
{
- struct trace_array *tr = filp->private_data;
struct ring_buffer_event *event;
enum event_trigger_type tt = ETT_NONE;
struct trace_buffer *buffer;
@@ -6888,18 +7218,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
#define FAULTED_STR "<faulted>"
#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
- if (tracing_disabled)
- return -EINVAL;
-
- if (!(tr->trace_flags & TRACE_ITER_MARKERS))
- return -EINVAL;
-
- if ((ssize_t)cnt < 0)
- return -EINVAL;
-
- if (cnt > TRACE_MARKER_MAX_SIZE)
- cnt = TRACE_MARKER_MAX_SIZE;
-
meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
again:
size = cnt + meta_size;
@@ -6932,7 +7250,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
}
entry = ring_buffer_event_data(event);
- entry->ip = _THIS_IP_;
+ entry->ip = ip;
len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
if (len) {
@@ -6965,18 +7283,12 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
}
static ssize_t
-tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
struct trace_array *tr = filp->private_data;
- struct ring_buffer_event *event;
- struct trace_buffer *buffer;
- struct raw_data_entry *entry;
- ssize_t written;
- int size;
- int len;
-
-#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+ ssize_t written = -ENODEV;
+ unsigned long ip;
if (tracing_disabled)
return -EINVAL;
@@ -6984,10 +7296,42 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
if (!(tr->trace_flags & TRACE_ITER_MARKERS))
return -EINVAL;
- /* The marker must at least have a tag id */
- if (cnt < sizeof(unsigned int))
+ if ((ssize_t)cnt < 0)
return -EINVAL;
+ if (cnt > TRACE_MARKER_MAX_SIZE)
+ cnt = TRACE_MARKER_MAX_SIZE;
+
+ /* The selftests expect this function to be the IP address */
+ ip = _THIS_IP_;
+
+ /* The global trace_marker can go to multiple instances */
+ if (tr == &global_trace) {
+ guard(rcu)();
+ list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
+ written = write_marker_to_buffer(tr, ubuf, cnt, ip);
+ if (written < 0)
+ break;
+ }
+ } else {
+ written = write_marker_to_buffer(tr, ubuf, cnt, ip);
+ }
+
+ return written;
+}
+
+static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
+ const char __user *ubuf, size_t cnt)
+{
+ struct ring_buffer_event *event;
+ struct trace_buffer *buffer;
+ struct raw_data_entry *entry;
+ ssize_t written;
+ int size;
+ int len;
+
+#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+
size = sizeof(*entry) + cnt;
if (cnt < FAULT_SIZE_ID)
size += FAULT_SIZE_ID - cnt;
@@ -7018,6 +7362,40 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
return written;
}
+static ssize_t
+tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ struct trace_array *tr = filp->private_data;
+ ssize_t written = -ENODEV;
+
+#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+
+ if (tracing_disabled)
+ return -EINVAL;
+
+ if (!(tr->trace_flags & TRACE_ITER_MARKERS))
+ return -EINVAL;
+
+ /* The marker must at least have a tag id */
+ if (cnt < sizeof(unsigned int))
+ return -EINVAL;
+
+ /* The global trace_marker_raw can go to multiple instances */
+ if (tr == &global_trace) {
+ guard(rcu)();
+ list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
+ written = write_raw_marker_to_buffer(tr, ubuf, cnt);
+ if (written < 0)
+ break;
+ }
+ } else {
+ written = write_raw_marker_to_buffer(tr, ubuf, cnt);
+ }
+
+ return written;
+}
+
static int tracing_clock_show(struct seq_file *m, void *v)
{
struct trace_array *tr = m->private;
@@ -7062,6 +7440,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
tracing_reset_online_cpus(&tr->max_buffer);
#endif
+ if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
+ struct trace_scratch *tscratch = tr->scratch;
+
+ tscratch->clock_id = i;
+ }
+
mutex_unlock(&trace_types_lock);
return 0;
@@ -7448,10 +7832,10 @@ static const struct file_operations trace_time_stamp_mode_fops = {
};
static const struct file_operations last_boot_fops = {
- .open = tracing_open_generic_tr,
- .read = tracing_last_boot_read,
- .llseek = generic_file_llseek,
- .release = tracing_release_generic_tr,
+ .open = tracing_last_boot_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_seq_release,
};
#ifdef CONFIG_TRACER_SNAPSHOT
@@ -8274,6 +8658,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
struct trace_iterator *iter = &info->iter;
int ret = 0;
+ /* A memmap'ed buffer is not supported for user space mmap */
+ if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
+ return -ENODEV;
+
/* Currently the boot mapped buffer is not supported for mmap */
if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
return -ENODEV;
@@ -9191,22 +9579,134 @@ static struct dentry *trace_instance_dir;
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
+#ifdef CONFIG_MODULES
+static int make_mod_delta(struct module *mod, void *data)
+{
+ struct trace_module_delta *module_delta;
+ struct trace_scratch *tscratch;
+ struct trace_mod_entry *entry;
+ struct trace_array *tr = data;
+ int i;
+
+ tscratch = tr->scratch;
+ module_delta = READ_ONCE(tr->module_delta);
+ for (i = 0; i < tscratch->nr_entries; i++) {
+ entry = &tscratch->entries[i];
+ if (strcmp(mod->name, entry->mod_name))
+ continue;
+ if (mod->state == MODULE_STATE_GOING)
+ module_delta->delta[i] = 0;
+ else
+ module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
+ - entry->mod_addr;
+ break;
+ }
+ return 0;
+}
+#else
+static int make_mod_delta(struct module *mod, void *data)
+{
+ return 0;
+}
+#endif
+
+static int mod_addr_comp(const void *a, const void *b, const void *data)
+{
+ const struct trace_mod_entry *e1 = a;
+ const struct trace_mod_entry *e2 = b;
+
+ return e1->mod_addr > e2->mod_addr ? 1 : -1;
+}
+
+static void setup_trace_scratch(struct trace_array *tr,
+ struct trace_scratch *tscratch, unsigned int size)
+{
+ struct trace_module_delta *module_delta;
+ struct trace_mod_entry *entry;
+ int i, nr_entries;
+
+ if (!tscratch)
+ return;
+
+ tr->scratch = tscratch;
+ tr->scratch_size = size;
+
+ if (tscratch->text_addr)
+ tr->text_delta = (unsigned long)_text - tscratch->text_addr;
+
+ if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
+ goto reset;
+
+ /* Check if each module name is a valid string */
+ for (i = 0; i < tscratch->nr_entries; i++) {
+ int n;
+
+ entry = &tscratch->entries[i];
+
+ for (n = 0; n < MODULE_NAME_LEN; n++) {
+ if (entry->mod_name[n] == '\0')
+ break;
+ if (!isprint(entry->mod_name[n]))
+ goto reset;
+ }
+ if (n == MODULE_NAME_LEN)
+ goto reset;
+ }
+
+ /* Sort the entries so that we can find appropriate module from address. */
+ nr_entries = tscratch->nr_entries;
+ sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
+ mod_addr_comp, NULL, NULL);
+
+ if (IS_ENABLED(CONFIG_MODULES)) {
+ module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
+ if (!module_delta) {
+ pr_info("module_delta allocation failed. Not able to decode module address.");
+ goto reset;
+ }
+ init_rcu_head(&module_delta->rcu);
+ } else
+ module_delta = NULL;
+ WRITE_ONCE(tr->module_delta, module_delta);
+
+ /* Scan modules to make text delta for modules. */
+ module_for_each_mod(make_mod_delta, tr);
+
+ /* Set trace_clock as the same of the previous boot. */
+ if (tscratch->clock_id != tr->clock_id) {
+ if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
+ tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
+ pr_info("the previous trace_clock info is not valid.");
+ goto reset;
+ }
+ }
+ return;
+ reset:
+ /* Invalid trace modules */
+ memset(tscratch, 0, size);
+}
+
static int
allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
{
enum ring_buffer_flags rb_flags;
+ struct trace_scratch *tscratch;
+ unsigned int scratch_size = 0;
rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
buf->tr = tr;
if (tr->range_addr_start && tr->range_addr_size) {
+ /* Add scratch buffer to handle 128 modules */
buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
tr->range_addr_start,
- tr->range_addr_size);
+ tr->range_addr_size,
+ struct_size(tscratch, entries, 128));
+
+ tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
+ setup_trace_scratch(tr, tscratch, scratch_size);
- ring_buffer_last_boot_delta(buf->buffer,
- &tr->text_delta, &tr->data_delta);
/*
* This is basically the same as a mapped buffer,
* with the same restrictions.
@@ -9275,6 +9775,7 @@ static void free_trace_buffers(struct trace_array *tr)
return;
free_trace_buffer(&tr->array_buffer);
+ kfree(tr->module_delta);
#ifdef CONFIG_TRACER_MAX_TRACE
free_trace_buffer(&tr->max_buffer);
@@ -9403,6 +9904,7 @@ trace_array_create_systems(const char *name, const char *systems,
INIT_LIST_HEAD(&tr->events);
INIT_LIST_HEAD(&tr->hist_vars);
INIT_LIST_HEAD(&tr->err_log);
+ INIT_LIST_HEAD(&tr->marker_list);
#ifdef CONFIG_MODULES
INIT_LIST_HEAD(&tr->mod_events);
@@ -9440,6 +9942,7 @@ trace_array_create_systems(const char *name, const char *systems,
free_cpumask_var(tr->pipe_cpumask);
free_cpumask_var(tr->tracing_cpumask);
kfree_const(tr->system_names);
+ kfree(tr->range_name);
kfree(tr->name);
kfree(tr);
@@ -9470,30 +9973,35 @@ static int instance_mkdir(const char *name)
return ret;
}
-static u64 map_pages(u64 start, u64 size)
+#ifdef CONFIG_MMU
+static u64 map_pages(unsigned long start, unsigned long size)
{
- struct page **pages;
- phys_addr_t page_start;
- unsigned int page_count;
- unsigned int i;
- void *vaddr;
-
- page_count = DIV_ROUND_UP(size, PAGE_SIZE);
+ unsigned long vmap_start, vmap_end;
+ struct vm_struct *area;
+ int ret;
- page_start = start;
- pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
return 0;
- for (i = 0; i < page_count; i++) {
- phys_addr_t addr = page_start + i * PAGE_SIZE;
- pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+ vmap_start = (unsigned long) area->addr;
+ vmap_end = vmap_start + size;
+
+ ret = vmap_page_range(vmap_start, vmap_end,
+ start, pgprot_nx(PAGE_KERNEL));
+ if (ret < 0) {
+ free_vm_area(area);
+ return 0;
}
- vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
- kfree(pages);
- return (u64)(unsigned long)vaddr;
+ return (u64)vmap_start;
}
+#else
+static inline u64 map_pages(unsigned long start, unsigned long size)
+{
+ return 0;
+}
+#endif
/**
* trace_array_get_by_name - Create/Lookup a trace array, given its name.
@@ -9556,6 +10064,9 @@ static int __remove_instance(struct trace_array *tr)
if (printk_trace == tr)
update_printk_trace(&global_trace);
+ if (update_marker_trace(tr, 0))
+ synchronize_rcu();
+
tracing_set_nop(tr);
clear_ftrace_function_probes(tr);
event_trace_del_tracer(tr);
@@ -9566,6 +10077,11 @@ static int __remove_instance(struct trace_array *tr)
free_trace_buffers(tr);
clear_tracing_err_log(tr);
+ if (tr->range_name) {
+ reserve_mem_release_by_name(tr->range_name);
+ kfree(tr->range_name);
+ }
+
for (i = 0; i < tr->nr_topts; i++) {
kfree(tr->topts[i].topts);
}
@@ -9726,6 +10242,8 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
{
struct vfsmount *mnt;
struct file_system_type *type;
+ struct fs_context *fc;
+ int ret;
/*
* To maintain backward compatibility for tools that mount
@@ -9735,12 +10253,20 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
type = get_fs_type("tracefs");
if (!type)
return NULL;
- mnt = vfs_submount(mntpt, type, "tracefs", NULL);
+
+ fc = fs_context_for_submount(type, mntpt);
put_filesystem(type);
- if (IS_ERR(mnt))
- return NULL;
- mntget(mnt);
+ if (IS_ERR(fc))
+ return ERR_CAST(fc);
+
+ ret = vfs_parse_fs_string(fc, "source",
+ "tracefs", strlen("tracefs"));
+ if (!ret)
+ mnt = fc_mount(fc);
+ else
+ mnt = ERR_PTR(ret);
+ put_fs_context(fc);
return mnt;
}
@@ -9887,6 +10413,24 @@ static void trace_module_remove_evals(struct module *mod)
static inline void trace_module_remove_evals(struct module *mod) { }
#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
+static void trace_module_record(struct module *mod, bool add)
+{
+ struct trace_array *tr;
+ unsigned long flags;
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
+ /* Update any persistent trace array that has already been started */
+ if (flags == TRACE_ARRAY_FL_BOOT && add) {
+ guard(mutex)(&scratch_mutex);
+ save_mod(mod, tr);
+ } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
+ /* Update delta if the module loaded in previous boot */
+ make_mod_delta(mod, tr);
+ }
+ }
+}
+
static int trace_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
@@ -9895,9 +10439,11 @@ static int trace_module_notify(struct notifier_block *self,
switch (val) {
case MODULE_STATE_COMING:
trace_module_add_evals(mod);
+ trace_module_record(mod, true);
break;
case MODULE_STATE_GOING:
trace_module_remove_evals(mod);
+ trace_module_record(mod, false);
break;
}
@@ -10079,7 +10625,7 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
static struct trace_iterator iter;
unsigned int old_userobj;
unsigned long flags;
- int cnt = 0, cpu;
+ int cnt = 0;
/*
* Always turn off tracing when we dump.
@@ -10096,9 +10642,8 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
/* Simulate the iterator */
trace_init_iter(&iter, tr);
- for_each_tracing_cpu(cpu) {
- atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- }
+ /* While dumping, do not allow the buffer to be enable */
+ tracer_tracing_disable(tr);
old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -10157,9 +10702,7 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
tr->trace_flags |= old_userobj;
- for_each_tracing_cpu(cpu) {
- atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- }
+ tracer_tracing_enable(tr);
local_irq_restore(flags);
}
@@ -10346,6 +10889,7 @@ static inline void do_allocate_snapshot(const char *name) { }
__init static void enable_instances(void)
{
struct trace_array *tr;
+ bool memmap_area = false;
char *curr_str;
char *name;
char *str;
@@ -10363,6 +10907,7 @@ __init static void enable_instances(void)
bool traceoff = false;
char *flag_delim;
char *addr_delim;
+ char *rname __free(kfree) = NULL;
tok = strsep(&curr_str, ",");
@@ -10413,16 +10958,31 @@ __init static void enable_instances(void)
name);
continue;
}
+ memmap_area = true;
} else if (tok) {
if (!reserve_mem_find_by_name(tok, &start, &size)) {
start = 0;
pr_warn("Failed to map boot instance %s to %s\n", name, tok);
continue;
}
+ rname = kstrdup(tok, GFP_KERNEL);
}
if (start) {
- addr = map_pages(start, size);
+ /* Start and size must be page aligned */
+ if (start & ~PAGE_MASK) {
+ pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
+ continue;
+ }
+ if (size & ~PAGE_MASK) {
+ pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
+ continue;
+ }
+
+ if (memmap_area)
+ addr = map_pages(start, size);
+ else
+ addr = (unsigned long)phys_to_virt(start);
if (addr) {
pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
name, &start, (unsigned long)size);
@@ -10449,15 +11009,18 @@ __init static void enable_instances(void)
update_printk_trace(tr);
/*
- * If start is set, then this is a mapped buffer, and
- * cannot be deleted by user space, so keep the reference
- * to it.
+ * memmap'd buffers can not be freed.
*/
- if (start) {
- tr->flags |= TRACE_ARRAY_FL_BOOT;
+ if (memmap_area) {
+ tr->flags |= TRACE_ARRAY_FL_MEMMAP;
tr->ref++;
}
+ if (start) {
+ tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
+ tr->range_name = no_free_ptr(rname);
+ }
+
while ((tok = strsep(&curr_str, ","))) {
early_enable_events(tr, tok, true);
}
@@ -10579,6 +11142,7 @@ __init static int tracer_alloc_buffers(void)
INIT_LIST_HEAD(&global_trace.events);
INIT_LIST_HEAD(&global_trace.hist_vars);
INIT_LIST_HEAD(&global_trace.err_log);
+ list_add(&global_trace.marker_list, &marker_copies);
list_add(&global_trace.list, &ftrace_trace_arrays);
apply_trace_boot_options();
@@ -10699,6 +11263,9 @@ __init static int late_trace_init(void)
tracepoint_printk = 0;
}
+ if (traceoff_after_boot)
+ tracing_off();
+
tracing_set_default_clock();
clear_boot_tracer();
return 0;