diff options
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r-- | kernel/trace/trace.c | 765 |
1 files changed, 666 insertions, 99 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd3cb2b2ab82..465989585135 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -49,6 +49,9 @@ #include <linux/fsnotify.h> #include <linux/irq_work.h> #include <linux/workqueue.h> +#include <linux/sort.h> +#include <linux/io.h> /* vmap_page_range() */ +#include <linux/fs_context.h> #include <asm/setup.h> /* COMMAND_LINE_SIZE */ @@ -87,6 +90,7 @@ void __init disable_tracing_selftest(const char *reason) static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; static bool tracepoint_printk_stop_on_boot __initdata; +static bool traceoff_after_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* For tracers that don't implement custom flags */ @@ -117,6 +121,7 @@ static int tracing_disabled = 1; cpumask_var_t __read_mostly tracing_buffer_mask; +#define MAX_TRACER_SIZE 100 /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops * @@ -139,7 +144,40 @@ cpumask_var_t __read_mostly tracing_buffer_mask; char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; /* When set, tracing will stop when a WARN*() is hit */ -int __disable_trace_on_warning; +static int __disable_trace_on_warning; + +int tracepoint_printk_sysctl(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +static const struct ctl_table trace_sysctl_table[] = { + { + .procname = "ftrace_dump_on_oops", + .data = &ftrace_dump_on_oops, + .maxlen = MAX_TRACER_SIZE, + .mode = 0644, + .proc_handler = proc_dostring, + }, + { + .procname = "traceoff_on_warning", + .data = &__disable_trace_on_warning, + .maxlen = sizeof(__disable_trace_on_warning), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tracepoint_printk", + .data = &tracepoint_printk, + .maxlen = sizeof(tracepoint_printk), + .mode = 0644, + .proc_handler = tracepoint_printk_sysctl, + }, +}; + +static int __init init_trace_sysctls(void) +{ + register_sysctl_init("kernel", trace_sysctl_table); + return 0; +} +subsys_initcall(init_trace_sysctls); #ifdef CONFIG_TRACE_EVAL_MAP_FILE /* Map of enums to their values, for "eval_map" file */ @@ -330,6 +368,13 @@ static int __init set_tracepoint_printk_stop(char *str) } __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); +static int __init set_traceoff_after_boot(char *str) +{ + traceoff_after_boot = true; + return 1; +} +__setup("traceoff_after_boot", set_traceoff_after_boot); + unsigned long long ns2usecs(u64 nsec) { nsec += 500; @@ -483,7 +528,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ - TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK) + TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \ + TRACE_ITER_COPY_MARKER) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ @@ -491,7 +537,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); /* trace_flags that are default zero for instances */ #define ZEROED_TRACE_FLAGS \ - (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK) + (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \ + TRACE_ITER_COPY_MARKER) /* * The global_trace is the descriptor that holds the top-level tracing @@ -503,6 +550,9 @@ static struct trace_array global_trace = { static struct trace_array *printk_trace = &global_trace; +/* List of trace_arrays interested in the top level trace_marker */ +static LIST_HEAD(marker_copies); + static __always_inline bool printk_binsafe(struct trace_array *tr) { /* @@ -524,6 +574,28 @@ static void update_printk_trace(struct trace_array *tr) tr->trace_flags |= TRACE_ITER_TRACE_PRINTK; } +/* Returns true if the status of tr changed */ +static bool update_marker_trace(struct trace_array *tr, int enabled) +{ + lockdep_assert_held(&event_mutex); + + if (enabled) { + if (!list_empty(&tr->marker_list)) + return false; + + list_add_rcu(&tr->marker_list, &marker_copies); + tr->trace_flags |= TRACE_ITER_COPY_MARKER; + return true; + } + + if (list_empty(&tr->marker_list)) + return false; + + list_del_init(&tr->marker_list); + tr->trace_flags &= ~TRACE_ITER_COPY_MARKER; + return true; +} + void trace_set_ring_buffer_expanded(struct trace_array *tr) { if (!tr) @@ -1573,6 +1645,39 @@ void tracer_tracing_off(struct trace_array *tr) } /** + * tracer_tracing_disable() - temporary disable the buffer from write + * @tr: The trace array to disable its buffer for + * + * Expects trace_tracing_enable() to re-enable tracing. + * The difference between this and tracer_tracing_off() is that this + * is a counter and can nest, whereas, tracer_tracing_off() can + * be called multiple times and a single trace_tracing_on() will + * enable it. + */ +void tracer_tracing_disable(struct trace_array *tr) +{ + if (WARN_ON_ONCE(!tr->array_buffer.buffer)) + return; + + ring_buffer_record_disable(tr->array_buffer.buffer); +} + +/** + * tracer_tracing_enable() - counter part of tracer_tracing_disable() + * @tr: The trace array that had tracer_tracincg_disable() called on it + * + * This is called after tracer_tracing_disable() has been called on @tr, + * when it's safe to re-enable tracing. + */ +void tracer_tracing_enable(struct trace_array *tr) +{ + if (WARN_ON_ONCE(!tr->array_buffer.buffer)) + return; + + ring_buffer_record_enable(tr->array_buffer.buffer); +} + +/** * tracing_off - turn off tracing buffers * * This function stops the tracing buffers from recording data. @@ -2878,13 +2983,16 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, void trace_function(struct trace_array *tr, unsigned long ip, unsigned long - parent_ip, unsigned int trace_ctx) + parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) { struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; + int size = sizeof(*entry); - event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), + size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); + + event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, trace_ctx); if (!event) return; @@ -2892,6 +3000,13 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long entry->ip = ip; entry->parent_ip = parent_ip; +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + if (fregs) { + for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) + entry->args[i] = ftrace_regs_get_argument(fregs, i); + } +#endif + if (static_branch_unlikely(&trace_function_exports_enabled)) ftrace_exports(event, TRACE_EXPORT_FUNCTION); __buffer_unlock_commit(buffer, event); @@ -3322,10 +3437,9 @@ out_nobuffer: } EXPORT_SYMBOL_GPL(trace_vbprintk); -__printf(3, 0) -static int -__trace_array_vprintk(struct trace_buffer *buffer, - unsigned long ip, const char *fmt, va_list args) +static __printf(3, 0) +int __trace_array_vprintk(struct trace_buffer *buffer, + unsigned long ip, const char *fmt, va_list args) { struct ring_buffer_event *event; int len = 0, size; @@ -3375,7 +3489,6 @@ out_nobuffer: return len; } -__printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { @@ -3405,7 +3518,6 @@ int trace_array_vprintk(struct trace_array *tr, * Note, trace_array_init_printk() must be called on @tr before this * can be used. */ -__printf(3, 0) int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { @@ -3450,7 +3562,6 @@ int trace_array_init_printk(struct trace_array *tr) } EXPORT_SYMBOL_GPL(trace_array_init_printk); -__printf(3, 4) int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) { @@ -3466,7 +3577,6 @@ int trace_array_printk_buf(struct trace_buffer *buffer, return ret; } -__printf(2, 0) int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(printk_trace, ip, fmt, args); @@ -4188,7 +4298,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) * safe to use if the array has delta offsets * Force printing via the fields. */ - if ((tr->text_delta || tr->data_delta) && + if ((tr->text_delta) && event->type > __TRACE_LAST_TYPE) return print_event_fields(iter, event); @@ -5033,7 +5143,6 @@ int tracing_set_cpumask(struct trace_array *tr, */ if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); @@ -5041,7 +5150,6 @@ int tracing_set_cpumask(struct trace_array *tr, } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); @@ -5174,7 +5282,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { if ((mask == TRACE_ITER_RECORD_TGID) || (mask == TRACE_ITER_RECORD_CMD) || - (mask == TRACE_ITER_TRACE_PRINTK)) + (mask == TRACE_ITER_TRACE_PRINTK) || + (mask == TRACE_ITER_COPY_MARKER)) lockdep_assert_held(&event_mutex); /* do nothing if flag is already set */ @@ -5205,6 +5314,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) } } + if (mask == TRACE_ITER_COPY_MARKER) + update_marker_trace(tr, enabled); + if (enabled) tr->trace_flags |= mask; else @@ -5983,11 +6095,137 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr, return __tracing_resize_ring_buffer(tr, size, cpu_id); } +struct trace_mod_entry { + unsigned long mod_addr; + char mod_name[MODULE_NAME_LEN]; +}; + +struct trace_scratch { + unsigned int clock_id; + unsigned long text_addr; + unsigned long nr_entries; + struct trace_mod_entry entries[]; +}; + +static DEFINE_MUTEX(scratch_mutex); + +static int cmp_mod_entry(const void *key, const void *pivot) +{ + unsigned long addr = (unsigned long)key; + const struct trace_mod_entry *ent = pivot; + + if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr) + return 0; + else + return addr - ent->mod_addr; +} + +/** + * trace_adjust_address() - Adjust prev boot address to current address. + * @tr: Persistent ring buffer's trace_array. + * @addr: Address in @tr which is adjusted. + */ +unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) +{ + struct trace_module_delta *module_delta; + struct trace_scratch *tscratch; + struct trace_mod_entry *entry; + unsigned long raddr; + int idx = 0, nr_entries; + + /* If we don't have last boot delta, return the address */ + if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) + return addr; + + /* tr->module_delta must be protected by rcu. */ + guard(rcu)(); + tscratch = tr->scratch; + /* if there is no tscrach, module_delta must be NULL. */ + module_delta = READ_ONCE(tr->module_delta); + if (!module_delta || !tscratch->nr_entries || + tscratch->entries[0].mod_addr > addr) { + raddr = addr + tr->text_delta; + return __is_kernel(raddr) || is_kernel_core_data(raddr) || + is_kernel_rodata(raddr) ? raddr : addr; + } + + /* Note that entries must be sorted. */ + nr_entries = tscratch->nr_entries; + if (nr_entries == 1 || + tscratch->entries[nr_entries - 1].mod_addr < addr) + idx = nr_entries - 1; + else { + entry = __inline_bsearch((void *)addr, + tscratch->entries, + nr_entries - 1, + sizeof(tscratch->entries[0]), + cmp_mod_entry); + if (entry) + idx = entry - tscratch->entries; + } + + return addr + module_delta->delta[idx]; +} + +#ifdef CONFIG_MODULES +static int save_mod(struct module *mod, void *data) +{ + struct trace_array *tr = data; + struct trace_scratch *tscratch; + struct trace_mod_entry *entry; + unsigned int size; + + tscratch = tr->scratch; + if (!tscratch) + return -1; + size = tr->scratch_size; + + if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) + return -1; + + entry = &tscratch->entries[tscratch->nr_entries]; + + tscratch->nr_entries++; + + entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; + strscpy(entry->mod_name, mod->name); + + return 0; +} +#else +static int save_mod(struct module *mod, void *data) +{ + return 0; +} +#endif + static void update_last_data(struct trace_array *tr) { - if (!tr->text_delta && !tr->data_delta) + struct trace_module_delta *module_delta; + struct trace_scratch *tscratch; + + if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) + return; + + if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) return; + /* Only if the buffer has previous boot data clear and update it. */ + tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; + + /* Reset the module list and reload them */ + if (tr->scratch) { + struct trace_scratch *tscratch = tr->scratch; + + tscratch->clock_id = tr->clock_id; + memset(tscratch->entries, 0, + flex_array_size(tscratch, entries, tscratch->nr_entries)); + tscratch->nr_entries = 0; + + guard(mutex)(&scratch_mutex); + module_for_each_mod(save_mod, tr); + } + /* * Need to clear all CPU buffers as there cannot be events * from the previous boot mixed with events with this boot @@ -5998,7 +6236,17 @@ static void update_last_data(struct trace_array *tr) /* Using current data now */ tr->text_delta = 0; - tr->data_delta = 0; + + if (!tr->scratch) + return; + + tscratch = tr->scratch; + module_delta = READ_ONCE(tr->module_delta); + WRITE_ONCE(tr->module_delta, NULL); + kfree_rcu(module_delta, rcu); + + /* Set the persistent ring buffer meta data to this address */ + tscratch->text_addr = (unsigned long)_text; } /** @@ -6677,13 +6925,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + (size_t)PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } @@ -6807,19 +7056,102 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +#define LAST_BOOT_HEADER ((void *)1) + +static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - struct trace_array *tr = filp->private_data; - struct seq_buf seq; - char buf[64]; + struct trace_array *tr = m->private; + struct trace_scratch *tscratch = tr->scratch; + unsigned int index = *pos; + + (*pos)++; - seq_buf_init(&seq, buf, 64); + if (*pos == 1) + return LAST_BOOT_HEADER; + + /* Only show offsets of the last boot data */ + if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) + return NULL; - seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta); - seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta); + /* *pos 0 is for the header, 1 is for the first module */ + index--; - return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq)); + if (index >= tscratch->nr_entries) + return NULL; + + return &tscratch->entries[index]; +} + +static void *l_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&scratch_mutex); + + return l_next(m, NULL, pos); +} + +static void l_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&scratch_mutex); +} + +static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) +{ + struct trace_scratch *tscratch = tr->scratch; + + /* + * Do not leak KASLR address. This only shows the KASLR address of + * the last boot. When the ring buffer is started, the LAST_BOOT + * flag gets cleared, and this should only report "current". + * Otherwise it shows the KASLR address from the previous boot which + * should not be the same as the current boot. + */ + if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) + seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); + else + seq_puts(m, "# Current\n"); +} + +static int l_show(struct seq_file *m, void *v) +{ + struct trace_array *tr = m->private; + struct trace_mod_entry *entry = v; + + if (v == LAST_BOOT_HEADER) { + show_last_boot_header(m, tr); + return 0; + } + + seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); + return 0; +} + +static const struct seq_operations last_boot_seq_ops = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show, +}; + +static int tracing_last_boot_open(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + struct seq_file *m; + int ret; + + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; + + ret = seq_open(file, &last_boot_seq_ops); + if (ret) { + trace_array_put(tr); + return ret; + } + + m = file->private_data; + m->private = tr; + + return 0; } static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) @@ -6870,11 +7202,9 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) #define TRACE_MARKER_MAX_SIZE 4096 -static ssize_t -tracing_mark_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *fpos) +static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf, + size_t cnt, unsigned long ip) { - struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; @@ -6888,18 +7218,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, #define FAULTED_STR "<faulted>" #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ - if (tracing_disabled) - return -EINVAL; - - if (!(tr->trace_flags & TRACE_ITER_MARKERS)) - return -EINVAL; - - if ((ssize_t)cnt < 0) - return -EINVAL; - - if (cnt > TRACE_MARKER_MAX_SIZE) - cnt = TRACE_MARKER_MAX_SIZE; - meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ again: size = cnt + meta_size; @@ -6932,7 +7250,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, } entry = ring_buffer_event_data(event); - entry->ip = _THIS_IP_; + entry->ip = ip; len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); if (len) { @@ -6965,18 +7283,12 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, } static ssize_t -tracing_mark_raw_write(struct file *filp, const char __user *ubuf, +tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { struct trace_array *tr = filp->private_data; - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct raw_data_entry *entry; - ssize_t written; - int size; - int len; - -#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) + ssize_t written = -ENODEV; + unsigned long ip; if (tracing_disabled) return -EINVAL; @@ -6984,10 +7296,42 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, if (!(tr->trace_flags & TRACE_ITER_MARKERS)) return -EINVAL; - /* The marker must at least have a tag id */ - if (cnt < sizeof(unsigned int)) + if ((ssize_t)cnt < 0) return -EINVAL; + if (cnt > TRACE_MARKER_MAX_SIZE) + cnt = TRACE_MARKER_MAX_SIZE; + + /* The selftests expect this function to be the IP address */ + ip = _THIS_IP_; + + /* The global trace_marker can go to multiple instances */ + if (tr == &global_trace) { + guard(rcu)(); + list_for_each_entry_rcu(tr, &marker_copies, marker_list) { + written = write_marker_to_buffer(tr, ubuf, cnt, ip); + if (written < 0) + break; + } + } else { + written = write_marker_to_buffer(tr, ubuf, cnt, ip); + } + + return written; +} + +static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, + const char __user *ubuf, size_t cnt) +{ + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct raw_data_entry *entry; + ssize_t written; + int size; + int len; + +#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) + size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; @@ -7018,6 +7362,40 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, return written; } +static ssize_t +tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + struct trace_array *tr = filp->private_data; + ssize_t written = -ENODEV; + +#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) + + if (tracing_disabled) + return -EINVAL; + + if (!(tr->trace_flags & TRACE_ITER_MARKERS)) + return -EINVAL; + + /* The marker must at least have a tag id */ + if (cnt < sizeof(unsigned int)) + return -EINVAL; + + /* The global trace_marker_raw can go to multiple instances */ + if (tr == &global_trace) { + guard(rcu)(); + list_for_each_entry_rcu(tr, &marker_copies, marker_list) { + written = write_raw_marker_to_buffer(tr, ubuf, cnt); + if (written < 0) + break; + } + } else { + written = write_raw_marker_to_buffer(tr, ubuf, cnt); + } + + return written; +} + static int tracing_clock_show(struct seq_file *m, void *v) { struct trace_array *tr = m->private; @@ -7062,6 +7440,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tracing_reset_online_cpus(&tr->max_buffer); #endif + if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { + struct trace_scratch *tscratch = tr->scratch; + + tscratch->clock_id = i; + } + mutex_unlock(&trace_types_lock); return 0; @@ -7448,10 +7832,10 @@ static const struct file_operations trace_time_stamp_mode_fops = { }; static const struct file_operations last_boot_fops = { - .open = tracing_open_generic_tr, - .read = tracing_last_boot_read, - .llseek = generic_file_llseek, - .release = tracing_release_generic_tr, + .open = tracing_last_boot_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_seq_release, }; #ifdef CONFIG_TRACER_SNAPSHOT @@ -8274,6 +8658,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) struct trace_iterator *iter = &info->iter; int ret = 0; + /* A memmap'ed buffer is not supported for user space mmap */ + if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP) + return -ENODEV; + /* Currently the boot mapped buffer is not supported for mmap */ if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) return -ENODEV; @@ -9191,22 +9579,134 @@ static struct dentry *trace_instance_dir; static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); +#ifdef CONFIG_MODULES +static int make_mod_delta(struct module *mod, void *data) +{ + struct trace_module_delta *module_delta; + struct trace_scratch *tscratch; + struct trace_mod_entry *entry; + struct trace_array *tr = data; + int i; + + tscratch = tr->scratch; + module_delta = READ_ONCE(tr->module_delta); + for (i = 0; i < tscratch->nr_entries; i++) { + entry = &tscratch->entries[i]; + if (strcmp(mod->name, entry->mod_name)) + continue; + if (mod->state == MODULE_STATE_GOING) + module_delta->delta[i] = 0; + else + module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base + - entry->mod_addr; + break; + } + return 0; +} +#else +static int make_mod_delta(struct module *mod, void *data) +{ + return 0; +} +#endif + +static int mod_addr_comp(const void *a, const void *b, const void *data) +{ + const struct trace_mod_entry *e1 = a; + const struct trace_mod_entry *e2 = b; + + return e1->mod_addr > e2->mod_addr ? 1 : -1; +} + +static void setup_trace_scratch(struct trace_array *tr, + struct trace_scratch *tscratch, unsigned int size) +{ + struct trace_module_delta *module_delta; + struct trace_mod_entry *entry; + int i, nr_entries; + + if (!tscratch) + return; + + tr->scratch = tscratch; + tr->scratch_size = size; + + if (tscratch->text_addr) + tr->text_delta = (unsigned long)_text - tscratch->text_addr; + + if (struct_size(tscratch, entries, tscratch->nr_entries) > size) + goto reset; + + /* Check if each module name is a valid string */ + for (i = 0; i < tscratch->nr_entries; i++) { + int n; + + entry = &tscratch->entries[i]; + + for (n = 0; n < MODULE_NAME_LEN; n++) { + if (entry->mod_name[n] == '\0') + break; + if (!isprint(entry->mod_name[n])) + goto reset; + } + if (n == MODULE_NAME_LEN) + goto reset; + } + + /* Sort the entries so that we can find appropriate module from address. */ + nr_entries = tscratch->nr_entries; + sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), + mod_addr_comp, NULL, NULL); + + if (IS_ENABLED(CONFIG_MODULES)) { + module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); + if (!module_delta) { + pr_info("module_delta allocation failed. Not able to decode module address."); + goto reset; + } + init_rcu_head(&module_delta->rcu); + } else + module_delta = NULL; + WRITE_ONCE(tr->module_delta, module_delta); + + /* Scan modules to make text delta for modules. */ + module_for_each_mod(make_mod_delta, tr); + + /* Set trace_clock as the same of the previous boot. */ + if (tscratch->clock_id != tr->clock_id) { + if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || + tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { + pr_info("the previous trace_clock info is not valid."); + goto reset; + } + } + return; + reset: + /* Invalid trace modules */ + memset(tscratch, 0, size); +} + static int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) { enum ring_buffer_flags rb_flags; + struct trace_scratch *tscratch; + unsigned int scratch_size = 0; rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; buf->tr = tr; if (tr->range_addr_start && tr->range_addr_size) { + /* Add scratch buffer to handle 128 modules */ buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, tr->range_addr_start, - tr->range_addr_size); + tr->range_addr_size, + struct_size(tscratch, entries, 128)); + + tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); + setup_trace_scratch(tr, tscratch, scratch_size); - ring_buffer_last_boot_delta(buf->buffer, - &tr->text_delta, &tr->data_delta); /* * This is basically the same as a mapped buffer, * with the same restrictions. @@ -9275,6 +9775,7 @@ static void free_trace_buffers(struct trace_array *tr) return; free_trace_buffer(&tr->array_buffer); + kfree(tr->module_delta); #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); @@ -9403,6 +9904,7 @@ trace_array_create_systems(const char *name, const char *systems, INIT_LIST_HEAD(&tr->events); INIT_LIST_HEAD(&tr->hist_vars); INIT_LIST_HEAD(&tr->err_log); + INIT_LIST_HEAD(&tr->marker_list); #ifdef CONFIG_MODULES INIT_LIST_HEAD(&tr->mod_events); @@ -9440,6 +9942,7 @@ trace_array_create_systems(const char *name, const char *systems, free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); kfree_const(tr->system_names); + kfree(tr->range_name); kfree(tr->name); kfree(tr); @@ -9470,30 +9973,35 @@ static int instance_mkdir(const char *name) return ret; } -static u64 map_pages(u64 start, u64 size) +#ifdef CONFIG_MMU +static u64 map_pages(unsigned long start, unsigned long size) { - struct page **pages; - phys_addr_t page_start; - unsigned int page_count; - unsigned int i; - void *vaddr; - - page_count = DIV_ROUND_UP(size, PAGE_SIZE); + unsigned long vmap_start, vmap_end; + struct vm_struct *area; + int ret; - page_start = start; - pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); - if (!pages) + area = get_vm_area(size, VM_IOREMAP); + if (!area) return 0; - for (i = 0; i < page_count; i++) { - phys_addr_t addr = page_start + i * PAGE_SIZE; - pages[i] = pfn_to_page(addr >> PAGE_SHIFT); + vmap_start = (unsigned long) area->addr; + vmap_end = vmap_start + size; + + ret = vmap_page_range(vmap_start, vmap_end, + start, pgprot_nx(PAGE_KERNEL)); + if (ret < 0) { + free_vm_area(area); + return 0; } - vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); - kfree(pages); - return (u64)(unsigned long)vaddr; + return (u64)vmap_start; } +#else +static inline u64 map_pages(unsigned long start, unsigned long size) +{ + return 0; +} +#endif /** * trace_array_get_by_name - Create/Lookup a trace array, given its name. @@ -9556,6 +10064,9 @@ static int __remove_instance(struct trace_array *tr) if (printk_trace == tr) update_printk_trace(&global_trace); + if (update_marker_trace(tr, 0)) + synchronize_rcu(); + tracing_set_nop(tr); clear_ftrace_function_probes(tr); event_trace_del_tracer(tr); @@ -9566,6 +10077,11 @@ static int __remove_instance(struct trace_array *tr) free_trace_buffers(tr); clear_tracing_err_log(tr); + if (tr->range_name) { + reserve_mem_release_by_name(tr->range_name); + kfree(tr->range_name); + } + for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); } @@ -9726,6 +10242,8 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) { struct vfsmount *mnt; struct file_system_type *type; + struct fs_context *fc; + int ret; /* * To maintain backward compatibility for tools that mount @@ -9735,12 +10253,20 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) type = get_fs_type("tracefs"); if (!type) return NULL; - mnt = vfs_submount(mntpt, type, "tracefs", NULL); + + fc = fs_context_for_submount(type, mntpt); put_filesystem(type); - if (IS_ERR(mnt)) - return NULL; - mntget(mnt); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ret = vfs_parse_fs_string(fc, "source", + "tracefs", strlen("tracefs")); + if (!ret) + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); + put_fs_context(fc); return mnt; } @@ -9887,6 +10413,24 @@ static void trace_module_remove_evals(struct module *mod) static inline void trace_module_remove_evals(struct module *mod) { } #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ +static void trace_module_record(struct module *mod, bool add) +{ + struct trace_array *tr; + unsigned long flags; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); + /* Update any persistent trace array that has already been started */ + if (flags == TRACE_ARRAY_FL_BOOT && add) { + guard(mutex)(&scratch_mutex); + save_mod(mod, tr); + } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { + /* Update delta if the module loaded in previous boot */ + make_mod_delta(mod, tr); + } + } +} + static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -9895,9 +10439,11 @@ static int trace_module_notify(struct notifier_block *self, switch (val) { case MODULE_STATE_COMING: trace_module_add_evals(mod); + trace_module_record(mod, true); break; case MODULE_STATE_GOING: trace_module_remove_evals(mod); + trace_module_record(mod, false); break; } @@ -10079,7 +10625,7 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m static struct trace_iterator iter; unsigned int old_userobj; unsigned long flags; - int cnt = 0, cpu; + int cnt = 0; /* * Always turn off tracing when we dump. @@ -10096,9 +10642,8 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m /* Simulate the iterator */ trace_init_iter(&iter, tr); - for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); - } + /* While dumping, do not allow the buffer to be enable */ + tracer_tracing_disable(tr); old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; @@ -10157,9 +10702,7 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m tr->trace_flags |= old_userobj; - for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); - } + tracer_tracing_enable(tr); local_irq_restore(flags); } @@ -10346,6 +10889,7 @@ static inline void do_allocate_snapshot(const char *name) { } __init static void enable_instances(void) { struct trace_array *tr; + bool memmap_area = false; char *curr_str; char *name; char *str; @@ -10363,6 +10907,7 @@ __init static void enable_instances(void) bool traceoff = false; char *flag_delim; char *addr_delim; + char *rname __free(kfree) = NULL; tok = strsep(&curr_str, ","); @@ -10413,16 +10958,31 @@ __init static void enable_instances(void) name); continue; } + memmap_area = true; } else if (tok) { if (!reserve_mem_find_by_name(tok, &start, &size)) { start = 0; pr_warn("Failed to map boot instance %s to %s\n", name, tok); continue; } + rname = kstrdup(tok, GFP_KERNEL); } if (start) { - addr = map_pages(start, size); + /* Start and size must be page aligned */ + if (start & ~PAGE_MASK) { + pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); + continue; + } + if (size & ~PAGE_MASK) { + pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); + continue; + } + + if (memmap_area) + addr = map_pages(start, size); + else + addr = (unsigned long)phys_to_virt(start); if (addr) { pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", name, &start, (unsigned long)size); @@ -10449,15 +11009,18 @@ __init static void enable_instances(void) update_printk_trace(tr); /* - * If start is set, then this is a mapped buffer, and - * cannot be deleted by user space, so keep the reference - * to it. + * memmap'd buffers can not be freed. */ - if (start) { - tr->flags |= TRACE_ARRAY_FL_BOOT; + if (memmap_area) { + tr->flags |= TRACE_ARRAY_FL_MEMMAP; tr->ref++; } + if (start) { + tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; + tr->range_name = no_free_ptr(rname); + } + while ((tok = strsep(&curr_str, ","))) { early_enable_events(tr, tok, true); } @@ -10579,6 +11142,7 @@ __init static int tracer_alloc_buffers(void) INIT_LIST_HEAD(&global_trace.events); INIT_LIST_HEAD(&global_trace.hist_vars); INIT_LIST_HEAD(&global_trace.err_log); + list_add(&global_trace.marker_list, &marker_copies); list_add(&global_trace.list, &ftrace_trace_arrays); apply_trace_boot_options(); @@ -10699,6 +11263,9 @@ __init static int late_trace_init(void) tracepoint_printk = 0; } + if (traceoff_after_boot) + tracing_off(); + tracing_set_default_clock(); clear_boot_tracer(); return 0; |