From f81ab074c30234b07c8309c542cafd07bed721f7 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Tue, 16 Aug 2011 14:46:15 -0700 Subject: trace: Add a new readonly entry to report total buffer size The current file "buffer_size_kb" reports the size of per-cpu buffer and not the overall memory allocated which could be misleading. A new file "buffer_total_size_kb" adds up all the enabled CPU buffer sizes and reports it. This is only a readonly entry. Signed-off-by: Vaibhav Nagarnaik Cc: Michael Rubin Cc: David Sharp Link: http://lkml.kernel.org/r/1313531179-9323-2-git-send-email-vnagarnaik@google.com Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e5df02c69b1d..01176788c387 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3568,6 +3568,30 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, return cnt; } +static ssize_t +tracing_total_entries_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + char buf[64]; + int r, cpu; + unsigned long size = 0, expanded_size = 0; + + mutex_lock(&trace_types_lock); + for_each_tracing_cpu(cpu) { + size += tr->entries >> 10; + if (!ring_buffer_expanded) + expanded_size += trace_buf_size >> 10; + } + if (ring_buffer_expanded) + r = sprintf(buf, "%lu\n", size); + else + r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); + mutex_unlock(&trace_types_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static ssize_t tracing_free_buffer_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -3739,6 +3763,12 @@ static const struct file_operations tracing_entries_fops = { .llseek = generic_file_llseek, }; +static const struct file_operations tracing_total_entries_fops = { + .open = tracing_open_generic, + .read = tracing_total_entries_read, + .llseek = generic_file_llseek, +}; + static const struct file_operations tracing_free_buffer_fops = { .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, @@ -4450,6 +4480,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); + trace_create_file("buffer_total_size_kb", 0444, d_tracer, + &global_trace, &tracing_total_entries_fops); + trace_create_file("free_buffer", 0644, d_tracer, &global_trace, &tracing_free_buffer_fops); -- cgit From c64e148a3be3cb786534ad38298c25c833116c26 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Tue, 16 Aug 2011 14:46:16 -0700 Subject: trace: Add ring buffer stats to measure rate of events The stats file under per_cpu folder provides the number of entries, overruns and other statistics about the CPU ring buffer. However, the numbers do not provide any indication of how full the ring buffer is in bytes compared to the overall size in bytes. Also, it is helpful to know the rate at which the cpu buffer is filling up. This patch adds an entry "bytes: " in printed stats for per_cpu ring buffer which provides the actual bytes consumed in the ring buffer. This field includes the number of bytes used by recorded events and the padding bytes added when moving the tail pointer to next page. It also adds the following time stamps: "oldest event ts:" - the oldest timestamp in the ring buffer "now ts:" - the timestamp at the time of reading The field "now ts" provides a consistent time snapshot to the userspace when being read. This is read from the same trace clock used by tracing event timestamps. Together, these values provide the rate at which the buffer is filling up, from the formula: bytes / (now_ts - oldest_event_ts) Signed-off-by: Vaibhav Nagarnaik Cc: Michael Rubin Cc: David Sharp Link: http://lkml.kernel.org/r/1313531179-9323-3-git-send-email-vnagarnaik@google.com Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 01176788c387..b41907000bc6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4056,6 +4056,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf, struct trace_array *tr = &global_trace; struct trace_seq *s; unsigned long cnt; + unsigned long long t; + unsigned long usec_rem; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) @@ -4072,6 +4074,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf, cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); trace_seq_printf(s, "commit overrun: %ld\n", cnt); + cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); + trace_seq_printf(s, "bytes: %ld\n", cnt); + + t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); + usec_rem = do_div(t, USEC_PER_SEC); + trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); + + t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); + usec_rem = do_div(t, USEC_PER_SEC); + trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); kfree(s); -- cgit From 6249687f76b69cc0b2ad34636f4a18d693ef3262 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 19 Sep 2011 11:35:58 -0400 Subject: tracing: Add a counter clock for those that do not trust clocks When debugging tight race conditions, it can be helpful to have a synchronized tracing method. Although in most cases the global clock provides this functionality, if timings is not the issue, it is more comforting to know that the order of events really happened in a precise order. Instead of using a clock, add a "counter" that is simply an incrementing atomic 64bit counter that orders the events as they are perceived to happen. The trace_clock_counter() is added from the attempt by Peter Zijlstra trying to convert the trace_clock_global() to it. I took Peter's counter code and made trace_clock_counter() instead, and added it to the choice of clocks. Just echo counter > /debug/tracing/trace_clock to activate it. Requested-by: Thomas Gleixner Requested-by: Peter Zijlstra Reviewed-By: Valdis Kletnieks Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b41907000bc6..4b8df0dc9358 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -435,6 +435,7 @@ static struct { } trace_clocks[] = { { trace_clock_local, "local" }, { trace_clock_global, "global" }, + { trace_clock_counter, "counter" }, }; int trace_clock_id; -- cgit From e0a413f619ef8bc366dafc6f8221674993b8d85f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 29 Sep 2011 21:26:16 -0400 Subject: tracing: Warn on output if the function tracer was found corrupted As the function tracer is very intrusive, lots of self checks are performed on the tracer and if something is found to be strange it will shut itself down keeping it from corrupting the rest of the kernel. This shutdown may still allow functions to be traced, as the tracing only stops new modifications from happening. Trying to stop the function tracer itself can cause more harm as it requires code modification. Although a WARN_ON() is executed, a user may not notice it. To help the user see that something isn't right with the tracing of the system a big warning is added to the output of the tracer that lets the user know that their data may be incomplete. Reported-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4b8df0dc9358..13f2b8472fed 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2160,6 +2160,14 @@ void trace_default_header(struct seq_file *m) } } +static void test_ftrace_alive(struct seq_file *m) +{ + if (!ftrace_is_dead()) + return; + seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); + seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2169,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v) if (iter->tr) { seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); + test_ftrace_alive(m); } if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); @@ -4613,6 +4622,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) tracing_off(); + /* Did function tracer already get disabled? */ + if (ftrace_is_dead()) { + printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); + printk("# MAY BE MISSING FUNCTION EVENTS\n"); + } + if (disable_tracing) ftrace_kill(); -- cgit From d696b58ca2c3ca76e784ef89a7e0453d9b7ab187 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Sep 2011 11:50:27 -0400 Subject: tracing: Do not allocate buffer for trace_marker When doing intense tracing, the kmalloc inside trace_marker can introduce side effects to what is being traced. As trace_marker() is used by userspace to inject data into the kernel ring buffer, it needs to do so with the least amount of intrusion to the operations of the kernel or the user space application. As the ring buffer is designed to write directly into the buffer without the need to make a temporary buffer, and userspace already went through the hassle of knowing how big the write will be, we can simply pin the userspace pages and write the data directly into the buffer. This improves the impact of tracing via trace_marker tremendously! Thanks to Peter Zijlstra and Thomas Gleixner for pointing out the use of get_user_pages_fast() and kmap_atomic(). Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 111 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 83 insertions(+), 28 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 13f2b8472fed..f86efe90ca45 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3628,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) return 0; } -static int mark_printk(const char *fmt, ...) -{ - int ret; - va_list args; - va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); - va_end(args); - return ret; -} - static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { - char *buf; - size_t written; + unsigned long addr = (unsigned long)ubuf; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct print_entry *entry; + unsigned long irq_flags; + struct page *pages[2]; + int nr_pages = 1; + ssize_t written; + void *page1; + void *page2; + int offset; + int size; + int len; + int ret; if (tracing_disabled) return -EINVAL; @@ -3651,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; - buf = kmalloc(cnt + 2, GFP_KERNEL); - if (buf == NULL) - return -ENOMEM; + /* + * Userspace is injecting traces into the kernel trace buffer. + * We want to be as non intrusive as possible. + * To do so, we do not want to allocate any special buffers + * or take any locks, but instead write the userspace data + * straight into the ring buffer. + * + * First we need to pin the userspace buffer into memory, + * which, most likely it is, because it just referenced it. + * But there's no guarantee that it is. By using get_user_pages_fast() + * and kmap_atomic/kunmap_atomic() we can get access to the + * pages directly. We then write the data directly into the + * ring buffer. + */ + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - if (copy_from_user(buf, ubuf, cnt)) { - kfree(buf); - return -EFAULT; + /* check if we cross pages */ + if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) + nr_pages = 2; + + offset = addr & (PAGE_SIZE - 1); + addr &= PAGE_MASK; + + ret = get_user_pages_fast(addr, nr_pages, 0, pages); + if (ret < nr_pages) { + while (--ret >= 0) + put_page(pages[ret]); + written = -EFAULT; + goto out; } - if (buf[cnt-1] != '\n') { - buf[cnt] = '\n'; - buf[cnt+1] = '\0'; + + page1 = kmap_atomic(pages[0]); + if (nr_pages == 2) + page2 = kmap_atomic(pages[1]); + + local_save_flags(irq_flags); + size = sizeof(*entry) + cnt + 2; /* possible \n added */ + buffer = global_trace.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + irq_flags, preempt_count()); + if (!event) { + /* Ring buffer disabled, return as if not open for write */ + written = -EBADF; + goto out_unlock; + } + + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + + if (nr_pages == 2) { + len = PAGE_SIZE - offset; + memcpy(&entry->buf, page1 + offset, len); + memcpy(&entry->buf[len], page2, cnt - len); } else - buf[cnt] = '\0'; + memcpy(&entry->buf, page1 + offset, cnt); - written = mark_printk("%s", buf); - kfree(buf); - *fpos += written; + if (entry->buf[cnt - 1] != '\n') { + entry->buf[cnt] = '\n'; + entry->buf[cnt + 1] = '\0'; + } else + entry->buf[cnt] = '\0'; + + ring_buffer_unlock_commit(buffer, event); - /* don't tell userspace we wrote more - it might confuse them */ - if (written > cnt) - written = cnt; + written = cnt; + *fpos += written; + + out_unlock: + if (nr_pages == 2) + kunmap_atomic(page2); + kunmap_atomic(page1); + while (nr_pages > 0) + put_page(pages[--nr_pages]); + out: return written; } -- cgit From 9b5f8b31af57a8ce9e9f77864d9143b5e3304815 Mon Sep 17 00:00:00 2001 From: Geunsik Lim Date: Fri, 12 Aug 2011 14:30:22 +0900 Subject: ftrace: Fix README to state tracing_on to start/stop tracing tracing_enabled option is deprecated. To start/stop tracing, write to /sys/kernel/debug/tracing/tracing_on without tracing_enabled. This patch is based on Linux 3.1.0-rc1 Signed-off-by: Geunsik Lim Link: http://lkml.kernel.org/r/1313127022-23830-1-git-send-email-leemgs1@gmail.com Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f86efe90ca45..cea16053f553 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2720,9 +2720,9 @@ static const char readme_msg[] = "# cat /sys/kernel/debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" - "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" + "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" - "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" + "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" ; static ssize_t -- cgit From 436fc280261dcfce5af38f08b89287750dc91cd2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Oct 2011 10:44:25 -0400 Subject: tracing: Fix returning of duplicate data after EOF in trace_pipe_raw The trace_pipe_raw handler holds a cached page from the time the file is opened to the time it is closed. The cached page is used to handle the case of the user space buffer being smaller than what was read from the ring buffer. The left over buffer is held in the cache so that the next read will continue where the data left off. After EOF is returned (no more data in the buffer), the index of the cached page is set to zero. If a user app reads the page again after EOF, the check in the buffer will see that the cached page is less than page size and will return the cached page again. This will cause reading the trace_pipe_raw again after EOF to return duplicate data, making the output look like the time went backwards but instead data is just repeated. The fix is to not reset the index right after all data is read from the cache, but to reset it after all data is read and more data exists in the ring buffer. Cc: stable Reported-by: Jeremy Eder Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cea16053f553..b24a72d35008 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3903,8 +3903,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (info->read < PAGE_SIZE) goto read; - info->read = 0; - trace_access_lock(info->cpu); ret = ring_buffer_read_page(info->tr->buffer, &info->spare, @@ -3914,6 +3912,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (ret < 0) return 0; + info->read = 0; + read: size = PAGE_SIZE - info->read; if (size > count) -- cgit