From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/pda.h | 2 -- include/asm-x86/system.h | 6 +++++- include/linux/sched.h | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 101fb9e11954..62b734986a44 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -16,11 +16,9 @@ struct x8664_pda { unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ -#endif char *irqstackptr; unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b29..172f54185093 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -92,6 +92,8 @@ do { \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -103,7 +105,9 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ + [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..d6a515158783 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,10 +1096,9 @@ struct task_struct { pid_t pid; pid_t tgid; -#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; -#endif + /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with -- cgit From 9b5609fd773e6ac0b1d6d6e1bf68f32cca64e06b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:41:09 +0100 Subject: stackprotector: include files create for core kernel files to include. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 4 ++++ include/linux/stackprotector.h | 8 ++++++++ 2 files changed, 12 insertions(+) create mode 100644 include/asm-x86/stackprotector.h create mode 100644 include/linux/stackprotector.h (limited to 'include') diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h new file mode 100644 index 000000000000..dcac7a6bdba2 --- /dev/null +++ b/include/asm-x86/stackprotector.h @@ -0,0 +1,4 @@ +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h new file mode 100644 index 000000000000..d3e8bbe602f8 --- /dev/null +++ b/include/linux/stackprotector.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACKPROTECTOR_H +#define _LINUX_STACKPROTECTOR_H 1 + +#ifdef CONFIG_CC_STACKPROTECTOR +# include +#endif + +#endif -- cgit From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:42:02 +0100 Subject: stackprotector: add boot_init_stack_canary() add the boot_init_stack_canary() and make the secondary idle threads use it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 20 ++++++++++++++++++++ include/linux/stackprotector.h | 4 ++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index dcac7a6bdba2..0f91f7a2688c 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,4 +1,24 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): + */ + current->stack_canary = get_random_int(); + write_pda(stack_canary, current->stack_canary); +} + #endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index d3e8bbe602f8..422e71aafd0b 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -3,6 +3,10 @@ #ifdef CONFIG_CC_STACKPROTECTOR # include +#else +static inline void boot_init_stack_canary(void) +{ +} #endif #endif -- cgit From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:44:08 +0100 Subject: x86: fix the stackprotector canary of the boot CPU Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/stackprotector.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 422e71aafd0b..6f3e54c704c0 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -1,6 +1,10 @@ #ifndef _LINUX_STACKPROTECTOR_H #define _LINUX_STACKPROTECTOR_H 1 +#include +#include +#include + #ifdef CONFIG_CC_STACKPROTECTOR # include #else -- cgit From 960a672bd9f1ec06e8f197cf81a50fd07ea02e7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:56:04 +0100 Subject: x86: stackprotector: mix TSC to the boot canary mix the TSC to the boot canary. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index 0f91f7a2688c..3baf7ad89be1 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +#include + /* * Initialize the stackprotector canary value. * @@ -9,16 +11,28 @@ */ static __always_inline void boot_init_stack_canary(void) { + u64 canary; + u64 tsc; + /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have * a 0 stack canary. This is a good place for updating * it, as we wont ever return from this function (so the * invalid canaries already on the stack wont ever - * trigger): + * trigger). + * + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); + get_random_bytes(&canary, sizeof(canary)); + tsc = __native_read_tsc(); + canary += tsc + (tsc << 32UL); + + current->stack_canary = canary; + write_pda(stack_canary, canary); } #endif -- cgit From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 22 Apr 2008 16:38:23 -0500 Subject: stackprotector: use canary at end of stack to indicate overruns at oops time (Updated with a common max-stack-used checker that knows about the canary, as suggested by Joe Perches) Use a canary at the end of the stack to clearly indicate at oops time whether the stack has ever overflowed. This is a very simple implementation with a couple of drawbacks: 1) a thread may legitimately use exactly up to the last word on the stack -- but the chances of doing this and then oopsing later seem slim 2) it's possible that the stack usage isn't dense enough that the canary location could get skipped over -- but the worst that happens is that we don't flag the overrun -- though this happens fairly often in my testing :( With the code in place, an intentionally-bloated stack oops might do: BUG: unable to handle kernel paging request at ffff8103f84cc680 IP: [] update_curr+0x9a/0xa8 PGD 8063 PUD 0 Thread overran stack or stack corrupted Oops: 0000 [1] SMP CPU 0 ... ... unless the stack overrun is so bad that it corrupts some other thread. Signed-off-by: Eric Sandeen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/magic.h | 1 + include/linux/sched.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/magic.h b/include/linux/magic.h index 1fa0c2ce4dec..74e68e201166 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -42,4 +42,5 @@ #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA +#define STACK_END_MAGIC 0x57AC6E9D #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6a515158783..c5181e77f305 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p) extern void thread_info_cache_init(void); +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ + n++; + } while (!*n); + + return (unsigned long)n - (unsigned long)end_of_stack(p); +} +#endif + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ -- cgit From af9ff7868f0f76d3364351b1641b9dfa99588e77 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 12 Jul 2008 09:36:38 -0700 Subject: x86: simplify stackprotector self-check Clean up the code by removing no longer needed code; make sure the pda is updated and kept in sync Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/asm-x86/pda.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 62b734986a44..a5ff5bb76299 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -131,4 +131,5 @@ do { \ #define PDA_STACKOFFSET (5*8) +#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) #endif -- cgit From bc22c17e12c130dc929218a95aa347e0f3fd05dc Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:16 +0100 Subject: bzip2/lzma: library support for gzip, bzip2 and lzma decompression Impact: Replaces inflate.c with a wrapper around zlib_inflate; new library code This is the first part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - changed inflate.c to accomodate rest of patch - implementation of bzip2 compression (not used at this stage yet) - implementation of lzma compression (not used at this stage yet) - Makefile routines to support bzip2 and lzma kernel compression Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- include/linux/decompress/bunzip2.h | 10 +++++ include/linux/decompress/generic.h | 30 +++++++++++++ include/linux/decompress/inflate.h | 13 ++++++ include/linux/decompress/mm.h | 87 ++++++++++++++++++++++++++++++++++++++ include/linux/decompress/unlzma.h | 12 ++++++ 5 files changed, 152 insertions(+) create mode 100644 include/linux/decompress/bunzip2.h create mode 100644 include/linux/decompress/generic.h create mode 100644 include/linux/decompress/inflate.h create mode 100644 include/linux/decompress/mm.h create mode 100644 include/linux/decompress/unlzma.h (limited to 'include') diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h new file mode 100644 index 000000000000..115272137a9c --- /dev/null +++ b/include/linux/decompress/bunzip2.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_BUNZIP2_H +#define DECOMPRESS_BUNZIP2_H + +int bunzip2(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h new file mode 100644 index 000000000000..f847f514f78e --- /dev/null +++ b/include/linux/decompress/generic.h @@ -0,0 +1,30 @@ +#ifndef DECOMPRESS_GENERIC_H +#define DECOMPRESS_GENERIC_H + +/* Minimal chunksize to be read. + *Bzip2 prefers at least 4096 + *Lzma prefers 0x10000 */ +#define COMPR_IOBUF_SIZE 4096 + +typedef int (*decompress_fn) (unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*writebb)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x)); + +/* inbuf - input buffer + *len - len of pre-read data in inbuf + *fill - function to fill inbuf if empty + *writebb - function to write out outbug + *posp - if non-null, input position (number of bytes read) will be + * returned here + * + *If len != 0, the inbuf is initialized (with as much data), and fill + *should not be called + *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE + *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE + */ + + +#endif diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h new file mode 100644 index 000000000000..f9b06ccc3e5c --- /dev/null +++ b/include/linux/decompress/inflate.h @@ -0,0 +1,13 @@ +#ifndef INFLATE_H +#define INFLATE_H + +/* Other housekeeping constants */ +#define INBUFSIZ 4096 + +int gunzip(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error_fn)(char *x)); +#endif diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h new file mode 100644 index 000000000000..12ff8c3f1d05 --- /dev/null +++ b/include/linux/decompress/mm.h @@ -0,0 +1,87 @@ +/* + * linux/compr_mm.h + * + * Memory management for pre-boot and ramdisk uncompressors + * + * Authors: Alain Knaff + * + */ + +#ifndef DECOMPR_MM_H +#define DECOMPR_MM_H + +#ifdef STATIC + +/* Code active when included from pre-boot environment: */ + +/* A trivial malloc implementation, adapted from + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + */ +static unsigned long malloc_ptr; +static int malloc_count; + +static void *malloc(int size) +{ + void *p; + + if (size < 0) + error("Malloc error"); + if (!malloc_ptr) + malloc_ptr = free_mem_ptr; + + malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + + p = (void *)malloc_ptr; + malloc_ptr += size; + + if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) + error("Out of memory"); + + malloc_count++; + return p; +} + +static void free(void *where) +{ + malloc_count--; + if (!malloc_count) + malloc_ptr = free_mem_ptr; +} + +#define large_malloc(a) malloc(a) +#define large_free(a) free(a) + +#define set_error_fn(x) + +#define INIT + +#else /* STATIC */ + +/* Code active when compiled standalone for use when loading ramdisk: */ + +#include +#include +#include +#include + +/* Use defines rather than static inline in order to avoid spurious + * warnings when not needed (indeed large_malloc / large_free are not + * needed by inflate */ + +#define malloc(a) kmalloc(a, GFP_KERNEL) +#define free(a) kfree(a) + +#define large_malloc(a) vmalloc(a) +#define large_free(a) vfree(a) + +static void(*error)(char *m); +#define set_error_fn(x) error = x; + +#define INIT __init +#define STATIC + +#include + +#endif /* STATIC */ + +#endif /* DECOMPR_MM_H */ diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h new file mode 100644 index 000000000000..7796538f1bf4 --- /dev/null +++ b/include/linux/decompress/unlzma.h @@ -0,0 +1,12 @@ +#ifndef DECOMPRESS_UNLZMA_H +#define DECOMPRESS_UNLZMA_H + +int unlzma(unsigned char *, int, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ); + +#endif -- cgit From 889c92d21db40be0b7d22a59395060237895bb85 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Jan 2009 15:14:17 -0800 Subject: bzip2/lzma: centralize format detection Centralize the compression format detection to a common routine in the lib directory, and use it for both initramfs and initrd. Signed-off-by: H. Peter Anvin --- include/linux/decompress/generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h index f847f514f78e..6dfb856327bb 100644 --- a/include/linux/decompress/generic.h +++ b/include/linux/decompress/generic.h @@ -26,5 +26,8 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len, *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE */ +/* Utility routine to detect the decompression method */ +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name); #endif -- cgit From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: cpumask: update irq_desc to use cpumask_var_t Impact: reduce memory usage, use new cpumask API. Replace the affinity and pending_masks with cpumask_var_t's. This adds to the significant size reduction done with the SPARSE_IRQS changes. The added functions (init_alloc_desc_masks & init_copy_desc_masks) are in the include file so they can be inlined (and optimized out for the !CONFIG_CPUMASKS_OFFSTACK case.) [Naming chosen to be consistent with the other init*irq functions, as well as the backwards arg declaration of "from, to" instead of the more common "to, from" standard.] Includes a slight change to the declaration of struct irq_desc to embed the pending_mask within ifdef(CONFIG_SMP) to be consistent with other references, and some small changes to Xen. Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64 Signed-off-by: Mike Travis Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: KOSAKI Motohiro Cc: Venkatesh Pallipadi Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Yinghai Lu --- include/linux/irq.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index f899b502f186..fa27210f1dfd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -182,11 +182,11 @@ struct irq_desc { unsigned int irqs_unhandled; spinlock_t lock; #ifdef CONFIG_SMP - cpumask_t affinity; + cpumask_var_t affinity; unsigned int cpu; -#endif #ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_t pending_mask; + cpumask_var_t pending_mask; +#endif #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; @@ -422,4 +422,79 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #endif /* !CONFIG_S390 */ +#ifdef CONFIG_SMP +/** + * init_alloc_desc_masks - allocate cpumasks for irq_desc + * @desc: pointer to irq_desc struct + * @boot: true if need bootmem + * + * Allocates affinity and pending_mask cpumask if required. + * Returns true if successful (or not required). + * Side effect: affinity has all bits set, pending_mask has all bits clear. + */ +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + if (boot) { + alloc_bootmem_cpumask_var(&desc->affinity); + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + alloc_bootmem_cpumask_var(&desc->pending_mask); + cpumask_clear(desc->pending_mask); +#endif + return true; + } + + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) + return false; + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + free_cpumask_var(desc->affinity); + return false; + } + cpumask_clear(desc->pending_mask); +#endif + return true; +} + +/** + * init_copy_desc_masks - copy cpumasks for irq_desc + * @old_desc: pointer to old irq_desc struct + * @new_desc: pointer to new irq_desc struct + * + * Insures affinity and pending_masks are copied to new irq_desc. + * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the + * irq_desc struct so the copy is redundant. + */ + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +#ifdef CONFIG_CPUMASKS_OFFSTACK + cpumask_copy(new_desc->affinity, old_desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); +#endif +#endif +} + +#else /* !CONFIG_SMP */ + +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + return true; +} + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +} + +#endif /* CONFIG_SMP */ + #endif /* _LINUX_IRQ_H */ -- cgit From fbd59a8d1f7cf325fdb6828659f1fb76631e87b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: cpumask: Use topology_core_cpumask()/topology_thread_cpumask() Impact: reduce stack usage, use new cpumask API. This actually uses topology_core_cpumask() and topology_thread_cpumask(), removing the only users of topology_core_siblings() and topology_thread_siblings() Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: linux-net-drivers@solarflare.com --- include/linux/topology.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index e632d29f0544..a16b9e06f2e5 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -193,5 +193,11 @@ int arch_update_cpu_topology(void); #ifndef topology_core_siblings #define topology_core_siblings(cpu) cpumask_of_cpu(cpu) #endif +#ifndef topology_thread_cpumask +#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_core_cpumask +#define topology_core_cpumask(cpu) cpumask_of(cpu) +#endif #endif /* _LINUX_TOPOLOGY_H */ -- cgit From 802bf931f2688ad125b73db597ce63cc842fb27a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: cpumask: fix bug in use cpumask_var_t in irq_desc Impact: fix bug where new irq_desc uses old cpumask pointers which are freed. As Yinghai pointed out, init_copy_one_irq_desc() copies the old desc to the new desc overwriting the cpumask pointers. Since the old_desc and the cpumask pointers are freed, then memory corruption will occur if these old pointers are used. Move the allocation of these pointers to after the copy. Signed-off-by: Mike Travis Cc: Yinghai Lu --- include/linux/irq.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index fa27210f1dfd..27a67536511e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -426,15 +426,18 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); /** * init_alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct + * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { + int node; + if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); cpumask_setall(desc->affinity); @@ -446,6 +449,8 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, return true; } + node = cpu_to_node(cpu); + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; cpumask_setall(desc->affinity); @@ -484,7 +489,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; -- cgit From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: irq: initialize nr_irqs based on nr_cpu_ids Impact: Reduce memory usage. This is the second half of the changes to make the irq_desc_ptrs be variable sized based on nr_cpu_ids. This is done by adding a new "max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids. This necessitated moving the define of MAX_IO_APICS to a separate file (asm/apicnum.h) so it could be included without the baggage of the other asm/apicdef.h declarations. Signed-off-by: Mike Travis --- include/linux/irqnr.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 86af92e9e84c..de66e4e10406 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -20,11 +20,18 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) + #else /* CONFIG_GENERIC_HARDIRQS */ +#include /* need possible max_nr_irqs() */ + extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +# ifndef max_nr_irqs +# define max_nr_irqs(nr_cpus) NR_IRQS +# endif + # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ -- cgit From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 11 Jan 2009 09:22:58 -0800 Subject: cpumask, irq: non-x86 build failures Ingo Molnar wrote: > All non-x86 architectures fail to build: > > In file included from /home/mingo/tip/include/linux/random.h:11, > from /home/mingo/tip/include/linux/stackprotector.h:6, > from /home/mingo/tip/init/main.c:17: > /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory Do not include asm/irq_vectors.h in generic code - it's not available on all architectures. Signed-off-by: Ingo Molnar --- include/linux/irqnr.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index de66e4e10406..887477bc2ab0 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,15 +23,9 @@ #else /* CONFIG_GENERIC_HARDIRQS */ -#include /* need possible max_nr_irqs() */ - extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); -# ifndef max_nr_irqs -# define max_nr_irqs(nr_cpus) NR_IRQS -# endif - # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ -- cgit From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- include/linux/interrupt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9127f6b51a39..472f11765f60 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v); struct irq_desc; extern int early_irq_init(void); +extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); extern int arch_init_chip_data(struct irq_desc *desc, int cpu); -- cgit From 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: make percpu symbols zerobased on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/asm-generic/sections.h | 2 +- include/asm-generic/vmlinux.lds.h | 51 ++++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 79a7ff925bf8..4ce48e878530 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char _end[]; -extern char __per_cpu_start[], __per_cpu_end[]; +extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f8..fc2f55f2dcd6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,12 +430,51 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU(align) \ - . = ALIGN(align); \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ +#define PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; + +#define PERCPU_EPILOG(phdr) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = __per_cpu_load + SIZEOF(.data.percpu); + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. If @vaddr + * is not blank, it specifies explicit base address and all percpu + * symbols will be offset from the given address. If blank, @vaddr + * always equals @laddr + LOAD_OFFSET. + * + * @phdr defines the output PHDR to use if not blank. Be warned that + * output PHDR is sticky. If @phdr is specified, the next output + * section in the linker script will go there too. @phdr should have + * a leading colon. + * + * This macro defines three symbols, __per_cpu_load, __per_cpu_start + * and __per_cpu_end. The first one is the vaddr of loaded percpu + * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the + * end offset. + */ +#define PERCPU_VADDR(vaddr, phdr) \ + PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - } \ - VMLINUX_SYMBOL(__per_cpu_end) = .; + PERCPU_EPILOG(phdr) + +/** + * PERCPU - define output section for percpu area, simple version + * @align: required alignment + * + * Align to @align and outputs output section for percpu area. This + * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and + * __per_cpu_start will be identical. + */ +#define PERCPU(align) \ + . = ALIGN(align); \ + PERCPU_VADDR( , ) -- cgit From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: fold pda into percpu area on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fc2f55f2dcd6..e53319cf29cb 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -441,9 +441,10 @@ . = __per_cpu_load + SIZEOF(.data.percpu); /** - * PERCPU_VADDR - define output section for percpu area + * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) + * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -455,11 +456,33 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * + * If @prealloc is non-zero, the specified number of bytes will be + * reserved at the start of percpu area. As the prealloc area is + * likely to break alignment, this macro puts areas in increasing + * alignment order. + * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ +#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ + PERCPU_PROLOG(vaddr) \ + . += prealloc; \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + *(.data.percpu.page_aligned) \ + PERCPU_EPILOG(segment) + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. Mostly + * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than + * using slighly different layout. + */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ -- cgit From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 22:15:53 +0900 Subject: percpu: add optimized generic percpu accessors It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672ebd..00f45ff081a6 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void); #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ __typeof__(type) per_cpu_var(name) +/* + * Optional methods for optimized non-lvalue per-cpu variable access. + * + * @var can be a percpu variable or a field of it and its size should + * equal char, int or long. percpu_read() evaluates to a lvalue and + * all others to void. + * + * These operations are guaranteed to be atomic w.r.t. preemption. + * The generic versions use plain get/put_cpu_var(). Archs are + * encouraged to implement single-instruction alternatives which don't + * require preemption protection. + */ +#ifndef percpu_read +# define percpu_read(var) \ + ({ \ + typeof(per_cpu_var(var)) __tmp_var__; \ + __tmp_var__ = get_cpu_var(var); \ + put_cpu_var(var); \ + __tmp_var__; \ + }) +#endif + +#define __percpu_generic_to_op(var, val, op) \ +do { \ + get_cpu_var(var) op val; \ + put_cpu_var(var); \ +} while (0) + +#ifndef percpu_write +# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =) +#endif + +#ifndef percpu_add +# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=) +#endif + +#ifndef percpu_sub +# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=) +#endif + +#ifndef percpu_and +# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=) +#endif + +#ifndef percpu_or +# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=) +#endif + +#ifndef percpu_xor +# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=) +#endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ -- cgit From 145cd30bac885dffad9db9d487baad07b68a3d04 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 17 Jan 2009 14:42:50 +0900 Subject: linker script: add missing VMLINUX_SYMBOL The newly added PERCPU_*() macros define and use __per_cpu_load but VMLINUX_SYMBOL() was missing from usages causing build failures on archs where linker visible symbol is different from C symbols (e.g. blackfin). Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e53319cf29cb..aa6b9b1b30b5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -432,13 +432,14 @@ #define PERCPU_PROLOG(vaddr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; #define PERCPU_EPILOG(phdr) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = __per_cpu_load + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc -- cgit From 0bd74fa8e29dcad98f7e8ffe01ec05fb3326abaf Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: percpu: refactor percpu.h Impact: cleanup Refactor the DEFINE_PER_CPU_* macros and add .data.percpu.first section. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/percpu.h | 41 ++++++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa6b9b1b30b5..32bbf50d3055 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -486,6 +486,7 @@ */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ + *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f2a3751873a..0e24202b5a4e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,34 +9,39 @@ #include #ifdef CONFIG_SMP -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define PER_CPU_BASE_SECTION ".data.percpu" #ifdef MODULE -#define SHARED_ALIGNED_SECTION ".data.percpu" +#define PER_CPU_SHARED_ALIGNED_SECTION "" #else -#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" #endif +#define PER_CPU_FIRST_SECTION ".first" -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp +#else + +#define PER_CPU_BASE_SECTION ".data" +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.page_aligned"))) \ +#define DEFINE_PER_CPU_SECTION(type, name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name -#else + #define DEFINE_PER_CPU(type, name) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + DEFINE_PER_CPU_SECTION(type, name, "") -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) -#endif +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -- cgit From 6b7c38d55587f43bcd2cbce3a98b1c0826982090 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: linker script: kill PERCPU_VADDR_PREALLOC() Impact: cleanup With .data.percpu.first in place, PERCPU_VADDR_PREALLOC() is no longer necessary. Kill it. Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 45 +++++++-------------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32bbf50d3055..53e21f36a802 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,22 +430,10 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU_PROLOG(vaddr) \ - VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__per_cpu_start) = .; - -#define PERCPU_EPILOG(phdr) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ - } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); - /** - * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc + * PERCPU_VADDR - define output section for percpu area * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) - * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -457,40 +445,23 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * - * If @prealloc is non-zero, the specified number of bytes will be - * reserved at the start of percpu area. As the prealloc area is - * likely to break alignment, this macro puts areas in increasing - * alignment order. - * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ -#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ - PERCPU_PROLOG(vaddr) \ - . += prealloc; \ - *(.data.percpu) \ - *(.data.percpu.shared_aligned) \ - *(.data.percpu.page_aligned) \ - PERCPU_EPILOG(segment) - -/** - * PERCPU_VADDR - define output section for percpu area - * @vaddr: explicit base address (optional) - * @phdr: destination PHDR (optional) - * - * Macro which expands to output section for percpu area. Mostly - * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than - * using slighly different layout. - */ #define PERCPU_VADDR(vaddr, phdr) \ - PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - PERCPU_EPILOG(phdr) + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version -- cgit From 5a611268b69f05262936dd177205acbce4471358 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 26 Jan 2009 08:44:05 -0500 Subject: generic, x86: fix __per_cpu_load relocation This patch fixes this linker error: WARNING: Absolute relocations present Offset Info Type Sym.Value Sym.Name c0a4e07d 00e78001 R_386_32 c0ab0000 __per_cpu_load Now, __per_cpu_load is a section-relative symbol: c0aa4000 D __per_cpu_load c0aa4000 A __per_cpu_load_abs Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 53e21f36a802..f3180a85c66a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,17 +451,18 @@ * end offset. */ #define PERCPU_VADDR(vaddr, phdr) \ - VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ + VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version -- cgit From dba3d36b2f0842ed7f25c33cd3a2ccdb3d0df9db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 17:10:12 +0100 Subject: Revert "generic, x86: fix __per_cpu_load relocation" This reverts commit 5a611268b69f05262936dd177205acbce4471358. It is causing occasional boot crashes, caused by certain linker versions (GNU ld version 2.18.50.0.6-2 20080403) messing up: 82dcc000 D __per_cpu_load c16e6000 A __per_cpu_load_abs The __per_cpu_load value is out of whack. Hpa noticed the following detail: * (gdb) p/x -(0xc16e6000-0x82dcc000) * $2 = 0xc16e6000 * I.e. one is the other << 1 The two symbols should be equal. Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f3180a85c66a..53e21f36a802 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,18 +451,17 @@ * end offset. */ #define PERCPU_VADDR(vaddr, phdr) \ - VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version -- cgit From 3ac6cffea4aa18007a454a7442da2855882f403d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 30 Jan 2009 16:32:22 +0900 Subject: linker script: use separate simpler definition for PERCPU() Impact: fix linker screwup on x86_32 Recent x86_64 zerobased patches introduced PERCPU_VADDR() to put .data.percpu to a predefined address and re-defined PERCPU() in terms of it. The new macro defined one extra symbol, __per_cpu_load, for LMA of the section so that the init data could be accessed. This new symbol introduced the following problems to x86_32. 1. If __per_cpu_load is defined outside of .data.percpu as an absolute symbol, relocation generation for relocatable kernel fails due to absolute relocation. 2. If __per_cpu_load is put inside .data.percpu with absolute address assignment to work around #1, linker gets confused and under certain configurations ends up relocating the symbol against .data.percpu such that the load address gets added on top of already set load address. As x86_32 doesn't use predefined address for .data.percpu, there's no need for it to care about the possibility of __per_cpu_load being different from __per_cpu_start. This patch defines PERCPU() separately so that __per_cpu_load is defined inside .data.percpu so that everything is ordinary linking-wise. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 53e21f36a802..5406e70aba86 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -445,10 +445,9 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * - * This macro defines three symbols, __per_cpu_load, __per_cpu_start - * and __per_cpu_end. The first one is the vaddr of loaded percpu - * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the - * end offset. + * Note that this macros defines __per_cpu_load as an absolute symbol. + * If there is no need to put the percpu section at a predetermined + * address, use PERCPU(). */ #define PERCPU_VADDR(vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ @@ -470,7 +469,20 @@ * Align to @align and outputs output section for percpu area. This * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and * __per_cpu_start will be identical. + * + * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except + * that __per_cpu_load is defined as a relative symbol against + * .data.percpu which is required for relocatable x86_32 + * configuration. */ #define PERCPU(align) \ . = ALIGN(align); \ - PERCPU_VADDR( , ) + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ + *(.data.percpu.first) \ + *(.data.percpu.page_aligned) \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } -- cgit From 65a4e574d2382d83f71b30ea92f86d2e40a6ef8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:36:17 +0100 Subject: smp, generic: introduce arch_disable_smp_support() instead of disable_ioapic_setup() Impact: cleanup disable_ioapic_setup() in init/main.c is ugly as the function is x86-specific. The #ifdef inline prototype there is ugly too. Replace it with a generic arch_disable_smp_support() function - which has a weak alias for non-x86 architectures and for non-ioapic x86 builds. Signed-off-by: Ingo Molnar --- include/linux/smp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 715196b09d67..d41a3a865fe3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,6 +66,12 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + /* * Call a function on all other processors */ -- cgit From a146649bc19d5eba4f5bfac6720c5f252d517a71 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 14:09:06 +0100 Subject: smp, generic: introduce arch_disable_smp_support(), build fix This function should be provided on UP too. Signed-off-by: Ingo Molnar --- include/linux/smp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index d41a3a865fe3..bbacb7baa446 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,12 +66,6 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); -/* - * Callback to arch code if there's nosmp or maxcpus=0 on the - * boot command line: - */ -extern void arch_disable_smp_support(void); - /* * Call a function on all other processors */ @@ -182,6 +176,12 @@ static inline void init_call_single_data(void) #define put_cpu() preempt_enable() #define put_cpu_no_resched() preempt_enable_no_resched() +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + void smp_setup_processor_id(void); #endif /* __LINUX_SMP_H */ -- cgit From 0fb807c3e573ff9de2965ca38c907605d4735d16 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 8 Feb 2009 11:00:25 +0530 Subject: unconditionally include asm/types.h from linux/types.h Reported-by: Sam Ravnborg Signed-off-by: Jaswinder Singh Rajput --- include/linux/types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index c30973ace890..fca82ed55f49 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TYPES_H #define _LINUX_TYPES_H +#include + #ifndef __ASSEMBLY__ #ifdef __KERNEL__ @@ -10,7 +12,6 @@ #endif #include -#include #ifndef __KERNEL_STRICT_NAMES -- cgit From d3770449d3cb058b94ca1d050d5ced4a66c75ce4 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:38 -0500 Subject: percpu: make PER_CPU_BASE_SECTION overridable by arches Impact: bug fix IA-64 needs to put percpu data in the seperate section even on UP. Fixes regression caused by "percpu: refactor percpu.h" Signed-off-by: Brian Gerst Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 0e24202b5a4e..3577ffd90d45 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,8 +8,15 @@ #include +#ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP #define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP #ifdef MODULE #define PER_CPU_SHARED_ALIGNED_SECTION "" @@ -20,7 +27,6 @@ #else -#define PER_CPU_BASE_SECTION ".data" #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_FIRST_SECTION "" -- cgit From 7d97277b754d3ee098a5ec69b6aaafb00c94e2f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Feb 2009 15:39:41 -0800 Subject: acpi/x86: introduce __apci_map_table, v4 to prevent wrongly overwriting fixmap that still want to use. ACPI used to rely on low mappings being all linearly mapped and grew a habit: it never really unmapped certain kinds of tables after use. This can cause problems - for example the hypothetical case when some spurious access still references it. v2: remove prev_map and prev_size in __apci_map_table v3: let acpi_os_unmap_memory() call early_iounmap too, so remove extral calling to early_acpi_os_unmap_memory v4: fix typo in one acpi_get_table_with_size calling Signed-off-by: Yinghai Lu Acked-by: Len Brown Signed-off-by: Ingo Molnar --- include/acpi/acpiosxf.h | 1 + include/acpi/acpixf.h | 4 ++++ include/linux/acpi.h | 1 + 3 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a62720a7edc0..ab0b85cf21f3 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -144,6 +144,7 @@ void __iomem *acpi_os_map_memory(acpi_physical_address where, acpi_size length); void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size); +void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size); #ifdef ACPI_FUTURE_USAGE acpi_status diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c8e8cf45830f..cc40102fe2f3 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -130,6 +130,10 @@ acpi_get_table_header(acpi_string signature, struct acpi_table_header *out_table_header); acpi_status +acpi_get_table_with_size(acpi_string signature, + u32 instance, struct acpi_table_header **out_table, + acpi_size *tbl_size); +acpi_status acpi_get_table(acpi_string signature, u32 instance, struct acpi_table_header **out_table); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6fce2fc2d124..d59f0fa4d772 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +void __init __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); -- cgit From 6cd61c0baa8bce32271226198b46c67a7a05d108 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: elf: add ELF_CORE_COPY_KERNEL_REGS() ELF core dump is used for both user land core dump and kernel crash dump. Depending on architecture, register might need to be accessed differently for userland and kernel. Allow architectures to define ELF_CORE_COPY_KERNEL_REGS() and use different operation for kernel register dump. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/elfcore.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 5ca54d77079f..7605c5e9589f 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re #endif } +static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +{ +#ifdef ELF_CORE_COPY_KERNEL_REGS + ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs); +#else + elf_core_copy_regs(elfregs, regs); +#endif +} + static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) { #ifdef ELF_CORE_COPY_TASK_REGS -- cgit From 0d4ff4df341208b1b75e01feca27301c0dcbf490 Mon Sep 17 00:00:00 2001 From: Jaya Kumar Date: Thu, 1 Jan 2009 17:49:19 +0100 Subject: [ARM] 5353/1: fbdev: add E-Ink Broadsheet controller support v3 This patch adds support for the E-Ink Broadsheet display controller. Cc: Eric Miao Signed-off-by: Jaya Kumar Signed-off-by: Russell King --- include/video/broadsheetfb.h | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/video/broadsheetfb.h (limited to 'include') diff --git a/include/video/broadsheetfb.h b/include/video/broadsheetfb.h new file mode 100644 index 000000000000..a758534c0272 --- /dev/null +++ b/include/video/broadsheetfb.h @@ -0,0 +1,59 @@ +/* + * broadsheetfb.h - definitions for the broadsheet framebuffer driver + * + * Copyright (C) 2008 by Jaya Kumar + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + * + */ + +#ifndef _LINUX_BROADSHEETFB_H_ +#define _LINUX_BROADSHEETFB_H_ + +/* Broadsheet command defines */ +#define BS_CMD_INIT_SYS_RUN 0x06 +#define BS_CMD_INIT_DSPE_CFG 0x09 +#define BS_CMD_INIT_DSPE_TMG 0x0A +#define BS_CMD_INIT_ROTMODE 0x0B +#define BS_CMD_RD_REG 0x10 +#define BS_CMD_WR_REG 0x11 +#define BS_CMD_LD_IMG 0x20 +#define BS_CMD_LD_IMG_AREA 0x22 +#define BS_CMD_LD_IMG_END 0x23 +#define BS_CMD_WAIT_DSPE_TRG 0x28 +#define BS_CMD_WAIT_DSPE_FREND 0x29 +#define BS_CMD_RD_WFM_INFO 0x30 +#define BS_CMD_UPD_INIT 0x32 +#define BS_CMD_UPD_FULL 0x33 +#define BS_CMD_UPD_GDRV_CLR 0x37 + +/* Broadsheet pin interface specific defines */ +#define BS_CS 0x01 +#define BS_DC 0x02 +#define BS_WR 0x03 + +/* struct used by broadsheet. board specific stuff comes from *board */ +struct broadsheetfb_par { + struct fb_info *info; + struct broadsheet_board *board; + void (*write_reg)(struct broadsheetfb_par *, u16 reg, u16 val); + u16 (*read_reg)(struct broadsheetfb_par *, u16 reg); + wait_queue_head_t waitq; +}; + +/* board specific routines */ +struct broadsheet_board { + struct module *owner; + int (*init)(struct broadsheetfb_par *); + int (*wait_for_rdy)(struct broadsheetfb_par *); + void (*set_ctl)(struct broadsheetfb_par *, unsigned char, u8); + void (*set_hdb)(struct broadsheetfb_par *, u16); + u16 (*get_hdb)(struct broadsheetfb_par *); + void (*cleanup)(struct broadsheetfb_par *); + int (*get_panel_type)(void); + int (*setup_irq)(struct fb_info *); +}; + +#endif -- cgit From 970ec1a8213cd1a1ea29972ebbe4575a8b30bca1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Feb 2009 14:06:13 -0800 Subject: [IA64] fix __apci_unmap_table Impact: fix build error to fix: tip/arch/ia64/kernel/acpi.c:203: error: conflicting types for '__acpi_unmap_table' tip/include/linux/acpi.h:82: error: previous declaration of '__acpi_unmap_table' was here tip/arch/ia64/kernel/acpi.c:203: error: conflicting types for '__acpi_unmap_table' tip/include/linux/acpi.h:82: error: previous declaration of '__acpi_unmap_table' was here Signed-off-by: Yinghai Lu Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d59f0fa4d772..78199151c00b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,7 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); -void __init __acpi_unmap_table(char *map, unsigned long size); +void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); -- cgit From b36128c830a8f5bd7d4981f5b0b69950f5928ee6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: change percpu_ptr to per_cpu_ptr Impact: cleanup There are two allocated per-cpu accessor macros with almost identical spelling. The original and far more popular is per_cpu_ptr (44 files), so change over the other 4 files. tj: kill percpu_ptr() and update UP too Signed-off-by: Rusty Russell Cc: mingo@redhat.com Cc: lenb@kernel.org Cc: cpufreq@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/percpu.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3577ffd90d45..c80cfe1260ec 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -81,23 +81,13 @@ struct percpu_data { }; #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define percpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { @@ -122,6 +112,15 @@ static inline void percpu_free(void *__pdata) cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) #define free_percpu(ptr) percpu_free((ptr)) -#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) +/* + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should + * probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) #endif /* __LINUX_PERCPU_H */ -- cgit From 313e458f81ec3852106c5a83830fe0d4f405a71a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: add align argument to __alloc_percpu. This prepares for a real __alloc_percpu, by adding an alignment argument. Only one place uses __alloc_percpu directly, and that's for a string. tj: af_inet also uses __alloc_percpu(), update it. Signed-off-by: Rusty Russell Cc: Christoph Lameter Cc: Jens Axboe --- include/linux/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index c80cfe1260ec..1fdaee93c04d 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -108,9 +108,10 @@ static inline void percpu_free(void *__pdata) /* (legacy) interface for use without CPU hotplug handling */ -#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ +#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ cpu_possible_map) -#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) #define free_percpu(ptr) percpu_free((ptr)) /* * Use this to get to a cpu's version of the per-cpu object dynamically -- cgit From f2a8205c4ef1af917d175c36a4097ae5587791c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: kill percpu_alloc() and friends Impact: kill unused functions percpu_alloc() and its friends never saw much action. It was supposed to replace the cpu-mask unaware __alloc_percpu() but it never happened and in fact __percpu_alloc_mask() itself never really grew proper up/down handling interface either (no exported interface for populate/depopulate). percpu allocation is about to go through major reimplementation and there's no reason to carry this unused interface around. Replace it with __alloc_percpu() and free_percpu(). Signed-off-by: Tejun Heo --- include/linux/percpu.h | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1fdaee93c04d..d99e24ae1811 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -82,46 +82,43 @@ struct percpu_data { #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); -extern void percpu_free(void *__pdata); +/* + * Use this to get to a cpu's version of the per-cpu object + * dynamically allocated. Non-atomic access to the current CPU's + * version should probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) + +extern void *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void *__pdata); #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +static inline void *__alloc_percpu(size_t size, size_t align) { + /* + * Can't easily make larger alignment work with kmalloc. WARN + * on it. Larger alignment should only be used for module + * percpu sections on SMP for which this path isn't used. + */ + WARN_ON_ONCE(align > __alignof__(unsigned long long)); return kzalloc(size, gfp); } -static inline void percpu_free(void *__pdata) +static inline void free_percpu(void *p) { - kfree(__pdata); + kfree(p); } #endif /* CONFIG_SMP */ -#define percpu_alloc_mask(size, gfp, mask) \ - __percpu_alloc_mask((size), (gfp), &(mask)) - -#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) - -/* (legacy) interface for use without CPU hotplug handling */ - -#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ - cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -#define free_percpu(ptr) percpu_free((ptr)) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) #endif /* __LINUX_PERCPU_H */ -- cgit From f0aa6617903648077dffe5cfcf7c4458f4610fa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: implement vm_area_register_early() Impact: allow multiple early vm areas There are places where kernel VM area needs to be allocated before vmalloc is initialized. This is done by allocating static vm_struct, initializing several fields and linking it to vmlist and later vmalloc initialization picking up these from vmlist. This is currently done manually and if there's more than one such areas, there's no defined way to arbitrate who gets which address. This patch implements vm_area_register_early(), which takes vm_area struct with flags and size initialized, assigns address to it and puts it on the vmlist. This way, multiple early vm areas can determine which addresses they should use. The only current user - alpha mm init - is converted to use it. Signed-off-by: Tejun Heo --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a986..bbc051392298 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,5 +106,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern __init void vm_area_register_early(struct vm_struct *vm); #endif /* _LINUX_VMALLOC_H */ -- cgit From 8fc48985006da4ceba24508db64ec77fc0dfe3bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: add un/map_kernel_range_noflush() Impact: two more public map/unmap functions Implement map_kernel_range_noflush() and unmap_kernel_range_noflush(). These functions respectively map and unmap address range in kernel VM area but doesn't do any vcache or tlb flushing. These will be used by new percpu allocator. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bbc051392298..599ba7984310 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -91,6 +91,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); +extern int map_kernel_range_noflush(unsigned long start, unsigned long size, + pgprot_t prot, struct page **pages); +extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* Allocate/destroy a 'vmalloc' VM area. */ -- cgit From fbf59bc9d74d1fb30b8e0630743aff2806eafcea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: implement new dynamic percpu allocator Impact: new scalable dynamic percpu allocator which allows dynamic percpu areas to be accessed the same way as static ones Implement scalable dynamic percpu allocator which can be used for both static and dynamic percpu areas. This will allow static and dynamic areas to share faster direct access methods. This feature is optional and enabled only when CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is defined by arch. Please read comment on top of mm/percpu.c for details. Signed-off-by: Tejun Heo Cc: Andrew Morton --- include/linux/percpu.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d99e24ae1811..18080995ff3e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -76,23 +76,37 @@ #ifdef CONFIG_SMP -struct percpu_data { - void *ptrs[1]; -}; +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +extern void *pcpu_base_addr; +typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); + +extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size); /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) + +#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + +struct percpu_data { + void *ptrs[1]; +}; + +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) + #define per_cpu_ptr(ptr, cpu) \ ({ \ struct percpu_data *__p = __percpu_disguise(ptr); \ (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); -- cgit From b814d41f0987c7648d7ed07471258101c95c026b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:32:10 +0100 Subject: x86, mm: fault.c, simplify kmmio_fault() Impact: cleanup Remove an #ifdef from kmmio_fault() - we can do this by providing default implementations for is_kmmio_active() and kmmio_handler(). The compiler optimizes it all away in the !CONFIG_MMIOTRACE case. Also, while at it, clean up mmiotrace.h a bit: - standard header guards - standard vertical spaces for structure definitions No code changed (both with mmiotrace on and off in the config): text data bss dec hex filename 2947 12 12 2971 b9b fault.o.before 2947 12 12 2971 b9b fault.o.after Cc: Pekka Paalanen Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 78 ++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 139d7c88d9c9..3d1b7bde1283 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,5 +1,5 @@ -#ifndef MMIOTRACE_H -#define MMIOTRACE_H +#ifndef _LINUX_MMIOTRACE_H +#define _LINUX_MMIOTRACE_H #include #include @@ -13,28 +13,34 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; /* kmmio internal list */ - unsigned long addr; /* start location of the probe point */ - unsigned long len; /* length of the probe region */ - kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ - kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *private; + /* kmmio internal list: */ + struct list_head list; + /* start location of the probe point: */ + unsigned long addr; + /* length of the probe region: */ + unsigned long len; + /* Called before addr is executed: */ + kmmio_pre_handler_t pre_handler; + /* Called after addr is executed: */ + kmmio_post_handler_t post_handler; + void *private; }; +extern unsigned int kmmio_count; + +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +#ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ static inline int is_kmmio_active(void) { - extern unsigned int kmmio_count; return kmmio_count; } -extern int register_kmmio_probe(struct kmmio_probe *p); -extern void unregister_kmmio_probe(struct kmmio_probe *p); - /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); -#ifdef CONFIG_MMIOTRACE /* Called from ioremap.c */ extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr); @@ -43,7 +49,17 @@ extern void mmiotrace_iounmap(volatile void __iomem *addr); /* For anyone to insert markers. Remember trailing newline. */ extern int mmiotrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); -#else +#else /* !CONFIG_MMIOTRACE: */ +static inline int is_kmmio_active(void) +{ + return 0; +} + +static inline int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + return 0; +} + static inline void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr) { @@ -63,28 +79,28 @@ static inline int mmiotrace_printk(const char *fmt, ...) #endif /* CONFIG_MMIOTRACE */ enum mm_io_opcode { - MMIO_READ = 0x1, /* struct mmiotrace_rw */ - MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ - MMIO_PROBE = 0x3, /* struct mmiotrace_map */ - MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ - MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ + MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ }; struct mmiotrace_rw { - resource_size_t phys; /* PCI address of register */ - unsigned long value; - unsigned long pc; /* optional program counter */ - int map_id; - unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ - unsigned char width; /* size of register access in bytes */ + resource_size_t phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; struct mmiotrace_map { - resource_size_t phys; /* base address in PCI space */ - unsigned long virt; /* base virtual address */ - unsigned long len; /* mapping size */ - int map_id; - unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ + resource_size_t phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; /* in kernel/trace/trace_mmiotrace.c */ @@ -94,4 +110,4 @@ extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); extern int mmio_trace_printk(const char *fmt, va_list args); -#endif /* MMIOTRACE_H */ +#endif /* _LINUX_MMIOTRACE_H */ -- cgit From b18018126f422f5b706fd750373425e10e84b486 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:42:57 +0100 Subject: x86, mm, kprobes: fault.c, simplify notify_page_fault() Impact: cleanup Remove an #ifdef from notify_page_fault(). The function still compiles to nothing in the !CONFIG_KPROBES case. Introduce kprobes_built_in() and kprobe_fault_handler() helpers to allow this - they returns 0 if !CONFIG_KPROBES. No code changed: text data bss dec hex filename 4618 32 24 4674 1242 fault.o.before 4618 32 24 4674 1242 fault.o.after Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 32851eef48f0..2ec6cc14a114 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -182,6 +182,14 @@ struct kprobe_blackpoint { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +/* + * For #ifdef avoidance: + */ +static inline int kprobes_built_in(void) +{ + return 1; +} + #ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); @@ -271,8 +279,16 @@ void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); -#else /* CONFIG_KPROBES */ +#else /* !CONFIG_KPROBES: */ +static inline int kprobes_built_in(void) +{ + return 0; +} +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + return 0; +} static inline struct kprobe *get_kprobe(void *addr) { return NULL; @@ -329,5 +345,5 @@ static inline void unregister_kretprobes(struct kretprobe **rps, int num) static inline void kprobe_flush_task(struct task_struct *tk) { } -#endif /* CONFIG_KPROBES */ -#endif /* _LINUX_KPROBES_H */ +#endif /* CONFIG_KPROBES */ +#endif /* _LINUX_KPROBES_H */ -- cgit From c132937556f56ee4b831ef4b23f1846e05fde102 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:20 +0900 Subject: bootmem: clean up arch-specific bootmem wrapping Impact: cleaner and consistent bootmem wrapping By setting CONFIG_HAVE_ARCH_BOOTMEM_NODE, archs can define arch-specific wrappers for bootmem allocation. However, this is done a bit strangely in that only the high level convenience macros can be changed while lower level, but still exported, interface functions can't be wrapped. This not only is messy but also leads to strange situation where alloc_bootmem() does what the arch wants it to do but the equivalent __alloc_bootmem() call doesn't although they should be able to be used interchangeably. This patch updates bootmem such that archs can override / wrap the backend function - alloc_bootmem_core() instead of the highlevel interface functions to allow simpler and consistent wrapping. Also, HAVE_ARCH_BOOTMEM_NODE is renamed to HAVE_ARCH_BOOTMEM. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 95837bfb5256..3a87f93081ed 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -69,10 +69,9 @@ extern int reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -#endif - +extern int reserve_bootmem(unsigned long addr, + unsigned long size, + int flags); extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); @@ -94,7 +93,7 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE + #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ @@ -113,7 +112,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, int flags); -- cgit From 2d0aae41695257603fc281b519677131ab5a752b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: bootmem: reorder interface functions and add a missing one Impact: cleanup and addition of missing interface wrapper The interface functions in bootmem.h was ordered in not so orderly manner. Reorder them such that * functions allocating the same area group together - ie. alloc_bootmem group and alloc_bootmem_low group. * functions w/o node parameter come before the ones w/ node parameter. * nopanic variants are immediately below their panicky counterparts. While at it, add alloc_bootmem_pages_node_nopanic() which was missing. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3a87f93081ed..455d83219fae 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -65,22 +65,20 @@ extern void free_bootmem(unsigned long addr, unsigned long size); #define BOOTMEM_DEFAULT 0 #define BOOTMEM_EXCLUSIVE (1<<0) -extern int reserve_bootmem_node(pg_data_t *pgdat, - unsigned long physaddr, - unsigned long size, - int flags); extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -extern void *__alloc_bootmem_nopanic(unsigned long size, +extern int reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size, + int flags); + +extern void *__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem(unsigned long size, +extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); extern void *__alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -89,6 +87,9 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -98,18 +99,21 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low(x) \ - __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_nopanic(x) \ __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + +#define alloc_bootmem_low(x) \ + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -- cgit From c0c0a29379b5848aec2e8f1c58d853d3cb7118b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: vmalloc: add @align to vm_area_register_early() Impact: allow larger alignment for early vmalloc area allocation Some early vmalloc users might want larger alignment, for example, for custom large page mapping. Add @align to vm_area_register_early(). While at it, drop docbook comment on non-existent @size. Signed-off-by: Tejun Heo Cc: Nick Piggin Cc: Ivan Kokshaysky --- include/linux/vmalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 599ba7984310..2f6994fdf0e0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -109,6 +109,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern __init void vm_area_register_early(struct vm_struct *vm); +extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #endif /* _LINUX_VMALLOC_H */ -- cgit From 8d408b4be37bc49c9086531f2ebe411cf5731746 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: percpu: give more latitude to arch specific first chunk initialization Impact: more latitude for first percpu chunk allocation The first percpu chunk serves the kernel static percpu area and may or may not contain extra room for further dynamic allocation. Initialization of the first chunk needs to be done before normal memory allocation service is up, so it has its own init path - pcpu_setup_static(). It seems archs need more latitude while initializing the first chunk for example to take advantage of large page mapping. This patch makes the following changes to allow this. * Define PERCPU_DYNAMIC_RESERVE to give arch hint about how much space to reserve in the first chunk for further dynamic allocation. * Rename pcpu_setup_static() to pcpu_setup_first_chunk(). * Make pcpu_setup_first_chunk() much more flexible by fetching page pointer by callback and adding optional @unit_size, @free_size and @base_addr arguments which allow archs to selectively part of chunk initialization to their likings. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 18080995ff3e..910beb0abea2 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -78,12 +78,47 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +/* minimum unit size, also is the maximum supported allocation size */ +#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) + +/* + * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy + * back on the first chunk if arch is manually allocating and mapping + * it for faster access (as a part of large page mapping for example). + * Note that dynamic percpu allocator covers both static and dynamic + * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * + * On typical configuration with modules, the following values leave + * about 8k of free space on the first chunk after boot on both x86_32 + * and 64 when module support is enabled. When module support is + * disabled, it's much tighter. + */ +#ifndef PERCPU_DYNAMIC_RESERVE +# if BITS_PER_LONG > 32 +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# endif +# else +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# endif +# endif +#endif /* PERCPU_DYNAMIC_RESERVE */ + extern void *pcpu_base_addr; +typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); -extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, - struct page **pages, size_t cpu_size); +extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's -- cgit From 3255aa2eb636a508fc82a73fabbb8aaf2ff23c0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 08:21:52 +0100 Subject: x86, mm: pass in 'total' to __copy_from_user_*nocache() Impact: cleanup, enable future change Add a 'total bytes copied' parameter to __copy_from_user_*nocache(), and update all the callsites. The parameter is not used yet - architecture code can use it to more intelligently decide whether the copy should be cached or non-temporal. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145e..6f3c603b0d67 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user(to, from, n); } -- cgit From 17581ad812a9abb0182260374ef2e52d4a808a64 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:14 -0800 Subject: gpu/drm, x86, PAT: PAT support for io_mapping_* Make io_mapping_create_wc and io_mapping_free go through PAT to make sure that there are no memory type aliases. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index cbc2f0cd631b..f1ed66c43787 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,8 +49,9 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; + pgprot_t prot; - if (!is_io_mapping_possible(base, size)) + if (!reserve_io_memtype_wc(base, size, &prot)) return NULL; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); @@ -59,13 +60,14 @@ io_mapping_create_wc(resource_size_t base, unsigned long size) iomap->base = base; iomap->size = size; - iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + iomap->prot = prot; return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { + free_io_memtype(mapping->base, mapping->size); kfree(mapping); } -- cgit From d2b0261506602bd969164879206027b30358ffdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 14:36:45 +0100 Subject: alloc_percpu: fix UP build Impact: build fix the !SMP branch had a 'gfp' leftover: include/linux/percpu.h: In function '__alloc_percpu': include/linux/percpu.h:160: error: 'gfp' undeclared (first use in this function) include/linux/percpu.h:160: error: (Each undeclared identifier is reported only once include/linux/percpu.h:160: error: for each function it appears in.) Use GFP_KERNEL like the SMP version does. Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 910beb0abea2..d8e5a9abbce0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -157,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * percpu sections on SMP for which this path isn't used. */ WARN_ON_ONCE(align > __alignof__(unsigned long long)); - return kzalloc(size, gfp); + return kzalloc(size, GFP_KERNEL); } static inline void free_percpu(void *p) -- cgit From e317603694bfd17b28a40de9d65e1a4ec12f816e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Feb 2009 10:54:17 +0900 Subject: percpu: fix too low alignment restriction on UP UP __alloc_percpu() triggered WARN_ON_ONCE() if the requested alignment is larger than that of unsigned long long, which is too small for all the cacheline aligned allocations. Bump it up to SMP_CACHE_BYTES which kmalloc allocations generally guarantee. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d8e5a9abbce0..545b068bcb70 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -156,7 +156,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * on it. Larger alignment should only be used for module * percpu sections on SMP for which this path isn't used. */ - WARN_ON_ONCE(align > __alignof__(unsigned long long)); + WARN_ON_ONCE(align > SMP_CACHE_BYTES); return kzalloc(size, GFP_KERNEL); } -- cgit From f5c1aa1537be39d8b9bb5279b5881d81898fd3cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 1 Mar 2009 12:32:08 +0100 Subject: Revert "gpu/drm, x86, PAT: PAT support for io_mapping_*" This reverts commit 17581ad812a9abb0182260374ef2e52d4a808a64. Sitsofe Wheeler reported that /dev/dri/card0 is MIA on his EeePC 900 and bisected it to this commit. Graphics card is an i915 in an EeePC 900: 00:02.0 VGA compatible controller [0300]: Intel Corporation Mobile 915GM/GMS/910GML Express Graphics Controller [8086:2592] (rev 04) ( Most likely the ioremap() of the driver failed and hence the card did not initialize. ) Reported-by: Sitsofe Wheeler Bisected-by: Sitsofe Wheeler Cc: Venkatesh Pallipadi Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index f1ed66c43787..cbc2f0cd631b 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,9 +49,8 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; - pgprot_t prot; - if (!reserve_io_memtype_wc(base, size, &prot)) + if (!is_io_mapping_possible(base, size)) return NULL; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); @@ -60,14 +59,13 @@ io_mapping_create_wc(resource_size_t base, unsigned long size) iomap->base = base; iomap->size = size; - iomap->prot = prot; + iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { - free_io_memtype(mapping->base, mapping->size); kfree(mapping); } -- cgit From f180053694b43d5714bf56cb95499a3c32ff155c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Mar 2009 11:00:57 +0100 Subject: x86, mm: dont use non-temporal stores in pagecache accesses Impact: standardize IO on cached ops On modern CPUs it is almost always a bad idea to use non-temporal stores, as the regression in this commit has shown it: 30d697f: x86: fix performance regression in write() syscall The kernel simply has no good information about whether using non-temporal stores is a good idea or not - and trying to add heuristics only increases complexity and inserts fragility. The regression on cached write()s took very long to be found - over two years. So dont take any chances and let the hardware decide how it makes use of its caches. The only exception is drivers/gpu/drm/i915/i915_gem.c: there were we are absolutely sure that another entity (the GPU) will pick up the dirty data immediately and that the CPU will not touch that data before the GPU will. Also, keep the _nocache() primitives to make it easier for people to experiment with these details. There may be more clear-cut cases where non-cached copies can be used, outside of filemap.c. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6f3c603b0d67..6b58367d145e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user(to, from, n); } -- cgit From 6a242909b01120f6f3d571c0b75e20ec61f0d8d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:58 +0900 Subject: percpu: clean up percpu constants Impact: cleaup Make the following cleanups. * There isn't much arch-specific about PERCPU_MODULE_RESERVE. Always define it whether arch overrides PERCPU_ENOUGH_ROOM or not. * blackfin overrides PERCPU_ENOUGH_ROOM to align static area size. Do it by default. * percpu allocation sizes doesn't have much to do with the page size. Don't use PAGE_SHIFT in their definition. Signed-off-by: Tejun Heo Cc: Bryan Wu --- include/linux/percpu.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 545b068bcb70..2d34b038fe70 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -5,6 +5,7 @@ #include /* For kmalloc() */ #include #include +#include #include @@ -52,17 +53,18 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ -#ifndef PERCPU_ENOUGH_ROOM +/* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 +#define PERCPU_MODULE_RESERVE (8 << 10) #else -#define PERCPU_MODULE_RESERVE 0 +#define PERCPU_MODULE_RESERVE 0 #endif +#ifndef PERCPU_ENOUGH_ROOM #define PERCPU_ENOUGH_ROOM \ - (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) -#endif /* PERCPU_ENOUGH_ROOM */ + (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ + PERCPU_MODULE_RESERVE) +#endif /* * Must be an lvalue. Since @var must be a simple identifier, @@ -79,7 +81,7 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -96,15 +98,15 @@ #ifndef PERCPU_DYNAMIC_RESERVE # if BITS_PER_LONG > 32 # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (24 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # endif # else # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (8 << 10) # endif # endif #endif /* PERCPU_DYNAMIC_RESERVE */ -- cgit From 2441d15c97d498b18f03ae9fba262ffeae42a08b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: cosmetic renames in pcpu_setup_first_chunk() Impact: cosmetic, preparation for future changes Make the following renames in pcpur_setup_first_chunk() in preparation for future changes. * s/free_size/dyn_size/ * s/static_vm/first_vm/ * s/static_chunk/schunk/ Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2d34b038fe70..a0b4ea2a3354 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -118,7 +118,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, + size_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* -- cgit From cafe8816b217b98dc3f268d3b77445da498beb4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: use negative for auto for pcpu_setup_first_chunk() arguments Impact: argument semantic cleanup In pcpu_setup_first_chunk(), zero @unit_size and @dyn_size meant auto-sizing. It's okay for @unit_size as 0 doesn't make sense but 0 dynamic reserve size is valid. Alos, if arch @dyn_size is calculated from other parameters, it might end up passing in 0 @dyn_size and malfunction when the size is automatically adjusted. This patch makes both @unit_size and @dyn_size ssize_t and use -1 for auto sizing. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a0b4ea2a3354..a96fc53bbd62 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,8 +117,9 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t dyn_size, void *base_addr, + size_t static_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* -- cgit From edcb463997ed7b2ffa3bac76e3e75957318f2e01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu, module: implement reserved allocation and use it for module percpu variables Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a96fc53bbd62..8ff15153ae20 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,10 +117,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -129,6 +129,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +extern void *__alloc_reserved_percpu(size_t size, size_t align); + #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { -- cgit From 6b19b0c2400437a3c10059ede0e59b517092e1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: x86, percpu: setup reserved percpu area for x86_64 Impact: fix relocation overflow during module load x86_64 uses 32bit relocations for symbol access and static percpu symbols whether in core or modules must be inside 2GB of the percpu segement base which the dynamic percpu allocator doesn't guarantee. This patch makes x86_64 reserve PERCPU_MODULE_RESERVE bytes in the first chunk so that module percpu areas are always allocated from the first chunk which is always inside the relocatable range. This problem exists for any percpu allocator but is easily triggered when using the embedding allocator because the second chunk is located beyond 2GB on it. This patch also changes the meaning of PERCPU_DYNAMIC_RESERVE such that it only indicates the size of the area to reserve for dynamic allocation as static and dynamic areas can be separate. New PERCPU_DYNAMIC_RESERVED is increased by 4k for both 32 and 64bits as the reserved area separation eats away some allocatable space and having slightly more headroom (currently between 4 and 8k after minimal boot sans module area) makes sense for common case performance. x86_32 can address anywhere from anywhere and doesn't need reserving. Mike Galbraith first reported the problem first and bisected it to the embedding percpu allocator commit. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Reported-by: Jaswinder Singh Rajput --- include/linux/percpu.h | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8ff15153ae20..54a968b4b924 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -85,31 +85,20 @@ /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk if arch is manually allocating and mapping - * it for faster access (as a part of large page mapping for example). - * Note that dynamic percpu allocator covers both static and dynamic - * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * back on the first chunk for dynamic percpu allocation if arch is + * manually allocating and mapping it for faster access (as a part of + * large page mapping for example). * - * On typical configuration with modules, the following values leave - * about 8k of free space on the first chunk after boot on both x86_32 - * and 64 when module support is enabled. When module support is - * disabled, it's much tighter. + * The following values give between one and two pages of free space + * after typical minimal boot (2-way SMP, single disk and NIC) with + * both defconfig and a distro config on x86_64 and 32. More + * intelligent way to determine this would be nice. */ -#ifndef PERCPU_DYNAMIC_RESERVE -# if BITS_PER_LONG > 32 -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (24 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# endif -# else -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (8 << 10) -# endif -# endif -#endif /* PERCPU_DYNAMIC_RESERVE */ +#if BITS_PER_LONG > 32 +#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#else +#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#endif extern void *pcpu_base_addr; -- cgit From 6074d5b0a319fe8400ff079a3c289406ca024321 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: percpu: more flexibility for @dyn_size of pcpu_setup_first_chunk() Impact: cleanup, more flexibility for first chunk init Non-negative @dyn_size used to be allowed iff @unit_size wasn't auto. This restriction stemmed from implementation detail and made things a bit less intuitive. This patch allows @dyn_size to be specified regardless of @unit_size and swaps the positions of @dyn_size and @unit_size so that the parameter order makes more sense (static, reserved and dyn sizes followed by enclosing unit_size). While at it, add @unit_size >= PCPU_MIN_UNIT_SIZE sanity check. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 54a968b4b924..fb455dcc59c7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -107,7 +107,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, + ssize_t dyn_size, ssize_t unit_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); -- cgit From 66c3a75772247c31feabefb724e082220a1ab060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: percpu: generalize embedding first chunk setup helper Impact: code reorganization Separate out embedding first chunk setup helper from x86 embedding first chunk allocator and put it in mm/percpu.c. This will be used by the default percpu first chunk allocator and possibly by other archs. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb455dcc59c7..ee5615d65211 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -111,6 +111,10 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); +extern ssize_t __init pcpu_embed_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, ssize_t unit_size); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's -- cgit From 881a256d84e658d14ca1c162fe56e9cbbb1cdd49 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 31 Dec 2008 13:12:46 -0500 Subject: [SCSI] Add VPD helper Based on prior work by Martin Petersen and James Bottomley, this patch adds a generic helper for retrieving VPD pages from SCSI devices. Signed-off-by: Matthew Wilcox Signed-off-by: James Bottomley --- include/scsi/scsi_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 01a4c58f8bad..9576690901dd 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -340,6 +340,7 @@ extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, struct scsi_sense_hdr *); extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr); +extern unsigned char *scsi_get_vpd_page(struct scsi_device *, u8 page); extern int scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state); extern struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type, -- cgit From 0762a4824d6c6f8eb5d2646dfda95581d99afaa5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 12 Jan 2009 09:28:55 +0100 Subject: [SCSI] Check for deleted device in scsi_device_online() scsi_device_online() is not just a negation of SDEV_OFFLINE, also devices in state SDEV_DEL are actually offline. Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- include/scsi/scsi_device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 9576690901dd..15b09266b7ff 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -401,7 +401,8 @@ static inline unsigned int sdev_id(struct scsi_device *sdev) */ static inline int scsi_device_online(struct scsi_device *sdev) { - return sdev->sdev_state != SDEV_OFFLINE; + return (sdev->sdev_state != SDEV_OFFLINE && + sdev->sdev_state != SDEV_DEL); } static inline int scsi_device_blocked(struct scsi_device *sdev) { -- cgit From 1c9fbafc8c629c89183d6dccec67a8415513b0d1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sun, 4 Jan 2009 03:14:11 -0500 Subject: [SCSI] Remove SUGGEST flags The SUGGEST_* flags in the SCSI command result have been out of fashion for a while and we don't actually use them in the error handling. Remove the remaining occurrences. Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley --- include/scsi/scsi.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index a109165714d6..815d4047c4ce 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -402,16 +402,6 @@ static inline int scsi_is_wlun(unsigned int lun) #define DRIVER_HARD 0x07 #define DRIVER_SENSE 0x08 -#define SUGGEST_RETRY 0x10 -#define SUGGEST_ABORT 0x20 -#define SUGGEST_REMAP 0x30 -#define SUGGEST_DIE 0x40 -#define SUGGEST_SENSE 0x80 -#define SUGGEST_IS_OK 0xff - -#define DRIVER_MASK 0x0f -#define SUGGEST_MASK 0xf0 - /* * Internal return values. */ @@ -447,7 +437,6 @@ static inline int scsi_is_wlun(unsigned int lun) #define msg_byte(result) (((result) >> 8) & 0xff) #define host_byte(result) (((result) >> 16) & 0xff) #define driver_byte(result) (((result) >> 24) & 0xff) -#define suggestion(result) (driver_byte(result) & SUGGEST_MASK) static inline void set_msg_byte(struct scsi_cmnd *cmd, char status) { -- cgit From 71969fd9e2c523d22bf1742eb31f1562247710eb Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 16:50:02 +0200 Subject: [SCSI] major.h: char-major number for OSD device driver Allocate major 260 for osd. Signed-off-by: Boaz Harrosh CC: Torben Mathiasen Signed-off-by: James Bottomley --- include/linux/major.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/major.h b/include/linux/major.h index 88249452b935..058ec15dd060 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -171,5 +171,6 @@ #define VIOTAPE_MAJOR 230 #define BLOCK_EXT_MAJOR 259 +#define SCSI_OSD_MAJOR 260 /* open-osd's OSD scsi device */ #endif -- cgit From 82443a58d361123d418033e9e32ac29a842fce68 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 16:51:33 +0200 Subject: [SCSI] add OSD_TYPE - Define the OSD_TYPE scsi device and let it show up in scans Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/scsi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 815d4047c4ce..80d7f60e2663 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -263,6 +263,7 @@ static inline int scsi_status_is_good(int status) #define TYPE_RAID 0x0c #define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */ #define TYPE_RBC 0x0e +#define TYPE_OSD 0x11 #define TYPE_NO_LUN 0x7f /* SCSI protocols; these are taken from SPC-3 section 7.5 */ -- cgit From de258bf5e63863f42e0f9a7c5ffd29916a41e399 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 16:54:10 +0200 Subject: [SCSI] libosd: OSDv1 Headers Headers only patch. osd_protocol.h Contains a C-fied definition of the T10 OSD standard osd_types.h Contains CPU order common used types osd_initiator.h API definition of the osd_initiator library osd_sec.h Contains High level API for the security manager. [Note that checkpatch spews errors on things that are valid in this context and will not be fixed] Signed-off-by: Boaz Harrosh Reviewed-by: Benny Halevy Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 332 +++++++++++++++++++++++++++++ include/scsi/osd_protocol.h | 497 +++++++++++++++++++++++++++++++++++++++++++ include/scsi/osd_sec.h | 45 ++++ include/scsi/osd_types.h | 40 ++++ 4 files changed, 914 insertions(+) create mode 100644 include/scsi/osd_initiator.h create mode 100644 include/scsi/osd_protocol.h create mode 100644 include/scsi/osd_sec.h create mode 100644 include/scsi/osd_types.h (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h new file mode 100644 index 000000000000..1d92247f820b --- /dev/null +++ b/include/scsi/osd_initiator.h @@ -0,0 +1,332 @@ +/* + * osd_initiator.h - OSD initiator API definition + * + * Copyright (C) 2008 Panasas Inc. All rights reserved. + * + * Authors: + * Boaz Harrosh + * Benny Halevy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * + */ +#ifndef __OSD_INITIATOR_H__ +#define __OSD_INITIATOR_H__ + +#include "osd_protocol.h" +#include "osd_types.h" + +#include + +/* Note: "NI" in comments below means "Not Implemented yet" */ + +/* + * Object-based Storage Device. + * This object represents an OSD device. + * It is not a full linux device in any way. It is only + * a place to hang resources associated with a Linux + * request Q and some default properties. + */ +struct osd_dev { + struct scsi_device *scsi_device; + unsigned def_timeout; +}; + +void osd_dev_init(struct osd_dev *od, struct scsi_device *scsi_device); +void osd_dev_fini(struct osd_dev *od); + +struct osd_request; +typedef void (osd_req_done_fn)(struct osd_request *or, void *private); + +struct osd_request { + struct osd_cdb cdb; + struct osd_data_out_integrity_info out_data_integ; + struct osd_data_in_integrity_info in_data_integ; + + struct osd_dev *osd_dev; + struct request *request; + + struct _osd_req_data_segment { + void *buff; + unsigned alloc_size; /* 0 here means: don't call kfree */ + unsigned total_bytes; + } set_attr, enc_get_attr, get_attr; + + struct _osd_io_info { + struct bio *bio; + u64 total_bytes; + struct request *req; + struct _osd_req_data_segment *last_seg; + u8 *pad_buff; + } out, in; + + gfp_t alloc_flags; + unsigned timeout; + unsigned retries; + u8 sense[OSD_MAX_SENSE_LEN]; + enum osd_attributes_mode attributes_mode; + + osd_req_done_fn *async_done; + void *async_private; + int async_error; +}; + +/* + * How to use the osd library: + * + * osd_start_request + * Allocates a request. + * + * osd_req_* + * Call one of, to encode the desired operation. + * + * osd_add_{get,set}_attr + * Optionally add attributes to the CDB, list or page mode. + * + * osd_finalize_request + * Computes final data out/in offsets and signs the request, + * making it ready for execution. + * + * osd_execute_request + * May be called to execute it through the block layer. Other wise submit + * the associated block request in some other way. + * + * After execution: + * osd_req_decode_sense + * Decodes sense information to verify execution results. + * + * osd_req_decode_get_attr + * Retrieve osd_add_get_attr_list() values if used. + * + * osd_end_request + * Must be called to deallocate the request. + */ + +/** + * osd_start_request - Allocate and initialize an osd_request + * + * @osd_dev: OSD device that holds the scsi-device and default values + * that the request is associated with. + * @gfp: The allocation flags to use for request allocation, and all + * subsequent allocations. This will be stored at + * osd_request->alloc_flags, can be changed by user later + * + * Allocate osd_request and initialize all members to the + * default/initial state. + */ +struct osd_request *osd_start_request(struct osd_dev *od, gfp_t gfp); + +enum osd_req_options { + OSD_REQ_FUA = 0x08, /* Force Unit Access */ + OSD_REQ_DPO = 0x10, /* Disable Page Out */ + + OSD_REQ_BYPASS_TIMESTAMPS = 0x80, +}; + +/** + * osd_finalize_request - Sign request and prepare request for execution + * + * @or: osd_request to prepare + * @options: combination of osd_req_options bit flags or 0. + * @cap: A Pointer to an OSD_CAP_LEN bytes buffer that is received from + * The security manager as capabilities for this cdb. + * @cap_key: The cryptographic key used to sign the cdb/data. Can be null + * if NOSEC is used. + * + * The actual request and bios are only allocated here, so are the get_attr + * buffers that will receive the returned attributes. Copy's @cap to cdb. + * Sign the cdb/data with @cap_key. + */ +int osd_finalize_request(struct osd_request *or, + u8 options, const void *cap, const u8 *cap_key); + +/** + * osd_execute_request - Execute the request synchronously through block-layer + * + * @or: osd_request to Executed + * + * Calls blk_execute_rq to q the command and waits for completion. + */ +int osd_execute_request(struct osd_request *or); + +/** + * osd_execute_request_async - Execute the request without waitting. + * + * @or: - osd_request to Executed + * @done: (Optional) - Called at end of execution + * @private: - Will be passed to @done function + * + * Calls blk_execute_rq_nowait to queue the command. When execution is done + * optionally calls @done with @private as parameter. @or->async_error will + * have the return code + */ +int osd_execute_request_async(struct osd_request *or, + osd_req_done_fn *done, void *private); + +/** + * osd_end_request - return osd_request to free store + * + * @or: osd_request to free + * + * Deallocate all osd_request resources (struct req's, BIOs, buffers, etc.) + */ +void osd_end_request(struct osd_request *or); + +/* + * CDB Encoding + * + * Note: call only one of the following methods. + */ + +/* + * Device commands + */ +void osd_req_set_master_seed_xchg(struct osd_request *or, ...);/* NI */ +void osd_req_set_master_key(struct osd_request *or, ...);/* NI */ + +void osd_req_format(struct osd_request *or, u64 tot_capacity); + +/* list all partitions + * @list header must be initialized to zero on first run. + * + * Call osd_is_obj_list_done() to find if we got the complete list. + */ +int osd_req_list_dev_partitions(struct osd_request *or, + osd_id initial_id, struct osd_obj_id_list *list, unsigned nelem); + +void osd_req_flush_obsd(struct osd_request *or, + enum osd_options_flush_scope_values); + +void osd_req_perform_scsi_command(struct osd_request *or, + const u8 *cdb, ...);/* NI */ +void osd_req_task_management(struct osd_request *or, ...);/* NI */ + +/* + * Partition commands + */ +void osd_req_create_partition(struct osd_request *or, osd_id partition); +void osd_req_remove_partition(struct osd_request *or, osd_id partition); + +void osd_req_set_partition_key(struct osd_request *or, + osd_id partition, u8 new_key_id[OSD_CRYPTO_KEYID_SIZE], + u8 seed[OSD_CRYPTO_SEED_SIZE]);/* NI */ + +/* list all collections in the partition + * @list header must be init to zero on first run. + * + * Call osd_is_obj_list_done() to find if we got the complete list. + */ +int osd_req_list_partition_collections(struct osd_request *or, + osd_id partition, osd_id initial_id, struct osd_obj_id_list *list, + unsigned nelem); + +/* list all objects in the partition + * @list header must be init to zero on first run. + * + * Call osd_is_obj_list_done() to find if we got the complete list. + */ +int osd_req_list_partition_objects(struct osd_request *or, + osd_id partition, osd_id initial_id, struct osd_obj_id_list *list, + unsigned nelem); + +void osd_req_flush_partition(struct osd_request *or, + osd_id partition, enum osd_options_flush_scope_values); + +/* + * Collection commands + */ +void osd_req_create_collection(struct osd_request *or, + const struct osd_obj_id *);/* NI */ +void osd_req_remove_collection(struct osd_request *or, + const struct osd_obj_id *);/* NI */ + +/* list all objects in the collection */ +int osd_req_list_collection_objects(struct osd_request *or, + const struct osd_obj_id *, osd_id initial_id, + struct osd_obj_id_list *list, unsigned nelem); + +/* V2 only filtered list of objects in the collection */ +void osd_req_query(struct osd_request *or, ...);/* NI */ + +void osd_req_flush_collection(struct osd_request *or, + const struct osd_obj_id *, enum osd_options_flush_scope_values); + +void osd_req_get_member_attrs(struct osd_request *or, ...);/* V2-only NI */ +void osd_req_set_member_attrs(struct osd_request *or, ...);/* V2-only NI */ + +/* + * Object commands + */ +void osd_req_create_object(struct osd_request *or, struct osd_obj_id *); +void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *); + +void osd_req_write(struct osd_request *or, + const struct osd_obj_id *, struct bio *data_out, u64 offset); +void osd_req_append(struct osd_request *or, + const struct osd_obj_id *, struct bio *data_out);/* NI */ +void osd_req_create_write(struct osd_request *or, + const struct osd_obj_id *, struct bio *data_out, u64 offset);/* NI */ +void osd_req_clear(struct osd_request *or, + const struct osd_obj_id *, u64 offset, u64 len);/* NI */ +void osd_req_punch(struct osd_request *or, + const struct osd_obj_id *, u64 offset, u64 len);/* V2-only NI */ + +void osd_req_flush_object(struct osd_request *or, + const struct osd_obj_id *, enum osd_options_flush_scope_values, + /*V2*/ u64 offset, /*V2*/ u64 len); + +void osd_req_read(struct osd_request *or, + const struct osd_obj_id *, struct bio *data_in, u64 offset); + +/* + * Root/Partition/Collection/Object Attributes commands + */ + +/* get before set */ +void osd_req_get_attributes(struct osd_request *or, const struct osd_obj_id *); + +/* set before get */ +void osd_req_set_attributes(struct osd_request *or, const struct osd_obj_id *); + +/* + * Attributes appended to most commands + */ + +/* Attributes List mode (or V2 CDB) */ + /* + * TODO: In ver2 if at finalize time only one attr was set and no gets, + * then the Attributes CDB mode is used automatically to save IO. + */ + +/* set a list of attributes. */ +int osd_req_add_set_attr_list(struct osd_request *or, + const struct osd_attr *, unsigned nelem); + +/* get a list of attributes */ +int osd_req_add_get_attr_list(struct osd_request *or, + const struct osd_attr *, unsigned nelem); + +/* + * Attributes list decoding + * Must be called after osd_request.request was executed + * It is called in a loop to decode the returned get_attr + * (see osd_add_get_attr) + */ +int osd_req_decode_get_attr_list(struct osd_request *or, + struct osd_attr *, int *nelem, void **iterator); + +/* Attributes Page mode */ + +/* + * Read an attribute page and optionally set one attribute + * + * Retrieves the attribute page directly to a user buffer. + * @attr_page_data shall stay valid until end of execution. + * See osd_attributes.h for common page structures + */ +int osd_req_add_get_attr_page(struct osd_request *or, + u32 page_id, void *attr_page_data, unsigned max_page_len, + const struct osd_attr *set_one); + +#endif /* __OSD_LIB_H__ */ diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h new file mode 100644 index 000000000000..ce1a8771ea71 --- /dev/null +++ b/include/scsi/osd_protocol.h @@ -0,0 +1,497 @@ +/* + * osd_protocol.h - OSD T10 standard C definitions. + * + * Copyright (C) 2008 Panasas Inc. All rights reserved. + * + * Authors: + * Boaz Harrosh + * Benny Halevy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * + * This file contains types and constants that are defined by the protocol + * Note: All names and symbols are taken from the OSD standard's text. + */ +#ifndef __OSD_PROTOCOL_H__ +#define __OSD_PROTOCOL_H__ + +#include +#include +#include + +enum { + OSDv1_ADDITIONAL_CDB_LENGTH = 192, + OSDv1_TOTAL_CDB_LEN = OSDv1_ADDITIONAL_CDB_LENGTH + 8, + OSDv1_CAP_LEN = 80, + /* Latest supported version */ + OSD_ADDITIONAL_CDB_LENGTH = OSDv1_ADDITIONAL_CDB_LENGTH, + OSD_TOTAL_CDB_LEN = OSDv1_TOTAL_CDB_LEN, + OSD_CAP_LEN = OSDv1_CAP_LEN, + + OSD_SYSTEMID_LEN = 20, + OSD_CRYPTO_KEYID_SIZE = 20, + OSD_CRYPTO_SEED_SIZE = 4, + OSD_CRYPTO_NONCE_SIZE = 12, + OSD_MAX_SENSE_LEN = 252, /* from SPC-3 */ + + OSD_PARTITION_FIRST_ID = 0x10000, + OSD_OBJECT_FIRST_ID = 0x10000, +}; + +/* (osd-r10 5.2.4) + * osd2r03: 5.2.3 Caching control bits + */ +enum osd_options_byte { + OSD_CDB_FUA = 0x08, /* Force Unit Access */ + OSD_CDB_DPO = 0x10, /* Disable Page Out */ +}; + +/* + * osd2r03: 5.2.5 Isolation. + * First 3 bits, V2-only. + * Also for attr 110h "default isolation method" at Root Information page + */ +enum osd_options_byte_isolation { + OSD_ISOLATION_DEFAULT = 0, + OSD_ISOLATION_NONE = 1, + OSD_ISOLATION_STRICT = 2, + OSD_ISOLATION_RANGE = 4, + OSD_ISOLATION_FUNCTIONAL = 5, + OSD_ISOLATION_VENDOR = 7, +}; + +/* (osd-r10: 6.7) + * osd2r03: 6.8 FLUSH, FLUSH COLLECTION, FLUSH OSD, FLUSH PARTITION + */ +enum osd_options_flush_scope_values { + OSD_CDB_FLUSH_ALL = 0, + OSD_CDB_FLUSH_ATTR_ONLY = 1, + + OSD_CDB_FLUSH_ALL_RECURSIVE = 2, + /* V2-only */ + OSD_CDB_FLUSH_ALL_RANGE = 2, +}; + +/* osd2r03: 5.2.10 Timestamps control */ +enum { + OSD_CDB_NORMAL_TIMESTAMPS = 0, + OSD_CDB_BYPASS_TIMESTAMPS = 0x7f, +}; + +/* (osd-r10: 5.2.2.1) + * osd2r03: 5.2.4.1 Get and set attributes CDB format selection + * 2 bits at second nibble of command_specific_options byte + */ +enum osd_attributes_mode { + /* V2-only */ + OSD_CDB_SET_ONE_ATTR = 0x10, + + OSD_CDB_GET_ATTR_PAGE_SET_ONE = 0x20, + OSD_CDB_GET_SET_ATTR_LISTS = 0x30, + + OSD_CDB_GET_SET_ATTR_MASK = 0x30, +}; + +/* (osd-r10: 4.12.5) + * osd2r03: 4.14.5 Data-In and Data-Out buffer offsets + * byte offset = mantissa * (2^(exponent+8)) + * struct { + * unsigned mantissa: 28; + * int exponent: 04; + * } + */ +typedef __be32 __bitwise osd_cdb_offset; + +enum { + OSD_OFFSET_UNUSED = 0xFFFFFFFF, + OSD_OFFSET_MAX_BITS = 28, + + OSDv1_OFFSET_MIN_SHIFT = 8, + OSD_OFFSET_MAX_SHIFT = 16, +}; + +/* Return the smallest allowed encoded offset that contains @offset. + * + * The actual encoded offset returned is @offset + *padding. + * (up to max_shift, non-inclusive) + */ +osd_cdb_offset __osd_encode_offset(u64 offset, unsigned *padding, + int min_shift, int max_shift); + +/* Minimum alignment is 256 bytes + * Note: Seems from std v1 that exponent can be from 0+8 to 0xE+8 (inclusive) + * which is 8 to 23 but IBM code restricts it to 16, so be it. + */ +static inline osd_cdb_offset osd_encode_offset_v1(u64 offset, unsigned *padding) +{ + return __osd_encode_offset(offset, padding, + OSDv1_OFFSET_MIN_SHIFT, OSD_OFFSET_MAX_SHIFT); +} + +/* osd2r03: 5.2.1 Overview */ +struct osd_cdb_head { + struct scsi_varlen_cdb_hdr varlen_cdb; +/*10*/ u8 options; + u8 command_specific_options; + u8 timestamp_control; +/*13*/ u8 reserved1[3]; +/*16*/ __be64 partition; +/*24*/ __be64 object; +/*32*/ union { /* V1 vs V2 alignment differences */ + struct __osdv1_cdb_addr_len { +/*32*/ __be32 list_identifier;/* Rarely used */ +/*36*/ __be64 length; +/*44*/ __be64 start_address; + } __packed v1; + }; +/*52*/ union { /* selected attributes mode Page/List/Single */ + struct osd_attributes_page_mode { +/*52*/ __be32 get_attr_page; +/*56*/ __be32 get_attr_alloc_length; +/*60*/ osd_cdb_offset get_attr_offset; + +/*64*/ __be32 set_attr_page; +/*68*/ __be32 set_attr_id; +/*72*/ __be32 set_attr_length; +/*76*/ osd_cdb_offset set_attr_offset; +/*80*/ } __packed attrs_page; + + struct osd_attributes_list_mode { +/*52*/ __be32 get_attr_desc_bytes; +/*56*/ osd_cdb_offset get_attr_desc_offset; + +/*60*/ __be32 get_attr_alloc_length; +/*64*/ osd_cdb_offset get_attr_offset; + +/*68*/ __be32 set_attr_bytes; +/*72*/ osd_cdb_offset set_attr_offset; + __be32 not_used; +/*80*/ } __packed attrs_list; + + /* osd2r03:5.2.4.2 Set one attribute value using CDB fields */ + struct osd_attributes_cdb_mode { +/*52*/ __be32 set_attr_page; +/*56*/ __be32 set_attr_id; +/*60*/ __be16 set_attr_len; +/*62*/ u8 set_attr_val[18]; +/*80*/ } __packed attrs_cdb; +/*52*/ u8 get_set_attributes_parameters[28]; + }; +} __packed; +/*80*/ + +/*160 v1*/ +struct osd_security_parameters { +/*160*/u8 integrity_check_value[OSD_CRYPTO_KEYID_SIZE]; +/*180*/u8 request_nonce[OSD_CRYPTO_NONCE_SIZE]; +/*192*/osd_cdb_offset data_in_integrity_check_offset; +/*196*/osd_cdb_offset data_out_integrity_check_offset; +} __packed; +/*200 v1*/ + +struct osdv1_cdb { + struct osd_cdb_head h; + u8 caps[OSDv1_CAP_LEN]; + struct osd_security_parameters sec_params; +} __packed; + +struct osd_cdb { + union { + struct osdv1_cdb v1; + u8 buff[OSD_TOTAL_CDB_LEN]; + }; +} __packed; + +static inline struct osd_cdb_head *osd_cdb_head(struct osd_cdb *ocdb) +{ + return (struct osd_cdb_head *)ocdb->buff; +} + +/* define both version actions + * Ex name = FORMAT_OSD we have OSD_ACT_FORMAT_OSD && OSDv1_ACT_FORMAT_OSD + */ +#define OSD_ACT___(Name, Num) \ + OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num), \ + OSDv1_ACT_##Name = __constant_cpu_to_be16(0x8800 + Num), + +/* V2 only actions */ +#define OSD_ACT_V2(Name, Num) \ + OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num), + +#define OSD_ACT_V1_V2(Name, Num1, Num2) \ + OSD_ACT_##Name = __constant_cpu_to_be16(Num2), \ + OSDv1_ACT_##Name = __constant_cpu_to_be16(Num1), + +enum osd_service_actions { + OSD_ACT_V2(OBJECT_STRUCTURE_CHECK, 0x00) + OSD_ACT___(FORMAT_OSD, 0x01) + OSD_ACT___(CREATE, 0x02) + OSD_ACT___(LIST, 0x03) + OSD_ACT_V2(PUNCH, 0x04) + OSD_ACT___(READ, 0x05) + OSD_ACT___(WRITE, 0x06) + OSD_ACT___(APPEND, 0x07) + OSD_ACT___(FLUSH, 0x08) + OSD_ACT_V2(CLEAR, 0x09) + OSD_ACT___(REMOVE, 0x0A) + OSD_ACT___(CREATE_PARTITION, 0x0B) + OSD_ACT___(REMOVE_PARTITION, 0x0C) + OSD_ACT___(GET_ATTRIBUTES, 0x0E) + OSD_ACT___(SET_ATTRIBUTES, 0x0F) + OSD_ACT___(CREATE_AND_WRITE, 0x12) + OSD_ACT___(CREATE_COLLECTION, 0x15) + OSD_ACT___(REMOVE_COLLECTION, 0x16) + OSD_ACT___(LIST_COLLECTION, 0x17) + OSD_ACT___(SET_KEY, 0x18) + OSD_ACT___(SET_MASTER_KEY, 0x19) + OSD_ACT___(FLUSH_COLLECTION, 0x1A) + OSD_ACT___(FLUSH_PARTITION, 0x1B) + OSD_ACT___(FLUSH_OSD, 0x1C) + + OSD_ACT_V2(QUERY, 0x20) + OSD_ACT_V2(REMOVE_MEMBER_OBJECTS, 0x21) + OSD_ACT_V2(GET_MEMBER_ATTRIBUTES, 0x22) + OSD_ACT_V2(SET_MEMBER_ATTRIBUTES, 0x23) + OSD_ACT_V2(READ_MAP, 0x31) + + OSD_ACT_V1_V2(PERFORM_SCSI_COMMAND, 0x8F7E, 0x8F7C) + OSD_ACT_V1_V2(SCSI_TASK_MANAGEMENT, 0x8F7F, 0x8F7D) + /* 0x8F80 to 0x8FFF are Vendor specific */ +}; + +/* osd2r03: 7.1.3.2 List entry format for retrieving attributes */ +struct osd_attributes_list_attrid { + __be32 attr_page; + __be32 attr_id; +} __packed; + +/* + * osd2r03: 7.1.3.3 List entry format for retrieved attributes and + * for setting attributes + */ +struct osd_attributes_list_element { + __be32 attr_page; + __be32 attr_id; + __be16 attr_bytes; + u8 attr_val[0]; +} __packed; + +enum { + OSDv1_ATTRIBUTES_ELEM_ALIGN = 1, +}; + +enum { + OSD_ATTR_LIST_ALL_PAGES = 0xFFFFFFFF, + OSD_ATTR_LIST_ALL_IN_PAGE = 0xFFFFFFFF, +}; + +static inline unsigned osdv1_attr_list_elem_size(unsigned len) +{ + return ALIGN(len + sizeof(struct osd_attributes_list_element), + OSDv1_ATTRIBUTES_ELEM_ALIGN); +} + +/* + * osd2r03: 7.1.3 OSD attributes lists (Table 184) — List type values + */ +enum osd_attr_list_types { + OSD_ATTR_LIST_GET = 0x1, /* descriptors only */ + OSD_ATTR_LIST_SET_RETRIEVE = 0x9, /*descriptors/values variable-length*/ + OSD_V2_ATTR_LIST_MULTIPLE = 0xE, /* ver2, Multiple Objects lists*/ + OSD_V1_ATTR_LIST_CREATE_MULTIPLE = 0xF,/*ver1, used by create_multple*/ +}; + +/* osd2r03: 7.1.3.4 Multi-object retrieved attributes format */ +struct osd_attributes_list_multi_header { + __be64 object_id; + u8 object_type; /* object_type enum below */ + u8 reserved[5]; + __be16 list_bytes; + /* followed by struct osd_attributes_list_element's */ +}; + +struct osdv1_attributes_list_header { + u8 type; /* low 4-bit only */ + u8 pad; + __be16 list_bytes; /* Initiator shall set to Zero. Only set by target */ + /* + * type=9 followed by struct osd_attributes_list_element's + * type=E followed by struct osd_attributes_list_multi_header's + */ +} __packed; + +static inline unsigned osdv1_list_size(struct osdv1_attributes_list_header *h) +{ + return be16_to_cpu(h->list_bytes); +} + +/* (osd-r10 6.13) + * osd2r03: 6.15 LIST (Table 79) LIST command parameter data. + * for root_lstchg below + */ +enum { + OSD_OBJ_ID_LIST_PAR = 0x1, /* V1-only. Not used in V2 */ + OSD_OBJ_ID_LIST_LSTCHG = 0x2, +}; + +/* + * osd2r03: 6.15.2 LIST command parameter data + * (Also for LIST COLLECTION) + */ +struct osd_obj_id_list { + __be64 list_bytes; /* bytes in list excluding list_bytes (-8) */ + __be64 continuation_id; + __be32 list_identifier; + u8 pad[3]; + u8 root_lstchg; + __be64 object_ids[0]; +} __packed; + +static inline bool osd_is_obj_list_done(struct osd_obj_id_list *list, + bool *is_changed) +{ + *is_changed = (0 != (list->root_lstchg & OSD_OBJ_ID_LIST_LSTCHG)); + return 0 != list->continuation_id; +} + +/* + * osd2r03: 4.12.4.5 The ALLDATA security method + */ +struct osd_data_out_integrity_info { + __be64 data_bytes; + __be64 set_attributes_bytes; + __be64 get_attributes_bytes; + __be64 integrity_check_value; +} __packed; + +struct osd_data_in_integrity_info { + __be64 data_bytes; + __be64 retrieved_attributes_bytes; + __be64 integrity_check_value; +} __packed; + +struct osd_timestamp { + u8 time[6]; /* number of milliseconds since 1/1/1970 UT (big endian) */ +} __packed; +/* FIXME: define helper functions to convert to/from osd time format */ + +/* + * Capability & Security definitions + * osd2r03: 4.11.2.2 Capability format + * osd2r03: 5.2.8 Security parameters + */ + +struct osd_key_identifier { + u8 id[7]; /* if you know why 7 please email bharrosh@panasas.com */ +} __packed; + +/* for osd_capability.format */ +enum { + OSD_SEC_CAP_FORMAT_NO_CAPS = 0, + OSD_SEC_CAP_FORMAT_VER1 = 1, + OSD_SEC_CAP_FORMAT_VER2 = 2, +}; + +/* security_method */ +enum { + OSD_SEC_NOSEC = 0, + OSD_SEC_CAPKEY = 1, + OSD_SEC_CMDRSP = 2, + OSD_SEC_ALLDATA = 3, +}; + +enum object_type { + OSD_SEC_OBJ_ROOT = 0x1, + OSD_SEC_OBJ_PARTITION = 0x2, + OSD_SEC_OBJ_COLLECTION = 0x40, + OSD_SEC_OBJ_USER = 0x80, +}; + +enum osd_capability_bit_masks { + OSD_SEC_CAP_APPEND = BIT(0), + OSD_SEC_CAP_OBJ_MGMT = BIT(1), + OSD_SEC_CAP_REMOVE = BIT(2), + OSD_SEC_CAP_CREATE = BIT(3), + OSD_SEC_CAP_SET_ATTR = BIT(4), + OSD_SEC_CAP_GET_ATTR = BIT(5), + OSD_SEC_CAP_WRITE = BIT(6), + OSD_SEC_CAP_READ = BIT(7), + + OSD_SEC_CAP_NONE1 = BIT(8), + OSD_SEC_CAP_NONE2 = BIT(9), + OSD_SEC_CAP_NONE3 = BIT(10), + OSD_SEC_CAP_QUERY = BIT(11), /*v2 only*/ + OSD_SEC_CAP_M_OBJECT = BIT(12), /*v2 only*/ + OSD_SEC_CAP_POL_SEC = BIT(13), + OSD_SEC_CAP_GLOBAL = BIT(14), + OSD_SEC_CAP_DEV_MGMT = BIT(15), +}; + +/* for object_descriptor_type (hi nibble used) */ +enum { + OSD_SEC_OBJ_DESC_NONE = 0, /* Not allowed */ + OSD_SEC_OBJ_DESC_OBJ = 1 << 4, /* v1: also collection */ + OSD_SEC_OBJ_DESC_PAR = 2 << 4, /* also root */ + OSD_SEC_OBJ_DESC_COL = 3 << 4, /* v2 only */ +}; + +/* (osd-r10:4.9.2.2) + * osd2r03:4.11.2.2 Capability format + */ +struct osd_capability_head { + u8 format; /* low nibble */ + u8 integrity_algorithm__key_version; /* MAKE_BYTE(integ_alg, key_ver) */ + u8 security_method; + u8 reserved1; +/*04*/ struct osd_timestamp expiration_time; +/*10*/ u8 audit[20]; +/*30*/ u8 discriminator[12]; +/*42*/ struct osd_timestamp object_created_time; +/*48*/ u8 object_type; +/*49*/ u8 permissions_bit_mask[5]; +/*54*/ u8 reserved2; +/*55*/ u8 object_descriptor_type; /* high nibble */ +} __packed; + +/*56 v1*/ +struct osdv1_cap_object_descriptor { + union { + struct { +/*56*/ __be32 policy_access_tag; +/*60*/ __be64 allowed_partition_id; +/*68*/ __be64 allowed_object_id; +/*76*/ __be32 reserved; + } __packed obj_desc; + +/*56*/ u8 object_descriptor[24]; + }; +} __packed; +/*80 v1*/ + +struct osd_capability { + struct osd_capability_head h; + struct osdv1_cap_object_descriptor od; +} __packed; + +/** + * osd_sec_set_caps - set cap-bits into the capabilities header + * + * @cap: The osd_capability_head to set cap bits to. + * @bit_mask: Use an ORed list of enum osd_capability_bit_masks values + * + * permissions_bit_mask is unaligned use below to set into caps + * in a version independent way + */ +static inline void osd_sec_set_caps(struct osd_capability_head *cap, + u16 bit_mask) +{ + /* + *Note: The bits above are defined LE order this is because this way + * they can grow in the future to more then 16, and still retain + * there constant values. + */ + put_unaligned_le16(bit_mask, &cap->permissions_bit_mask); +} + +#endif /* ndef __OSD_PROTOCOL_H__ */ diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h new file mode 100644 index 000000000000..4c09fee8ae1e --- /dev/null +++ b/include/scsi/osd_sec.h @@ -0,0 +1,45 @@ +/* + * osd_sec.h - OSD security manager API + * + * Copyright (C) 2008 Panasas Inc. All rights reserved. + * + * Authors: + * Boaz Harrosh + * Benny Halevy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * + */ +#ifndef __OSD_SEC_H__ +#define __OSD_SEC_H__ + +#include "osd_protocol.h" +#include "osd_types.h" + +/* + * Contains types and constants of osd capabilities and security + * encoding/decoding. + * API is trying to keep security abstract so initiator of an object + * based pNFS client knows as little as possible about security and + * capabilities. It is the Server's osd-initiator place to know more. + * Also can be used by osd-target. + */ +void osd_sec_encode_caps(void *caps, ...);/* NI */ +void osd_sec_init_nosec_doall_caps(void *caps, + const struct osd_obj_id *obj, bool is_collection, const bool is_v1); + +bool osd_is_sec_alldata(struct osd_security_parameters *sec_params); + +/* Conditionally sign the CDB according to security setting in ocdb + * with cap_key */ +void osd_sec_sign_cdb(struct osd_cdb *ocdb, const u8 *cap_key); + +/* Unconditionally sign the BIO data with cap_key. + * Check for osd_is_sec_alldata() was done prior to calling this. */ +void osd_sec_sign_data(void *data_integ, struct bio *bio, const u8 *cap_key); + +/* Version independent copy of caps into the cdb */ +void osd_set_caps(struct osd_cdb *cdb, const void *caps); + +#endif /* ndef __OSD_SEC_H__ */ diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h new file mode 100644 index 000000000000..3f5e88cc75c0 --- /dev/null +++ b/include/scsi/osd_types.h @@ -0,0 +1,40 @@ +/* + * osd_types.h - Types and constants which are not part of the protocol. + * + * Copyright (C) 2008 Panasas Inc. All rights reserved. + * + * Authors: + * Boaz Harrosh + * Benny Halevy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * + * Contains types and constants that are implementation specific and are + * used by more than one part of the osd library. + * (Eg initiator/target/security_manager/...) + */ +#ifndef __OSD_TYPES_H__ +#define __OSD_TYPES_H__ + +struct osd_systemid { + u8 data[OSD_SYSTEMID_LEN]; +}; + +typedef u64 __bitwise osd_id; + +struct osd_obj_id { + osd_id partition; + osd_id id; +}; + +static const struct __weak osd_obj_id osd_root_object = {0, 0}; + +struct osd_attr { + u32 attr_page; + u32 attr_id; + u16 len; /* byte count of operand */ + void *val_ptr; /* in network order */ +}; + +#endif /* ndef __OSD_TYPES_H__ */ -- cgit From 95b05a7db5865855c32e0bb8b244c3a7aac1cfeb Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 16:56:47 +0200 Subject: [SCSI] osd_uld: OSD scsi ULD Add a Linux driver module that registers as a SCSI ULD and probes for OSD type SCSI devices. When an OSD-type SCSI device is found a character device is created in the form of /dev/osdX - where X goes from 0 up to hard coded 64. The Major character device number used is 260. Signed-off-by: Boaz Harrosh Reviewed-by: Benny Halevy Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 1d92247f820b..a5dbbddcf73b 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -33,6 +33,12 @@ struct osd_dev { unsigned def_timeout; }; +/* Add/remove test ioctls from external modules */ +typedef int (do_test_fn)(struct osd_dev *od, unsigned cmd, unsigned long arg); +int osduld_register_test(unsigned ioctl, do_test_fn *do_test); +void osduld_unregister_test(unsigned ioctl); + +/* These are called by uld at probe time */ void osd_dev_init(struct osd_dev *od, struct scsi_device *scsi_device); void osd_dev_fini(struct osd_dev *od); -- cgit From b799bc7da0ce5ba4a988c521a8fb10452eb419f0 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 16:58:03 +0200 Subject: [SCSI] osd_uld: API for retrieving osd devices from Kernel Kernel clients like exofs can retrieve struct osd_dev(s) by means of below API. + osduld_path_lookup() - given a path (e.g "/dev/osd0") locks and returns the corresponding struct osd_dev, which is then needed for subsequent libosd use. + osduld_put_device() - free up use of an osd_dev. Devices can be shared by multiple clients. The osd_uld_device's life time is governed by an embedded kref structure. The osd_uld_device holds an extra reference to both it's char-device and it's scsi_device, and will release these just before the final deallocation. There are three possible lock sources of the osd_uld_device 1. First and for most is the probe() function called by scsi-ml upon a successful login into a target. Released in release() when logout. 2. Second by user-mode file handles opened on the char-dev. 3. Third is here by Kernel users. All three locks must be removed before the osd_uld_device is freed. The MODULE has three lock sources as well: 1. scsi-ml at probe() time, removed after release(). (login/logout) 2. The user-mode file handles open/close. 3. Import symbols by client modules like exofs. TODO: This API is not enough for the pNFS-objects LD. A more versatile API will be needed. Proposed API could be: struct osd_dev *osduld_sysid_lookup(const char id[OSD_SYSTEMID_LEN]); Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index a5dbbddcf73b..e84dc7aa5e34 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -33,6 +33,10 @@ struct osd_dev { unsigned def_timeout; }; +/* Retrieve/return osd_dev(s) for use by Kernel clients */ +struct osd_dev *osduld_path_lookup(const char *dev_name); /*Use IS_ERR/ERR_PTR*/ +void osduld_put_device(struct osd_dev *od); + /* Add/remove test ioctls from external modules */ typedef int (do_test_fn)(struct osd_dev *od, unsigned cmd, unsigned long arg); int osduld_register_test(unsigned ioctl, do_test_fn *do_test); -- cgit From 4ef1a3d70d02663f6bfe901db629e8e608da15b1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 16:59:50 +0200 Subject: [SCSI] libosd: attributes Support Support for both List-Mode and Page-Mode osd attributes. One of these operations may be added to most other operations. Define the OSD standard's attribute pages constants and structures (osd_attributes.h) Signed-off-by: Boaz Harrosh Reviewed-by: Benny Halevy Signed-off-by: James Bottomley --- include/scsi/osd_attributes.h | 327 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 include/scsi/osd_attributes.h (limited to 'include') diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h new file mode 100644 index 000000000000..f888a6fda073 --- /dev/null +++ b/include/scsi/osd_attributes.h @@ -0,0 +1,327 @@ +#ifndef __OSD_ATTRIBUTES_H__ +#define __OSD_ATTRIBUTES_H__ + +#include "osd_protocol.h" + +/* + * Contains types and constants that define attribute pages and attribute + * numbers and their data types. + */ + +#define ATTR_SET(pg, id, l, ptr) \ + { .attr_page = pg, .attr_id = id, .len = l, .val_ptr = ptr } + +#define ATTR_DEF(pg, id, l) ATTR_SET(pg, id, l, NULL) + +/* osd-r10 4.7.3 Attributes pages */ +enum { + OSD_APAGE_OBJECT_FIRST = 0x0, + OSD_APAGE_OBJECT_DIRECTORY = 0, + OSD_APAGE_OBJECT_INFORMATION = 1, + OSD_APAGE_OBJECT_QUOTAS = 2, + OSD_APAGE_OBJECT_TIMESTAMP = 3, + OSD_APAGE_OBJECT_COLLECTIONS = 4, + OSD_APAGE_OBJECT_SECURITY = 5, + OSD_APAGE_OBJECT_LAST = 0x2fffffff, + + OSD_APAGE_PARTITION_FIRST = 0x30000000, + OSD_APAGE_PARTITION_DIRECTORY = OSD_APAGE_PARTITION_FIRST + 0, + OSD_APAGE_PARTITION_INFORMATION = OSD_APAGE_PARTITION_FIRST + 1, + OSD_APAGE_PARTITION_QUOTAS = OSD_APAGE_PARTITION_FIRST + 2, + OSD_APAGE_PARTITION_TIMESTAMP = OSD_APAGE_PARTITION_FIRST + 3, + OSD_APAGE_PARTITION_SECURITY = OSD_APAGE_PARTITION_FIRST + 5, + OSD_APAGE_PARTITION_LAST = 0x5FFFFFFF, + + OSD_APAGE_COLLECTION_FIRST = 0x60000000, + OSD_APAGE_COLLECTION_DIRECTORY = OSD_APAGE_COLLECTION_FIRST + 0, + OSD_APAGE_COLLECTION_INFORMATION = OSD_APAGE_COLLECTION_FIRST + 1, + OSD_APAGE_COLLECTION_TIMESTAMP = OSD_APAGE_COLLECTION_FIRST + 3, + OSD_APAGE_COLLECTION_SECURITY = OSD_APAGE_COLLECTION_FIRST + 5, + OSD_APAGE_COLLECTION_LAST = 0x8FFFFFFF, + + OSD_APAGE_ROOT_FIRST = 0x90000000, + OSD_APAGE_ROOT_DIRECTORY = OSD_APAGE_ROOT_FIRST + 0, + OSD_APAGE_ROOT_INFORMATION = OSD_APAGE_ROOT_FIRST + 1, + OSD_APAGE_ROOT_QUOTAS = OSD_APAGE_ROOT_FIRST + 2, + OSD_APAGE_ROOT_TIMESTAMP = OSD_APAGE_ROOT_FIRST + 3, + OSD_APAGE_ROOT_SECURITY = OSD_APAGE_ROOT_FIRST + 5, + OSD_APAGE_ROOT_LAST = 0xBFFFFFFF, + + OSD_APAGE_RESERVED_TYPE_FIRST = 0xC0000000, + OSD_APAGE_RESERVED_TYPE_LAST = 0xEFFFFFFF, + + OSD_APAGE_COMMON_FIRST = 0xF0000000, + OSD_APAGE_COMMON_LAST = 0xFFFFFFFE, + + OSD_APAGE_REQUEST_ALL = 0xFFFFFFFF, +}; + +/* subcategories of attr pages within each range above */ +enum { + OSD_APAGE_STD_FIRST = 0x0, + OSD_APAGE_STD_DIRECTORY = 0, + OSD_APAGE_STD_INFORMATION = 1, + OSD_APAGE_STD_QUOTAS = 2, + OSD_APAGE_STD_TIMESTAMP = 3, + OSD_APAGE_STD_COLLECTIONS = 4, + OSD_APAGE_STD_POLICY_SECURITY = 5, + OSD_APAGE_STD_LAST = 0x0000007F, + + OSD_APAGE_RESERVED_FIRST = 0x00000080, + OSD_APAGE_RESERVED_LAST = 0x00007FFF, + + OSD_APAGE_OTHER_STD_FIRST = 0x00008000, + OSD_APAGE_OTHER_STD_LAST = 0x0000EFFF, + + OSD_APAGE_PUBLIC_FIRST = 0x0000F000, + OSD_APAGE_PUBLIC_LAST = 0x0000FFFF, + + OSD_APAGE_APP_DEFINED_FIRST = 0x00010000, + OSD_APAGE_APP_DEFINED_LAST = 0x1FFFFFFF, + + OSD_APAGE_VENDOR_SPECIFIC_FIRST = 0x20000000, + OSD_APAGE_VENDOR_SPECIFIC_LAST = 0x2FFFFFFF, +}; + +enum { + OSD_ATTR_PAGE_IDENTIFICATION = 0, /* in all pages 40 bytes */ +}; + +struct page_identification { + u8 vendor_identification[8]; + u8 page_identification[32]; +} __packed; + +struct osd_attr_page_header { + __be32 page_number; + __be32 page_length; +} __packed; + +/* 7.1.2.8 Root Information attributes page (OSD_APAGE_ROOT_INFORMATION) */ +enum { + OSD_ATTR_RI_OSD_SYSTEM_ID = 0x3, /* 20 */ + OSD_ATTR_RI_VENDOR_IDENTIFICATION = 0x4, /* 8 */ + OSD_ATTR_RI_PRODUCT_IDENTIFICATION = 0x5, /* 16 */ + OSD_ATTR_RI_PRODUCT_MODEL = 0x6, /* 32 */ + OSD_ATTR_RI_PRODUCT_REVISION_LEVEL = 0x7, /* 4 */ + OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER = 0x8, /* variable */ + OSD_ATTR_RI_OSD_NAME = 0x9, /* variable */ + OSD_ATTR_RI_TOTAL_CAPACITY = 0x80, /* 8 */ + OSD_ATTR_RI_USED_CAPACITY = 0x81, /* 8 */ + OSD_ATTR_RI_NUMBER_OF_PARTITIONS = 0xC0, /* 8 */ + OSD_ATTR_RI_CLOCK = 0x100, /* 6 */ +}; +/* Root_Information_attributes_page does not have a get_page structure */ + +/* 7.1.2.9 Partition Information attributes page + * (OSD_APAGE_PARTITION_INFORMATION) + */ +enum { + OSD_ATTR_PI_PARTITION_ID = 0x1, /* 8 */ + OSD_ATTR_PI_USERNAME = 0x9, /* variable */ + OSD_ATTR_PI_USED_CAPACITY = 0x81, /* 8 */ + OSD_ATTR_PI_NUMBER_OF_OBJECTS = 0xC1, /* 8 */ +}; +/* Partition Information attributes page does not have a get_page structure */ + +/* 7.1.2.10 Collection Information attributes page + * (OSD_APAGE_COLLECTION_INFORMATION) + */ +enum { + OSD_ATTR_CI_PARTITION_ID = 0x1, /* 8 */ + OSD_ATTR_CI_COLLECTION_OBJECT_ID = 0x2, /* 8 */ + OSD_ATTR_CI_USERNAME = 0x9, /* variable */ + OSD_ATTR_CI_USED_CAPACITY = 0x81, /* 8 */ +}; +/* Collection Information attributes page does not have a get_page structure */ + +/* 7.1.2.11 User Object Information attributes page + * (OSD_APAGE_OBJECT_INFORMATION) + */ +enum { + OSD_ATTR_OI_PARTITION_ID = 0x1, /* 8 */ + OSD_ATTR_OI_OBJECT_ID = 0x2, /* 8 */ + OSD_ATTR_OI_USERNAME = 0x9, /* variable */ + OSD_ATTR_OI_USED_CAPACITY = 0x81, /* 8 */ + OSD_ATTR_OI_LOGICAL_LENGTH = 0x82, /* 8 */ +}; +/* Object Information attributes page does not have a get_page structure */ + +/* 7.1.2.12 Root Quotas attributes page (OSD_APAGE_ROOT_QUOTAS) */ +enum { + OSD_ATTR_RQ_DEFAULT_MAXIMUM_USER_OBJECT_LENGTH = 0x1, /* 8 */ + OSD_ATTR_RQ_PARTITION_CAPACITY_QUOTA = 0x10001, /* 8 */ + OSD_ATTR_RQ_PARTITION_OBJECT_COUNT = 0x10002, /* 8 */ + OSD_ATTR_RQ_PARTITION_COLLECTIONS_PER_USER_OBJECT = 0x10081, /* 4 */ + OSD_ATTR_RQ_PARTITION_COUNT = 0x20002, /* 8 */ +}; + +struct Root_Quotas_attributes_page { + struct osd_attr_page_header hdr; /* id=R+2, size=0x24 */ + __be64 default_maximum_user_object_length; + __be64 partition_capacity_quota; + __be64 partition_object_count; + __be64 partition_collections_per_user_object; + __be64 partition_count; +} __packed; + +/* 7.1.2.13 Partition Quotas attributes page (OSD_APAGE_PARTITION_QUOTAS)*/ +enum { + OSD_ATTR_PQ_DEFAULT_MAXIMUM_USER_OBJECT_LENGTH = 0x1, /* 8 */ + OSD_ATTR_PQ_CAPACITY_QUOTA = 0x10001, /* 8 */ + OSD_ATTR_PQ_OBJECT_COUNT = 0x10002, /* 8 */ + OSD_ATTR_PQ_COLLECTIONS_PER_USER_OBJECT = 0x10081, /* 4 */ +}; + +struct Partition_Quotas_attributes_page { + struct osd_attr_page_header hdr; /* id=P+2, size=0x1C */ + __be64 default_maximum_user_object_length; + __be64 capacity_quota; + __be64 object_count; + __be64 collections_per_user_object; +} __packed; + +/* 7.1.2.14 User Object Quotas attributes page (OSD_APAGE_OBJECT_QUOTAS) */ +enum { + OSD_ATTR_OQ_MAXIMUM_LENGTH = 0x1, /* 8 */ +}; + +struct Object_Quotas_attributes_page { + struct osd_attr_page_header hdr; /* id=U+2, size=0x8 */ + __be64 maximum_length; +} __packed; + +/* 7.1.2.15 Root Timestamps attributes page (OSD_APAGE_ROOT_TIMESTAMP) */ +enum { + OSD_ATTR_RT_ATTRIBUTES_ACCESSED_TIME = 0x2, /* 6 */ + OSD_ATTR_RT_ATTRIBUTES_MODIFIED_TIME = 0x3, /* 6 */ + OSD_ATTR_RT_TIMESTAMP_BYPASS = 0xFFFFFFFE, /* 1 */ +}; + +struct root_timestamps_attributes_page { + struct osd_attr_page_header hdr; /* id=R+3, size=0xD */ + struct osd_timestamp attributes_accessed_time; + struct osd_timestamp attributes_modified_time; + u8 timestamp_bypass; +} __packed; + +/* 7.1.2.16 Partition Timestamps attributes page + * (OSD_APAGE_PARTITION_TIMESTAMP) + */ +enum { + OSD_ATTR_PT_CREATED_TIME = 0x1, /* 6 */ + OSD_ATTR_PT_ATTRIBUTES_ACCESSED_TIME = 0x2, /* 6 */ + OSD_ATTR_PT_ATTRIBUTES_MODIFIED_TIME = 0x3, /* 6 */ + OSD_ATTR_PT_DATA_ACCESSED_TIME = 0x4, /* 6 */ + OSD_ATTR_PT_DATA_MODIFIED_TIME = 0x5, /* 6 */ + OSD_ATTR_PT_TIMESTAMP_BYPASS = 0xFFFFFFFE, /* 1 */ +}; + +struct partition_timestamps_attributes_page { + struct osd_attr_page_header hdr; /* id=P+3, size=0x1F */ + struct osd_timestamp created_time; + struct osd_timestamp attributes_accessed_time; + struct osd_timestamp attributes_modified_time; + struct osd_timestamp data_accessed_time; + struct osd_timestamp data_modified_time; + u8 timestamp_bypass; +} __packed; + +/* 7.1.2.17/18 Collection/Object Timestamps attributes page + * (OSD_APAGE_COLLECTION_TIMESTAMP/OSD_APAGE_OBJECT_TIMESTAMP) + */ +enum { + OSD_ATTR_OT_CREATED_TIME = 0x1, /* 6 */ + OSD_ATTR_OT_ATTRIBUTES_ACCESSED_TIME = 0x2, /* 6 */ + OSD_ATTR_OT_ATTRIBUTES_MODIFIED_TIME = 0x3, /* 6 */ + OSD_ATTR_OT_DATA_ACCESSED_TIME = 0x4, /* 6 */ + OSD_ATTR_OT_DATA_MODIFIED_TIME = 0x5, /* 6 */ +}; + +/* same for collection */ +struct object_timestamps_attributes_page { + struct osd_attr_page_header hdr; /* id=C+3/3, size=0x1E */ + struct osd_timestamp created_time; + struct osd_timestamp attributes_accessed_time; + struct osd_timestamp attributes_modified_time; + struct osd_timestamp data_accessed_time; + struct osd_timestamp data_modified_time; +} __packed; + +/* 7.1.2.19 Collections attributes page */ +/* TBD */ + +/* 7.1.2.20 Root Policy/Security attributes page (OSD_APAGE_ROOT_SECURITY) */ +enum { + OSD_ATTR_RS_DEFAULT_SECURITY_METHOD = 0x1, /* 1 */ + OSD_ATTR_RS_OLDEST_VALID_NONCE_LIMIT = 0x2, /* 6 */ + OSD_ATTR_RS_NEWEST_VALID_NONCE_LIMIT = 0x3, /* 6 */ + OSD_ATTR_RS_PARTITION_DEFAULT_SECURITY_METHOD = 0x6, /* 1 */ + OSD_ATTR_RS_SUPPORTED_SECURITY_METHODS = 0x7, /* 2 */ + OSD_ATTR_RS_ADJUSTABLE_CLOCK = 0x9, /* 6 */ + OSD_ATTR_RS_MASTER_KEY_IDENTIFIER = 0x7FFD, /* 0 or 7 */ + OSD_ATTR_RS_ROOT_KEY_IDENTIFIER = 0x7FFE, /* 0 or 7 */ + OSD_ATTR_RS_SUPPORTED_INTEGRITY_ALGORITHM_0 = 0x80000000,/* 1,(x16)*/ + OSD_ATTR_RS_SUPPORTED_DH_GROUP_0 = 0x80000010,/* 1,(x16)*/ +}; + +struct root_security_attributes_page { + struct osd_attr_page_header hdr; /* id=R+5, size=0x3F */ + u8 default_security_method; + u8 partition_default_security_method; + __be16 supported_security_methods; + u8 mki_valid_rki_valid; + struct osd_timestamp oldest_valid_nonce_limit; + struct osd_timestamp newest_valid_nonce_limit; + struct osd_timestamp adjustable_clock; + u8 master_key_identifier[32-25]; + u8 root_key_identifier[39-32]; + u8 supported_integrity_algorithm[16]; + u8 supported_dh_group[16]; +} __packed; + +/* 7.1.2.21 Partition Policy/Security attributes page + * (OSD_APAGE_PARTITION_SECURITY) + */ +enum { + OSD_ATTR_PS_DEFAULT_SECURITY_METHOD = 0x1, /* 1 */ + OSD_ATTR_PS_OLDEST_VALID_NONCE = 0x2, /* 6 */ + OSD_ATTR_PS_NEWEST_VALID_NONCE = 0x3, /* 6 */ + OSD_ATTR_PS_REQUEST_NONCE_LIST_DEPTH = 0x4, /* 2 */ + OSD_ATTR_PS_FROZEN_WORKING_KEY_BIT_MASK = 0x5, /* 2 */ + OSD_ATTR_PS_PARTITION_KEY_IDENTIFIER = 0x7FFF, /* 0 or 7 */ + OSD_ATTR_PS_WORKING_KEY_IDENTIFIER_FIRST = 0x8000, /* 0 or 7 */ + OSD_ATTR_PS_WORKING_KEY_IDENTIFIER_LAST = 0x800F, /* 0 or 7 */ + OSD_ATTR_PS_POLICY_ACCESS_TAG = 0x40000001, /* 4 */ + OSD_ATTR_PS_USER_OBJECT_POLICY_ACCESS_TAG = 0x40000002, /* 4 */ +}; + +struct partition_security_attributes_page { + struct osd_attr_page_header hdr; /* id=p+5, size=0x8f */ + u8 reserved[3]; + u8 default_security_method; + struct osd_timestamp oldest_valid_nonce; + struct osd_timestamp newest_valid_nonce; + __be16 request_nonce_list_depth; + __be16 frozen_working_key_bit_mask; + __be32 policy_access_tag; + __be32 user_object_policy_access_tag; + u8 pki_valid; + __be16 wki_00_0f_vld; + struct osd_key_identifier partition_key_identifier; + struct osd_key_identifier working_key_identifiers[16]; +} __packed; + +/* 7.1.2.22/23 Collection/Object Policy-Security attributes page + * (OSD_APAGE_COLLECTION_SECURITY/OSD_APAGE_OBJECT_SECURITY) + */ +enum { + OSD_ATTR_OS_POLICY_ACCESS_TAG = 0x40000001, /* 4 */ +}; + +struct object_security_attributes_page { + struct osd_attr_page_header hdr; /* id=C+5/5, size=4 */ + __be32 policy_access_tag; +} __packed; + +#endif /*ndef __OSD_ATTRIBUTES_H__*/ -- cgit From c6572c983726fe3f3bb5f07e9afe3a9b8e402d1b Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 17:09:40 +0200 Subject: [SCSI] libosd: OSD version 2 Support Add support for OSD2 at run time. It is now possible to run with both OSDv1 and OSDv2 targets at the same time. The actual detection should be preformed by the security manager, as the version is encoded in the capability structure. Signed-off-by: Boaz Harrosh Reviewed-by: Benny Halevy Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 39 +++++++++++++++++++ include/scsi/osd_protocol.h | 90 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index e84dc7aa5e34..8482777416d8 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -21,6 +21,23 @@ /* Note: "NI" in comments below means "Not Implemented yet" */ +/* Configure of code: + * #undef if you *don't* want OSD v1 support in runtime. + * If #defined the initiator will dynamically configure to encode OSD v1 + * CDB's if the target is detected to be OSD v1 only. + * OSD v2 only commands, options, and attributes will be ignored if target + * is v1 only. + * If #defined will result in bigger/slower code (OK Slower maybe not) + * Q: Should this be CONFIG_SCSI_OSD_VER1_SUPPORT and set from Kconfig? + */ +#define OSD_VER1_SUPPORT y + +enum osd_std_version { + OSD_VER_NONE = 0, + OSD_VER1 = 1, + OSD_VER2 = 2, +}; + /* * Object-based Storage Device. * This object represents an OSD device. @@ -31,6 +48,10 @@ struct osd_dev { struct scsi_device *scsi_device; unsigned def_timeout; + +#ifdef OSD_VER1_SUPPORT + enum osd_std_version version; +#endif }; /* Retrieve/return osd_dev(s) for use by Kernel clients */ @@ -46,6 +67,14 @@ void osduld_unregister_test(unsigned ioctl); void osd_dev_init(struct osd_dev *od, struct scsi_device *scsi_device); void osd_dev_fini(struct osd_dev *od); +/* we might want to use function vector in the future */ +static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v) +{ +#ifdef OSD_VER1_SUPPORT + od->version = v; +#endif +} + struct osd_request; typedef void (osd_req_done_fn)(struct osd_request *or, void *private); @@ -82,6 +111,16 @@ struct osd_request { int async_error; }; +/* OSD Version control */ +static inline bool osd_req_is_ver1(struct osd_request *or) +{ +#ifdef OSD_VER1_SUPPORT + return or->osd_dev->version == OSD_VER1; +#else + return false; +#endif +} + /* * How to use the osd library: * diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h index ce1a8771ea71..cd3cbf764650 100644 --- a/include/scsi/osd_protocol.h +++ b/include/scsi/osd_protocol.h @@ -25,12 +25,16 @@ enum { OSDv1_TOTAL_CDB_LEN = OSDv1_ADDITIONAL_CDB_LENGTH + 8, OSDv1_CAP_LEN = 80, /* Latest supported version */ - OSD_ADDITIONAL_CDB_LENGTH = OSDv1_ADDITIONAL_CDB_LENGTH, - OSD_TOTAL_CDB_LEN = OSDv1_TOTAL_CDB_LEN, - OSD_CAP_LEN = OSDv1_CAP_LEN, +/* OSD_ADDITIONAL_CDB_LENGTH = 216,*/ + OSD_ADDITIONAL_CDB_LENGTH = + OSDv1_ADDITIONAL_CDB_LENGTH, /* FIXME: Pete rev-001 sup */ + OSD_TOTAL_CDB_LEN = OSD_ADDITIONAL_CDB_LENGTH + 8, +/* OSD_CAP_LEN = 104,*/ + OSD_CAP_LEN = OSDv1_CAP_LEN,/* FIXME: Pete rev-001 sup */ OSD_SYSTEMID_LEN = 20, OSD_CRYPTO_KEYID_SIZE = 20, + /*FIXME: OSDv2_CRYPTO_KEYID_SIZE = 32,*/ OSD_CRYPTO_SEED_SIZE = 4, OSD_CRYPTO_NONCE_SIZE = 12, OSD_MAX_SENSE_LEN = 252, /* from SPC-3 */ @@ -108,6 +112,7 @@ enum { OSD_OFFSET_MAX_BITS = 28, OSDv1_OFFSET_MIN_SHIFT = 8, + OSD_OFFSET_MIN_SHIFT = 3, OSD_OFFSET_MAX_SHIFT = 16, }; @@ -129,6 +134,16 @@ static inline osd_cdb_offset osd_encode_offset_v1(u64 offset, unsigned *padding) OSDv1_OFFSET_MIN_SHIFT, OSD_OFFSET_MAX_SHIFT); } +/* Minimum 8 bytes alignment + * Same as v1 but since exponent can be signed than a less than + * 256 alignment can be reached with small offsets (<2GB) + */ +static inline osd_cdb_offset osd_encode_offset_v2(u64 offset, unsigned *padding) +{ + return __osd_encode_offset(offset, padding, + OSD_OFFSET_MIN_SHIFT, OSD_OFFSET_MAX_SHIFT); +} + /* osd2r03: 5.2.1 Overview */ struct osd_cdb_head { struct scsi_varlen_cdb_hdr varlen_cdb; @@ -144,6 +159,13 @@ struct osd_cdb_head { /*36*/ __be64 length; /*44*/ __be64 start_address; } __packed v1; + + struct __osdv2_cdb_addr_len { + /* called allocation_length in some commands */ +/*32*/ __be64 length; +/*40*/ __be64 start_address; +/*48*/ __be32 list_identifier;/* Rarely used */ + } __packed v2; }; /*52*/ union { /* selected attributes mode Page/List/Single */ struct osd_attributes_page_mode { @@ -182,6 +204,7 @@ struct osd_cdb_head { /*80*/ /*160 v1*/ +/*184 v2*/ struct osd_security_parameters { /*160*/u8 integrity_check_value[OSD_CRYPTO_KEYID_SIZE]; /*180*/u8 request_nonce[OSD_CRYPTO_NONCE_SIZE]; @@ -189,6 +212,9 @@ struct osd_security_parameters { /*196*/osd_cdb_offset data_out_integrity_check_offset; } __packed; /*200 v1*/ +/*224 v2*/ + +/* FIXME: osdv2_security_parameters */ struct osdv1_cdb { struct osd_cdb_head h; @@ -196,9 +222,17 @@ struct osdv1_cdb { struct osd_security_parameters sec_params; } __packed; +struct osdv2_cdb { + struct osd_cdb_head h; + u8 caps[OSD_CAP_LEN]; + struct osd_security_parameters sec_params; + /* FIXME: osdv2_security_parameters */ +} __packed; + struct osd_cdb { union { struct osdv1_cdb v1; + struct osdv2_cdb v2; u8 buff[OSD_TOTAL_CDB_LEN]; }; } __packed; @@ -269,6 +303,7 @@ struct osd_attributes_list_attrid { /* * osd2r03: 7.1.3.3 List entry format for retrieved attributes and * for setting attributes + * NOTE: v2 is 8-bytes aligned, v1 is not aligned. */ struct osd_attributes_list_element { __be32 attr_page; @@ -279,6 +314,7 @@ struct osd_attributes_list_element { enum { OSDv1_ATTRIBUTES_ELEM_ALIGN = 1, + OSD_ATTRIBUTES_ELEM_ALIGN = 8, }; enum { @@ -292,6 +328,12 @@ static inline unsigned osdv1_attr_list_elem_size(unsigned len) OSDv1_ATTRIBUTES_ELEM_ALIGN); } +static inline unsigned osdv2_attr_list_elem_size(unsigned len) +{ + return ALIGN(len + sizeof(struct osd_attributes_list_element), + OSD_ATTRIBUTES_ELEM_ALIGN); +} + /* * osd2r03: 7.1.3 OSD attributes lists (Table 184) — List type values */ @@ -326,6 +368,21 @@ static inline unsigned osdv1_list_size(struct osdv1_attributes_list_header *h) return be16_to_cpu(h->list_bytes); } +struct osdv2_attributes_list_header { + u8 type; /* lower 4-bits only */ + u8 pad[3]; +/*4*/ __be32 list_bytes; /* Initiator shall set to zero. Only set by target */ + /* + * type=9 followed by struct osd_attributes_list_element's + * type=E followed by struct osd_attributes_list_multi_header's + */ +} __packed; + +static inline unsigned osdv2_list_size(struct osdv2_attributes_list_header *h) +{ + return be32_to_cpu(h->list_bytes); +} + /* (osd-r10 6.13) * osd2r03: 6.15 LIST (Table 79) LIST command parameter data. * for root_lstchg below @@ -469,11 +526,36 @@ struct osdv1_cap_object_descriptor { } __packed; /*80 v1*/ -struct osd_capability { +/*56 v2*/ +struct osd_cap_object_descriptor { + union { + struct { +/*56*/ __be32 allowed_attributes_access; +/*60*/ __be32 policy_access_tag; +/*64*/ __be16 boot_epoch; +/*66*/ u8 reserved[6]; +/*72*/ __be64 allowed_partition_id; +/*80*/ __be64 allowed_object_id; +/*88*/ __be64 allowed_range_length; +/*96*/ __be64 allowed_range_start; + } __packed obj_desc; + +/*56*/ u8 object_descriptor[48]; + }; +} __packed; +/*104 v2*/ + +struct osdv1_capability { struct osd_capability_head h; struct osdv1_cap_object_descriptor od; } __packed; +struct osd_capability { + struct osd_capability_head h; +/* struct osd_cap_object_descriptor od;*/ + struct osdv1_cap_object_descriptor od; /* FIXME: Pete rev-001 sup */ +} __packed; + /** * osd_sec_set_caps - set cap-bits into the capabilities header * -- cgit From 1b9dce94c8a24a3f1a01fcdf688f2d903b32acdf Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 17:13:38 +0200 Subject: [SCSI] libosd: OSDv2 auto detection Auto detect an OSDv2 or OSDv1 target at run time. Note how none of the OSD API calls change. The tests do not know what device version it is. This test now passes against both the IBM-OSD-SIM OSD1 target as well as OSC's OSD2 target. Signed-off-by: Boaz Harrosh Reviewed-by: Benny Halevy Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 8482777416d8..24edeae48936 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -67,6 +67,9 @@ void osduld_unregister_test(unsigned ioctl); void osd_dev_init(struct osd_dev *od, struct scsi_device *scsi_device); void osd_dev_fini(struct osd_dev *od); +/* some hi level device operations */ +int osd_auto_detect_ver(struct osd_dev *od, void *caps); /* GFP_KERNEL */ + /* we might want to use function vector in the future */ static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v) { -- cgit From 98f3aea2bd4b4f9cd7a6a6479ed9410787f756fd Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 25 Jan 2009 17:15:16 +0200 Subject: [SCSI] libosd: SCSI/OSD Sense decoding support Implementation of the osd_req_decode_sense() API. Can be called by library users to decode what failed in command executions. Add SCSI_OSD_DPRINT_SENSE Kconfig variable. Possible values are: 0 - Do not print any errors to messages file 1 - (Default) Print only decoded errors that are not recoverable. Recoverable errors are those that the target has complied with the request but with a warning. For example read passed end of object will return zeros after the last valid byte. 2- Print all errors. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 49 ++++++++ include/scsi/osd_sense.h | 260 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 include/scsi/osd_sense.h (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 24edeae48936..b24d9616eb46 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -216,6 +216,55 @@ int osd_execute_request(struct osd_request *or); int osd_execute_request_async(struct osd_request *or, osd_req_done_fn *done, void *private); +/** + * osd_req_decode_sense_full - Decode sense information after execution. + * + * @or: - osd_request to examine + * @osi - Recievs a more detailed error report information (optional). + * @silent - Do not print to dmsg (Even if enabled) + * @bad_obj_list - Some commands act on multiple objects. Failed objects will + * be recieved here (optional) + * @max_obj - Size of @bad_obj_list. + * @bad_attr_list - List of failing attributes (optional) + * @max_attr - Size of @bad_attr_list. + * + * After execution, sense + return code can be analyzed using this function. The + * return code is the final disposition on the error. So it is possible that a + * CHECK_CONDITION was returned from target but this will return NO_ERROR, for + * example on recovered errors. All parameters are optional if caller does + * not need any returned information. + * Note: This function will also dump the error to dmsg according to settings + * of the SCSI_OSD_DPRINT_SENSE Kconfig value. Set @silent if you know the + * command would routinely fail, to not spam the dmsg file. + */ +struct osd_sense_info { + int key; /* one of enum scsi_sense_keys */ + int additional_code ; /* enum osd_additional_sense_codes */ + union { /* Sense specific information */ + u16 sense_info; + u16 cdb_field_offset; /* scsi_invalid_field_in_cdb */ + }; + union { /* Command specific information */ + u64 command_info; + }; + + u32 not_initiated_command_functions; /* osd_command_functions_bits */ + u32 completed_command_functions; /* osd_command_functions_bits */ + struct osd_obj_id obj; + struct osd_attr attr; +}; + +int osd_req_decode_sense_full(struct osd_request *or, + struct osd_sense_info *osi, bool silent, + struct osd_obj_id *bad_obj_list, int max_obj, + struct osd_attr *bad_attr_list, int max_attr); + +static inline int osd_req_decode_sense(struct osd_request *or, + struct osd_sense_info *osi) +{ + return osd_req_decode_sense_full(or, osi, false, NULL, 0, NULL, 0); +} + /** * osd_end_request - return osd_request to free store * diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h new file mode 100644 index 000000000000..ff9b33c773c7 --- /dev/null +++ b/include/scsi/osd_sense.h @@ -0,0 +1,260 @@ +/* + * osd_sense.h - OSD Related sense handling definitions. + * + * Copyright (C) 2008 Panasas Inc. All rights reserved. + * + * Authors: + * Boaz Harrosh + * Benny Halevy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * + * This file contains types and constants that are defined by the protocol + * Note: All names and symbols are taken from the OSD standard's text. + */ +#ifndef __OSD_SENSE_H__ +#define __OSD_SENSE_H__ + +#include + +/* SPC3r23 4.5.6 Sense key and sense code definitions table 27 */ +enum scsi_sense_keys { + scsi_sk_no_sense = 0x0, + scsi_sk_recovered_error = 0x1, + scsi_sk_not_ready = 0x2, + scsi_sk_medium_error = 0x3, + scsi_sk_hardware_error = 0x4, + scsi_sk_illegal_request = 0x5, + scsi_sk_unit_attention = 0x6, + scsi_sk_data_protect = 0x7, + scsi_sk_blank_check = 0x8, + scsi_sk_vendor_specific = 0x9, + scsi_sk_copy_aborted = 0xa, + scsi_sk_aborted_command = 0xb, + scsi_sk_volume_overflow = 0xd, + scsi_sk_miscompare = 0xe, + scsi_sk_reserved = 0xf, +}; + +/* SPC3r23 4.5.6 Sense key and sense code definitions table 28 */ +/* Note: only those which can be returned by an OSD target. Most of + * these errors are taken care of by the generic scsi layer. + */ +enum osd_additional_sense_codes { + scsi_no_additional_sense_information = 0x0000, + scsi_operation_in_progress = 0x0016, + scsi_cleaning_requested = 0x0017, + scsi_lunr_cause_not_reportable = 0x0400, + scsi_logical_unit_is_in_process_of_becoming_ready = 0x0401, + scsi_lunr_initializing_command_required = 0x0402, + scsi_lunr_manual_intervention_required = 0x0403, + scsi_lunr_operation_in_progress = 0x0407, + scsi_lunr_selftest_in_progress = 0x0409, + scsi_luna_asymmetric_access_state_transition = 0x040a, + scsi_luna_target_port_in_standby_state = 0x040b, + scsi_luna_target_port_in_unavailable_state = 0x040c, + scsi_lunr_notify_enable_spinup_required = 0x0411, + scsi_logical_unit_does_not_respond_to_selection = 0x0500, + scsi_logical_unit_communication_failure = 0x0800, + scsi_logical_unit_communication_timeout = 0x0801, + scsi_logical_unit_communication_parity_error = 0x0802, + scsi_error_log_overflow = 0x0a00, + scsi_warning = 0x0b00, + scsi_warning_specified_temperature_exceeded = 0x0b01, + scsi_warning_enclosure_degraded = 0x0b02, + scsi_write_error_unexpected_unsolicited_data = 0x0c0c, + scsi_write_error_not_enough_unsolicited_data = 0x0c0d, + scsi_invalid_information_unit = 0x0e00, + scsi_invalid_field_in_command_information_unit = 0x0e03, + scsi_read_error_failed_retransmission_request = 0x1113, + scsi_parameter_list_length_error = 0x1a00, + scsi_invalid_command_operation_code = 0x2000, + scsi_invalid_field_in_cdb = 0x2400, + osd_security_audit_value_frozen = 0x2404, + osd_security_working_key_frozen = 0x2405, + osd_nonce_not_unique = 0x2406, + osd_nonce_timestamp_out_of_range = 0x2407, + scsi_logical_unit_not_supported = 0x2500, + scsi_invalid_field_in_parameter_list = 0x2600, + scsi_parameter_not_supported = 0x2601, + scsi_parameter_value_invalid = 0x2602, + scsi_invalid_release_of_persistent_reservation = 0x2604, + osd_invalid_dataout_buffer_integrity_check_value = 0x260f, + scsi_not_ready_to_ready_change_medium_may_have_changed = 0x2800, + scsi_power_on_reset_or_bus_device_reset_occurred = 0x2900, + scsi_power_on_occurred = 0x2901, + scsi_scsi_bus_reset_occurred = 0x2902, + scsi_bus_device_reset_function_occurred = 0x2903, + scsi_device_internal_reset = 0x2904, + scsi_transceiver_mode_changed_to_single_ended = 0x2905, + scsi_transceiver_mode_changed_to_lvd = 0x2906, + scsi_i_t_nexus_loss_occurred = 0x2907, + scsi_parameters_changed = 0x2a00, + scsi_mode_parameters_changed = 0x2a01, + scsi_asymmetric_access_state_changed = 0x2a06, + scsi_priority_changed = 0x2a08, + scsi_command_sequence_error = 0x2c00, + scsi_previous_busy_status = 0x2c07, + scsi_previous_task_set_full_status = 0x2c08, + scsi_previous_reservation_conflict_status = 0x2c09, + osd_partition_or_collection_contains_user_objects = 0x2c0a, + scsi_commands_cleared_by_another_initiator = 0x2f00, + scsi_cleaning_failure = 0x3007, + scsi_enclosure_failure = 0x3400, + scsi_enclosure_services_failure = 0x3500, + scsi_unsupported_enclosure_function = 0x3501, + scsi_enclosure_services_unavailable = 0x3502, + scsi_enclosure_services_transfer_failure = 0x3503, + scsi_enclosure_services_transfer_refused = 0x3504, + scsi_enclosure_services_checksum_error = 0x3505, + scsi_rounded_parameter = 0x3700, + osd_read_past_end_of_user_object = 0x3b17, + scsi_logical_unit_has_not_self_configured_yet = 0x3e00, + scsi_logical_unit_failure = 0x3e01, + scsi_timeout_on_logical_unit = 0x3e02, + scsi_logical_unit_failed_selftest = 0x3e03, + scsi_logical_unit_unable_to_update_selftest_log = 0x3e04, + scsi_target_operating_conditions_have_changed = 0x3f00, + scsi_microcode_has_been_changed = 0x3f01, + scsi_inquiry_data_has_changed = 0x3f03, + scsi_echo_buffer_overwritten = 0x3f0f, + scsi_diagnostic_failure_on_component_nn_first = 0x4080, + scsi_diagnostic_failure_on_component_nn_last = 0x40ff, + scsi_message_error = 0x4300, + scsi_internal_target_failure = 0x4400, + scsi_select_or_reselect_failure = 0x4500, + scsi_scsi_parity_error = 0x4700, + scsi_data_phase_crc_error_detected = 0x4701, + scsi_scsi_parity_error_detected_during_st_data_phase = 0x4702, + scsi_asynchronous_information_protection_error_detected = 0x4704, + scsi_protocol_service_crc_error = 0x4705, + scsi_phy_test_function_in_progress = 0x4706, + scsi_invalid_message_error = 0x4900, + scsi_command_phase_error = 0x4a00, + scsi_data_phase_error = 0x4b00, + scsi_logical_unit_failed_self_configuration = 0x4c00, + scsi_overlapped_commands_attempted = 0x4e00, + osd_quota_error = 0x5507, + scsi_failure_prediction_threshold_exceeded = 0x5d00, + scsi_failure_prediction_threshold_exceeded_false = 0x5dff, + scsi_voltage_fault = 0x6500, +}; + +enum scsi_descriptor_types { + scsi_sense_information = 0x0, + scsi_sense_command_specific_information = 0x1, + scsi_sense_key_specific = 0x2, + scsi_sense_field_replaceable_unit = 0x3, + scsi_sense_stream_commands = 0x4, + scsi_sense_block_commands = 0x5, + osd_sense_object_identification = 0x6, + osd_sense_response_integrity_check = 0x7, + osd_sense_attribute_identification = 0x8, + scsi_sense_ata_return = 0x9, + + scsi_sense_Reserved_first = 0x0A, + scsi_sense_Reserved_last = 0x7F, + scsi_sense_Vendor_specific_first = 0x80, + scsi_sense_Vendor_specific_last = 0xFF, +}; + +struct scsi_sense_descriptor { /* for picking into desc type */ + u8 descriptor_type; /* one of enum scsi_descriptor_types */ + u8 additional_length; /* n - 1 */ + u8 data[]; +} __packed; + +/* OSD deploys only scsi descriptor_based sense buffers */ +struct scsi_sense_descriptor_based { +/*0*/ u8 response_code; /* 0x72 or 0x73 */ +/*1*/ u8 sense_key; /* one of enum scsi_sense_keys (4 lower bits) */ +/*2*/ __be16 additional_sense_code; /* enum osd_additional_sense_codes */ +/*4*/ u8 Reserved[3]; +/*7*/ u8 additional_sense_length; /* n - 7 */ +/*8*/ struct scsi_sense_descriptor ssd[0]; /* variable length, 1 or more */ +} __packed; + +/* some descriptors deployed by OSD */ + +/* SPC3r23 4.5.2.3 Command-specific information sense data descriptor */ +/* Note: this is the same for descriptor_type=00 but with type=00 the + * Reserved[0] == 0x80 (ie. bit-7 set) + */ +struct scsi_sense_command_specific_data_descriptor { +/*0*/ u8 descriptor_type; /* (00h/01h) */ +/*1*/ u8 additional_length; /* (0Ah) */ +/*2*/ u8 Reserved[2]; +/*4*/ __be64 information; +} __packed; +/*12*/ + +struct scsi_sense_key_specific_data_descriptor { +/*0*/ u8 descriptor_type; /* (02h) */ +/*1*/ u8 additional_length; /* (06h) */ +/*2*/ u8 Reserved[2]; +/* SKSV, C/D, Reserved (2), BPV, BIT POINTER (3) */ +/*4*/ u8 sksv_cd_bpv_bp; +/*5*/ __be16 value; /* field-pointer/progress-value/retry-count/... */ +/*7*/ u8 Reserved2; +} __packed; +/*8*/ + +/* 4.16.2.1 OSD error identification sense data descriptor - table 52 */ +/* Note: these bits are defined LE order for easy definition, this way the BIT() + * number is the same as in the documentation. Below members at + * osd_sense_identification_data_descriptor are therefore defined __le32. + */ +enum osd_command_functions_bits { + OSD_CFB_COMMAND = BIT(4), + OSD_CFB_CMD_CAP_VERIFIED = BIT(5), + OSD_CFB_VALIDATION = BIT(7), + OSD_CFB_IMP_ST_ATT = BIT(12), + OSD_CFB_SET_ATT = BIT(20), + OSD_CFB_SA_CAP_VERIFIED = BIT(21), + OSD_CFB_GET_ATT = BIT(28), + OSD_CFB_GA_CAP_VERIFIED = BIT(29), +}; + +struct osd_sense_identification_data_descriptor { +/*0*/ u8 descriptor_type; /* (06h) */ +/*1*/ u8 additional_length; /* (1Eh) */ +/*2*/ u8 Reserved[6]; +/*8*/ __le32 not_initiated_functions; /*osd_command_functions_bits*/ +/*12*/ __le32 completed_functions; /*osd_command_functions_bits*/ +/*16*/ __be64 partition_id; +/*24*/ __be64 object_id; +} __packed; +/*32*/ + +struct osd_sense_response_integrity_check_descriptor { +/*0*/ u8 descriptor_type; /* (07h) */ +/*1*/ u8 additional_length; /* (20h) */ +/*2*/ u8 integrity_check_value[32]; /*FIXME: OSDv2_CRYPTO_KEYID_SIZE*/ +} __packed; +/*34*/ + +struct osd_sense_attributes_data_descriptor { +/*0*/ u8 descriptor_type; /* (08h) */ +/*1*/ u8 additional_length; /* (n-2) */ +/*2*/ u8 Reserved[6]; + struct osd_sense_attr { +/*8*/ __be32 attr_page; +/*12*/ __be32 attr_id; +/*16*/ } sense_attrs[0]; /* 1 or more */ +} __packed; +/*variable*/ + +/* Dig into scsi_sk_illegal_request/scsi_invalid_field_in_cdb errors */ + +/*FIXME: Support also field in CAPS*/ +#define OSD_CDB_OFFSET(F) offsetof(struct osd_cdb_head, F) + +enum osdv2_cdb_field_offset { + OSDv1_CFO_STARTING_BYTE = OSD_CDB_OFFSET(v1.start_address), + OSD_CFO_STARTING_BYTE = OSD_CDB_OFFSET(v2.start_address), + OSD_CFO_PARTITION_ID = OSD_CDB_OFFSET(partition), + OSD_CFO_OBJECT_ID = OSD_CDB_OFFSET(object), +}; + +#endif /* ndef __OSD_SENSE_H__ */ -- cgit From f078727b250c2653fc9a564f15547c17ebac3f99 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 14 Dec 2008 01:23:45 +0900 Subject: [SCSI] remove scsi_req_map_sg No one uses scsi_execute_async with data transfer now. We can remove scsi_req_map_sg. Only scsi_eh_lock_door uses scsi_execute_async. scsi_eh_lock_door doesn't handle sense and the callback. So we can remove scsi_io_context too. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- include/scsi/scsi_device.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 15b09266b7ff..3f566af3f101 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -371,12 +371,6 @@ extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *, int timeout, int retries, int *resid); -extern int scsi_execute_async(struct scsi_device *sdev, - const unsigned char *cmd, int cmd_len, int data_direction, - void *buffer, unsigned bufflen, int use_sg, - int timeout, int retries, void *privdata, - void (*done)(void *, char *, int, int), - gfp_t gfp); static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev) { -- cgit From f290f1970f01287eaaffc798a677594a57ebd65e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 8 Feb 2009 21:59:48 -0600 Subject: [SCSI] Make scsi.h independent of the rest of the scsi includes This allows it to compile and be used on the ps3 platform that wants to use the #define values in scsi.h without actually having CONFIG_SCSI set. Signed-off-by: James Bottomley --- include/scsi/scsi.h | 19 ++----------------- include/scsi/scsi_cmnd.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 80d7f60e2663..084478e14d24 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -9,7 +9,8 @@ #define _SCSI_SCSI_H #include -#include + +struct scsi_cmnd; /* * The maximum number of SG segments that we will put inside a @@ -439,22 +440,6 @@ static inline int scsi_is_wlun(unsigned int lun) #define host_byte(result) (((result) >> 16) & 0xff) #define driver_byte(result) (((result) >> 24) & 0xff) -static inline void set_msg_byte(struct scsi_cmnd *cmd, char status) -{ - cmd->result |= status << 8; -} - -static inline void set_host_byte(struct scsi_cmnd *cmd, char status) -{ - cmd->result |= status << 16; -} - -static inline void set_driver_byte(struct scsi_cmnd *cmd, char status) -{ - cmd->result |= status << 24; -} - - #define sense_class(sense) (((sense) >> 4) & 0x7) #define sense_error(sense) ((sense) & 0xf) #define sense_valid(sense) ((sense) & 0x80); diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 855bf95963e7..43b50d36925c 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -291,4 +291,19 @@ static inline struct scsi_data_buffer *scsi_prot(struct scsi_cmnd *cmd) #define scsi_for_each_prot_sg(cmd, sg, nseg, __i) \ for_each_sg(scsi_prot_sglist(cmd), sg, nseg, __i) +static inline void set_msg_byte(struct scsi_cmnd *cmd, char status) +{ + cmd->result |= status << 8; +} + +static inline void set_host_byte(struct scsi_cmnd *cmd, char status) +{ + cmd->result |= status << 16; +} + +static inline void set_driver_byte(struct scsi_cmnd *cmd, char status) +{ + cmd->result |= status << 24; +} + #endif /* _SCSI_SCSI_CMND_H */ -- cgit From 211c738d86f3f423f1b218ab3a356c9538e38047 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 27 Feb 2009 14:06:37 -0800 Subject: [SCSI] net, fcoe: add ETH_P_FCOE for Fibre Channel over Ethernet (FCoE) This adds eth type ETH_P_FCOE for Fibre Channel over Ethernet (FCoE), consequently, the ETH_P_FCOE from fc_fcoe.h and fcoe skb->protocol is not set as ETH_P_FCOE. Signed-off-by: Yi Zou Acked-by: David Miller Signed-off-by: James Bottomley --- include/linux/if_ether.h | 1 + include/scsi/fc/fc_fcoe.h | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 7f3c735f422b..59d197cb4851 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -78,6 +78,7 @@ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* diff --git a/include/scsi/fc/fc_fcoe.h b/include/scsi/fc/fc_fcoe.h index f271d9cc0fc2..ccb3dbe90463 100644 --- a/include/scsi/fc/fc_fcoe.h +++ b/include/scsi/fc/fc_fcoe.h @@ -24,13 +24,6 @@ * FCoE - Fibre Channel over Ethernet. */ -/* - * The FCoE ethertype eventually goes in net/if_ether.h. - */ -#ifndef ETH_P_FCOE -#define ETH_P_FCOE 0x8906 /* FCOE ether type */ -#endif - /* * FC_FCOE_OUI hasn't been standardized yet. XXX TBD. */ -- cgit From 43eb99c5b349b188f82725652f3d1018c619d682 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Fri, 27 Feb 2009 14:06:43 -0800 Subject: [SCSI] net: reclaim 8 upper bits of the netdev->features from GSO Reclaim 8 upper bits of netdev->features from GSO. Signed-off-by: Chris Leech Signed-off-by: Yi Zou Acked-by: David Miller Signed-off-by: James Bottomley --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec54785d34f9..c8238d9ba376 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -652,7 +652,7 @@ struct net_device /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 -#define NETIF_F_GSO_MASK 0xffff0000 +#define NETIF_F_GSO_MASK 0x00ff0000 #define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) -- cgit From 01d5b2fca1fa58ed5039239fd531e9f658971ace Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Fri, 27 Feb 2009 14:06:49 -0800 Subject: [SCSI] net: define feature flags for FCoE offloads Define feature flags for FCoE offloads. Signed-off-by: Chris Leech Signed-off-by: Yi Zou Acked-by: David Miller Signed-off-by: James Bottomley --- include/linux/netdevice.h | 3 +++ include/linux/skbuff.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8238d9ba376..5c405571cb60 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -650,6 +650,8 @@ struct net_device #define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ +#define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */ + /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 #define NETIF_F_GSO_MASK 0x00ff0000 @@ -658,6 +660,7 @@ struct net_device #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) #define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) #define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) +#define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9dcf956ad18a..02adea2099a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -188,6 +188,8 @@ enum { SKB_GSO_TCP_ECN = 1 << 3, SKB_GSO_TCPV6 = 1 << 4, + + SKB_GSO_FCOE = 1 << 5, }; #if BITS_PER_LONG > 32 -- cgit From 4d288d5767f853bfca25adc7b6030dc95518cb2e Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 27 Feb 2009 14:06:59 -0800 Subject: [SCSI] net: add FCoE offload support through net_device This adds support to provide Fiber Channel over Ethernet (FCoE) offload through net_device's net_device_ops struct. The offload through net_device for FCoE is enabled in kernel as built-in or module driver. Signed-off-by: Yi Zou Acked-by: David Miller Signed-off-by: James Bottomley --- include/linux/netdevice.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5c405571cb60..7ed49f5335b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -582,6 +582,14 @@ struct net_device_ops { #define HAVE_NETDEV_POLL void (*ndo_poll_controller)(struct net_device *dev); #endif +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) + int (*ndo_fcoe_ddp_setup)(struct net_device *dev, + u16 xid, + struct scatterlist *sgl, + unsigned int sgc); + int (*ndo_fcoe_ddp_done)(struct net_device *dev, + u16 xid); +#endif }; /* @@ -843,6 +851,11 @@ struct net_device struct dcbnl_rtnl_ops *dcbnl_ops; #endif +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) + /* max exchange id for FCoE LRO by ddp */ + unsigned int fcoe_ddp_xid; +#endif + #ifdef CONFIG_COMPAT_NET_DEV_OPS struct { int (*init)(struct net_device *dev); -- cgit From ea1e9a9df5e1fde7ad8878c85b4a097cad0ddcea Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 27 Feb 2009 14:07:04 -0800 Subject: [SCSI] fcoe, libfc: check offload features from LLD through netdev This checks if net_devices supports FCoE offload ops in netdev_ops and it if it does, then sets up the corresponding flags in the associated fc_lport. For large send offload, the maximum length supported in one large send is now described by the added lso_max in fc_lport, which is setup initially from netdev->gso_max_size. Signed-off-by: Yi Zou Signed-off-by: James Bottomley --- include/scsi/libfc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index a2e126b86e3e..61c746cf55f3 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -654,6 +654,7 @@ struct fc_lport { u16 link_speed; u16 link_supported_speeds; u16 lro_xid; /* max xid for fcoe lro */ + unsigned int lso_max; /* max large send size */ struct fc_ns_fts fcts; /* FC-4 type masks */ struct fc_els_rnid_gen rnid_gen; /* RNID information */ -- cgit From b277d2aa9a4d969002c4157bf77b76b9ad9ca04a Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 27 Feb 2009 14:07:21 -0800 Subject: [SCSI] libfc: add support of large receive offload by ddp in fc_fcp When LLD supports direct data placement (ddp) for large receive of an scsi i/o coming into fc_fcp, we call into libfc_function_template's ddp_setup() to prepare for a ddp of large receive for this read I/O. When I/O is complete, we call the corresponding ddp_done() to get the length of data ddped as well as to let LLD do clean up. fc_fcp_ddp_setup()/fc_fcp_ddp_done() are added to setup and complete a ddped read I/O described by the given fc_fcp_pkt. They would call into corresponding ddp_setup/ddp_done implemented by the fcoe layer. Eventually, fcoe layer calls into LLD's ddp_setup/ddp_done provided through net_device Signed-off-by: Yi Zou Signed-off-by: James Bottomley --- include/scsi/fc_frame.h | 19 ++----------------- include/scsi/libfc.h | 30 ++++++++++++++++++++++++++++++ include/scsi/libfcoe.h | 18 ------------------ 3 files changed, 32 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h index 04d34a71355f..59511057cee0 100644 --- a/include/scsi/fc_frame.h +++ b/include/scsi/fc_frame.h @@ -54,8 +54,7 @@ #define fr_eof(fp) (fr_cb(fp)->fr_eof) #define fr_flags(fp) (fr_cb(fp)->fr_flags) #define fr_max_payload(fp) (fr_cb(fp)->fr_max_payload) -#define fr_cmd(fp) (fr_cb(fp)->fr_cmd) -#define fr_dir(fp) (fr_cmd(fp)->sc_data_direction) +#define fr_fsp(fp) (fr_cb(fp)->fr_fsp) #define fr_crc(fp) (fr_cb(fp)->fr_crc) struct fc_frame { @@ -66,7 +65,7 @@ struct fcoe_rcv_info { struct packet_type *ptype; struct fc_lport *fr_dev; /* transport layer private pointer */ struct fc_seq *fr_seq; /* for use with exchange manager */ - struct scsi_cmnd *fr_cmd; /* for use of scsi command */ + struct fc_fcp_pkt *fr_fsp; /* for the corresponding fcp I/O */ u32 fr_crc; u16 fr_max_payload; /* max FC payload */ enum fc_sof fr_sof; /* start of frame delimiter */ @@ -218,20 +217,6 @@ static inline bool fc_frame_is_cmd(const struct fc_frame *fp) return fc_frame_rctl(fp) == FC_RCTL_DD_UNSOL_CMD; } -static inline bool fc_frame_is_read(const struct fc_frame *fp) -{ - if (fc_frame_is_cmd(fp) && fr_cmd(fp)) - return fr_dir(fp) == DMA_FROM_DEVICE; - return false; -} - -static inline bool fc_frame_is_write(const struct fc_frame *fp) -{ - if (fc_frame_is_cmd(fp) && fr_cmd(fp)) - return fr_dir(fp) == DMA_TO_DEVICE; - return false; -} - /* * Check for leaks. * Print the frame header of any currently allocated frame, assuming there diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 61c746cf55f3..a70eafaad084 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -245,6 +245,7 @@ struct fc_fcp_pkt { */ struct fcp_cmnd cdb_cmd; size_t xfer_len; + u16 xfer_ddp; /* this xfer is ddped */ u32 xfer_contig_end; /* offset of end of contiguous xfer */ u16 max_payload; /* max payload size in bytes */ @@ -267,6 +268,15 @@ struct fc_fcp_pkt { u8 recov_retry; /* count of recovery retries */ struct fc_seq *recov_seq; /* sequence for REC or SRR */ }; +/* + * FC_FCP HELPER FUNCTIONS + *****************************/ +static inline bool fc_fcp_is_read(const struct fc_fcp_pkt *fsp) +{ + if (fsp && fsp->cmd) + return fsp->cmd->sc_data_direction == DMA_FROM_DEVICE; + return false; +} /* * Structure and function definitions for managing Fibre Channel Exchanges @@ -399,6 +409,21 @@ struct libfc_function_template { void *arg), void *arg, unsigned int timer_msec); + /* + * Sets up the DDP context for a given exchange id on the given + * scatterlist if LLD supports DDP for large receive. + * + * STATUS: OPTIONAL + */ + int (*ddp_setup)(struct fc_lport *lp, u16 xid, + struct scatterlist *sgl, unsigned int sgc); + /* + * Completes the DDP transfer and returns the length of data DDPed + * for the given exchange id. + * + * STATUS: OPTIONAL + */ + int (*ddp_done)(struct fc_lport *lp, u16 xid); /* * Send a frame using an existing sequence and exchange. * @@ -821,6 +846,11 @@ int fc_change_queue_type(struct scsi_device *sdev, int tag_type); */ void fc_fcp_destroy(struct fc_lport *); +/* + * Set up direct-data placement for this I/O request + */ +void fc_fcp_ddp_setup(struct fc_fcp_pkt *fsp, u16 xid); + /* * ELS/CT interface *****************************/ diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 941818f29f59..c41f7d0c6efc 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -124,24 +124,6 @@ static inline u16 skb_fc_rxid(const struct sk_buff *skb) return be16_to_cpu(skb_fc_header(skb)->fh_rx_id); } -/* FIXME - DMA_BIDIRECTIONAL ? */ -#define skb_cb(skb) ((struct fcoe_rcv_info *)&((skb)->cb[0])) -#define skb_cmd(skb) (skb_cb(skb)->fr_cmd) -#define skb_dir(skb) (skb_cmd(skb)->sc_data_direction) -static inline bool skb_fc_is_read(const struct sk_buff *skb) -{ - if (skb_fc_is_cmd(skb) && skb_cmd(skb)) - return skb_dir(skb) == DMA_FROM_DEVICE; - return false; -} - -static inline bool skb_fc_is_write(const struct sk_buff *skb) -{ - if (skb_fc_is_cmd(skb) && skb_cmd(skb)) - return skb_dir(skb) == DMA_TO_DEVICE; - return false; -} - /* libfcoe funcs */ int fcoe_reset(struct Scsi_Host *shost); u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], -- cgit From 1b2c7af877f427a2b25583c9033616c9ebd30aed Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:45:58 -0600 Subject: [SCSI] libiscsi: replace scsi_debug logging with session/conn logging This makes the logging a compile time option and replaces the scsi_debug macro with session and connection ones that print out a driver model id prefix. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 7360e1916e75..67542aa3aedc 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -45,13 +45,6 @@ struct iscsi_session; struct iscsi_nopin; struct device; -/* #define DEBUG_SCSI */ -#ifdef DEBUG_SCSI -#define debug_scsi(fmt...) printk(KERN_INFO "iscsi: " fmt) -#else -#define debug_scsi(fmt...) -#endif - #define ISCSI_DEF_XMIT_CMDS_MAX 128 /* must be power of 2 */ #define ISCSI_MGMT_CMDS_MAX 15 -- cgit From e28f3d5b51ed07d822f135cd941b01e2d485270e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:46:01 -0600 Subject: [SCSI] libiscsi: don't cap queue depth in iscsi modules There is no need to cap the queue depth in the modules. We set this in userspace and can do that there. For performance testing with ram based targets, this is helpful since we can have very high queue depths. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 67542aa3aedc..898de4a73727 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -48,8 +48,7 @@ struct device; #define ISCSI_DEF_XMIT_CMDS_MAX 128 /* must be power of 2 */ #define ISCSI_MGMT_CMDS_MAX 15 -#define ISCSI_DEF_CMD_PER_LUN 32 -#define ISCSI_MAX_CMD_PER_LUN 128 +#define ISCSI_DEF_CMD_PER_LUN 32 /* Task Mgmt states */ enum { -- cgit From 06d25af4edb60f9e9c7e74d342a6963a32e3392f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:46:02 -0600 Subject: [SCSI] iscsi class: fix lock dep warning on logout We never should hit the lock up that is spit out when lock dep is on and we logout. But we have been using the shost work queue in a odd way. This patch has us use the work queue for scanning instead of creating our own, and this ends up also killing the lock dep warnings. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/scsi_transport_iscsi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index b50aabe2861e..ac29fbd35544 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -206,8 +206,6 @@ struct iscsi_cls_session { struct iscsi_cls_host { atomic_t nr_scans; struct mutex mutex; - struct workqueue_struct *scan_workq; - char scan_workq_name[20]; }; extern void iscsi_host_for_each_session(struct Scsi_Host *shost, -- cgit From 32ae763e3fce4192cd008956a340353a2e5c3192 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:46:03 -0600 Subject: [SCSI] iscsi lib: have lib create work queue for transmitting IO We were using the shost work queue which ended up being a little akward since all iscsi hosts need a thread for scanning, but only drivers hooked into libiscsi need a workqueue for transmitting. So this patch moves the xmit workqueue to the lib. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 898de4a73727..b0b8a6992497 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -318,6 +318,9 @@ struct iscsi_host { spinlock_t lock; int num_sessions; int state; + + struct workqueue_struct *workq; + char workq_name[20]; }; /* @@ -343,7 +346,8 @@ extern int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf); extern int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev); extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, - int dd_data_size, uint16_t qdepth); + int dd_data_size, uint16_t qdepth, + bool xmit_can_sleep); extern void iscsi_host_remove(struct Scsi_Host *shost); extern void iscsi_host_free(struct Scsi_Host *shost); @@ -379,6 +383,7 @@ extern void iscsi_session_failure(struct iscsi_cls_session *cls_session, extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param, char *buf); extern void iscsi_suspend_tx(struct iscsi_conn *conn); +extern void iscsi_conn_queue_work(struct iscsi_conn *conn); #define iscsi_conn_printk(prefix, _c, fmt, a...) \ iscsi_cls_conn_printk(prefix, ((struct iscsi_conn *)_c)->cls_conn, \ -- cgit From 4d1083509a69a36cc1394f188b7b8956e5526a16 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:46:04 -0600 Subject: [SCSI] iscsi lib: remove qdepth param from iscsi host allocation The qdepth setting was useful when we needed libiscsi to verify the setting. Now we just need to make sure if older tools passed in zero then we need to set some default. So this patch just has us use the sht->cmd_per_lun or if for LLD does a host per session then we can set it on per host basis. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b0b8a6992497..84eded91b945 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -346,7 +346,7 @@ extern int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf); extern int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev); extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, - int dd_data_size, uint16_t qdepth, + int dd_data_size, bool xmit_can_sleep); extern void iscsi_host_remove(struct Scsi_Host *shost); extern void iscsi_host_free(struct Scsi_Host *shost); -- cgit From 40a06e755d8524cd0b24f795e8bdce5ad19fc41b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:46:05 -0600 Subject: [SCSI] libiscsi: pass session failure a session struct The api for conn and session failures is akward because one takes a conn from the lib and one takes a session from the class. This syncs up the interfaces to use structs from the lib. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/libiscsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 84eded91b945..7ffaed2f94dd 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -378,7 +378,7 @@ extern void iscsi_conn_stop(struct iscsi_cls_conn *, int); extern int iscsi_conn_bind(struct iscsi_cls_session *, struct iscsi_cls_conn *, int); extern void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err); -extern void iscsi_session_failure(struct iscsi_cls_session *cls_session, +extern void iscsi_session_failure(struct iscsi_session *session, enum iscsi_err err); extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param, char *buf); -- cgit From 5e7facb77ff4b6961d936773fb1f175f7abf76b7 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 5 Mar 2009 14:46:06 -0600 Subject: [SCSI] iscsi class: remove host no argument from session creation callout We do not need to have llds set the host no for the session's parent, because we know the session's parent is going to be the host. This removes it from the session creation callback and converts the drivers. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/scsi_transport_iscsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index ac29fbd35544..457588e1119b 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -88,7 +88,7 @@ struct iscsi_transport { uint64_t host_param_mask; struct iscsi_cls_session *(*create_session) (struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, - uint32_t sn, uint32_t *hn); + uint32_t sn); void (*destroy_session) (struct iscsi_cls_session *session); struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess, uint32_t cid); -- cgit From 4ab3b73f85ca2e99d9dbdb55ac13e57327a7e915 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 9 Mar 2009 10:51:38 -0400 Subject: [SCSI] bsg: add linux/types.h include to bsg.h Since bsg.h has recently been added to the list of kernel headers that should be exported to the user space, this attachment makes bsg.h more user space "friendly". Specifically autotools dislike headers that don't compile freestanding and bsg.h's use of __u32 types (and friends) are not standard C (C90 or C99). The inclusion of linux/types.h fixes that. Signed-off-by: Douglas Gilbert Signed-off-by: James Bottomley --- include/linux/bsg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index cf0303a60611..6c0a00dfa90c 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -1,6 +1,8 @@ #ifndef BSG_H #define BSG_H +#include + #define BSG_PROTOCOL_SCSI 0 #define BSG_SUB_PROTOCOL_SCSI_CMD 0 -- cgit From dec3f95959bff957f5bcbf16c2a2823f7e33d1e7 Mon Sep 17 00:00:00 2001 From: Eric Moore Date: Mon, 9 Mar 2009 01:27:49 -0600 Subject: [SCSI] mpt2sas: add MPT2SAS_MINOR(221) to miscdevice.h Signed-off-by: Eric Moore Signed-off-by: James Bottomley --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index a820f816a49e..beb6ec99cfef 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -26,6 +26,7 @@ #define TUN_MINOR 200 #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ #define MPT_MINOR 220 +#define MPT2SAS_MINOR 221 #define HPET_MINOR 228 #define FUSE_MINOR 229 #define KVM_MINOR 232 -- cgit From bf8e3355ec8f4e472f9841e94203cd759b45226e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 5 Dec 2008 22:43:41 +0100 Subject: firewire: cdev: documentation fixlet Reported-by: Jay Fenlason Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 4d078e99c017..899ef279f5be 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -229,7 +229,7 @@ struct fw_cdev_get_info { * Send a request to the device. This ioctl implements all outgoing requests. * Both quadlet and block request specify the payload as a pointer to the data * in the @data field. Once the transaction completes, the kernel writes an - * &fw_cdev_event_request event back. The @closure field is passed back to + * &fw_cdev_event_response event back. The @closure field is passed back to * user space in the response event. */ struct fw_cdev_send_request { -- cgit From 632321ecd99bf85c982a75f8329b4ecbb95b3a8f Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 2 Jan 2009 12:47:13 +0100 Subject: firewire: cdev: fix documentation of FW_CDEV_IOC_GET_INFO The FW_CDEV_IOC_GET_INFO ioctl looks at client->device->config_rom, not at the local node's config ROM. We could fix the implementation or the documentation. I believe the way how it is currently implemented is more useful than the way how it is currently documented. In fact, libdc1394 uses the ABI already as implemented, not as documented. Hence let's change the documentation. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 899ef279f5be..86c8ff5326f9 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -201,7 +201,7 @@ union fw_cdev_event { * case, @rom_length is updated with the actual length of the * configuration ROM. * @rom: If non-zero, address of a buffer to be filled by a copy of the - * local node's configuration ROM + * device's configuration ROM * @bus_reset: If non-zero, address of a buffer to be filled by a * &struct fw_cdev_event_bus_reset with the current state * of the bus. This does not cause a bus reset to happen. -- cgit From b1bda4cdc2037447bd66753bf5ccab66d91b0b59 Mon Sep 17 00:00:00 2001 From: "Jay Fenlason, Stefan Richter" Date: Sun, 4 Jan 2009 16:23:29 +0100 Subject: firewire: cdev: add ioctls for isochronous resource management Based on Date: Tue, 18 Nov 2008 11:41:27 -0500 From: Jay Fenlason Subject: [Patch V4] Add ISO resource management support with several changes to the ABI and implementation. Only the part of the ABI which enables auto-reallocation and auto-deallocation is included here. This implements ioctls for kernel-assisted allocation of isochronous channels and isochronous bandwidth. The benefits are: - The client does not have to have write access to the /dev/fw* device corresponding to the IRM. - The client does not have to perform reallocation after bus resets. - Channel and bandwidth are deallocated by the kernel if the file is closed before the client deallocated the resources. Thus resources are released even if the client crashes. It is anticipated that future in-kernel code (firewire-core IRM code; the firewire port of firedtv), will use the fw-iso.c portions of this code too. Signed-off-by: Stefan Richter Tested-by: David Moore --- include/linux/firewire-cdev.h | 100 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 86c8ff5326f9..25b96dd0574f 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -25,10 +25,12 @@ #include #include -#define FW_CDEV_EVENT_BUS_RESET 0x00 -#define FW_CDEV_EVENT_RESPONSE 0x01 -#define FW_CDEV_EVENT_REQUEST 0x02 -#define FW_CDEV_EVENT_ISO_INTERRUPT 0x03 +#define FW_CDEV_EVENT_BUS_RESET 0x00 +#define FW_CDEV_EVENT_RESPONSE 0x01 +#define FW_CDEV_EVENT_REQUEST 0x02 +#define FW_CDEV_EVENT_ISO_INTERRUPT 0x03 +#define FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED 0x04 +#define FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED 0x05 /** * struct fw_cdev_event_common - Common part of all fw_cdev_event_ types @@ -146,6 +148,37 @@ struct fw_cdev_event_iso_interrupt { __u32 header[0]; }; +/** + * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed + * @closure: See &fw_cdev_event_common; + * set by %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl + * @type: %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or + * %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED + * @handle: Reference by which an allocated resource can be deallocated + * @channel: Isochronous channel which was (de)allocated, if any + * @bandwidth: Bandwidth allocation units which were (de)allocated, if any + * @channels_available: Last known availability of channels + * @bandwidth_available: Last known availability of bandwidth + * + * An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED event is sent after an isochronous + * resource was allocated at the IRM. The client has to check @channel and + * @bandwidth for whether the allocation actually succeeded. + * + * @channel is <0 if no channel was allocated. + * @bandwidth is 0 if no bandwidth was allocated. + * + * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event is sent after an isochronous + * resource was deallocated at the IRM. It is also sent when automatic + * reallocation after a bus reset failed. + */ +struct fw_cdev_event_iso_resource { + __u64 closure; + __u32 type; + __u32 handle; + __s32 channel; + __s32 bandwidth; +}; + /** * union fw_cdev_event - Convenience union of fw_cdev_event_ types * @common: Valid for all types @@ -153,6 +186,9 @@ struct fw_cdev_event_iso_interrupt { * @response: Valid if @common.type == %FW_CDEV_EVENT_RESPONSE * @request: Valid if @common.type == %FW_CDEV_EVENT_REQUEST * @iso_interrupt: Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT + * @iso_resource: Valid if @common.type == + * %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or + * %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED * * Convenience union for userspace use. Events could be read(2) into an * appropriately aligned char buffer and then cast to this union for further @@ -163,13 +199,15 @@ struct fw_cdev_event_iso_interrupt { * not fit will be discarded so that the next read(2) will return a new event. */ union fw_cdev_event { - struct fw_cdev_event_common common; - struct fw_cdev_event_bus_reset bus_reset; - struct fw_cdev_event_response response; - struct fw_cdev_event_request request; - struct fw_cdev_event_iso_interrupt iso_interrupt; + struct fw_cdev_event_common common; + struct fw_cdev_event_bus_reset bus_reset; + struct fw_cdev_event_response response; + struct fw_cdev_event_request request; + struct fw_cdev_event_iso_interrupt iso_interrupt; + struct fw_cdev_event_iso_resource iso_resource; }; +/* available since kernel version 2.6.22 */ #define FW_CDEV_IOC_GET_INFO _IOWR('#', 0x00, struct fw_cdev_get_info) #define FW_CDEV_IOC_SEND_REQUEST _IOW('#', 0x01, struct fw_cdev_send_request) #define FW_CDEV_IOC_ALLOCATE _IOWR('#', 0x02, struct fw_cdev_allocate) @@ -178,13 +216,18 @@ union fw_cdev_event { #define FW_CDEV_IOC_INITIATE_BUS_RESET _IOW('#', 0x05, struct fw_cdev_initiate_bus_reset) #define FW_CDEV_IOC_ADD_DESCRIPTOR _IOWR('#', 0x06, struct fw_cdev_add_descriptor) #define FW_CDEV_IOC_REMOVE_DESCRIPTOR _IOW('#', 0x07, struct fw_cdev_remove_descriptor) - #define FW_CDEV_IOC_CREATE_ISO_CONTEXT _IOWR('#', 0x08, struct fw_cdev_create_iso_context) #define FW_CDEV_IOC_QUEUE_ISO _IOWR('#', 0x09, struct fw_cdev_queue_iso) #define FW_CDEV_IOC_START_ISO _IOW('#', 0x0a, struct fw_cdev_start_iso) #define FW_CDEV_IOC_STOP_ISO _IOW('#', 0x0b, struct fw_cdev_stop_iso) + +/* available since kernel version 2.6.24 */ #define FW_CDEV_IOC_GET_CYCLE_TIMER _IOR('#', 0x0c, struct fw_cdev_get_cycle_timer) +/* available since kernel version 2.6.30 */ +#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource) +#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate) + /* FW_CDEV_VERSION History * * 1 Feb 18, 2007: Initial version. @@ -284,9 +327,9 @@ struct fw_cdev_allocate { }; /** - * struct fw_cdev_deallocate - Free an address range allocation - * @handle: Handle to the address range, as returned by the kernel when the - * range was allocated + * struct fw_cdev_deallocate - Free a CSR address range or isochronous resource + * @handle: Handle to the address range or iso resource, as returned by the + * kernel when the range or resource was allocated */ struct fw_cdev_deallocate { __u32 handle; @@ -479,4 +522,35 @@ struct fw_cdev_get_cycle_timer { __u32 cycle_timer; }; +/** + * struct fw_cdev_allocate_iso_resource - Allocate a channel or bandwidth + * @closure: Passed back to userspace in correponding iso resource events + * @channels: Isochronous channels of which one is to be allocated + * @bandwidth: Isochronous bandwidth units to be allocated + * @handle: Handle to the allocation, written by the kernel + * + * The %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl initiates allocation of an + * isochronous channel and/or of isochronous bandwidth at the isochronous + * resource manager (IRM). Only one of the channels specified in @channels is + * allocated. An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED is sent after + * communication with the IRM, indicating success or failure in the event data. + * The kernel will automatically reallocate the resources after bus resets. + * Should a reallocation fail, an %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event + * will be sent. The kernel will also automatically deallocate the resources + * when the file descriptor is closed. + * + * @channels is a host-endian bitfield with the most significant bit + * representing channel 0 and the least significant bit representing channel 63: + * 1ULL << (63 - c) + * + * @bandwidth is expressed in bandwidth allocation units, i.e. the time to send + * one quadlet of data (payload or header data) at speed S1600. + */ +struct fw_cdev_allocate_iso_resource { + __u64 closure; + __u64 channels; + __u32 bandwidth; + __u32 handle; +}; + #endif /* _LINUX_FIREWIRE_CDEV_H */ -- cgit From 1ec3c0269d7196118cc7c403654ca5f19ef4d584 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 4 Jan 2009 16:23:29 +0100 Subject: firewire: cdev: add ioctls for manual iso resource management This adds ioctls for allocation and deallocation of a channel or/and bandwidth without auto-reallocation and without auto-deallocation. The benefit of these ioctls is that libraw1394-style isochronous resource management can be implemented without write access to the IRM's character device file. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 25b96dd0574f..08ca838a727b 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -151,7 +151,7 @@ struct fw_cdev_event_iso_interrupt { /** * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed * @closure: See &fw_cdev_event_common; - * set by %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl + * set by %FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE) ioctl * @type: %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or * %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED * @handle: Reference by which an allocated resource can be deallocated @@ -164,12 +164,12 @@ struct fw_cdev_event_iso_interrupt { * resource was allocated at the IRM. The client has to check @channel and * @bandwidth for whether the allocation actually succeeded. * - * @channel is <0 if no channel was allocated. - * @bandwidth is 0 if no bandwidth was allocated. - * * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event is sent after an isochronous * resource was deallocated at the IRM. It is also sent when automatic * reallocation after a bus reset failed. + * + * @channel is <0 if no channel was (de)allocated or if reallocation failed. + * @bandwidth is 0 if no bandwidth was (de)allocated or if reallocation failed. */ struct fw_cdev_event_iso_resource { __u64 closure; @@ -225,8 +225,10 @@ union fw_cdev_event { #define FW_CDEV_IOC_GET_CYCLE_TIMER _IOR('#', 0x0c, struct fw_cdev_get_cycle_timer) /* available since kernel version 2.6.30 */ -#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource) -#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate) +#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource) +#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate) +#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource) +#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource) /* FW_CDEV_VERSION History * @@ -523,11 +525,12 @@ struct fw_cdev_get_cycle_timer { }; /** - * struct fw_cdev_allocate_iso_resource - Allocate a channel or bandwidth + * struct fw_cdev_allocate_iso_resource - (De)allocate a channel or bandwidth * @closure: Passed back to userspace in correponding iso resource events - * @channels: Isochronous channels of which one is to be allocated - * @bandwidth: Isochronous bandwidth units to be allocated - * @handle: Handle to the allocation, written by the kernel + * @channels: Isochronous channels of which one is to be (de)allocated + * @bandwidth: Isochronous bandwidth units to be (de)allocated + * @handle: Handle to the allocation, written by the kernel (only valid in + * case of %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctls) * * The %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl initiates allocation of an * isochronous channel and/or of isochronous bandwidth at the isochronous @@ -539,6 +542,25 @@ struct fw_cdev_get_cycle_timer { * will be sent. The kernel will also automatically deallocate the resources * when the file descriptor is closed. * + * The %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE ioctl can be used to initiate + * deallocation of resources which were allocated as described above. + * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation. + * + * The %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE ioctl is a variant of allocation + * without automatic re- or deallocation. + * An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED event concludes this operation, + * indicating success or failure in its data. + * + * The %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE ioctl works like + * %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE except that resources are freed + * instead of allocated. At most one channel may be specified in this ioctl. + * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation. + * + * To summarize, %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE allocates iso resources + * for the lifetime of the fd or handle. + * In contrast, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE allocates iso resources + * for the duration of a bus generation. + * * @channels is a host-endian bitfield with the most significant bit * representing channel 0 and the least significant bit representing channel 63: * 1ULL << (63 - c) -- cgit From 33580a3ef5ba3bc0ee1b520df82a24bb37ce28f0 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 4 Jan 2009 16:23:29 +0100 Subject: firewire: cdev: add ioctl to query maximum transmission speed While the speed of asynchronous transactions is automatically chosen by the kernel, the speed of isochronous streams has to be chosen by the initiating client. In case of 1394a bus topologies, the maximum possible speed could be figured out with some effort by evaluation of the remote node's link speed field in the config ROM, the local node's link speed field, and the PHY speeds and topologic information in the local node's or IRM's topology map CSR. However, this does not work in case of 1394b buses. Hence add an ioctl to export the maximum speed which the kernel already determined. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 08ca838a727b..f819c1026958 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -229,6 +229,7 @@ union fw_cdev_event { #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate) #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource) #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource) +#define FW_CDEV_IOC_GET_SPEED _IOR('#', 0x11, struct fw_cdev_get_speed) /* FW_CDEV_VERSION History * @@ -575,4 +576,13 @@ struct fw_cdev_allocate_iso_resource { __u32 handle; }; +/** + * struct fw_cdev_get_speed - Query maximum speed to or from this device + * @max_speed: Speed code; minimum of the device's link speed, the local node's + * link speed, and all PHY port speeds between the two links + */ +struct fw_cdev_get_speed { + __u32 max_speed; +}; + #endif /* _LINUX_FIREWIRE_CDEV_H */ -- cgit From acfe8333572cad5dc70fce18ac966be0446548d7 Mon Sep 17 00:00:00 2001 From: "Jay Fenlason, Stefan Richter" Date: Sun, 4 Jan 2009 16:23:29 +0100 Subject: firewire: cdev: add ioctl for broadcast write requests Write transactions to the broadcast node ID are a convenient way to trigger functions of multiple nodes at once. IIDC is a protocol which can make use of this if multiple cameras with same command_regs_base are connected at the same bus. Based on Date: Wed, 10 Sep 2008 11:32:16 -0400 From: Jay Fenlason Subject: [patch] SEND_BROADCAST_REQUEST Changes: ioctl_send_request() and ioctl_send_broadcast_request() now share code. Broadcast speed corrected to S100. Check for proper tcode. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index f819c1026958..340a78502bca 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -230,6 +230,7 @@ union fw_cdev_event { #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource) #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource) #define FW_CDEV_IOC_GET_SPEED _IOR('#', 0x11, struct fw_cdev_get_speed) +#define FW_CDEV_IOC_SEND_BROADCAST_REQUEST _IOW('#', 0x12, struct fw_cdev_send_request) /* FW_CDEV_VERSION History * -- cgit From 77258da403be4cfce84b6abcdb515ad0bd1f92f1 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Wed, 7 Jan 2009 20:14:53 +0100 Subject: firewire: cdev: increment fw_cdev_version, update documentation Necessary due to Date: Tue, 22 Jul 2008 23:23:40 -0700 From: David Moore Subject: firewire: Include iso timestamp in headers when header_size > 4 Side note: The lack of upwards compatibility sounds worse than it is. All existing client implementations, libraw1394 and libdc1394, set header_size = 4. And since the ABI v1 behaviour does not offer any advantages over the new behaviour, we deliberately do not provide the old behaviour anymore. Also add documentation about the format of fw_cdev_get_cycle_timer which may be used in conjunction with the timestamp of iso packets but has a different format. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 340a78502bca..6ed9127680fd 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -138,7 +138,24 @@ struct fw_cdev_event_request { * This event is sent when the controller has completed an &fw_cdev_iso_packet * with the %FW_CDEV_ISO_INTERRUPT bit set. In the receive case, the headers * stripped of all packets up until and including the interrupt packet are - * returned in the @header field. + * returned in the @header field. The amount of header data per packet is as + * specified at iso context creation by &fw_cdev_create_iso_context.header_size. + * + * In version 1 of this ABI, header data consisted of the 1394 isochronous + * packet header, followed by quadlets from the packet payload if + * &fw_cdev_create_iso_context.header_size > 4. + * + * In version 2 of this ABI, header data consist of the 1394 isochronous + * packet header, followed by a timestamp quadlet if + * &fw_cdev_create_iso_context.header_size > 4, followed by quadlets from the + * packet payload if &fw_cdev_create_iso_context.header_size > 8. + * + * Behaviour of ver. 1 of this ABI is no longer available since ABI ver. 2. + * + * Format of 1394 iso packet header: 16 bits len, 2 bits tag, 6 bits channel, + * 4 bits tcode, 4 bits sy, in big endian byte order. Format of timestamp: + * 16 bits invalid, 3 bits cycleSeconds, 13 bits cycleCount, in big endian byte + * order. */ struct fw_cdev_event_iso_interrupt { __u64 closure; @@ -232,11 +249,13 @@ union fw_cdev_event { #define FW_CDEV_IOC_GET_SPEED _IOR('#', 0x11, struct fw_cdev_get_speed) #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST _IOW('#', 0x12, struct fw_cdev_send_request) -/* FW_CDEV_VERSION History - * - * 1 Feb 18, 2007: Initial version. +/* + * FW_CDEV_VERSION History + * 1 (2.6.22) - initial version + * 2 (2.6.30) - changed &fw_cdev_event_iso_interrupt.header if + * &fw_cdev_create_iso_context.header_size is 8 or more */ -#define FW_CDEV_VERSION 1 +#define FW_CDEV_VERSION 2 /** * struct fw_cdev_get_info - General purpose information ioctl @@ -417,6 +436,9 @@ struct fw_cdev_remove_descriptor { * * If a context was successfully created, the kernel writes back a handle to the * context, which must be passed in for subsequent operations on that context. + * + * Note that the effect of a @header_size > 4 depends on + * &fw_cdev_get_info.version, as documented at &fw_cdev_event_iso_interrupt. */ struct fw_cdev_create_iso_context { __u32 type; @@ -520,6 +542,9 @@ struct fw_cdev_stop_iso { * The %FW_CDEV_IOC_GET_CYCLE_TIMER ioctl reads the isochronous cycle timer * and also the system clock. This allows to express the receive time of an * isochronous packet as a system time with microsecond accuracy. + * + * @cycle_timer consists of 7 bits cycleSeconds, 13 bits cycleCount, and + * 12 bits cycleOffset, in host byte order. */ struct fw_cdev_get_cycle_timer { __u64 local_time; -- cgit From 5d9cb7d276a9c465fef5a771792eac2cf1929f2b Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 8 Jan 2009 23:07:40 +0100 Subject: firewire: cdev: add ioctls for iso resource management, amendment Some fixes: - Remove stale documentation. - Fix a != vs. == thinko that got in the way of channel management. - Try bandwidth deallocation even if channel deallocation failed. A simplification: - fw_cdev_allocate_iso_resource.channels is now ordered like libdc1394's dc1394_iso_allocate_channel() channels_allowed argument. By the way, I looked closer at cards from NEC, TI, and VIA, and noticed that they all don't implement IEEE 1394a behaviour which is meant to deviate from IEEE 1212's notion of lock compare-swap. This means that we have to do two lock transactions instead of one in many cases where one transaction would already succeed on a fully 1394a compliant IRM. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 6ed9127680fd..2e35379bf96c 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -174,8 +174,6 @@ struct fw_cdev_event_iso_interrupt { * @handle: Reference by which an allocated resource can be deallocated * @channel: Isochronous channel which was (de)allocated, if any * @bandwidth: Bandwidth allocation units which were (de)allocated, if any - * @channels_available: Last known availability of channels - * @bandwidth_available: Last known availability of bandwidth * * An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED event is sent after an isochronous * resource was allocated at the IRM. The client has to check @channel and @@ -580,7 +578,7 @@ struct fw_cdev_get_cycle_timer { * * The %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE ioctl works like * %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE except that resources are freed - * instead of allocated. At most one channel may be specified in this ioctl. + * instead of allocated. * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation. * * To summarize, %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE allocates iso resources @@ -588,9 +586,9 @@ struct fw_cdev_get_cycle_timer { * In contrast, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE allocates iso resources * for the duration of a bus generation. * - * @channels is a host-endian bitfield with the most significant bit - * representing channel 0 and the least significant bit representing channel 63: - * 1ULL << (63 - c) + * @channels is a host-endian bitfield with the least significant bit + * representing channel 0 and the most significant bit representing channel 63: + * 1ULL << c for each channel c that is a candidate for (de)allocation. * * @bandwidth is expressed in bandwidth allocation units, i.e. the time to send * one quadlet of data (payload or header data) at speed S1600. -- cgit From f8c2287c65f8f72000102fc058232669e4540bc4 Mon Sep 17 00:00:00 2001 From: Jay Fenlason Date: Thu, 5 Mar 2009 19:08:40 +0100 Subject: firewire: implement asynchronous stream transmission Allow userspace and other firewire drivers (fw-ipv4 I'm looking at you!) to send Asynchronous Transmit Streams as described in 7.8.3 of release 1.1 of the 1394 Open Host Controller Interface Specification. Signed-off-by: Jay Fenlason Signed-off-by: Stefan Richter (tweaks) --- include/linux/firewire-cdev.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 2e35379bf96c..4dfc84d0ac76 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -246,6 +246,7 @@ union fw_cdev_event { #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource) #define FW_CDEV_IOC_GET_SPEED _IOR('#', 0x11, struct fw_cdev_get_speed) #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST _IOW('#', 0x12, struct fw_cdev_send_request) +#define FW_CDEV_IOC_SEND_STREAM_PACKET _IOW('#', 0x13, struct fw_cdev_send_stream_packet) /* * FW_CDEV_VERSION History @@ -609,4 +610,30 @@ struct fw_cdev_get_speed { __u32 max_speed; }; +/** + * struct fw_cdev_send_stream_packet - send an asynchronous stream packet + * @generation: Bus generation where the packet is valid + * @speed: Speed code to send the packet at + * @channel: Channel to send the packet on + * @sy: Four-bit sy code for the packet + * @tag: Two-bit tag field to use for the packet + * @size: Size of the packet's data payload + * @data: Userspace pointer to the payload + * + * The %FW_CDEV_IOC_SEND_STREAM_PACKET ioctl sends an asynchronous stream packet + * to every device (that is listening to the specified channel) on the + * firewire bus. It is the applications's job to ensure + * that the intended device(s) will be able to receive the packet at the chosen + * transmit speed. + */ +struct fw_cdev_send_stream_packet { + __u32 generation; + __u32 speed; + __u32 channel; + __u32 sy; + __u32 tag; + __u32 size; + __u64 data; +}; + #endif /* _LINUX_FIREWIRE_CDEV_H */ -- cgit From c8a25900f35e575938c791507894c036c0f2ca7d Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 10 Mar 2009 20:59:16 +0100 Subject: firewire: cdev: amendment to "add ioctl to query maximum transmission speed" The as yet unreleased FW_CDEV_IOC_GET_SPEED ioctl puts only a single integer into the parameter buffer. We can use ioctl()'s return value instead. (Also: Some whitespace change in firewire-cdev.h.) Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 4dfc84d0ac76..de4035792f70 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -223,28 +223,28 @@ union fw_cdev_event { }; /* available since kernel version 2.6.22 */ -#define FW_CDEV_IOC_GET_INFO _IOWR('#', 0x00, struct fw_cdev_get_info) -#define FW_CDEV_IOC_SEND_REQUEST _IOW('#', 0x01, struct fw_cdev_send_request) -#define FW_CDEV_IOC_ALLOCATE _IOWR('#', 0x02, struct fw_cdev_allocate) -#define FW_CDEV_IOC_DEALLOCATE _IOW('#', 0x03, struct fw_cdev_deallocate) -#define FW_CDEV_IOC_SEND_RESPONSE _IOW('#', 0x04, struct fw_cdev_send_response) -#define FW_CDEV_IOC_INITIATE_BUS_RESET _IOW('#', 0x05, struct fw_cdev_initiate_bus_reset) -#define FW_CDEV_IOC_ADD_DESCRIPTOR _IOWR('#', 0x06, struct fw_cdev_add_descriptor) -#define FW_CDEV_IOC_REMOVE_DESCRIPTOR _IOW('#', 0x07, struct fw_cdev_remove_descriptor) -#define FW_CDEV_IOC_CREATE_ISO_CONTEXT _IOWR('#', 0x08, struct fw_cdev_create_iso_context) -#define FW_CDEV_IOC_QUEUE_ISO _IOWR('#', 0x09, struct fw_cdev_queue_iso) -#define FW_CDEV_IOC_START_ISO _IOW('#', 0x0a, struct fw_cdev_start_iso) -#define FW_CDEV_IOC_STOP_ISO _IOW('#', 0x0b, struct fw_cdev_stop_iso) +#define FW_CDEV_IOC_GET_INFO _IOWR('#', 0x00, struct fw_cdev_get_info) +#define FW_CDEV_IOC_SEND_REQUEST _IOW('#', 0x01, struct fw_cdev_send_request) +#define FW_CDEV_IOC_ALLOCATE _IOWR('#', 0x02, struct fw_cdev_allocate) +#define FW_CDEV_IOC_DEALLOCATE _IOW('#', 0x03, struct fw_cdev_deallocate) +#define FW_CDEV_IOC_SEND_RESPONSE _IOW('#', 0x04, struct fw_cdev_send_response) +#define FW_CDEV_IOC_INITIATE_BUS_RESET _IOW('#', 0x05, struct fw_cdev_initiate_bus_reset) +#define FW_CDEV_IOC_ADD_DESCRIPTOR _IOWR('#', 0x06, struct fw_cdev_add_descriptor) +#define FW_CDEV_IOC_REMOVE_DESCRIPTOR _IOW('#', 0x07, struct fw_cdev_remove_descriptor) +#define FW_CDEV_IOC_CREATE_ISO_CONTEXT _IOWR('#', 0x08, struct fw_cdev_create_iso_context) +#define FW_CDEV_IOC_QUEUE_ISO _IOWR('#', 0x09, struct fw_cdev_queue_iso) +#define FW_CDEV_IOC_START_ISO _IOW('#', 0x0a, struct fw_cdev_start_iso) +#define FW_CDEV_IOC_STOP_ISO _IOW('#', 0x0b, struct fw_cdev_stop_iso) /* available since kernel version 2.6.24 */ -#define FW_CDEV_IOC_GET_CYCLE_TIMER _IOR('#', 0x0c, struct fw_cdev_get_cycle_timer) +#define FW_CDEV_IOC_GET_CYCLE_TIMER _IOR('#', 0x0c, struct fw_cdev_get_cycle_timer) /* available since kernel version 2.6.30 */ #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource) #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate) #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource) #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource) -#define FW_CDEV_IOC_GET_SPEED _IOR('#', 0x11, struct fw_cdev_get_speed) +#define FW_CDEV_IOC_GET_SPEED _IO('#', 0x11) /* returns speed code */ #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST _IOW('#', 0x12, struct fw_cdev_send_request) #define FW_CDEV_IOC_SEND_STREAM_PACKET _IOW('#', 0x13, struct fw_cdev_send_stream_packet) @@ -601,15 +601,6 @@ struct fw_cdev_allocate_iso_resource { __u32 handle; }; -/** - * struct fw_cdev_get_speed - Query maximum speed to or from this device - * @max_speed: Speed code; minimum of the device's link speed, the local node's - * link speed, and all PHY port speeds between the two links - */ -struct fw_cdev_get_speed { - __u32 max_speed; -}; - /** * struct fw_cdev_send_stream_packet - send an asynchronous stream packet * @generation: Bus generation where the packet is valid -- cgit From de487da8ca5839d057e1f4b57ee3f387e180b800 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 10 Mar 2009 21:00:23 +0100 Subject: firewire: cdev: secure add_descriptor ioctl The access permissions and ownership or ACL of /dev/fw* character device files will typically be set based on the device type of the respective nodes, as obtained by firewire-core from descriptors in the device's configuration ROM. An example policy is to deny write permission by default but grant write permission to files of AV/C video and audio devices and IIDC video devices. The FW_CDEV_IOC_ADD_DESCRIPTOR ioctl could be used to partly subvert such a policy: Find a device file with relaxed permissions, use the ioctl to add a descriptor with AV/C marker to the local node's ROM, thus gain access to the local node's character device file. (This is only possible if there are udev scripts installed which actively relax permissions for known device types and if there is a device of such a type connected.) Accessibility of the local node's device file is relevant to host security if the host contains two or more IEEE 1394 link layer controllers which are plugged into a single bus. Therefore change the ABI to deny FW_CDEV_IOC_ADD_DESCRIPTOR if the file belongs to a remote node. (This change has no impact on known implementers of the ABI: None of them uses the ioctl yet.) Also clarify the documentation: The ioctl affects all local nodes, not just one local node. Cc: stable@kernel.org Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index de4035792f70..25bc82726ef7 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -394,6 +394,9 @@ struct fw_cdev_initiate_bus_reset { * If successful, the kernel adds the descriptor and writes back a handle to the * kernel-side object to be used for later removal of the descriptor block and * immediate key. + * + * This ioctl affects the configuration ROMs of all local nodes. + * The ioctl only succeeds on device files which represent a local node. */ struct fw_cdev_add_descriptor { __u32 immediate; @@ -409,7 +412,7 @@ struct fw_cdev_add_descriptor { * descriptor was added * * Remove a descriptor block and accompanying immediate key from the local - * node's configuration ROM. + * nodes' configuration ROMs. */ struct fw_cdev_remove_descriptor { __u32 handle; -- cgit From 18e9b10fcdc090d3a38606958167d5923c7099b7 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 10 Mar 2009 21:02:21 +0100 Subject: firewire: cdev: add closure to async stream ioctl This changes the as yet unreleased FW_CDEV_IOC_SEND_STREAM_PACKET ioctl to generate an fw_cdev_event_response event just like the other two ioctls for asynchronous request transmission do. This way, clients get feedback on successful or unsuccessful transmission. This also adds input validation for length, tag, channel, sy, speed. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 25bc82726ef7..c6b3ca3af6df 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -606,28 +606,29 @@ struct fw_cdev_allocate_iso_resource { /** * struct fw_cdev_send_stream_packet - send an asynchronous stream packet - * @generation: Bus generation where the packet is valid - * @speed: Speed code to send the packet at - * @channel: Channel to send the packet on - * @sy: Four-bit sy code for the packet - * @tag: Two-bit tag field to use for the packet - * @size: Size of the packet's data payload - * @data: Userspace pointer to the payload + * @length: Length of outgoing payload, in bytes + * @tag: Data format tag + * @channel: Isochronous channel to transmit to + * @sy: Synchronization code + * @closure: Passed back to userspace in the response event + * @data: Userspace pointer to payload + * @generation: The bus generation where packet is valid + * @speed: Speed to transmit at * * The %FW_CDEV_IOC_SEND_STREAM_PACKET ioctl sends an asynchronous stream packet - * to every device (that is listening to the specified channel) on the - * firewire bus. It is the applications's job to ensure - * that the intended device(s) will be able to receive the packet at the chosen - * transmit speed. + * to every device which is listening to the specified channel. The kernel + * writes an &fw_cdev_event_response event which indicates success or failure of + * the transmission. */ struct fw_cdev_send_stream_packet { - __u32 generation; - __u32 speed; + __u32 length; + __u32 tag; __u32 channel; __u32 sy; - __u32 tag; - __u32 size; + __u64 closure; __u64 data; + __u32 generation; + __u32 speed; }; #endif /* _LINUX_FIREWIRE_CDEV_H */ -- cgit From f18df228997fb716990590d248663981a15f17d4 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 13 Jan 2009 16:43:09 +0100 Subject: quota: Add quota reservation support Delayed allocation defers the block allocation at the dirty pages flush-out time, doing quota charge/check at that time is too late. But we can't charge the quota blocks until blocks are really allocated, otherwise users could get overcharged after reboot from system crash. This patch adds quota reservation for delayed allocation. Quota blocks are reserved in memory, inode and quota won't gets dirtied until later block allocation time. Signed-off-by: Mingming Cao Signed-off-by: Jan Kara --- include/linux/quota.h | 3 +++ include/linux/quotaops.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index d72d5d84fde5..54b837fa64f2 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -198,6 +198,7 @@ struct mem_dqblk { qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */ qsize_t dqb_curspace; /* current used space */ + qsize_t dqb_rsvspace; /* current reserved space for delalloc*/ qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */ qsize_t dqb_isoftlimit; /* preferred inode limit */ qsize_t dqb_curinodes; /* current # allocated inodes */ @@ -308,6 +309,8 @@ struct dquot_operations { int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */ + /* reserve quota for delayed block allocation */ + int (*reserve_space) (struct inode *, qsize_t, int); }; /* Operations handling requests from userspace */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 0b35b3a1be05..3e3a0d2874d9 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -183,6 +183,16 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) return ret; } +static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr) +{ + if (sb_any_quota_active(inode->i_sb)) { + /* Used space is updated in alloc_space() */ + if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA) + return 1; + } + return 0; +} + static inline int vfs_dq_alloc_inode(struct inode *inode) { if (sb_any_quota_active(inode->i_sb)) { @@ -339,6 +349,11 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) return 0; } +static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr) +{ + return 0; +} + static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { inode_sub_bytes(inode, nr); @@ -376,6 +391,12 @@ static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr) nr << inode->i_sb->s_blocksize_bits); } +static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr) +{ + return vfs_dq_reserve_space(inode, + nr << inode->i_blkbits); +} + static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr) { vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits); -- cgit From 740d9dcd949a986c88886a591054a0cdb89ef669 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 13 Jan 2009 16:43:14 +0100 Subject: quota: Add quota reservation claim and released operations Reserved quota will be claimed at the block allocation time. Over-booked quota could be returned back with the release callback function. Signed-off-by: Mingming Cao Signed-off-by: Jan Kara --- include/linux/quota.h | 6 ++++++ include/linux/quotaops.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 54b837fa64f2..a510d91561f4 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -311,6 +311,12 @@ struct dquot_operations { int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */ /* reserve quota for delayed block allocation */ int (*reserve_space) (struct inode *, qsize_t, int); + /* claim reserved quota for delayed alloc */ + int (*claim_space) (struct inode *, qsize_t); + /* release rsved quota for delayed alloc */ + void (*release_rsv) (struct inode *, qsize_t); + /* get reserved quota for delayed alloc */ + qsize_t (*get_reserved_space) (struct inode *); }; /* Operations handling requests from userspace */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 3e3a0d2874d9..7369d04e0a86 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -35,6 +35,11 @@ void dquot_destroy(struct dquot *dquot); int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); int dquot_alloc_inode(const struct inode *inode, qsize_t number); +int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc); +int dquot_claim_space(struct inode *inode, qsize_t number); +void dquot_release_reserved_space(struct inode *inode, qsize_t number); +qsize_t dquot_get_reserved_space(struct inode *inode); + int dquot_free_space(struct inode *inode, qsize_t number); int dquot_free_inode(const struct inode *inode, qsize_t number); @@ -203,6 +208,31 @@ static inline int vfs_dq_alloc_inode(struct inode *inode) return 0; } +/* + * Convert in-memory reserved quotas to real consumed quotas + */ +static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr) +{ + if (sb_any_quota_active(inode->i_sb)) { + if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA) + return 1; + } else + inode_add_bytes(inode, nr); + + mark_inode_dirty(inode); + return 0; +} + +/* + * Release reserved (in-memory) quotas + */ +static inline +void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr) +{ + if (sb_any_quota_active(inode->i_sb)) + inode->i_sb->dq_op->release_rsv(inode, nr); +} + static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { if (sb_any_quota_active(inode->i_sb)) @@ -354,6 +384,17 @@ static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr) return 0; } +static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr) +{ + return vfs_dq_alloc_space(inode, nr); +} + +static inline +int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr) +{ + return 0; +} + static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { inode_sub_bytes(inode, nr); @@ -397,6 +438,18 @@ static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr) nr << inode->i_blkbits); } +static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr) +{ + return vfs_dq_claim_space(inode, + nr << inode->i_blkbits); +} + +static inline +void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr) +{ + vfs_dq_release_reservation_space(inode, nr << inode->i_blkbits); +} + static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr) { vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits); -- cgit From 9900ba3487f9ba392db30e12d210f768a90abb13 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Wed, 14 Jan 2009 16:18:57 +0100 Subject: quota: Use inode->i_blkbits to get block bits Andrew has suggested to use inode->i_blkbits to get the block bits info, rather than use super block's blockbits. That should be faster and emit less code. Signed-off-by: Mingming Cao Signed-off-by: Jan Kara --- include/linux/quotaops.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 7369d04e0a86..69b502e5eba0 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -410,38 +410,32 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr) static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr) { - return vfs_dq_prealloc_space_nodirty(inode, - nr << inode->i_sb->s_blocksize_bits); + return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_blkbits); } static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr) { - return vfs_dq_prealloc_space(inode, - nr << inode->i_sb->s_blocksize_bits); + return vfs_dq_prealloc_space(inode, nr << inode->i_blkbits); } static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr) { - return vfs_dq_alloc_space_nodirty(inode, - nr << inode->i_sb->s_blocksize_bits); + return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits); } static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr) { - return vfs_dq_alloc_space(inode, - nr << inode->i_sb->s_blocksize_bits); + return vfs_dq_alloc_space(inode, nr << inode->i_blkbits); } static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr) { - return vfs_dq_reserve_space(inode, - nr << inode->i_blkbits); + return vfs_dq_reserve_space(inode, nr << inode->i_blkbits); } static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr) { - return vfs_dq_claim_space(inode, - nr << inode->i_blkbits); + return vfs_dq_claim_space(inode, nr << inode->i_blkbits); } static inline @@ -452,12 +446,12 @@ void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr) static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr) { - vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits); + vfs_dq_free_space_nodirty(inode, nr << inode->i_blkbits); } static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr) { - vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits); + vfs_dq_free_space(inode, nr << inode->i_blkbits); } /* -- cgit From dd6f3c6d5a26a282521f15a183fdc2d6f35cfa0f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Jan 2009 16:01:43 +0100 Subject: quota: Remove NODQUOT macro Remove this macro which is just a definition of NULL. Fix a few coding style issues along the way. Signed-off-by: Jan Kara --- include/linux/quota.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index a510d91561f4..78c48895b12a 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -277,8 +277,6 @@ struct dquot { struct mem_dqblk dq_dqb; /* Diskquota usage */ }; -#define NODQUOT (struct dquot *)NULL - #define QUOTA_OK 0 #define NO_QUOTA 1 -- cgit From bf84c82d000b9820b01f516d13d328f354f8a8ee Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Jan 2009 17:37:01 +0100 Subject: quota: Remove uppercase aliases for quota functions. Since all users have been converted, remove uppercase names of quota functions. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 69b502e5eba0..36353d95c8db 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -454,35 +454,4 @@ static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr) vfs_dq_free_space(inode, nr << inode->i_blkbits); } -/* - * Define uppercase equivalents for compatibility with old function names - * Can go away when we think all users have been converted (15/04/2008) - */ -#define DQUOT_INIT(inode) vfs_dq_init(inode) -#define DQUOT_DROP(inode) vfs_dq_drop(inode) -#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \ - vfs_dq_prealloc_space_nodirty(inode, nr) -#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr) -#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \ - vfs_dq_alloc_space_nodirty(inode, nr) -#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr) -#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \ - vfs_dq_prealloc_block_nodirty(inode, nr) -#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr) -#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \ - vfs_dq_alloc_block_nodirty(inode, nr) -#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr) -#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode) -#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \ - vfs_dq_free_space_nodirty(inode, nr) -#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr) -#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \ - vfs_dq_free_block_nodirty(inode, nr) -#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr) -#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode) -#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr) -#define DQUOT_SYNC(sb) vfs_dq_sync(sb) -#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount) -#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb) - #endif /* _LINUX_QUOTAOPS_ */ -- cgit From 05fd8e73e1357feaea9c48938d937eae76b4aef4 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Sat, 7 Mar 2009 12:55:49 +0100 Subject: clkdev: add possibility to get a clock based on the device name This adds clk_get_sys to get a clock without the associated struct device. Signed-off-by: Sascha Hauer --- include/linux/clk.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 778777316ea4..1db9bbf444a3 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -125,4 +125,21 @@ int clk_set_parent(struct clk *clk, struct clk *parent); */ struct clk *clk_get_parent(struct clk *clk); +/** + * clk_get_sys - get a clock based upon the device name + * @dev_id: device name + * @con_id: connection ID + * + * Returns a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev_id and @con_id to determine the clock consumer, and + * thereby the clock producer. In contrast to clk_get() this function + * takes the device name instead of the device itself for identification. + * + * Drivers must assume that the clock source is not enabled. + * + * clk_get_sys should not be called from within interrupt context. + */ +struct clk *clk_get_sys(const char *dev_id, const char *con_id); + #endif -- cgit From 2b1c6bd77d4e6a727ffac8630cd154b2144b751a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 10:09:09 +0100 Subject: generic compat_sys_ustat Due to a different size of ino_t ustat needs a compat handler, but currently only x86 and mips provide one. Add a generic compat_sys_ustat and switch all architectures over to it. Instead of doing various user copy hacks compat_sys_ustat just reimplements sys_ustat as it's trivial. This was suggested by Arnd Bergmann. Found by Eric Sandeen when running xfstests/017 on ppc64, which causes stack smashing warnings on RHEL/Fedora due to the too large amount of data writen by the syscall. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 3fd2194ff573..b880864672de 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -125,6 +125,13 @@ struct compat_dirent { char d_name[256]; }; +struct compat_ustat { + compat_daddr_t f_tfree; + compat_ino_t f_tinode; + char f_fname[6]; + char f_fpack[6]; +}; + typedef union compat_sigval { compat_int_t sival_int; compat_uptr_t sival_ptr; @@ -178,6 +185,7 @@ long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, unsigned nsems, const struct compat_timespec __user *timeout); asmlinkage long compat_sys_keyctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5); +asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); asmlinkage ssize_t compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen); -- cgit From 10f303ae1e5e77a9f7cb053e6329906afb132c67 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Wed, 14 Jan 2009 17:01:33 +0800 Subject: do_pipe cleanup: drop its last user in arch/alpha/ The last user of do_pipe is in arch/alpha/, after replacing it with do_pipe_flags, the do_pipe can be totally dropped. Signed-off-by: Cheng Renquan Acked-by: Richard Henderson Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 92734c0012e6..51de83bd8a87 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1881,7 +1881,6 @@ static inline void allow_write_access(struct file *file) if (file) atomic_inc(&file->f_path.dentry->d_inode->i_writecount); } -extern int do_pipe(int *); extern int do_pipe_flags(int *, int); extern struct file *create_read_pipe(struct file *f, int flags); extern struct file *create_write_pipe(int flags); -- cgit From c2aca5e529a2499d454c41e01f59f1d5fe4a1364 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 20 Jan 2009 10:29:45 +0000 Subject: vfs: Update fs.h to use inline functions when no file locking set This avoids various issues which might give rise to compiler warnings about missing functions and/or unused variable with the previous macros. This also fixes a bug where one of the macros was returning 0, but it should have been void. Reported-by: Randy Dunlap Signed-off-by: Steven Whitehouse Tested-by: Randy Dunlap Signed-off-by: Al Viro --- include/linux/fs.h | 165 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 139 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51de83bd8a87..d84020b7e676 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1063,34 +1063,147 @@ extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); #else /* !CONFIG_FILE_LOCKING */ -#define fcntl_getlk(a, b) ({ -EINVAL; }) -#define fcntl_setlk(a, b, c, d) ({ -EACCES; }) +static inline int fcntl_getlk(struct file *file, struct flock __user *user) +{ + return -EINVAL; +} + +static inline int fcntl_setlk(unsigned int fd, struct file *file, + unsigned int cmd, struct flock __user *user) +{ + return -EACCES; +} + #if BITS_PER_LONG == 32 -#define fcntl_getlk64(a, b) ({ -EINVAL; }) -#define fcntl_setlk64(a, b, c, d) ({ -EACCES; }) +static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) +{ + return -EINVAL; +} + +static inline int fcntl_setlk64(unsigned int fd, struct file *file, + unsigned int cmd, struct flock64 __user *user) +{ + return -EACCES; +} #endif -#define fcntl_setlease(a, b, c) ({ 0; }) -#define fcntl_getlease(a) ({ 0; }) -#define locks_init_lock(a) ({ }) -#define __locks_copy_lock(a, b) ({ }) -#define locks_copy_lock(a, b) ({ }) -#define locks_remove_posix(a, b) ({ }) -#define locks_remove_flock(a) ({ }) -#define posix_test_lock(a, b) ({ 0; }) -#define posix_lock_file(a, b, c) ({ -ENOLCK; }) -#define posix_lock_file_wait(a, b) ({ -ENOLCK; }) -#define posix_unblock_lock(a, b) (-ENOENT) -#define vfs_test_lock(a, b) ({ 0; }) -#define vfs_lock_file(a, b, c, d) (-ENOLCK) -#define vfs_cancel_lock(a, b) ({ 0; }) -#define flock_lock_file_wait(a, b) ({ -ENOLCK; }) -#define __break_lease(a, b) ({ 0; }) -#define lease_get_mtime(a, b) ({ }) -#define generic_setlease(a, b, c) ({ -EINVAL; }) -#define vfs_setlease(a, b, c) ({ -EINVAL; }) -#define lease_modify(a, b) ({ -EINVAL; }) -#define lock_may_read(a, b, c) ({ 1; }) -#define lock_may_write(a, b, c) ({ 1; }) +static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) +{ + return 0; +} + +static inline int fcntl_getlease(struct file *filp) +{ + return 0; +} + +static inline void locks_init_lock(struct file_lock *fl) +{ + return; +} + +static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + return; +} + +static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + return; +} + +static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) +{ + return; +} + +static inline void locks_remove_flock(struct file *filp) +{ + return; +} + +static inline void posix_test_lock(struct file *filp, struct file_lock *fl) +{ + return; +} + +static inline int posix_lock_file(struct file *filp, struct file_lock *fl, + struct file_lock *conflock) +{ + return -ENOLCK; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return -ENOLCK; +} + +static inline int posix_unblock_lock(struct file *filp, + struct file_lock *waiter) +{ + return -ENOENT; +} + +static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) +{ + return 0; +} + +static inline int vfs_lock_file(struct file *filp, unsigned int cmd, + struct file_lock *fl, struct file_lock *conf) +{ + return -ENOLCK; +} + +static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) +{ + return 0; +} + +static inline int flock_lock_file_wait(struct file *filp, + struct file_lock *request) +{ + return -ENOLCK; +} + +static inline int __break_lease(struct inode *inode, unsigned int mode) +{ + return 0; +} + +static inline void lease_get_mtime(struct inode *inode, struct timespec *time) +{ + return; +} + +static inline int generic_setlease(struct file *filp, long arg, + struct file_lock **flp) +{ + return -EINVAL; +} + +static inline int vfs_setlease(struct file *filp, long arg, + struct file_lock **lease) +{ + return -EINVAL; +} + +static inline int lease_modify(struct file_lock **before, int arg) +{ + return -EINVAL; +} + +static inline int lock_may_read(struct inode *inode, loff_t start, + unsigned long len) +{ + return 1; +} + +static inline int lock_may_write(struct inode *inode, loff_t start, + unsigned long len) +{ + return 1; +} + #endif /* !CONFIG_FILE_LOCKING */ -- cgit From af5df56688acfb75c1b15b4e000ec5e82a9cdc29 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 20 Jan 2009 10:29:46 +0000 Subject: vfs: Further changes from macro to inline function in fs.h There is a second set of macros for when CONFIG_FILE_LOCKING is not set. This patch updates those to become inline functions as well. Signed-off-by: Steven Whitehouse Signed-off-by: Al Viro --- include/linux/fs.h | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d84020b7e676..5f74d616cd7d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1800,13 +1800,44 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return 0; } #else /* !CONFIG_FILE_LOCKING */ -#define locks_mandatory_locked(a) ({ 0; }) -#define locks_mandatory_area(a, b, c, d, e) ({ 0; }) -#define __mandatory_lock(a) ({ 0; }) -#define mandatory_lock(a) ({ 0; }) -#define locks_verify_locked(a) ({ 0; }) -#define locks_verify_truncate(a, b, c) ({ 0; }) -#define break_lease(a, b) ({ 0; }) +static inline int locks_mandatory_locked(struct inode *inode) +{ + return 0; +} + +static inline int locks_mandatory_area(int rw, struct inode *inode, + struct file *filp, loff_t offset, + size_t count) +{ + return 0; +} + +static inline int __mandatory_lock(struct inode *inode) +{ + return 0; +} + +static inline int mandatory_lock(struct inode *inode) +{ + return 0; +} + +static inline int locks_verify_locked(struct inode *inode) +{ + return 0; +} + +static inline int locks_verify_truncate(struct inode *inode, struct file *filp, + size_t size) +{ + return 0; +} + +static inline int break_lease(struct inode *inode, unsigned int mode) +{ + return 0; +} + #endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ -- cgit From e56980d451904b623573ef4966cbab768e433c79 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 11 Feb 2009 13:14:54 -0800 Subject: fs: make struct dentry->d_op const This change will allow for tagging many dentry_operations const in the source tree. Signed-off-by: Jan Engelhardt Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/dcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c66d22487bf8..15156364d196 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -112,7 +112,7 @@ struct dentry { struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; + const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ -- cgit From f786aa90e026f2174bb0c26d49f338c5c46ede55 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Feb 2009 05:51:22 +0000 Subject: constify dentry_operations: NFS Signed-off-by: Al Viro --- include/linux/nfs_fs.h | 2 +- include/linux/nfs_xdr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index db867b04ac3c..8cc8807f77d6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -415,7 +415,7 @@ extern const struct inode_operations nfs_dir_inode_operations; extern const struct inode_operations nfs3_dir_inode_operations; #endif /* CONFIG_NFS_V3 */ extern const struct file_operations nfs_dir_operations; -extern struct dentry_operations nfs_dentry_operations; +extern const struct dentry_operations nfs_dentry_operations; extern void nfs_force_lookup_revalidate(struct inode *dir); extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2e5f00066afd..43a713fce11c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -785,7 +785,7 @@ struct nfs_access_entry; */ struct nfs_rpc_ops { u32 version; /* Protocol version */ - struct dentry_operations *dentry_ops; + const struct dentry_operations *dentry_ops; const struct inode_operations *dir_inode_ops; const struct inode_operations *file_inode_ops; -- cgit From e16404ed0f3f330dc3e99b95cef69bb60bcd27f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Feb 2009 05:55:13 +0000 Subject: constify dentry_operations: misc filesystems Signed-off-by: Al Viro --- include/linux/ncp_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index f69e66d151cc..30b06c893944 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -204,7 +204,7 @@ void ncp_update_inode2(struct inode *, struct ncp_entry_info *); /* linux/fs/ncpfs/dir.c */ extern const struct inode_operations ncp_dir_inode_operations; extern const struct file_operations ncp_dir_operations; -extern struct dentry_operations ncp_root_dentry_operations; +extern const struct dentry_operations ncp_root_dentry_operations; int ncp_conn_logged_in(struct super_block *); int ncp_date_dos2unix(__le16 time, __le16 date); void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date); -- cgit From 585d3bc06f4ca57f975a5a1f698f65a45ea66225 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 25 Feb 2009 10:44:19 +0100 Subject: fs: move bdev code out of buffer.c Move some block device related code out from buffer.c and put it in block_dev.c. I'm trying to move non-buffer_head code out of buffer.c Signed-off-by: Al Viro --- include/linux/buffer_head.h | 7 ------- include/linux/fs.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index bd7ac793be19..f19fd9045ea0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -165,15 +165,8 @@ int sync_mapping_buffers(struct address_space *mapping); void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void mark_buffer_async_write(struct buffer_head *bh); -void invalidate_bdev(struct block_device *); -int sync_blockdev(struct block_device *bdev); void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); -int fsync_bdev(struct block_device *); -struct super_block *freeze_bdev(struct block_device *); -int thaw_bdev(struct block_device *, struct super_block *); -int fsync_super(struct super_block *); -int fsync_no_super(struct block_device *); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); struct buffer_head *__getblk(struct block_device *bdev, sector_t block, diff --git a/include/linux/fs.h b/include/linux/fs.h index 5f74d616cd7d..c2c4454a268a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1874,6 +1874,13 @@ extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, fmode_t); +extern void invalidate_bdev(struct block_device *); +extern int sync_blockdev(struct block_device *bdev); +extern struct super_block *freeze_bdev(struct block_device *); +extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); +extern int fsync_bdev(struct block_device *); +extern int fsync_super(struct super_block *); +extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} #endif -- cgit From a3ec947c85ec339884b30ef6a08133e9311fdae1 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 4 Mar 2009 12:06:34 -0800 Subject: vfs: simple_set_mnt() should return void simple_set_mnt() is defined as returning 'int' but always returns 0. Callers assume simple_set_mnt() never fails and don't properly cleanup if it were to _ever_ fail. For instance, get_sb_single() and get_sb_nodev() should: up_write(sb->s_unmount); deactivate_super(sb); if simple_set_mnt() fails. Since simple_set_mnt() never fails, would be cleaner if it did not return anything. [akpm@linux-foundation.org: fix build] Signed-off-by: Sukadev Bhattiprolu Acked-by: Serge Hallyn Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c2c4454a268a..a7d73914a9f7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1719,7 +1719,7 @@ struct super_block *sget(struct file_system_type *type, extern int get_sb_pseudo(struct file_system_type *, char *, const struct super_operations *ops, unsigned long, struct vfsmount *mnt); -extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); +extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); int __put_super_and_need_restart(struct super_block *sb); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ -- cgit From 28a62277e06f93729d0340d9659153dcfbdbe16d Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Tue, 17 Feb 2009 20:08:49 -0500 Subject: drm: Convert proc files to seq_file and introduce debugfs The old mechanism to formatting proc files is extremely ugly. The seq_file API was designed specifically for cases like this and greatly simplifies the process. Also, most of the files in /proc really don't belong there. This patch introduces the infrastructure for putting these into debugfs and exposes all of the proc files in debugfs as well. Signed-off-by: Ben Gamari Signed-off-by: Eric Anholt --- include/drm/drmP.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index e5f4ae989abf..c19a93c3be85 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -758,6 +758,8 @@ struct drm_driver { int (*proc_init)(struct drm_minor *minor); void (*proc_cleanup)(struct drm_minor *minor); + int (*debugfs_init)(struct drm_minor *minor); + void (*debugfs_cleanup)(struct drm_minor *minor); /** * Driver-specific constructor for drm_gem_objects, to set up @@ -793,6 +795,48 @@ struct drm_driver { #define DRM_MINOR_CONTROL 2 #define DRM_MINOR_RENDER 3 + +/** + * debugfs node list. This structure represents a debugfs file to + * be created by the drm core + */ +struct drm_debugfs_list { + const char *name; /** file name */ + int (*show)(struct seq_file*, void*); /** show callback */ + u32 driver_features; /**< Required driver features for this entry */ +}; + +/** + * debugfs node structure. This structure represents a debugfs file. + */ +struct drm_debugfs_node { + struct list_head list; + struct drm_minor *minor; + struct drm_debugfs_list *debugfs_ent; + struct dentry *dent; +}; + +/** + * Info file list entry. This structure represents a debugfs or proc file to + * be created by the drm core + */ +struct drm_info_list { + const char *name; /** file name */ + int (*show)(struct seq_file*, void*); /** show callback */ + u32 driver_features; /**< Required driver features for this entry */ + void *data; +}; + +/** + * debugfs node structure. This structure represents a debugfs file. + */ +struct drm_info_node { + struct list_head list; + struct drm_minor *minor; + struct drm_info_list *info_ent; + struct dentry *dent; +}; + /** * DRM minor structure. This structure represents a drm minor number. */ @@ -802,7 +846,12 @@ struct drm_minor { dev_t device; /**< Device number for mknod */ struct device kdev; /**< Linux device */ struct drm_device *dev; - struct proc_dir_entry *dev_root; /**< proc directory entry */ + + struct proc_dir_entry *proc_root; /**< proc directory entry */ + struct drm_info_node proc_nodes; + struct dentry *debugfs_root; + struct drm_info_node debugfs_nodes; + struct drm_master *master; /* currently active master for this node */ struct list_head master_list; struct drm_mode_group mode_group; @@ -1258,6 +1307,7 @@ extern unsigned int drm_debug; extern struct class *drm_class; extern struct proc_dir_entry *drm_proc_root; +extern struct dentry *drm_debugfs_root; extern struct idr drm_minors_idr; @@ -1268,6 +1318,31 @@ extern int drm_proc_init(struct drm_minor *minor, int minor_id, struct proc_dir_entry *root); extern int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root); + /* Debugfs support */ +#if defined(CONFIG_DEBUG_FS) +extern int drm_debugfs_init(struct drm_minor *minor, int minor_id, + struct dentry *root); +extern int drm_debugfs_create_files(struct drm_info_list *files, int count, + struct dentry *root, struct drm_minor *minor); +extern int drm_debugfs_remove_files(struct drm_info_list *files, int count, + struct drm_minor *minor); +extern int drm_debugfs_cleanup(struct drm_minor *minor); +#endif + + /* Info file support */ +extern int drm_name_info(struct seq_file *m, void *data); +extern int drm_vm_info(struct seq_file *m, void *data); +extern int drm_queues_info(struct seq_file *m, void *data); +extern int drm_bufs_info(struct seq_file *m, void *data); +extern int drm_vblank_info(struct seq_file *m, void *data); +extern int drm_clients_info(struct seq_file *m, void* data); +extern int drm_gem_name_info(struct seq_file *m, void *data); +extern int drm_gem_object_info(struct seq_file *m, void* data); + +#if DRM_DEBUG_CODE +extern int drm_vma_info(struct seq_file *m, void *data); +#endif + /* Scatter Gather Support (drm_scatter.h) */ extern void drm_sg_cleanup(struct drm_sg_mem * entry); extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data, -- cgit From 2177832f2e20fceb32142bb4fd33ae68c8af8c5a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 23 Feb 2009 15:19:16 +0800 Subject: agp/intel: Add support for new intel chipset. This is a G33-like desktop and mobile chipset. Signed-off-by: Shaohua Li Signed-off-by: Eric Anholt --- include/drm/drm_pciids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 5165f240aa68..76c4c8243038 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -418,4 +418,6 @@ {0x8086, 0x2e02, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0, 0, 0} -- cgit From d2dd14ac1847082d4bb955619e86ed315c0ecd20 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 28 Mar 2009 21:34:41 +0100 Subject: i2c-nforce2: Add support for MCP67, MCP73, MCP78S and MCP79 The MCP78S and MCP79 appear to be compatible with the previous nForce chips as far as the SMBus controller is concerned. The MCP67 and MCP73 were not tested yet but I'd be very surprised if they weren't compatible too. Signed-off-by: Jean Delvare Cc: Oleg Ryjkov Cc: Malcolm Lalkaka Cc: Zbigniew Luszpinski --- include/linux/pci_ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 05dfa7c4fb64..5109fecde284 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1237,6 +1237,7 @@ #define PCI_DEVICE_ID_NVIDIA_NVENET_21 0x0451 #define PCI_DEVICE_ID_NVIDIA_NVENET_22 0x0452 #define PCI_DEVICE_ID_NVIDIA_NVENET_23 0x0453 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_SMBUS 0x0542 #define PCI_DEVICE_ID_NVIDIA_NVENET_24 0x054C #define PCI_DEVICE_ID_NVIDIA_NVENET_25 0x054D #define PCI_DEVICE_ID_NVIDIA_NVENET_26 0x054E @@ -1247,11 +1248,14 @@ #define PCI_DEVICE_ID_NVIDIA_NVENET_31 0x07DF #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE 0x0560 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE 0x056C +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS 0x0752 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE 0x0759 #define PCI_DEVICE_ID_NVIDIA_NVENET_32 0x0760 #define PCI_DEVICE_ID_NVIDIA_NVENET_33 0x0761 #define PCI_DEVICE_ID_NVIDIA_NVENET_34 0x0762 #define PCI_DEVICE_ID_NVIDIA_NVENET_35 0x0763 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS 0x07D8 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS 0x0AA2 #define PCI_DEVICE_ID_NVIDIA_NVENET_36 0x0AB0 #define PCI_DEVICE_ID_NVIDIA_NVENET_37 0x0AB1 #define PCI_DEVICE_ID_NVIDIA_NVENET_38 0x0AB2 -- cgit From eff9ec95efaaf6b12d230f0ea7d3c295d3bc9d57 Mon Sep 17 00:00:00 2001 From: Marco Aurelio da Costa Date: Sat, 28 Mar 2009 21:34:44 +0100 Subject: i2c-algo-pca: Add PCA9665 support Add support for the PCA9665 I2C controller. Signed-off-by: Wolfram Sang Signed-off-by: Jean Delvare --- include/linux/i2c-algo-pca.h | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h index adcb3dc7ac26..1364d62e2fbe 100644 --- a/include/linux/i2c-algo-pca.h +++ b/include/linux/i2c-algo-pca.h @@ -1,7 +1,14 @@ #ifndef _LINUX_I2C_ALGO_PCA_H #define _LINUX_I2C_ALGO_PCA_H -/* Clock speeds for the bus */ +/* Chips known to the pca algo */ +#define I2C_PCA_CHIP_9564 0x00 +#define I2C_PCA_CHIP_9665 0x01 + +/* Internal period for PCA9665 oscilator */ +#define I2C_PCA_OSC_PER 3 /* e10-8s */ + +/* Clock speeds for the bus for PCA9564*/ #define I2C_PCA_CON_330kHz 0x00 #define I2C_PCA_CON_288kHz 0x01 #define I2C_PCA_CON_217kHz 0x02 @@ -18,6 +25,26 @@ #define I2C_PCA_ADR 0x02 /* OWN ADR Read/Write */ #define I2C_PCA_CON 0x03 /* CONTROL Read/Write */ +/* PCA9665 registers */ +#define I2C_PCA_INDPTR 0x00 /* INDIRECT Pointer Write Only */ +#define I2C_PCA_IND 0x02 /* INDIRECT Read/Write */ + +/* PCA9665 indirect registers */ +#define I2C_PCA_ICOUNT 0x00 /* Byte Count for buffered mode */ +#define I2C_PCA_IADR 0x01 /* OWN ADR */ +#define I2C_PCA_ISCLL 0x02 /* SCL LOW period */ +#define I2C_PCA_ISCLH 0x03 /* SCL HIGH period */ +#define I2C_PCA_ITO 0x04 /* TIMEOUT */ +#define I2C_PCA_IPRESET 0x05 /* Parallel bus reset */ +#define I2C_PCA_IMODE 0x06 /* I2C Bus mode */ + +/* PCA9665 I2C bus mode */ +#define I2C_PCA_MODE_STD 0x00 /* Standard mode */ +#define I2C_PCA_MODE_FAST 0x01 /* Fast mode */ +#define I2C_PCA_MODE_FASTP 0x02 /* Fast Plus mode */ +#define I2C_PCA_MODE_TURBO 0x03 /* Turbo mode */ + + #define I2C_PCA_CON_AA 0x80 /* Assert Acknowledge */ #define I2C_PCA_CON_ENSIO 0x40 /* Enable */ #define I2C_PCA_CON_STA 0x20 /* Start */ @@ -31,7 +58,9 @@ struct i2c_algo_pca_data { int (*read_byte) (void *data, int reg); int (*wait_for_completion) (void *data); void (*reset_chip) (void *data); - /* i2c_clock values are defined in linux/i2c-algo-pca.h */ + /* For PCA9564, use one of the predefined frequencies: + * 330000, 288000, 217000, 146000, 88000, 59000, 44000, 36000 + * For PCA9665, use the frequency you want here. */ unsigned int i2c_clock; }; -- cgit From 8e99ada8deaa9033600cd2c7d0a9366b0e99ab68 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Mar 2009 21:34:45 +0100 Subject: i2c-algo-pca: Rework waiting for a free bus Waiting for a free bus now accepts the timeout value in jiffies and does proper checking using time_before. Signed-off-by: Wolfram Sang Signed-off-by: Jean Delvare --- include/linux/i2c-pca-platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c-pca-platform.h b/include/linux/i2c-pca-platform.h index 3d191873f2d1..aba33759dec4 100644 --- a/include/linux/i2c-pca-platform.h +++ b/include/linux/i2c-pca-platform.h @@ -6,7 +6,7 @@ struct i2c_pca9564_pf_platform_data { * not supplied (negative value), but it * cannot exit some error conditions then */ int i2c_clock_speed; /* values are defined in linux/i2c-algo-pca.h */ - int timeout; /* timeout = this value * 10us */ + int timeout; /* timeout in jiffies */ }; #endif /* I2C_PCA9564_PLATFORM_H */ -- cgit From 506a8b6c27cb08998dc13069fbdf6eb7ec748b99 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Sat, 28 Mar 2009 21:34:46 +0100 Subject: i2c-piix4: Add support for the Broadcom HT1100 chipset Add support for the Broadcom HT1100 LD chipset (SMBus function.) Signed-off-by: Flavio Leitner Signed-off-by: Jean Delvare --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5109fecde284..2c9e8080da5e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1479,6 +1479,7 @@ #define PCI_DEVICE_ID_SERVERWORKS_HT1000IDE 0x0214 #define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2 0x0217 #define PCI_DEVICE_ID_SERVERWORKS_CSB6LPC 0x0227 +#define PCI_DEVICE_ID_SERVERWORKS_HT1100LD 0x0408 #define PCI_VENDOR_ID_SBE 0x1176 #define PCI_DEVICE_ID_SBE_WANXL100 0x0301 -- cgit