diff options
Diffstat (limited to 'scripts/lib/kdoc/kdoc_parser.py')
| -rw-r--r-- | scripts/lib/kdoc/kdoc_parser.py | 848 | 
1 files changed, 414 insertions, 434 deletions
| diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index fe730099eca8..2376f180b1fa 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -46,7 +46,7 @@ doc_decl = doc_com + KernRe(r'(\w+)', cache=False)  known_section_names = 'description|context|returns?|notes?|examples?'  known_sections = KernRe(known_section_names, flags = re.I)  doc_sect = doc_com + \ -    KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', +    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',             flags=re.I, cache=False)  doc_content = doc_com_body + KernRe(r'(.*)', cache=False) @@ -54,13 +54,11 @@ doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)  doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)  doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)  doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", -               flags=re.I | re.S, cache=False)  export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)  export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)  #  # Tests for the beginning of a kerneldoc block in its various forms. @@ -75,12 +73,154 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '                          cache = False)  # +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ +    # Strip attributes +    (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), +    (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), +    (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), +    (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), +    (KernRe(r'\s*__packed\s*', re.S), ' '), +    (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), +    (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), +    (KernRe(r'\s*____cacheline_aligned', re.S), ' '), +    (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), +    # +    # Unwrap struct_group macros based on this definition: +    # __struct_group(TAG, NAME, ATTRS, MEMBERS...) +    # which has variants like: struct_group(NAME, MEMBERS...) +    # Only MEMBERS arguments require documentation. +    # +    # Parsing them happens on two steps: +    # +    # 1. drop struct group arguments that aren't at MEMBERS, +    #    storing them as STRUCT_GROUP(MEMBERS) +    # +    # 2. remove STRUCT_GROUP() ancillary macro. +    # +    # The original logic used to remove STRUCT_GROUP() using an +    # advanced regex: +    # +    #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; +    # +    # with two patterns that are incompatible with +    # Python re module, as it has: +    # +    #   - a recursive pattern: (?1) +    #   - an atomic grouping: (?>...) +    # +    # I tried a simpler version: but it didn't work either: +    #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; +    # +    # As it doesn't properly match the end parenthesis on some cases. +    # +    # So, a better solution was crafted: there's now a NestedMatch +    # class that ensures that delimiters after a search are properly +    # matched. So, the implementation to drop STRUCT_GROUP() will be +    # handled in separate. +    # +    (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), +    (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), +    (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), +    (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), +    # +    # Replace macros +    # +    # TODO: use NestedMatch for FOO($1, $2, ...) matches +    # +    # it is better to also move those to the NestedMatch logic, +    # to ensure that parenthesis will be properly matched. +    # +    (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), +     r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), +    (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), +     r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), +    (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', +            re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), +    (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', +            re.S), r'unsigned long \1[1 << ((\2) - 1)]'), +    (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + +            r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), +    (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + +            struct_args_pattern + r'\)', re.S), r'\2 *\1'), +    (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + +            struct_args_pattern + r'\)', re.S), r'\1 \2[]'), +    (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), +    (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ +    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms  = [ +    (KernRe(r"^static +"), ""), +    (KernRe(r"^extern +"), ""), +    (KernRe(r"^asmlinkage +"), ""), +    (KernRe(r"^inline +"), ""), +    (KernRe(r"^__inline__ +"), ""), +    (KernRe(r"^__inline +"), ""), +    (KernRe(r"^__always_inline +"), ""), +    (KernRe(r"^noinline +"), ""), +    (KernRe(r"^__FORTIFY_INLINE +"), ""), +    (KernRe(r"__init +"), ""), +    (KernRe(r"__init_or_module +"), ""), +    (KernRe(r"__deprecated +"), ""), +    (KernRe(r"__flatten +"), ""), +    (KernRe(r"__meminit +"), ""), +    (KernRe(r"__must_check +"), ""), +    (KernRe(r"__weak +"), ""), +    (KernRe(r"__sched +"), ""), +    (KernRe(r"_noprof"), ""), +    (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), +    (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), +    (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), +    (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), +    (KernRe(r"__attribute_const__ +"), ""), +    (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): +    for search, subst in xforms: +        text = search.sub(subst, text) +    return text + +#  # A little helper to get rid of excess white space  #  multi_space = KernRe(r'\s\s+')  def trim_whitespace(s):      return multi_space.sub(' ', s.strip()) +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): +    # +    # First look for a "public:" block that ends a private region, then +    # handle the "private until the end" case. +    # +    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) +    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) +    # +    # We needed the comments to do the above, but now we can take them out. +    # +    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() +  class state:      """      State machine enums @@ -318,36 +458,26 @@ class KernelDoc:          param = KernRe(r'[\[\)].*').sub('', param, count=1) -        if dtype == "" and param.endswith("..."): -            if KernRe(r'\w\.\.\.$').search(param): -                # For named variable parameters of the form `x...`, -                # remove the dots -                param = param[:-3] -            else: -                # Handles unnamed variable parameters -                param = "..." - -            if param not in self.entry.parameterdescs or \ -                not self.entry.parameterdescs[param]: - -                self.entry.parameterdescs[param] = "variable arguments" - -        elif dtype == "" and (not param or param == "void"): -            param = "void" -            self.entry.parameterdescs[param] = "no arguments" - -        elif dtype == "" and param in ["struct", "union"]: -            # Handle unnamed (anonymous) union or struct -            dtype = param -            param = "{unnamed_" + param + "}" -            self.entry.parameterdescs[param] = "anonymous\n" -            self.entry.anon_struct_union = True - -        # Handle cache group enforcing variables: they do not need -        # to be described in header files -        elif "__cacheline_group" in param: -            # Ignore __cacheline_group_begin and __cacheline_group_end -            return +        # +        # Look at various "anonymous type" cases. +        # +        if dtype == '': +            if param.endswith("..."): +                if len(param) > 3: # there is a name provided, use that +                    param = param[:-3] +                if not self.entry.parameterdescs.get(param): +                    self.entry.parameterdescs[param] = "variable arguments" + +            elif (not param) or param == "void": +                param = "void" +                self.entry.parameterdescs[param] = "no arguments" + +            elif param in ["struct", "union"]: +                # Handle unnamed (anonymous) union or struct +                dtype = param +                param = "{unnamed_" + param + "}" +                self.entry.parameterdescs[param] = "anonymous\n" +                self.entry.anon_struct_union = True          # Warn if parameter has no description          # (but ignore ones starting with # as these are not parameters @@ -389,9 +519,6 @@ class KernelDoc:              args = arg_expr.sub(r"\1#", args)          for arg in args.split(splitter): -            # Strip comments -            arg = KernRe(r'\/\*.*\*\/').sub('', arg) -              # Ignore argument attributes              arg = KernRe(r'\sPOS0?\s').sub(' ', arg) @@ -407,81 +534,76 @@ class KernelDoc:                  # Treat preprocessor directive as a typeless variable                  self.push_parameter(ln, decl_type, arg, "",                                      "", declaration_name) - +            # +            # The pointer-to-function case. +            #              elif KernRe(r'\(.+\)\s*\(').search(arg): -                # Pointer-to-function -                  arg = arg.replace('#', ',') - -                r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') +                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*" +                           r'([\w\[\].]*)'    # Capture the name and possible [array] +                           r'\s*\)')	      # Make sure the trailing ")" is there                  if r.match(arg):                      param = r.group(1)                  else:                      self.emit_msg(ln, f"Invalid param: {arg}")                      param = arg - -                dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) -                self.push_parameter(ln, decl_type, param, dtype, -                                    arg, declaration_name) - +                dtype = arg.replace(param, '') +                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) +            # +            # The array-of-pointers case.  Dig the parameter name out from the middle +            # of the declaration. +            #              elif KernRe(r'\(.+\)\s*\[').search(arg): -                # Array-of-pointers - -                arg = arg.replace('#', ',') -                r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') +                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*" +                           r'([\w.]*?)'			# The actual pointer name +                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]                  if r.match(arg):                      param = r.group(1)                  else:                      self.emit_msg(ln, f"Invalid param: {arg}")                      param = arg - -                dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - -                self.push_parameter(ln, decl_type, param, dtype, -                                    arg, declaration_name) - +                dtype = arg.replace(param, '') +                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)              elif arg: +                # +                # Clean up extraneous spaces and split the string at commas; the first +                # element of the resulting list will also include the type information. +                #                  arg = KernRe(r'\s*:\s*').sub(":", arg)                  arg = KernRe(r'\s*\[').sub('[', arg) -                  args = KernRe(r'\s*,\s*').split(arg) -                if args[0] and '*' in args[0]: -                    args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - -                first_arg = [] -                r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') -                if args[0] and r.match(args[0]): -                    args.pop(0) -                    first_arg.extend(r.group(1)) -                    first_arg.append(r.group(2)) +                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) +                # +                # args[0] has a string of "type a".  If "a" includes an [array] +                # declaration, we want to not be fooled by any white space inside +                # the brackets, so detect and handle that case specially. +                # +                r = KernRe(r'^([^[\]]*\s+)(.*)$') +                if r.match(args[0]): +                    args[0] = r.group(2) +                    dtype = r.group(1)                  else: -                    first_arg = KernRe(r'\s+').split(args.pop(0)) - -                args.insert(0, first_arg.pop()) -                dtype = ' '.join(first_arg) +                    # No space in args[0]; this seems wrong but preserves previous behavior +                    dtype = '' +                bitfield_re = KernRe(r'(.*?):(\w+)')                  for param in args: -                    if KernRe(r'^(\*+)\s*(.*)').match(param): -                        r = KernRe(r'^(\*+)\s*(.*)') -                        if not r.match(param): -                            self.emit_msg(ln, f"Invalid param: {param}") -                            continue - -                        param = r.group(1) - +                    # +                    # For pointers, shift the star(s) from the variable name to the +                    # type declaration. +                    # +                    r = KernRe(r'^(\*+)\s*(.*)') +                    if r.match(param):                          self.push_parameter(ln, decl_type, r.group(2),                                              f"{dtype} {r.group(1)}",                                              arg, declaration_name) - -                    elif KernRe(r'(.*?):(\w+)').search(param): -                        r = KernRe(r'(.*?):(\w+)') -                        if not r.match(param): -                            self.emit_msg(ln, f"Invalid param: {param}") -                            continue - +                    # +                    # Perform a similar shift for bitfields. +                    # +                    elif bitfield_re.search(param):                          if dtype != "":  # Skip unnamed bit-fields -                            self.push_parameter(ln, decl_type, r.group(1), -                                                f"{dtype}:{r.group(2)}", +                            self.push_parameter(ln, decl_type, bitfield_re.group(1), +                                                f"{dtype}:{bitfield_re.group(2)}",                                                  arg, declaration_name)                      else:                          self.push_parameter(ln, decl_type, param, dtype, @@ -520,13 +642,11 @@ class KernelDoc:              self.emit_msg(ln,                            f"No description found for return value of '{declaration_name}'") -    def dump_struct(self, ln, proto): -        """ -        Store an entry for an struct or union -        """ - +    # +    # Split apart a structure prototype; returns (struct|union, name, members) or None +    # +    def split_struct_proto(self, proto):          type_pattern = r'(struct|union)' -          qualifiers = [              "__attribute__",              "__packed", @@ -534,288 +654,202 @@ class KernelDoc:              "____cacheline_aligned_in_smp",              "____cacheline_aligned",          ] -          definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" -        struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') - -        # Extract struct/union definition -        members = None -        declaration_name = None -        decl_type = None          r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)          if r.search(proto): -            decl_type = r.group(1) -            declaration_name = r.group(2) -            members = r.group(3) +            return (r.group(1), r.group(2), r.group(3))          else:              r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') -              if r.search(proto): -                decl_type = r.group(1) -                declaration_name = r.group(3) -                members = r.group(2) - -        if not members: -            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") -            return - -        if self.entry.identifier != declaration_name: -            self.emit_msg(ln, -                          f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") -            return - -        args_pattern = r'([^,)]+)' - -        sub_prefixes = [ -            (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), -            (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), - -            # Strip comments -            (KernRe(r'\/\*.*?\*\/', re.S), ''), - -            # Strip attributes -            (attribute, ' '), -            (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), -            (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), -            (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), -            (KernRe(r'\s*__packed\s*', re.S), ' '), -            (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), -            (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), -            (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - -            # Unwrap struct_group macros based on this definition: -            # __struct_group(TAG, NAME, ATTRS, MEMBERS...) -            # which has variants like: struct_group(NAME, MEMBERS...) -            # Only MEMBERS arguments require documentation. -            # -            # Parsing them happens on two steps: -            # -            # 1. drop struct group arguments that aren't at MEMBERS, -            #    storing them as STRUCT_GROUP(MEMBERS) -            # -            # 2. remove STRUCT_GROUP() ancillary macro. -            # -            # The original logic used to remove STRUCT_GROUP() using an -            # advanced regex: -            # -            #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; -            # -            # with two patterns that are incompatible with -            # Python re module, as it has: -            # -            #   - a recursive pattern: (?1) -            #   - an atomic grouping: (?>...) -            # -            # I tried a simpler version: but it didn't work either: -            #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; -            # -            # As it doesn't properly match the end parenthesis on some cases. -            # -            # So, a better solution was crafted: there's now a NestedMatch -            # class that ensures that delimiters after a search are properly -            # matched. So, the implementation to drop STRUCT_GROUP() will be -            # handled in separate. - -            (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), -            (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), -            (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), -            (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - -            # Replace macros -            # -            # TODO: use NestedMatch for FOO($1, $2, ...) matches -            # -            # it is better to also move those to the NestedMatch logic, -            # to ensure that parenthesis will be properly matched. - -            (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), -            (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), -            (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), -            (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), -            (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), -            (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), -            (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), -            (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), -            (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), -            (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'), -        ] - -        # Regexes here are guaranteed to have the end limiter matching -        # the start delimiter. Yet, right now, only one replace group -        # is allowed. - -        sub_nested_prefixes = [ -            (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -        ] - -        for search, sub in sub_prefixes: -            members = search.sub(sub, members) - -        nested = NestedMatch() - -        for search, sub in sub_nested_prefixes: -            members = nested.sub(search, sub, members) - -        # Keeps the original declaration as-is -        declaration = members - -        # Split nested struct/union elements -        # -        # This loop was simpler at the original kernel-doc perl version, as -        #   while ($members =~ m/$struct_members/) { ... } -        # reads 'members' string on each interaction. -        # -        # Python behavior is different: it parses 'members' only once, -        # creating a list of tuples from the first interaction. +                return (r.group(1), r.group(3), r.group(2)) +        return None +    # +    # Rewrite the members of a structure or union for easier formatting later on. +    # Among other things, this function will turn a member like: +    # +    #  struct { inner_members; } foo; +    # +    # into: +    # +    #  struct foo; inner_members; +    # +    def rewrite_struct_members(self, members):          # -        # On other words, this won't get nested structs. +        # Process struct/union members from the most deeply nested outward.  The +        # trick is in the ^{ below - it prevents a match of an outer struct/union +        # until the inner one has been munged (removing the "{" in the process).          # -        # So, we need to have an extra loop on Python to override such -        # re limitation. - -        while True: -            tuples = struct_members.findall(members) -            if not tuples: -                break - +        struct_members = KernRe(r'(struct|union)'   # 0: declaration type +                                r'([^\{\};]+)' 	    # 1: possible name +                                r'(\{)' +                                r'([^\{\}]*)'       # 3: Contents of declaration +                                r'(\})' +                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration +        tuples = struct_members.findall(members) +        while tuples:              for t in tuples:                  newmember = "" -                maintype = t[0] -                s_ids = t[5] -                content = t[3] - -                oldmember = "".join(t) - -                for s_id in s_ids.split(','): +                oldmember = "".join(t) # Reconstruct the original formatting +                dtype, name, lbr, content, rbr, rest, semi = t +                # +                # Pass through each field name, normalizing the form and formatting. +                # +                for s_id in rest.split(','):                      s_id = s_id.strip() - -                    newmember += f"{maintype} {s_id}; " +                    newmember += f"{dtype} {s_id}; " +                    # +                    # Remove bitfield/array/pointer info, getting the bare name. +                    #                      s_id = KernRe(r'[:\[].*').sub('', s_id)                      s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - +                    # +                    # Pass through the members of this inner structure/union. +                    #                      for arg in content.split(';'):                          arg = arg.strip() - -                        if not arg: -                            continue - -                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') +                        # +                        # Look for (type)(*name)(args) - pointer to function +                        # +                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')                          if r.match(arg): +                            dtype, name, extra = r.group(1), r.group(2), r.group(3)                              # Pointer-to-function -                            dtype = r.group(1) -                            name = r.group(2) -                            extra = r.group(3) - -                            if not name: -                                continue -                              if not s_id:                                  # Anonymous struct/union                                  newmember += f"{dtype}{name}{extra}; "                              else:                                  newmember += f"{dtype}{s_id}.{name}{extra}; " - +                        # +                        # Otherwise a non-function member. +                        #                          else: -                            arg = arg.strip() -                            # Handle bitmaps +                            # +                            # Remove bitmap and array portions and spaces around commas +                            #                              arg = KernRe(r':\s*\d+\s*').sub('', arg) - -                            # Handle arrays                              arg = KernRe(r'\[.*\]').sub('', arg) - -                            # Handle multiple IDs                              arg = KernRe(r'\s*,\s*').sub(',', arg) - +                            # +                            # Look for a normal decl - "type name[,name...]" +                            #                              r = KernRe(r'(.*)\s+([\S+,]+)') -                              if r.search(arg): -                                dtype = r.group(1) -                                names = r.group(2) +                                for name in r.group(2).split(','): +                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) +                                    if not s_id: +                                        # Anonymous struct/union +                                        newmember += f"{r.group(1)} {name}; " +                                    else: +                                        newmember += f"{r.group(1)} {s_id}.{name}; "                              else:                                  newmember += f"{arg}; " -                                continue - -                            for name in names.split(','): -                                name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() - -                                if not name: -                                    continue - -                                if not s_id: -                                    # Anonymous struct/union -                                    newmember += f"{dtype} {name}; " -                                else: -                                    newmember += f"{dtype} {s_id}.{name}; " - +                # +                # At the end of the s_id loop, replace the original declaration with +                # the munged version. +                #                  members = members.replace(oldmember, newmember) +            # +            # End of the tuple loop - search again and see if there are outer members +            # that now turn up. +            # +            tuples = struct_members.findall(members) +        return members -        # Ignore other nested elements, like enums -        members = re.sub(r'(\{[^\{\}]*\})', '', members) - -        self.create_parameter_list(ln, decl_type, members, ';', -                                   declaration_name) -        self.check_sections(ln, declaration_name, decl_type) - -        # Adjust declaration for better display +    # +    # Format the struct declaration into a standard form for inclusion in the +    # resulting docs. +    # +    def format_struct_decl(self, declaration): +        # +        # Insert newlines, get rid of extra spaces. +        #          declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)          declaration = KernRe(r'\}\s+;').sub('};', declaration) - -        # Better handle inlined enums -        while True: -            r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') -            if not r.search(declaration): -                break - +        # +        # Format inline enums with each member on its own line. +        # +        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') +        while r.search(declaration):              declaration = r.sub(r'\1,\n\2', declaration) - +        # +        # Now go through and supply the right number of tabs +        # for each line. +        #          def_args = declaration.split('\n')          level = 1          declaration = ""          for clause in def_args: +            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) +            if clause: +                if '}' in clause and level > 1: +                    level -= 1 +                if not clause.startswith('#'): +                    declaration += "\t" * level +                declaration += "\t" + clause + "\n" +                if "{" in clause and "}" not in clause: +                    level += 1 +        return declaration -            clause = clause.strip() -            clause = KernRe(r'\s+').sub(' ', clause, count=1) - -            if not clause: -                continue - -            if '}' in clause and level > 1: -                level -= 1 -            if not KernRe(r'^\s*#').match(clause): -                declaration += "\t" * level +    def dump_struct(self, ln, proto): +        """ +        Store an entry for an struct or union +        """ +        # +        # Do the basic parse to get the pieces of the declaration. +        # +        struct_parts = self.split_struct_proto(proto) +        if not struct_parts: +            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") +            return +        decl_type, declaration_name, members = struct_parts -            declaration += "\t" + clause + "\n" -            if "{" in clause and "}" not in clause: -                level += 1 +        if self.entry.identifier != declaration_name: +            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " +                          f"Prototype was for {decl_type} {declaration_name} instead\n") +            return +        # +        # Go through the list of members applying all of our transformations. +        # +        members = trim_private_members(members) +        members = apply_transforms(struct_xforms, members) +        nested = NestedMatch() +        for search, sub in struct_nested_prefixes: +            members = nested.sub(search, sub, members) +        # +        # Deal with embedded struct and union members, and drop enums entirely. +        # +        declaration = members +        members = self.rewrite_struct_members(members) +        members = re.sub(r'(\{[^\{\}]*\})', '', members) +        # +        # Output the result and we are done. +        # +        self.create_parameter_list(ln, decl_type, members, ';', +                                   declaration_name) +        self.check_sections(ln, declaration_name, decl_type)          self.output_declaration(decl_type, declaration_name, -                                definition=declaration, +                                definition=self.format_struct_decl(declaration),                                  purpose=self.entry.declaration_purpose)      def dump_enum(self, ln, proto):          """          Stores an enum inside self.entries array.          """ - -        # Ignore members marked private -        proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) -        proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) - -        # Strip comments -        proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) - -        # Strip #define macros inside enums +        # +        # Strip preprocessor directives.  Note that this depends on the +        # trailing semicolon we added in process_proto_type(). +        #          proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) -          #          # Parse out the name and members of the enum.  Typedef form first.          #          r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')          if r.search(proto):              declaration_name = r.group(2) -            members = r.group(1).rstrip() +            members = trim_private_members(r.group(1))          #          # Failing that, look for a straight enum          # @@ -823,7 +857,7 @@ class KernelDoc:              r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')              if r.match(proto):                  declaration_name = r.group(1) -                members = r.group(2).rstrip() +                members = trim_private_members(r.group(2))          #          # OK, this isn't going to work.          # @@ -892,62 +926,31 @@ class KernelDoc:          Stores a function of function macro inside self.entries array.          """ -        func_macro = False +        found = func_macro = False          return_type = ''          decl_type = 'function' - -        # Prefixes that would be removed -        sub_prefixes = [ -            (r"^static +", "", 0), -            (r"^extern +", "", 0), -            (r"^asmlinkage +", "", 0), -            (r"^inline +", "", 0), -            (r"^__inline__ +", "", 0), -            (r"^__inline +", "", 0), -            (r"^__always_inline +", "", 0), -            (r"^noinline +", "", 0), -            (r"^__FORTIFY_INLINE +", "", 0), -            (r"__init +", "", 0), -            (r"__init_or_module +", "", 0), -            (r"__deprecated +", "", 0), -            (r"__flatten +", "", 0), -            (r"__meminit +", "", 0), -            (r"__must_check +", "", 0), -            (r"__weak +", "", 0), -            (r"__sched +", "", 0), -            (r"_noprof", "", 0), -            (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), -            (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), -            (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), -            (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), -            (r"__attribute_const__ +", "", 0), - -            # It seems that Python support for re.X is broken: -            # At least for me (Python 3.13), this didn't work -#            (r""" -#              __attribute__\s*\(\( -#                (?: -#                    [\w\s]+          # attribute name -#                    (?:\([^)]*\))?   # attribute arguments -#                    \s*,?            # optional comma at the end -#                )+ -#              \)\)\s+ -#             """, "", re.X), - -            # So, remove whitespaces and comments from it -            (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), -        ] - -        for search, sub, flags in sub_prefixes: -            prototype = KernRe(search, flags).sub(sub, prototype) - -        # Macros are a special case, as they change the prototype format +        # +        # Apply the initial transformations. +        # +        prototype = apply_transforms(function_xforms, prototype) +        # +        # If we have a macro, remove the "#define" at the front. +        #          new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)          if new_proto != prototype: -            is_define_proto = True              prototype = new_proto -        else: -            is_define_proto = False +            # +            # Dispense with the simple "#define A B" case here; the key +            # is the space after the name of the symbol being defined. +            # NOTE that the seemingly misnamed "func_macro" indicates a +            # macro *without* arguments. +            # +            r = KernRe(r'^(\w+)\s+') +            if r.search(prototype): +                return_type = '' +                declaration_name = r.group(1) +                func_macro = True +                found = True          # Yes, this truly is vile.  We are looking for:          # 1. Return type (may be nothing if we're looking at a macro) @@ -965,91 +968,73 @@ class KernelDoc:          # - atomic_set (macro)          # - pci_match_device, __copy_to_user (long return type) -        name = r'[a-zA-Z0-9_~:]+' -        prototype_end1 = r'[^\(]*' -        prototype_end2 = r'[^\{]*' -        prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' - -        # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. -        # So, this needs to be mapped in Python with (?:...)? or (?:...)+ - +        name = r'\w+'          type1 = r'(?:[\w\s]+)?'          type2 = r'(?:[\w\s]+\*+)+' - -        found = False - -        if is_define_proto: -            r = KernRe(r'^()(' + name + r')\s+') - -            if r.search(prototype): -                return_type = '' -                declaration_name = r.group(2) -                func_macro = True - -                found = True - +        # +        # Attempt to match first on (args) with no internal parentheses; this +        # lets us easily filter out __acquires() and other post-args stuff.  If +        # that fails, just grab the rest of the line to the last closing +        # parenthesis. +        # +        proto_args = r'\(([^\(]*|.*)\)' +        # +        # (Except for the simple macro case) attempt to split up the prototype +        # in the various ways we understand. +        #          if not found:              patterns = [ -                rf'^()({name})\s*{prototype_end}', -                rf'^({type1})\s+({name})\s*{prototype_end}', -                rf'^({type2})\s*({name})\s*{prototype_end}', +                rf'^()({name})\s*{proto_args}', +                rf'^({type1})\s+({name})\s*{proto_args}', +                rf'^({type2})\s*({name})\s*{proto_args}',              ]              for p in patterns:                  r = KernRe(p) -                  if r.match(prototype): -                      return_type = r.group(1)                      declaration_name = r.group(2)                      args = r.group(3) -                      self.create_parameter_list(ln, decl_type, args, ',',                                                 declaration_name) -                      found = True                      break +        # +        # Parsing done; make sure that things are as we expect. +        #          if not found:              self.emit_msg(ln,                            f"cannot understand function prototype: '{prototype}'")              return -          if self.entry.identifier != declaration_name: -            self.emit_msg(ln, -                          f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") +            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " +                          f"Prototype was for {declaration_name}() instead")              return -          self.check_sections(ln, declaration_name, "function") -          self.check_return_section(ln, declaration_name, return_type) +        # +        # Store the result. +        # +        self.output_declaration(decl_type, declaration_name, +                                typedef=('typedef' in return_type), +                                functiontype=return_type, +                                purpose=self.entry.declaration_purpose, +                                func_macro=func_macro) -        if 'typedef' in return_type: -            self.output_declaration(decl_type, declaration_name, -                                    typedef=True, -                                    functiontype=return_type, -                                    purpose=self.entry.declaration_purpose, -                                    func_macro=func_macro) -        else: -            self.output_declaration(decl_type, declaration_name, -                                    typedef=False, -                                    functiontype=return_type, -                                    purpose=self.entry.declaration_purpose, -                                    func_macro=func_macro)      def dump_typedef(self, ln, proto):          """          Stores a typedef inside self.entries array.          """ - -        typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' +        # +        # We start by looking for function typedefs. +        # +        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'          typedef_ident = r'\*?\s*(\w\S+)\s*'          typedef_args = r'\s*\((.*)\);' -        typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) -        typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) - -        # Strip comments -        proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) +        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) +        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)          # Parse function typedef prototypes          for r in [typedef1, typedef2]: @@ -1065,21 +1050,16 @@ class KernelDoc:                                f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")                  return -            decl_type = 'function' -            self.create_parameter_list(ln, decl_type, args, ',', declaration_name) +            self.create_parameter_list(ln, 'function', args, ',', declaration_name) -            self.output_declaration(decl_type, declaration_name, +            self.output_declaration('function', declaration_name,                                      typedef=True,                                      functiontype=return_type,                                      purpose=self.entry.declaration_purpose)              return - -        # Handle nested parentheses or brackets -        r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') -        while r.search(proto): -            proto = r.sub('', proto) - -        # Parse simple typedefs +        # +        # Not a function, try to parse a simple typedef. +        #          r = KernRe(r'typedef.*\s+(\w+)\s*;')          if r.match(proto):              declaration_name = r.group(1) @@ -1262,7 +1242,7 @@ class KernelDoc:              self.dump_section()              # Look for doc_com + <text> + doc_end: -            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') +            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')              if r.match(line):                  self.emit_msg(ln, f"suspicious ending line: {line}") @@ -1473,7 +1453,7 @@ class KernelDoc:          """Ancillary routine to process a function prototype"""          # strip C99-style comments to end of line -        line = KernRe(r"\/\/.*$", re.S).sub('', line) +        line = KernRe(r"//.*$", re.S).sub('', line)          #          # Soak up the line's worth of prototype text, stopping at { or ; if present.          # | 
