From d966dc658ce381c56d85cd477e095944b8470379 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:11 +0800 Subject: scripts/kernel-doc.py: move KernelDoc class to a separate file In preparation for letting kerneldoc Sphinx extension to import Python libraries, move regex ancillary classes to a separate file. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/c76df228504e711c6b4bcd23d5a0ea1fda678cda.1744106241.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 1690 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1690 insertions(+) create mode 100755 scripts/lib/kdoc/kdoc_parser.py (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py new file mode 100755 index 000000000000..3ce116595546 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -0,0 +1,1690 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import argparse +import re +from pprint import pformat + +from kdoc_re import NestedMatch, Re + + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = Re(r'^/\*\*\s*$', cache=False) + +doc_end = Re(r'\*/', cache=False) +doc_com = Re(r'\s*\*\s*', cache=False) +doc_com_body = Re(r'\s*\* ?', cache=False) +doc_decl = doc_com + Re(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +doc_sect = doc_com + \ + Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + Re(r'(.*)', cache=False) +doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=re.I | re.S, cache=False) + +export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Parser states + STATE_NORMAL = 0 # normal code + STATE_NAME = 1 # looking for function name + STATE_BODY_MAYBE = 2 # body - or maybe more description + STATE_BODY = 3 # the body of the comment + STATE_BODY_WITH_BLANK_LINE = 4 # the body which has a blank line + STATE_PROTO = 5 # scanning prototype + STATE_DOCBLOCK = 6 # documentation block + STATE_INLINE = 7 # gathering doc outside main block + + st_name = [ + "NORMAL", + "NAME", + "BODY_MAYBE", + "BODY", + "BODY_WITH_BLANK_LINE", + "PROTO", + "DOCBLOCK", + "INLINE", + ] + + # Inline documentation state + STATE_INLINE_NA = 0 # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME = 1 # looking for member name (@foo:) + STATE_INLINE_TEXT = 2 # looking for member documentation + STATE_INLINE_END = 3 # done + STATE_INLINE_ERROR = 4 # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. + + st_inline_name = [ + "", + "_NAME", + "_TEXT", + "_END", + "_ERROR", + ] + + # Section names + + section_default = "Description" # default section + section_intro = "Introduction" + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + def show_warnings(self, dtype, declaration_name): # pylint: disable=W0613 + """ + Allow filtering out warnings + """ + + # TODO: implement it + + return True + + # TODO: rename to emit_message + def emit_warning(self, ln, msg, warning=True): + """Emit a message""" + + if warning: + self.config.log.warning("%s:%d %s", self.fname, ln, msg) + else: + self.config.log.info("%s:%d %s", self.fname, ln, msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + name = self.entry.section + contents = self.entry.contents + + # TODO: we can prevent dumping empty sections here with: + # + # if self.entry.contents.strip("\n"): + # if start_new: + # self.entry.section = self.section_default + # self.entry.contents = "" + # + # return + # + # But, as we want to be producing the same output of the + # venerable kernel-doc Perl tool, let's just output everything, + # at least for now + + if type_param.match(name): + name = type_param.group(1) + + self.entry.parameterdescs[name] = contents + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + + self.entry.sectcheck += name + " " + self.entry.new_start_line = 0 + + elif name == "@...": + name = "..." + self.entry.parameterdescs[name] = contents + self.entry.sectcheck += name + " " + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + + else: + if name in self.entry.sections and self.entry.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != self.section_default: + self.emit_warning(self.entry.new_start_line, + f"duplicate section name '{name}'\n") + self.entry.sections[name] += contents + else: + self.entry.sections[name] = contents + self.entry.sectionlist.append(name) + self.entry.section_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self.entry))) + + if start_new: + self.entry.section = self.section_default + self.entry.contents = "" + + # TODO: rename it to store_declaration + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + # The implementation here is different than the original kernel-doc: + # instead of checking for output filters or actually output anything, + # it just stores the declaration content at self.entries, as the + # output will happen on a separate class. + # + # For now, we're keeping the same name of the function just to make + # easier to compare the source code of both scripts + + if "declaration_start_line" not in args: + args["declaration_start_line"] = self.entry.declaration_start_line + + args["type"] = dtype + + # TODO: use colletions.OrderedDict + + sections = args.get('sections', {}) + sectionlist = args.get('sectionlist', []) + + # Drop empty sections + # TODO: improve it to emit warnings + for section in ["Description", "Return"]: + if section in sectionlist: + if not sections[section].rstrip(): + del sections[section] + sectionlist.remove(section) + + self.entries.append((name, args)) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + self.entry = argparse.Namespace + + self.entry.contents = "" + self.entry.function = "" + self.entry.sectcheck = "" + self.entry.struct_actual = "" + self.entry.prototype = "" + + self.entry.parameterlist = [] + self.entry.parameterdescs = {} + self.entry.parametertypes = {} + self.entry.parameterdesc_start_lines = {} + + self.entry.section_start_lines = {} + self.entry.sectionlist = [] + self.entry.sections = {} + + self.entry.anon_struct_union = False + + self.entry.leading_space = None + + # State flags + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + self.entry.brcount = 0 + + self.entry.in_doc_sect = False + self.entry.declaration_start_line = ln + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = Re(r'[\[\)].*').sub('', param, count=1) + + if dtype == "" and param.endswith("..."): + if Re(r'\w\.\.\.$').search(param): + # For named variable parameters of the form `x...`, + # remove the dots + param = param[:-3] + else: + # Handles unnamed variable parameters + param = "..." + + if param not in self.entry.parameterdescs or \ + not self.entry.parameterdescs[param]: + + self.entry.parameterdescs[param] = "variable arguments" + + elif dtype == "" and (not param or param == "void"): + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif dtype == "" and param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Handle cache group enforcing variables: they do not need + # to be described in header files + elif "__cacheline_group" in param: + # Ignore __cacheline_group_begin and __cacheline_group_end + return + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if self.show_warnings(dtype, declaration_name) and "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = Re(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + def save_struct_actual(self, actual): + """ + Strip all spaces from the actual param so that it looks like + one string item. + """ + + actual = Re(r'\s*').sub("", actual, count=1) + + self.entry.struct_actual += actual + " " + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = Re(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Strip comments + arg = Re(r'\/\*.*\*\/').sub('', arg) + + # Ignore argument attributes + arg = Re(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = Re(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + + elif Re(r'\(.+\)\s*\(').search(arg): + # Pointer-to-function + + arg = arg.replace('#', ',') + + r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif Re(r'\(.+\)\s*\[').search(arg): + # Array-of-pointers + + arg = arg.replace('#', ',') + r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif arg: + arg = Re(r'\s*:\s*').sub(":", arg) + arg = Re(r'\s*\[').sub('[', arg) + + args = Re(r'\s*,\s*').split(arg) + if args[0] and '*' in args[0]: + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + + first_arg = [] + r = Re(r'^(.*\s+)(.*?\[.*\].*)$') + if args[0] and r.match(args[0]): + args.pop(0) + first_arg.extend(r.group(1)) + first_arg.append(r.group(2)) + else: + first_arg = Re(r'\s+').split(args.pop(0)) + + args.insert(0, first_arg.pop()) + dtype = ' '.join(first_arg) + + for param in args: + if Re(r'^(\*+)\s*(.*)').match(param): + r = Re(r'^(\*+)\s*(.*)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + param = r.group(1) + + self.save_struct_actual(r.group(2)) + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + + elif Re(r'(.*?):(\w+)').search(param): + r = Re(r'(.*?):(\w+)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + if dtype != "": # Skip unnamed bit-fields + self.save_struct_actual(r.group(1)) + self.push_parameter(ln, decl_type, r.group(1), + f"{dtype}:{r.group(2)}", + arg, declaration_name) + else: + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + + sects = sectcheck.split() + prms = prmscheck.split() + err = False + + for sx in range(len(sects)): # pylint: disable=C0200 + err = True + for px in range(len(prms)): # pylint: disable=C0200 + prm_clean = prms[px] + prm_clean = Re(r'\[.*\]').sub('', prm_clean) + prm_clean = attribute.sub('', prm_clean) + + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + prm_clean = Re(r'\[.*').sub('', prm_clean) + + if prm_clean == sects[sx]: + err = False + break + + if err: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_warning(ln, + f"No description found for return value of '{declaration_name}'") + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + + type_pattern = r'(struct|union)' + + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + + # Extract struct/union definition + members = None + declaration_name = None + decl_type = None + + r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(2) + members = r.group(3) + else: + r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(3) + members = r.group(2) + + if not members: + self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") + self.config.errors += 1 + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + return + + args_pattern = r'([^,)]+)' + + sub_prefixes = [ + (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + + # Strip comments + (Re(r'\/\*.*?\*\/', re.S), ''), + + # Strip attributes + (attribute, ' '), + (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__packed\s*', re.S), ' '), + (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (Re(r'\s*____cacheline_aligned', re.S), ' '), + + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + + (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + + # Replace macros + # + # TODO: it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + + (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + ] + + # Regexes here are guaranteed to have the end limiter matching + # the start delimiter. Yet, right now, only one replace group + # is allowed. + + sub_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + ] + + for search, sub in sub_prefixes: + members = search.sub(sub, members) + + nested = NestedMatch() + + for search, sub in sub_nested_prefixes: + members = nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration = members + + # Split nested struct/union elements + # + # This loop was simpler at the original kernel-doc perl version, as + # while ($members =~ m/$struct_members/) { ... } + # reads 'members' string on each interaction. + # + # Python behavior is different: it parses 'members' only once, + # creating a list of tuples from the first interaction. + # + # On other words, this won't get nested structs. + # + # So, we need to have an extra loop on Python to override such + # re limitation. + + while True: + tuples = struct_members.findall(members) + if not tuples: + break + + for t in tuples: + newmember = "" + maintype = t[0] + s_ids = t[5] + content = t[3] + + oldmember = "".join(t) + + for s_id in s_ids.split(','): + s_id = s_id.strip() + + newmember += f"{maintype} {s_id}; " + s_id = Re(r'[:\[].*').sub('', s_id) + s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + + for arg in content.split(';'): + arg = arg.strip() + + if not arg: + continue + + r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + if r.match(arg): + # Pointer-to-function + dtype = r.group(1) + name = r.group(2) + extra = r.group(3) + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + + else: + arg = arg.strip() + # Handle bitmaps + arg = Re(r':\s*\d+\s*').sub('', arg) + + # Handle arrays + arg = Re(r'\[.*\]').sub('', arg) + + # Handle multiple IDs + arg = Re(r'\s*,\s*').sub(',', arg) + + r = Re(r'(.*)\s+([\S+,]+)') + + if r.search(arg): + dtype = r.group(1) + names = r.group(2) + else: + newmember += f"{arg}; " + continue + + for name in names.split(','): + name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype} {name}; " + else: + newmember += f"{dtype} {s_id}.{name}; " + + members = members.replace(oldmember, newmember) + + # Ignore other nested elements, like enums + members = re.sub(r'(\{[^\{\}]*\})', '', members) + + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type, + self.entry.sectcheck, self.entry.struct_actual) + + # Adjust declaration for better display + declaration = Re(r'([\{;])').sub(r'\1\n', declaration) + declaration = Re(r'\}\s+;').sub('};', declaration) + + # Better handle inlined enums + while True: + r = Re(r'(enum\s+\{[^\}]+),([^\n])') + if not r.search(declaration): + break + + declaration = r.sub(r'\1,\n\2', declaration) + + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + + clause = clause.strip() + clause = Re(r'\s+').sub(' ', clause, count=1) + + if not clause: + continue + + if '}' in clause and level > 1: + level -= 1 + + if not Re(r'^\s*#').match(clause): + declaration += "\t" * level + + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + + self.output_declaration(decl_type, declaration_name, + struct=declaration_name, + module=self.entry.modulename, + definition=declaration, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + + # Ignore members marked private + proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + + # Strip comments + proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + + # Strip #define macros inside enums + proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + + members = None + declaration_name = None + + r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = r.group(1).rstrip() + else: + r = Re(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = r.group(2).rstrip() + + if not members: + self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") + self.config.errors += 1 + return + + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_warning(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_warning(ln, + f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + + member_set = set() + + members = Re(r'\([^;]*?[\)]').sub('', members) + + for arg in members.split(','): + if not arg: + continue + arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + + for k in self.entry.parameterdescs: + if k not in member_set: + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + enum=declaration_name, + module=self.config.modulename, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + return + + if self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + return + + if self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + return + + # TODO: handle other types + self.output_declaration(self.entry.decl_type, prototype, + entry=self.entry) + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + func_macro = False + return_type = '' + decl_type = 'function' + + # Prefixes that would be removed + sub_prefixes = [ + (r"^static +", "", 0), + (r"^extern +", "", 0), + (r"^asmlinkage +", "", 0), + (r"^inline +", "", 0), + (r"^__inline__ +", "", 0), + (r"^__inline +", "", 0), + (r"^__always_inline +", "", 0), + (r"^noinline +", "", 0), + (r"^__FORTIFY_INLINE +", "", 0), + (r"__init +", "", 0), + (r"__init_or_module +", "", 0), + (r"__deprecated +", "", 0), + (r"__flatten +", "", 0), + (r"__meminit +", "", 0), + (r"__must_check +", "", 0), + (r"__weak +", "", 0), + (r"__sched +", "", 0), + (r"_noprof", "", 0), + (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), + (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), + (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), + (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), + (r"__attribute_const__ +", "", 0), + + # It seems that Python support for re.X is broken: + # At least for me (Python 3.13), this didn't work +# (r""" +# __attribute__\s*\(\( +# (?: +# [\w\s]+ # attribute name +# (?:\([^)]*\))? # attribute arguments +# \s*,? # optional comma at the end +# )+ +# \)\)\s+ +# """, "", re.X), + + # So, remove whitespaces and comments from it + (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), + ] + + for search, sub, flags in sub_prefixes: + prototype = Re(search, flags).sub(sub, prototype) + + # Macros are a special case, as they change the prototype format + new_proto = Re(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + is_define_proto = True + prototype = new_proto + else: + is_define_proto = False + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'[a-zA-Z0-9_~:]+' + prototype_end1 = r'[^\(]*' + prototype_end2 = r'[^\{]*' + prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' + + # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. + # So, this needs to be mapped in Python with (?:...)? or (?:...)+ + + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + + found = False + + if is_define_proto: + r = Re(r'^()(' + name + r')\s+') + + if r.search(prototype): + return_type = '' + declaration_name = r.group(2) + func_macro = True + + found = True + + if not found: + patterns = [ + rf'^()({name})\s*{prototype_end}', + rf'^({type1})\s+({name})\s*{prototype_end}', + rf'^({type2})\s*({name})\s*{prototype_end}', + ] + + for p in patterns: + r = Re(p) + + if r.match(prototype): + + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + + found = True + break + if not found: + self.emit_warning(ln, + f"cannot understand function prototype: '{prototype}'") + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + return + + prms = " ".join(self.entry.parameterlist) + self.check_sections(ln, declaration_name, "function", + self.entry.sectcheck, prms) + + self.check_return_section(ln, declaration_name, return_type) + + if 'typedef' in return_type: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + module=self.config.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + else: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=False, + module=self.config.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + + typedef_type = r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = Re(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = Re(r'typedef' + typedef_type + typedef_ident + typedef_args) + + # Strip comments + proto = Re(r'/\*.*?\*/', flags=re.S).sub('', proto) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + decl_type = 'function' + self.create_parameter_list(ln, decl_type, args, ',', declaration_name) + + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + module=self.entry.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + return + + # Handle nested parentheses or brackets + r = Re(r'(\(*.\)\s*|\[*.\]\s*);$') + while r.search(proto): + proto = r.sub('', proto) + + # Parse simple typedefs + r = Re(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + typedef=declaration_name, + module=self.entry.modulename, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + return + + self.emit_warning(ln, "error: Cannot parse typedef!") + self.config.errors += 1 + + @staticmethod + def process_export(function_table, line): + """ + process EXPORT_SYMBOL* tags + + This method is called both internally and externally, so, it + doesn't use self. + """ + + if export_symbol.search(line): + symbol = export_symbol.group(2) + function_table.add(symbol) + + if export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + function_table.add(symbol) + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln + 1) + self.entry.in_doc_sect = False + + # next line is always the function name + self.state = self.STATE_NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + + if doc_block.search(line): + self.entry.new_start_line = ln + + if not doc_block.group(1): + self.entry.section = self.section_intro + else: + self.entry.section = doc_block.group(1) + + self.state = self.STATE_DOCBLOCK + return + + if doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + self.entry.is_kernel_comment = False + + decl_start = str(doc_com) # comment block asterisk + fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) + parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function + decl_end = r"(?:[-:].*)" # end of the name part + + # test for pointer declaration type, foo * bar() - desc + r = Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + if r.search(line): + self.entry.identifier = r.group(1) + + # Test for data declaration + r = Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + if r.search(line): + self.entry.decl_type = r.group(1) + self.entry.identifier = r.group(2) + self.entry.is_kernel_comment = True + else: + # Look for foo() or static void foo() - description; + # or misspelt identifier + + r1 = Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + + for r in [r1, r2]: + if r.search(line): + self.entry.identifier = r.group(1) + self.entry.decl_type = "function" + + r = Re(r"define\s+") + self.entry.identifier = r.sub("", self.entry.identifier) + self.entry.is_kernel_comment = True + break + + self.entry.identifier = self.entry.identifier.strip(" ") + + self.state = self.STATE_BODY + + # if there's no @param blocks need to set up default section here + self.entry.section = self.section_default + self.entry.new_start_line = ln + 1 + + r = Re("[-:](.*)") + if r.search(line): + # strip leading/trailing/multiple spaces + self.entry.descr = r.group(1).strip(" ") + + r = Re(r"\s+") + self.entry.descr = r.sub(" ", self.entry.descr) + self.entry.declaration_purpose = self.entry.descr + self.state = self.STATE_BODY_MAYBE + else: + self.entry.declaration_purpose = "" + + if not self.entry.is_kernel_comment: + self.emit_warning(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = self.STATE_NORMAL + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_warning(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_warning(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = self.STATE_NORMAL + + if self.config.verbose: + self.emit_warning(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + + return + + # Failed to find an identifier. Emit a warning + self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") + + def process_body(self, ln, line): + """ + STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. + """ + + if self.state == self.STATE_BODY_WITH_BLANK_LINE: + r = Re(r"\s*\*\s?\S") + if r.match(line): + self.dump_section() + self.entry.section = self.section_default + self.entry.new_start_line = line + self.entry.contents = "" + + if doc_sect.search(line): + self.entry.in_doc_sect = True + newsection = doc_sect.group(1) + + if newsection.lower() in ["description", "context"]: + newsection = newsection.title() + + # Special case: @return is a section, not a param description + if newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + + # Perl kernel-doc has a check here for contents before sections. + # the logic there is always false, as in_doc_sect variable is + # always true. So, just don't implement Wcontents_before_sections + + # .title() + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + + if self.entry.contents.strip("\n"): + self.dump_section() + + self.entry.new_start_line = ln + self.entry.section = newsection + self.entry.leading_space = None + + self.entry.contents = newcontents.lstrip() + if self.entry.contents: + self.entry.contents += "\n" + + self.state = self.STATE_BODY + return + + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + if r.match(line): + self.emit_warning(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = self.STATE_PROTO + return + + if doc_content.search(line): + cont = doc_content.group(1) + + if cont == "": + if self.entry.section == self.section_context: + self.dump_section() + + self.entry.new_start_line = ln + self.state = self.STATE_BODY + else: + if self.entry.section != self.section_default: + self.state = self.STATE_BODY_WITH_BLANK_LINE + else: + self.state = self.STATE_BODY + + self.entry.contents += "\n" + + elif self.state == self.STATE_BODY_MAYBE: + + # Continued declaration purpose + self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() + self.entry.declaration_purpose += " " + cont + + r = Re(r"\s+") + self.entry.declaration_purpose = r.sub(' ', + self.entry.declaration_purpose) + + else: + if self.entry.section.startswith('@') or \ + self.entry.section == self.section_context: + if self.entry.leading_space is None: + r = Re(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + + # Double-check if leading space are realy spaces + pos = 0 + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + break + pos += 1 + + cont = cont[pos:] + + # NEW LOGIC: + # In case it is different, update it + if self.entry.leading_space != pos: + self.entry.leading_space = pos + + self.entry.contents += cont + "\n" + return + + # Unknown line, ignore + self.emit_warning(ln, f"bad line: {line}") + + def process_inline(self, ln, line): + """STATE_INLINE: docbook comments within a prototype.""" + + if self.inline_doc_state == self.STATE_INLINE_NAME and \ + doc_inline_sect.search(line): + self.entry.section = doc_inline_sect.group(1) + self.entry.new_start_line = ln + + self.entry.contents = doc_inline_sect.group(2).lstrip() + if self.entry.contents != "": + self.entry.contents += "\n" + + self.inline_doc_state = self.STATE_INLINE_TEXT + # Documentation block end */ + return + + if doc_inline_end.search(line): + if self.entry.contents not in ["", "\n"]: + self.dump_section() + + self.state = self.STATE_PROTO + self.inline_doc_state = self.STATE_INLINE_NA + return + + if doc_content.search(line): + if self.inline_doc_state == self.STATE_INLINE_TEXT: + self.entry.contents += doc_content.group(1) + "\n" + if not self.entry.contents.strip(" ").rstrip("\n"): + self.entry.contents = "" + + elif self.inline_doc_state == self.STATE_INLINE_NAME: + self.emit_warning(ln, + f"Incorrect use of kernel-doc format: {line}") + + self.inline_doc_state = self.STATE_INLINE_ERROR + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = Re(r'long\s+(sys_.*?),') + if r.search(proto): + proto = proto.replace(',', '(', count=1) + elif is_void: + proto = proto.replace(')', '(void)', count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = Re(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = Re(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = Re(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = Re(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_warning(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + r = Re(r"\/\/.*$", re.S) + line = r.sub('', line) + + if Re(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif line.startswith('#'): + # Strip other macros like #ifdef/#ifndef/#endif/... + pass + else: + r = Re(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + + if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + # strip comments + r = Re(r'/\*.*?\*/') + self.entry.prototype = r.sub('', self.entry.prototype) + + # strip newlines/cr's + r = Re(r'[\r\n]+') + self.entry.prototype = r.sub(' ', self.entry.prototype) + + # strip leading spaces + r = Re(r'^\s+') + self.entry.prototype = r.sub('', self.entry.prototype) + + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + + r = Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + + r = Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip newlines/cr's. + line = Re(r'[\r\n]+', re.S).sub(' ', line) + + # Strip leading spaces + line = Re(r'^\s+', re.S).sub('', line) + + # Strip trailing spaces + line = Re(r'\s+$', re.S).sub('', line) + + # Strip C99-style comments to the end of the line + line = Re(r"\/\/.*$", re.S).sub('', line) + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + + r = Re(r'([^\{\};]*)([\{\};])(.*)') + while True: + if r.search(line): + if self.entry.prototype: + self.entry.prototype += " " + self.entry.prototype += r.group(1) + r.group(2) + + self.entry.brcount += r.group(2).count('{') + self.entry.brcount -= r.group(2).count('}') + + self.entry.brcount = max(self.entry.brcount, 0) + + if r.group(2) == ';' and self.entry.brcount == 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + break + + line = r.group(3) + else: + self.entry.prototype += line + break + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.section = doc_inline_oneline.group(1) + self.entry.contents = doc_inline_oneline.group(2) + + if self.entry.contents != "": + self.entry.contents += "\n" + self.dump_section(start_new=False) + + elif doc_inline_start.search(line): + self.state = self.STATE_INLINE + self.inline_doc_state = self.STATE_INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", None, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, module=self.config.modulename) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.contents += doc_content.group(1) + "\n" + + def run(self): + """ + Open and process each line of a C source file. + he parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + """ + + cont = False + prev = "" + prev_ln = None + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == self.STATE_PROTO: + if line.endswith("\\"): + prev += line.removesuffix("\\") + cont = True + + if not prev_ln: + prev_ln = ln + + continue + + if cont: + ln = prev_ln + line = prev + line + prev = "" + cont = False + prev_ln = None + + self.config.log.debug("%d %s%s: %s", + ln, self.st_name[self.state], + self.st_inline_name[self.inline_doc_state], + line) + + # TODO: not all states allow EXPORT_SYMBOL*, so this + # can be optimized later on to speedup parsing + self.process_export(self.config.function_table, line) + + # Hand this line to the appropriate state handler + if self.state == self.STATE_NORMAL: + self.process_normal(ln, line) + elif self.state == self.STATE_NAME: + self.process_name(ln, line) + elif self.state in [self.STATE_BODY, self.STATE_BODY_MAYBE, + self.STATE_BODY_WITH_BLANK_LINE]: + self.process_body(ln, line) + elif self.state == self.STATE_INLINE: # scanning for inline parameters + self.process_inline(ln, line) + elif self.state == self.STATE_PROTO: + self.process_proto(ln, line) + elif self.state == self.STATE_DOCBLOCK: + self.process_docblock(ln, line) + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + self.config.errors += 1 -- cgit From c3597ab27bc0e5eae23c74a76380000a0f8481e1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:17 +0800 Subject: scripts/kernel-doc.py: fix line number output With the Pyhton version, the actual output happens after parsing, from records stored at self.entries. Ensure that line numbers will be properly stored there and that they'll produce the desired results at the ReST output. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/5182a531d14b5fe9e1fc5da5f9dae05d66852a60.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3ce116595546..e8c86448d6b5 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -276,7 +276,7 @@ class KernelDoc: self.entry.brcount = 0 self.entry.in_doc_sect = False - self.entry.declaration_start_line = ln + self.entry.declaration_start_line = ln + 1 def push_parameter(self, ln, decl_type, param, dtype, org_arg, declaration_name): @@ -806,8 +806,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): @@ -882,8 +884,10 @@ class KernelDoc: module=self.config.modulename, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_declaration(self, ln, prototype): @@ -1054,8 +1058,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) else: @@ -1067,8 +1073,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) @@ -1112,8 +1120,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1136,6 +1146,7 @@ class KernelDoc: module=self.entry.modulename, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1168,7 +1179,7 @@ class KernelDoc: return # start a new entry - self.reset_state(ln + 1) + self.reset_state(ln) self.entry.in_doc_sect = False # next line is always the function name @@ -1281,7 +1292,7 @@ class KernelDoc: if r.match(line): self.dump_section() self.entry.section = self.section_default - self.entry.new_start_line = line + self.entry.new_start_line = ln self.entry.contents = "" if doc_sect.search(line): @@ -1619,7 +1630,9 @@ class KernelDoc: self.dump_section() self.output_declaration("doc", None, sectionlist=self.entry.sectionlist, - sections=self.entry.sections, module=self.config.modulename) + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + module=self.config.modulename) self.reset_state(ln) elif doc_content.search(line): -- cgit From 408269ae35d6b88d48477af56a2376ea05e619ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:18 +0800 Subject: scripts/kernel-doc.py: fix handling of doc output check The filtering logic was seeking for the DOC name to check for symbols, but such data is stored only inside a section. Add it to the output_declaration, as it is quicker/easier to check the declaration name than to check inside each section. While here, make sure that the output for both ReST and man after filtering will be similar to what kernel-doc Perl version does. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/6d8b77af85295452c0191863ea1041f4195aeaaf.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e8c86448d6b5..74b311c8184c 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1198,6 +1198,7 @@ class KernelDoc: else: self.entry.section = doc_block.group(1) + self.entry.identifier = self.entry.section self.state = self.STATE_DOCBLOCK return @@ -1628,7 +1629,7 @@ class KernelDoc: if doc_end.search(line): self.dump_section() - self.output_declaration("doc", None, + self.output_declaration("doc", self.entry.identifier, sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, -- cgit From 9cbc2d3b137bfdb7937265c46e9e5d7e72952841 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:20 +0800 Subject: scripts/kernel-doc.py: postpone warnings to the output plugin We don't want to have warnings displayed for symbols that weren't output. So, postpone warnings print to the output plugin, where symbol output is validated. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e6344711e390cf22af02a56bb5dd51ca67c0afb6.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 74b311c8184c..3698ef625367 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -131,23 +131,23 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - def show_warnings(self, dtype, declaration_name): # pylint: disable=W0613 - """ - Allow filtering out warnings - """ - - # TODO: implement it - - return True - # TODO: rename to emit_message def emit_warning(self, ln, msg, warning=True): """Emit a message""" + log_msg = f"{self.fname}:{ln} {msg}" + + if self.entry: + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.entry.warnings.append((warning, log_msg)) + return + if warning: - self.config.log.warning("%s:%d %s", self.fname, ln, msg) + self.config.log.warning(log_msg) else: - self.config.log.info("%s:%d %s", self.fname, ln, msg) + self.config.log.info(log_msg) def dump_section(self, start_new=True): """ @@ -221,10 +221,9 @@ class KernelDoc: # For now, we're keeping the same name of the function just to make # easier to compare the source code of both scripts - if "declaration_start_line" not in args: - args["declaration_start_line"] = self.entry.declaration_start_line - + args["declaration_start_line"] = self.entry.declaration_start_line args["type"] = dtype + args["warnings"] = self.entry.warnings # TODO: use colletions.OrderedDict @@ -257,6 +256,8 @@ class KernelDoc: self.entry.struct_actual = "" self.entry.prototype = "" + self.entry.warnings = [] + self.entry.parameterlist = [] self.entry.parameterdescs = {} self.entry.parametertypes = {} @@ -328,7 +329,7 @@ class KernelDoc: if param not in self.entry.parameterdescs and not param.startswith("#"): self.entry.parameterdescs[param] = self.undescribed - if self.show_warnings(dtype, declaration_name) and "." not in param: + if "." not in param: if decl_type == 'function': dname = f"{decl_type} parameter" else: @@ -868,16 +869,14 @@ class KernelDoc: self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") member_set.add(arg) for k in self.entry.parameterdescs: if k not in member_set: - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, enum=declaration_name, -- cgit From 485f6f7960c468d9e27665f61517dc5fc097ea98 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:26 +0800 Subject: scripts/kernel-doc.py: adjust some coding style issues Make pylint happier by adding some missing documentation and addressing a couple of pylint warnings. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/0f9d5473105e4c09c6c41e3db72cc63f1d4d55f9.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3698ef625367..dcb9515fc40b 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -131,7 +131,7 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - # TODO: rename to emit_message + # TODO: rename to emit_message after removal of kernel-doc.pl def emit_warning(self, ln, msg, warning=True): """Emit a message""" @@ -157,19 +157,6 @@ class KernelDoc: name = self.entry.section contents = self.entry.contents - # TODO: we can prevent dumping empty sections here with: - # - # if self.entry.contents.strip("\n"): - # if start_new: - # self.entry.section = self.section_default - # self.entry.contents = "" - # - # return - # - # But, as we want to be producing the same output of the - # venerable kernel-doc Perl tool, let's just output everything, - # at least for now - if type_param.match(name): name = type_param.group(1) @@ -205,7 +192,7 @@ class KernelDoc: self.entry.section = self.section_default self.entry.contents = "" - # TODO: rename it to store_declaration + # TODO: rename it to store_declaration after removal of kernel-doc.pl def output_declaration(self, dtype, name, **args): """ Stores the entry into an entry array. @@ -225,13 +212,13 @@ class KernelDoc: args["type"] = dtype args["warnings"] = self.entry.warnings - # TODO: use colletions.OrderedDict + # TODO: use colletions.OrderedDict to remove sectionlist sections = args.get('sections', {}) sectionlist = args.get('sectionlist', []) # Drop empty sections - # TODO: improve it to emit warnings + # TODO: improve empty sections logic to emit warnings for section in ["Description", "Return"]: if section in sectionlist: if not sections[section].rstrip(): @@ -636,7 +623,9 @@ class KernelDoc: # Replace macros # - # TODO: it is better to also move those to the NestedMatch logic, + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, # to ensure that parenthesis will be properly matched. (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), @@ -906,7 +895,6 @@ class KernelDoc: self.dump_struct(ln, prototype) return - # TODO: handle other types self.output_declaration(self.entry.decl_type, prototype, entry=self.entry) @@ -1680,10 +1668,6 @@ class KernelDoc: self.st_inline_name[self.inline_doc_state], line) - # TODO: not all states allow EXPORT_SYMBOL*, so this - # can be optimized later on to speedup parsing - self.process_export(self.config.function_table, line) - # Hand this line to the appropriate state handler if self.state == self.STATE_NORMAL: self.process_normal(ln, line) -- cgit From 78ea748f7978d39a6ee29897d3bd32e6208f74ac Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:27 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: fix Python compat with < v3.13 - str.replace count was introduced only in Python 3.13; - before Python 3.13, f-string dict arguments can't use the same delimiter of the main string. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e2b8e8361294558dae09236e4b8fbea5d86be5a3.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index dcb9515fc40b..e48ed128ca04 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1444,9 +1444,9 @@ class KernelDoc: r = Re(r'long\s+(sys_.*?),') if r.search(proto): - proto = proto.replace(',', '(', count=1) + proto = Re(',').sub('(', proto, count=1) elif is_void: - proto = proto.replace(')', '(void)', count=1) + proto = Re(r'\)').sub('(void)', proto, count=1) # Now delete all of the odd-numbered commas in the proto # so that argument types & names don't have a comma between them -- cgit From 2ab867a4941de2e9d7804e76ab002ad74c73b078 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:28 +0800 Subject: scripts/kernel-doc.py: move modulename to man class Only man output requires a modulename. Move its definition to the man class. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/583085e3885b0075d16ef9961b4f2ad870f30a55.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e48ed128ca04..f923600561f8 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -791,7 +791,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, struct=declaration_name, - module=self.entry.modulename, definition=declaration, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -869,7 +868,6 @@ class KernelDoc: self.output_declaration('enum', declaration_name, enum=declaration_name, - module=self.config.modulename, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, @@ -1040,7 +1038,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=True, - module=self.config.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1055,7 +1052,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=False, - module=self.config.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1102,7 +1098,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=True, - module=self.entry.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1130,7 +1125,6 @@ class KernelDoc: self.output_declaration('typedef', declaration_name, typedef=declaration_name, - module=self.entry.modulename, sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, @@ -1619,8 +1613,7 @@ class KernelDoc: self.output_declaration("doc", self.entry.identifier, sectionlist=self.entry.sectionlist, sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, - module=self.config.modulename) + section_start_lines=self.entry.section_start_lines) self.reset_state(ln) elif doc_content.search(line): -- cgit From e4b2bd908c3d8f071d4fac6e588fffc6110c1b1f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:30 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: remove a python 3.9 dependency str.removesuffix() was added on Python 3.9, but rstrip() actually does the same thing, as we just want to remove a single character. It is also shorter. So, use it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/f64cc4adef107ada26da4bfb7e4b7002dd783173.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f923600561f8..77e8bfeccc8e 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1641,7 +1641,7 @@ class KernelDoc: # Group continuation lines on prototypes if self.state == self.STATE_PROTO: if line.endswith("\\"): - prev += line.removesuffix("\\") + prev += line.rstrip("\\") cont = True if not prev_ln: -- cgit From 11afeab6d74d1be80420b47113c4893c88dcc04b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:31 +0800 Subject: scripts/kernel-doc.py: Properly handle Werror and exit codes The original kernel-doc script has a logic to return warnings as errors, and to report the number of warnings found, if in verbose mode. Implement it to be fully compatible with the original script. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/de33b0cebd9fdf82d8b221bcfe41db7269286222.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 77e8bfeccc8e..43e6ffbdcc2c 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -137,17 +137,18 @@ class KernelDoc: log_msg = f"{self.fname}:{ln} {msg}" + if not warning: + self.config.log.info(log_msg) + return + if self.entry: # Delegate warning output to output logic, as this way it # will report warnings/info only for symbols that are output - self.entry.warnings.append((warning, log_msg)) + self.entry.warnings.append(log_msg) return - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) + self.config.log.warning(log_msg) def dump_section(self, start_new=True): """ @@ -556,7 +557,6 @@ class KernelDoc: if not members: self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") - self.config.errors += 1 return if self.entry.identifier != declaration_name: @@ -831,7 +831,6 @@ class KernelDoc: if not members: self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") - self.config.errors += 1 return if self.entry.identifier != declaration_name: @@ -1132,7 +1131,6 @@ class KernelDoc: return self.emit_warning(ln, "error: Cannot parse typedef!") - self.config.errors += 1 @staticmethod def process_export(function_table, line): @@ -1677,4 +1675,3 @@ class KernelDoc: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") - self.config.errors += 1 -- cgit From 16740c29dbf3275a22691d3d7c63701992872898 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:34 +0800 Subject: scripts/kernel_doc.py: better handle exported symbols Change the logic which detects internal/external symbols in a way that we can re-use it when calling via Sphinx extension. While here, remove an unused self.config var and let it clearer that self.config variables are read-only. This helps to allow handling multiple times in parallel if ever needed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/6a69ba8d2b7ee6a6427abb53e60d09bd4d3565ee.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 52 +++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 43e6ffbdcc2c..33f00c77dd5f 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1133,21 +1133,25 @@ class KernelDoc: self.emit_warning(ln, "error: Cannot parse typedef!") @staticmethod - def process_export(function_table, line): + def process_export(function_set, line): """ process EXPORT_SYMBOL* tags - This method is called both internally and externally, so, it - doesn't use self. + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. """ + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + if export_symbol.search(line): symbol = export_symbol.group(2) - function_table.add(symbol) + function_set.add(symbol) + return if export_symbol_ns.search(line): symbol = export_symbol_ns.group(2) - function_table.add(symbol) + function_set.add(symbol) def process_normal(self, ln, line): """ @@ -1617,17 +1621,39 @@ class KernelDoc: elif doc_content.search(line): self.entry.contents += doc_content.group(1) + "\n" - def run(self): + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + def parse_kdoc(self): """ Open and process each line of a C source file. - he parsing is controlled via a state machine, and the line is passed + The parsing is controlled via a state machine, and the line is passed to a different process function depending on the state. The process function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. """ cont = False prev = "" prev_ln = None + export_table = set() try: with open(self.fname, "r", encoding="utf8", @@ -1659,6 +1685,16 @@ class KernelDoc: self.st_inline_name[self.inline_doc_state], line) + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + # TODO: It should be noticed that not all states are + # needed here. On a future cleanup, process export only + # at the states that aren't handling comment markups. + self.process_export(export_table, line) + # Hand this line to the appropriate state handler if self.state == self.STATE_NORMAL: self.process_normal(ln, line) @@ -1675,3 +1711,5 @@ class KernelDoc: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries -- cgit From 04a383ced6965fedc9c1b6c83d841acce076b53c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:35 +0800 Subject: scripts/kernel-doc.py: Rename the kernel doc Re class to KernRe Using just "Re" makes it harder to distinguish from the native "re" class. So, let's rename it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/4e095ecd5235a3e811ddcf5bad4cfb92f1da0a4a.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 264 ++++++++++++++++++++-------------------- 1 file changed, 132 insertions(+), 132 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 33f00c77dd5f..f60722bcc687 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -16,7 +16,7 @@ import argparse import re from pprint import pformat -from kdoc_re import NestedMatch, Re +from kdoc_re import NestedMatch, KernRe # @@ -29,12 +29,12 @@ from kdoc_re import NestedMatch, Re # # Allow whitespace at end of comment start. -doc_start = Re(r'^/\*\*\s*$', cache=False) +doc_start = KernRe(r'^/\*\*\s*$', cache=False) -doc_end = Re(r'\*/', cache=False) -doc_com = Re(r'\s*\*\s*', cache=False) -doc_com_body = Re(r'\s*\* ?', cache=False) -doc_decl = doc_com + Re(r'(\w+)', cache=False) +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) # @params and a strictly limited set of supported section names # Specifically: @@ -44,22 +44,22 @@ doc_decl = doc_com + Re(r'(\w+)', cache=False) # while trying to not match literal block starts like "example::" # doc_sect = doc_com + \ - Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', flags=re.I, cache=False) -doc_content = doc_com_body + Re(r'(.*)', cache=False) -doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) -doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False) -export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) class KernelDoc: @@ -278,10 +278,10 @@ class KernelDoc: self.entry.anon_struct_union = False - param = Re(r'[\[\)].*').sub('', param, count=1) + param = KernRe(r'[\[\)].*').sub('', param, count=1) if dtype == "" and param.endswith("..."): - if Re(r'\w\.\.\.$').search(param): + if KernRe(r'\w\.\.\.$').search(param): # For named variable parameters of the form `x...`, # remove the dots param = param[:-3] @@ -335,7 +335,7 @@ class KernelDoc: # to ignore "[blah" in a parameter string. self.entry.parameterlist.append(param) - org_arg = Re(r'\s\s+').sub(' ', org_arg) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) self.entry.parametertypes[param] = org_arg def save_struct_actual(self, actual): @@ -344,7 +344,7 @@ class KernelDoc: one string item. """ - actual = Re(r'\s*').sub("", actual, count=1) + actual = KernRe(r'\s*').sub("", actual, count=1) self.entry.struct_actual += actual + " " @@ -355,20 +355,20 @@ class KernelDoc: """ # temporarily replace all commas inside function pointer definition - arg_expr = Re(r'(\([^\),]+),') + arg_expr = KernRe(r'(\([^\),]+),') while arg_expr.search(args): args = arg_expr.sub(r"\1#", args) for arg in args.split(splitter): # Strip comments - arg = Re(r'\/\*.*\*\/').sub('', arg) + arg = KernRe(r'\/\*.*\*\/').sub('', arg) # Ignore argument attributes - arg = Re(r'\sPOS0?\s').sub(' ', arg) + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) # Strip leading/trailing spaces arg = arg.strip() - arg = Re(r'\s+').sub(' ', arg, count=1) + arg = KernRe(r'\s+').sub(' ', arg, count=1) if arg.startswith('#'): # Treat preprocessor directive as a typeless variable just to fill @@ -379,63 +379,63 @@ class KernelDoc: self.push_parameter(ln, decl_type, arg, "", "", declaration_name) - elif Re(r'\(.+\)\s*\(').search(arg): + elif KernRe(r'\(.+\)\s*\(').search(arg): # Pointer-to-function arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') if r.match(arg): param = r.group(1) else: self.emit_warning(ln, f"Invalid param: {arg}") param = arg - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif Re(r'\(.+\)\s*\[').search(arg): + elif KernRe(r'\(.+\)\s*\[').search(arg): # Array-of-pointers arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') if r.match(arg): param = r.group(1) else: self.emit_warning(ln, f"Invalid param: {arg}") param = arg - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) elif arg: - arg = Re(r'\s*:\s*').sub(":", arg) - arg = Re(r'\s*\[').sub('[', arg) + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) - args = Re(r'\s*,\s*').split(arg) + args = KernRe(r'\s*,\s*').split(arg) if args[0] and '*' in args[0]: args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) first_arg = [] - r = Re(r'^(.*\s+)(.*?\[.*\].*)$') + r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') if args[0] and r.match(args[0]): args.pop(0) first_arg.extend(r.group(1)) first_arg.append(r.group(2)) else: - first_arg = Re(r'\s+').split(args.pop(0)) + first_arg = KernRe(r'\s+').split(args.pop(0)) args.insert(0, first_arg.pop()) dtype = ' '.join(first_arg) for param in args: - if Re(r'^(\*+)\s*(.*)').match(param): - r = Re(r'^(\*+)\s*(.*)') + if KernRe(r'^(\*+)\s*(.*)').match(param): + r = KernRe(r'^(\*+)\s*(.*)') if not r.match(param): self.emit_warning(ln, f"Invalid param: {param}") continue @@ -447,8 +447,8 @@ class KernelDoc: f"{dtype} {r.group(1)}", arg, declaration_name) - elif Re(r'(.*?):(\w+)').search(param): - r = Re(r'(.*?):(\w+)') + elif KernRe(r'(.*?):(\w+)').search(param): + r = KernRe(r'(.*?):(\w+)') if not r.match(param): self.emit_warning(ln, f"Invalid param: {param}") continue @@ -477,7 +477,7 @@ class KernelDoc: err = True for px in range(len(prms)): # pylint: disable=C0200 prm_clean = prms[px] - prm_clean = Re(r'\[.*\]').sub('', prm_clean) + prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) prm_clean = attribute.sub('', prm_clean) # ignore array size in a parameter string; @@ -486,7 +486,7 @@ class KernelDoc: # and this appears in @prms as "addr[6" since the # parameter list is split at spaces; # hence just ignore "[..." for the sections check; - prm_clean = Re(r'\[.*').sub('', prm_clean) + prm_clean = KernRe(r'\[.*').sub('', prm_clean) if prm_clean == sects[sx]: err = False @@ -512,7 +512,7 @@ class KernelDoc: # Ignore an empty return type (It's a macro) # Ignore functions with a "void" return type (but not "void *") - if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): return if not self.entry.sections.get("Return", None): @@ -535,20 +535,20 @@ class KernelDoc: ] definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') # Extract struct/union definition members = None declaration_name = None decl_type = None - r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) if r.search(proto): decl_type = r.group(1) declaration_name = r.group(2) members = r.group(3) else: - r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') if r.search(proto): decl_type = r.group(1) @@ -567,21 +567,21 @@ class KernelDoc: args_pattern = r'([^,)]+)' sub_prefixes = [ - (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), # Strip comments - (Re(r'\/\*.*?\*\/', re.S), ''), + (KernRe(r'\/\*.*?\*\/', re.S), ''), # Strip attributes (attribute, ' '), - (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__packed\s*', re.S), ' '), - (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (Re(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), # Unwrap struct_group macros based on this definition: # __struct_group(TAG, NAME, ATTRS, MEMBERS...) @@ -616,10 +616,10 @@ class KernelDoc: # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. - (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), # Replace macros # @@ -628,15 +628,15 @@ class KernelDoc: # it is better to also move those to the NestedMatch logic, # to ensure that parenthesis will be properly matched. - (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), ] # Regexes here are guaranteed to have the end limiter matching @@ -689,8 +689,8 @@ class KernelDoc: s_id = s_id.strip() newmember += f"{maintype} {s_id}; " - s_id = Re(r'[:\[].*').sub('', s_id) - s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) for arg in content.split(';'): arg = arg.strip() @@ -698,7 +698,7 @@ class KernelDoc: if not arg: continue - r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') if r.match(arg): # Pointer-to-function dtype = r.group(1) @@ -717,15 +717,15 @@ class KernelDoc: else: arg = arg.strip() # Handle bitmaps - arg = Re(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r':\s*\d+\s*').sub('', arg) # Handle arrays - arg = Re(r'\[.*\]').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) # Handle multiple IDs - arg = Re(r'\s*,\s*').sub(',', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) - r = Re(r'(.*)\s+([\S+,]+)') + r = KernRe(r'(.*)\s+([\S+,]+)') if r.search(arg): dtype = r.group(1) @@ -735,7 +735,7 @@ class KernelDoc: continue for name in names.split(','): - name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() if not name: continue @@ -757,12 +757,12 @@ class KernelDoc: self.entry.sectcheck, self.entry.struct_actual) # Adjust declaration for better display - declaration = Re(r'([\{;])').sub(r'\1\n', declaration) - declaration = Re(r'\}\s+;').sub('};', declaration) + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) # Better handle inlined enums while True: - r = Re(r'(enum\s+\{[^\}]+),([^\n])') + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') if not r.search(declaration): break @@ -774,7 +774,7 @@ class KernelDoc: for clause in def_args: clause = clause.strip() - clause = Re(r'\s+').sub(' ', clause, count=1) + clause = KernRe(r'\s+').sub(' ', clause, count=1) if not clause: continue @@ -782,7 +782,7 @@ class KernelDoc: if '}' in clause and level > 1: level -= 1 - if not Re(r'^\s*#').match(clause): + if not KernRe(r'^\s*#').match(clause): declaration += "\t" * level declaration += "\t" + clause + "\n" @@ -807,24 +807,24 @@ class KernelDoc: """ # Ignore members marked private - proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) # Strip comments - proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) # Strip #define macros inside enums - proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) members = None declaration_name = None - r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) members = r.group(1).rstrip() else: - r = Re(r'enum\s+(\w*)\s*\{(.*)\}') + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) members = r.group(2).rstrip() @@ -847,12 +847,12 @@ class KernelDoc: member_set = set() - members = Re(r'\([^;]*?[\)]').sub('', members) + members = KernRe(r'\([^;]*?[\)]').sub('', members) for arg in members.split(','): if not arg: continue - arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -947,10 +947,10 @@ class KernelDoc: ] for search, sub, flags in sub_prefixes: - prototype = Re(search, flags).sub(sub, prototype) + prototype = KernRe(search, flags).sub(sub, prototype) # Macros are a special case, as they change the prototype format - new_proto = Re(r"^#\s*define\s+").sub("", prototype) + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) if new_proto != prototype: is_define_proto = True prototype = new_proto @@ -987,7 +987,7 @@ class KernelDoc: found = False if is_define_proto: - r = Re(r'^()(' + name + r')\s+') + r = KernRe(r'^()(' + name + r')\s+') if r.search(prototype): return_type = '' @@ -1004,7 +1004,7 @@ class KernelDoc: ] for p in patterns: - r = Re(p) + r = KernRe(p) if r.match(prototype): @@ -1071,11 +1071,11 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' - typedef1 = Re(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = Re(r'typedef' + typedef_type + typedef_ident + typedef_args) + typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) # Strip comments - proto = Re(r'/\*.*?\*/', flags=re.S).sub('', proto) + proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) # Parse function typedef prototypes for r in [typedef1, typedef2]: @@ -1109,12 +1109,12 @@ class KernelDoc: return # Handle nested parentheses or brackets - r = Re(r'(\(*.\)\s*|\[*.\]\s*);$') + r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') while r.search(proto): proto = r.sub('', proto) # Parse simple typedefs - r = Re(r'typedef.*\s+(\w+)\s*;') + r = KernRe(r'typedef.*\s+(\w+)\s*;') if r.match(proto): declaration_name = r.group(1) @@ -1195,12 +1195,12 @@ class KernelDoc: decl_end = r"(?:[-:].*)" # end of the name part # test for pointer declaration type, foo * bar() - desc - r = Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") if r.search(line): self.entry.identifier = r.group(1) # Test for data declaration - r = Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") if r.search(line): self.entry.decl_type = r.group(1) self.entry.identifier = r.group(2) @@ -1209,15 +1209,15 @@ class KernelDoc: # Look for foo() or static void foo() - description; # or misspelt identifier - r1 = Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") - r2 = Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") for r in [r1, r2]: if r.search(line): self.entry.identifier = r.group(1) self.entry.decl_type = "function" - r = Re(r"define\s+") + r = KernRe(r"define\s+") self.entry.identifier = r.sub("", self.entry.identifier) self.entry.is_kernel_comment = True break @@ -1230,12 +1230,12 @@ class KernelDoc: self.entry.section = self.section_default self.entry.new_start_line = ln + 1 - r = Re("[-:](.*)") + r = KernRe("[-:](.*)") if r.search(line): # strip leading/trailing/multiple spaces self.entry.descr = r.group(1).strip(" ") - r = Re(r"\s+") + r = KernRe(r"\s+") self.entry.descr = r.sub(" ", self.entry.descr) self.entry.declaration_purpose = self.entry.descr self.state = self.STATE_BODY_MAYBE @@ -1272,7 +1272,7 @@ class KernelDoc: """ if self.state == self.STATE_BODY_WITH_BLANK_LINE: - r = Re(r"\s*\*\s?\S") + r = KernRe(r"\s*\*\s?\S") if r.match(line): self.dump_section() self.entry.section = self.section_default @@ -1318,7 +1318,7 @@ class KernelDoc: self.dump_section() # Look for doc_com + + doc_end: - r = Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') if r.match(line): self.emit_warning(ln, f"suspicious ending line: {line}") @@ -1351,7 +1351,7 @@ class KernelDoc: self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() self.entry.declaration_purpose += " " + cont - r = Re(r"\s+") + r = KernRe(r"\s+") self.entry.declaration_purpose = r.sub(' ', self.entry.declaration_purpose) @@ -1359,7 +1359,7 @@ class KernelDoc: if self.entry.section.startswith('@') or \ self.entry.section == self.section_context: if self.entry.leading_space is None: - r = Re(r'^(\s+)') + r = KernRe(r'^(\s+)') if r.match(cont): self.entry.leading_space = len(r.group(1)) else: @@ -1436,13 +1436,13 @@ class KernelDoc: is_void = True # Replace SYSCALL_DEFINE with correct return type & function name - proto = Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - r = Re(r'long\s+(sys_.*?),') + r = KernRe(r'long\s+(sys_.*?),') if r.search(proto): - proto = Re(',').sub('(', proto, count=1) + proto = KernRe(',').sub('(', proto, count=1) elif is_void: - proto = Re(r'\)').sub('(void)', proto, count=1) + proto = KernRe(r'\)').sub('(void)', proto, count=1) # Now delete all of the odd-numbered commas in the proto # so that argument types & names don't have a comma between them @@ -1469,22 +1469,22 @@ class KernelDoc: tracepointargs = None # Match tracepoint name based on different patterns - r = Re(r'TRACE_EVENT\((.*?),') + r = KernRe(r'TRACE_EVENT\((.*?),') if r.search(proto): tracepointname = r.group(1) - r = Re(r'DEFINE_SINGLE_EVENT\((.*?),') + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') if r.search(proto): tracepointname = r.group(1) - r = Re(r'DEFINE_EVENT\((.*?),(.*?),') + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') if r.search(proto): tracepointname = r.group(2) if tracepointname: tracepointname = tracepointname.lstrip() - r = Re(r'TP_PROTO\((.*?)\)') + r = KernRe(r'TP_PROTO\((.*?)\)') if r.search(proto): tracepointargs = r.group(1) @@ -1501,43 +1501,43 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - r = Re(r"\/\/.*$", re.S) + r = KernRe(r"\/\/.*$", re.S) line = r.sub('', line) - if Re(r'\s*#\s*define').match(line): + if KernRe(r'\s*#\s*define').match(line): self.entry.prototype = line elif line.startswith('#'): # Strip other macros like #ifdef/#ifndef/#endif/... pass else: - r = Re(r'([^\{]*)') + r = KernRe(r'([^\{]*)') if r.match(line): self.entry.prototype += r.group(1) + " " - if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): # strip comments - r = Re(r'/\*.*?\*/') + r = KernRe(r'/\*.*?\*/') self.entry.prototype = r.sub('', self.entry.prototype) # strip newlines/cr's - r = Re(r'[\r\n]+') + r = KernRe(r'[\r\n]+') self.entry.prototype = r.sub(' ', self.entry.prototype) # strip leading spaces - r = Re(r'^\s+') + r = KernRe(r'^\s+') self.entry.prototype = r.sub('', self.entry.prototype) # Handle self.entry.prototypes for function pointers like: # int (*pcs_config)(struct foo) - r = Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) if 'SYSCALL_DEFINE' in self.entry.prototype: self.entry.prototype = self.syscall_munge(ln, self.entry.prototype) - r = Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') if r.search(self.entry.prototype): self.entry.prototype = self.tracepoint_munge(ln, self.entry.prototype) @@ -1549,22 +1549,22 @@ class KernelDoc: """Ancillary routine to process a type""" # Strip newlines/cr's. - line = Re(r'[\r\n]+', re.S).sub(' ', line) + line = KernRe(r'[\r\n]+', re.S).sub(' ', line) # Strip leading spaces - line = Re(r'^\s+', re.S).sub('', line) + line = KernRe(r'^\s+', re.S).sub('', line) # Strip trailing spaces - line = Re(r'\s+$', re.S).sub('', line) + line = KernRe(r'\s+$', re.S).sub('', line) # Strip C99-style comments to the end of the line - line = Re(r"\/\/.*$", re.S).sub('', line) + line = KernRe(r"\/\/.*$", re.S).sub('', line) # To distinguish preprocessor directive from regular declaration later. if line.startswith('#'): line += ";" - r = Re(r'([^\{\};]*)([\{\};])(.*)') + r = KernRe(r'([^\{\};]*)([\{\};])(.*)') while True: if r.search(line): if self.entry.prototype: -- cgit From de258fa8ca8d72ef17f4d71162cfbbd2d9f397e6 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 8 Apr 2025 18:09:36 +0800 Subject: scripts: kernel-doc: fix parsing function-like typedefs (again) Typedefs like typedef struct phylink_pcs *(*pcs_xlate_t)(const u64 *args); have a typedef_type that ends with a * and therefore has no word boundary. Add an extra clause for the final group of the typedef_type so we only require a word boundary if we match a word. [mchehab: modify also kernel-doc.py, as we're deprecating the perl version] Fixes: 7d2c6b1edf79 ("scripts: kernel-doc: fix parsing function-like typedefs") Signed-off-by: Sean Anderson Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e0abb103c73a96d76602d909f60ab8fd6e2fd0bd.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f60722bcc687..4f036c720b36 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1067,7 +1067,7 @@ class KernelDoc: Stores a typedef inside self.entries array. """ - typedef_type = r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' -- cgit From f9cdbc5781f2c2bf374d21d91b139ae5a2f62093 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Apr 2025 11:12:49 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: move states to a separate class States are really enums. on Python, enums are actually classes, as can be seen at: https://docs.python.org/3/library/enum.html Yet, I can't see any advantage of derivating the class from enum class here. So, just place the states on a separate class. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/00cb4e0b8a1545bf7c4401b58213841db5cba2e2.1744685912.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 123 +++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 59 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 4f036c720b36..461e0acb0fb7 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -61,24 +61,22 @@ export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+" type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -class KernelDoc: +class state: """ - Read a C language source or header FILE and extract embedded - documentation comments. + State machine enums """ # Parser states - STATE_NORMAL = 0 # normal code - STATE_NAME = 1 # looking for function name - STATE_BODY_MAYBE = 2 # body - or maybe more description - STATE_BODY = 3 # the body of the comment - STATE_BODY_WITH_BLANK_LINE = 4 # the body which has a blank line - STATE_PROTO = 5 # scanning prototype - STATE_DOCBLOCK = 6 # documentation block - STATE_INLINE = 7 # gathering doc outside main block - - st_name = [ + NORMAL = 0 # normal code + NAME = 1 # looking for function name + BODY_MAYBE = 2 # body - or maybe more description + BODY = 3 # the body of the comment + BODY_WITH_BLANK_LINE = 4 # the body which has a blank line + PROTO = 5 # scanning prototype + DOCBLOCK = 6 # documentation block + INLINE = 7 # gathering doc outside main block + + name = [ "NORMAL", "NAME", "BODY_MAYBE", @@ -90,15 +88,15 @@ class KernelDoc: ] # Inline documentation state - STATE_INLINE_NA = 0 # not applicable ($state != STATE_INLINE) - STATE_INLINE_NAME = 1 # looking for member name (@foo:) - STATE_INLINE_TEXT = 2 # looking for member documentation - STATE_INLINE_END = 3 # done - STATE_INLINE_ERROR = 4 # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. - - st_inline_name = [ + INLINE_NA = 0 # not applicable ($state != INLINE) + INLINE_NAME = 1 # looking for member name (@foo:) + INLINE_TEXT = 2 # looking for member documentation + INLINE_END = 3 # done + INLINE_ERROR = 4 # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. + + inline_name = [ "", "_NAME", "_TEXT", @@ -106,6 +104,13 @@ class KernelDoc: "_ERROR", ] + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + # Section names section_default = "Description" # default section @@ -122,8 +127,8 @@ class KernelDoc: self.config = config # Initial state for the state machines - self.state = self.STATE_NORMAL - self.inline_doc_state = self.STATE_INLINE_NA + self.state = state.NORMAL + self.inline_doc_state = state.INLINE_NA # Store entry currently being processed self.entry = None @@ -260,8 +265,8 @@ class KernelDoc: self.entry.leading_space = None # State flags - self.state = self.STATE_NORMAL - self.inline_doc_state = self.STATE_INLINE_NA + self.state = state.NORMAL + self.inline_doc_state = state.INLINE_NA self.entry.brcount = 0 self.entry.in_doc_sect = False @@ -1166,7 +1171,7 @@ class KernelDoc: self.entry.in_doc_sect = False # next line is always the function name - self.state = self.STATE_NAME + self.state = state.NAME def process_name(self, ln, line): """ @@ -1182,7 +1187,7 @@ class KernelDoc: self.entry.section = doc_block.group(1) self.entry.identifier = self.entry.section - self.state = self.STATE_DOCBLOCK + self.state = state.DOCBLOCK return if doc_decl.search(line): @@ -1224,7 +1229,7 @@ class KernelDoc: self.entry.identifier = self.entry.identifier.strip(" ") - self.state = self.STATE_BODY + self.state = state.BODY # if there's no @param blocks need to set up default section here self.entry.section = self.section_default @@ -1238,14 +1243,14 @@ class KernelDoc: r = KernRe(r"\s+") self.entry.descr = r.sub(" ", self.entry.descr) self.entry.declaration_purpose = self.entry.descr - self.state = self.STATE_BODY_MAYBE + self.state = state.BODY_MAYBE else: self.entry.declaration_purpose = "" if not self.entry.is_kernel_comment: self.emit_warning(ln, f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") - self.state = self.STATE_NORMAL + self.state = state.NORMAL if not self.entry.declaration_purpose and self.config.wshort_desc: self.emit_warning(ln, @@ -1254,7 +1259,7 @@ class KernelDoc: if not self.entry.identifier and self.entry.decl_type != "enum": self.emit_warning(ln, f"wrong kernel-doc identifier on line:\n{line}") - self.state = self.STATE_NORMAL + self.state = state.NORMAL if self.config.verbose: self.emit_warning(ln, @@ -1271,7 +1276,7 @@ class KernelDoc: STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. """ - if self.state == self.STATE_BODY_WITH_BLANK_LINE: + if self.state == state.BODY_WITH_BLANK_LINE: r = KernRe(r"\s*\*\s?\S") if r.match(line): self.dump_section() @@ -1311,7 +1316,7 @@ class KernelDoc: if self.entry.contents: self.entry.contents += "\n" - self.state = self.STATE_BODY + self.state = state.BODY return if doc_end.search(line): @@ -1325,7 +1330,7 @@ class KernelDoc: self.entry.prototype = "" self.entry.new_start_line = ln + 1 - self.state = self.STATE_PROTO + self.state = state.PROTO return if doc_content.search(line): @@ -1336,16 +1341,16 @@ class KernelDoc: self.dump_section() self.entry.new_start_line = ln - self.state = self.STATE_BODY + self.state = state.BODY else: if self.entry.section != self.section_default: - self.state = self.STATE_BODY_WITH_BLANK_LINE + self.state = state.BODY_WITH_BLANK_LINE else: - self.state = self.STATE_BODY + self.state = state.BODY self.entry.contents += "\n" - elif self.state == self.STATE_BODY_MAYBE: + elif self.state == state.BODY_MAYBE: # Continued declaration purpose self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() @@ -1388,7 +1393,7 @@ class KernelDoc: def process_inline(self, ln, line): """STATE_INLINE: docbook comments within a prototype.""" - if self.inline_doc_state == self.STATE_INLINE_NAME and \ + if self.inline_doc_state == state.INLINE_NAME and \ doc_inline_sect.search(line): self.entry.section = doc_inline_sect.group(1) self.entry.new_start_line = ln @@ -1397,7 +1402,7 @@ class KernelDoc: if self.entry.contents != "": self.entry.contents += "\n" - self.inline_doc_state = self.STATE_INLINE_TEXT + self.inline_doc_state = state.INLINE_TEXT # Documentation block end */ return @@ -1405,21 +1410,21 @@ class KernelDoc: if self.entry.contents not in ["", "\n"]: self.dump_section() - self.state = self.STATE_PROTO - self.inline_doc_state = self.STATE_INLINE_NA + self.state = state.PROTO + self.inline_doc_state = state.INLINE_NA return if doc_content.search(line): - if self.inline_doc_state == self.STATE_INLINE_TEXT: + if self.inline_doc_state == state.INLINE_TEXT: self.entry.contents += doc_content.group(1) + "\n" if not self.entry.contents.strip(" ").rstrip("\n"): self.entry.contents = "" - elif self.inline_doc_state == self.STATE_INLINE_NAME: + elif self.inline_doc_state == state.INLINE_NAME: self.emit_warning(ln, f"Incorrect use of kernel-doc format: {line}") - self.inline_doc_state = self.STATE_INLINE_ERROR + self.inline_doc_state = state.INLINE_ERROR def syscall_munge(self, ln, proto): # pylint: disable=W0613 """ @@ -1598,8 +1603,8 @@ class KernelDoc: self.dump_section(start_new=False) elif doc_inline_start.search(line): - self.state = self.STATE_INLINE - self.inline_doc_state = self.STATE_INLINE_NAME + self.state = state.INLINE + self.inline_doc_state = state.INLINE_NAME elif self.entry.decl_type == 'function': self.process_proto_function(ln, line) @@ -1663,7 +1668,7 @@ class KernelDoc: line = line.expandtabs().strip("\n") # Group continuation lines on prototypes - if self.state == self.STATE_PROTO: + if self.state == state.PROTO: if line.endswith("\\"): prev += line.rstrip("\\") cont = True @@ -1681,8 +1686,8 @@ class KernelDoc: prev_ln = None self.config.log.debug("%d %s%s: %s", - ln, self.st_name[self.state], - self.st_inline_name[self.inline_doc_state], + ln, state.name[self.state], + state.inline_name[self.inline_doc_state], line) # This is an optimization over the original script. @@ -1696,18 +1701,18 @@ class KernelDoc: self.process_export(export_table, line) # Hand this line to the appropriate state handler - if self.state == self.STATE_NORMAL: + if self.state == state.NORMAL: self.process_normal(ln, line) - elif self.state == self.STATE_NAME: + elif self.state == state.NAME: self.process_name(ln, line) - elif self.state in [self.STATE_BODY, self.STATE_BODY_MAYBE, - self.STATE_BODY_WITH_BLANK_LINE]: + elif self.state in [state.BODY, state.BODY_MAYBE, + state.BODY_WITH_BLANK_LINE]: self.process_body(ln, line) - elif self.state == self.STATE_INLINE: # scanning for inline parameters + elif self.state == state.INLINE: # scanning for inline parameters self.process_inline(ln, line) - elif self.state == self.STATE_PROTO: + elif self.state == state.PROTO: self.process_proto(ln, line) - elif self.state == self.STATE_DOCBLOCK: + elif self.state == state.DOCBLOCK: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") -- cgit From a4bd43d6f7b72b90e064eb8c22c720126cfc1525 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 25 Apr 2025 15:13:39 +0800 Subject: scripts/lib/kdoc: change mode to 0644 The script library here contain just classes. Remove execution permission. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- scripts/lib/kdoc/kdoc_parser.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 scripts/lib/kdoc/kdoc_parser.py (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py old mode 100755 new mode 100644 -- cgit From e3b42e94cf108e1cb4bdd628271c1565aceaf943 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 25 Apr 2025 15:13:40 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: move kernel entry to a class The KernelDoc class is too complex. Start optimizing it by placing the kernel-doc parser entry to a separate class. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <28b456f726a022011f0ce5810dbcc26827c1403a.1745564565.git.mchehab+huawei@kernel.org> --- scripts/lib/kdoc/kdoc_parser.py | 273 ++++++++++++++++++++++------------------ 1 file changed, 149 insertions(+), 124 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 461e0acb0fb7..062453eefc7a 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -12,7 +12,6 @@ Read a C language source or header FILE and extract embedded documentation comments """ -import argparse import re from pprint import pformat @@ -104,6 +103,97 @@ class state: "_ERROR", ] +SECTION_DEFAULT = "Description" # default section + +class KernelEntry: + + def __init__(self, config, ln): + self.config = config + + self.contents = "" + self.function = "" + self.sectcheck = "" + self.struct_actual = "" + self.prototype = "" + + self.warnings = [] + + self.parameterlist = [] + self.parameterdescs = {} + self.parametertypes = {} + self.parameterdesc_start_lines = {} + + self.section_start_lines = {} + self.sectionlist = [] + self.sections = {} + + self.anon_struct_union = False + + self.leading_space = None + + # State flags + self.brcount = 0 + + self.in_doc_sect = False + self.declaration_start_line = ln + 1 + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_msg(self, log_msg, warning=True): + """Emit a message""" + + if not warning: + self.config.log.info(log_msg) + return + + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.warnings.append(log_msg) + return + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + name = self.section + contents = self.contents + + if type_param.match(name): + name = type_param.group(1) + + self.parameterdescs[name] = contents + self.parameterdesc_start_lines[name] = self.new_start_line + + self.sectcheck += name + " " + self.new_start_line = 0 + + elif name == "@...": + name = "..." + self.parameterdescs[name] = contents + self.sectcheck += name + " " + self.parameterdesc_start_lines[name] = self.new_start_line + self.new_start_line = 0 + + else: + if name in self.sections and self.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != SECTION_DEFAULT: + self.emit_msg(self.new_start_line, + f"duplicate section name '{name}'\n") + self.sections[name] += contents + else: + self.sections[name] = contents + self.sectionlist.append(name) + self.section_start_lines[name] = self.new_start_line + self.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) + + if start_new: + self.section = SECTION_DEFAULT + self.contents = "" + class KernelDoc: """ @@ -113,7 +203,6 @@ class KernelDoc: # Section names - section_default = "Description" # default section section_intro = "Introduction" section_context = "Context" section_return = "Return" @@ -136,67 +225,27 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_warning(self, ln, msg, warning=True): + def emit_msg(self, ln, msg, warning=True): """Emit a message""" log_msg = f"{self.fname}:{ln} {msg}" - if not warning: - self.config.log.info(log_msg) - return - if self.entry: - # Delegate warning output to output logic, as this way it - # will report warnings/info only for symbols that are output - - self.entry.warnings.append(log_msg) + self.entry.emit_msg(log_msg, warning) return - self.config.log.warning(log_msg) + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) def dump_section(self, start_new=True): """ Dumps section contents to arrays/hashes intended for that purpose. """ - name = self.entry.section - contents = self.entry.contents - - if type_param.match(name): - name = type_param.group(1) - - self.entry.parameterdescs[name] = contents - self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line - - self.entry.sectcheck += name + " " - self.entry.new_start_line = 0 - - elif name == "@...": - name = "..." - self.entry.parameterdescs[name] = contents - self.entry.sectcheck += name + " " - self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line - self.entry.new_start_line = 0 - - else: - if name in self.entry.sections and self.entry.sections[name] != "": - # Only warn on user-specified duplicate section names - if name != self.section_default: - self.emit_warning(self.entry.new_start_line, - f"duplicate section name '{name}'\n") - self.entry.sections[name] += contents - else: - self.entry.sections[name] = contents - self.entry.sectionlist.append(name) - self.entry.section_start_lines[name] = self.entry.new_start_line - self.entry.new_start_line = 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self.entry))) - - if start_new: - self.entry.section = self.section_default - self.entry.contents = "" + if self.entry: + self.entry.dump_section(start_new) # TODO: rename it to store_declaration after removal of kernel-doc.pl def output_declaration(self, dtype, name, **args): @@ -241,36 +290,11 @@ class KernelDoc: variables used by the state machine. """ - self.entry = argparse.Namespace - - self.entry.contents = "" - self.entry.function = "" - self.entry.sectcheck = "" - self.entry.struct_actual = "" - self.entry.prototype = "" - - self.entry.warnings = [] - - self.entry.parameterlist = [] - self.entry.parameterdescs = {} - self.entry.parametertypes = {} - self.entry.parameterdesc_start_lines = {} - - self.entry.section_start_lines = {} - self.entry.sectionlist = [] - self.entry.sections = {} - - self.entry.anon_struct_union = False - - self.entry.leading_space = None + self.entry = KernelEntry(self.config, ln) # State flags self.state = state.NORMAL self.inline_doc_state = state.INLINE_NA - self.entry.brcount = 0 - - self.entry.in_doc_sect = False - self.entry.declaration_start_line = ln + 1 def push_parameter(self, ln, decl_type, param, dtype, org_arg, declaration_name): @@ -328,8 +352,8 @@ class KernelDoc: else: dname = f"{decl_type} member" - self.emit_warning(ln, - f"{dname} '{param}' not described in '{declaration_name}'") + self.emit_msg(ln, + f"{dname} '{param}' not described in '{declaration_name}'") # Strip spaces from param so that it is one continuous string on # parameterlist. This fixes a problem where check_sections() @@ -393,7 +417,7 @@ class KernelDoc: if r.match(arg): param = r.group(1) else: - self.emit_warning(ln, f"Invalid param: {arg}") + self.emit_msg(ln, f"Invalid param: {arg}") param = arg dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) @@ -409,7 +433,7 @@ class KernelDoc: if r.match(arg): param = r.group(1) else: - self.emit_warning(ln, f"Invalid param: {arg}") + self.emit_msg(ln, f"Invalid param: {arg}") param = arg dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) @@ -442,7 +466,7 @@ class KernelDoc: if KernRe(r'^(\*+)\s*(.*)').match(param): r = KernRe(r'^(\*+)\s*(.*)') if not r.match(param): - self.emit_warning(ln, f"Invalid param: {param}") + self.emit_msg(ln, f"Invalid param: {param}") continue param = r.group(1) @@ -455,7 +479,7 @@ class KernelDoc: elif KernRe(r'(.*?):(\w+)').search(param): r = KernRe(r'(.*?):(\w+)') if not r.match(param): - self.emit_warning(ln, f"Invalid param: {param}") + self.emit_msg(ln, f"Invalid param: {param}") continue if dtype != "": # Skip unnamed bit-fields @@ -503,8 +527,8 @@ class KernelDoc: else: dname = f"{decl_type} member" - self.emit_warning(ln, - f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") + self.emit_msg(ln, + f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") def check_return_section(self, ln, declaration_name, return_type): """ @@ -521,8 +545,8 @@ class KernelDoc: return if not self.entry.sections.get("Return", None): - self.emit_warning(ln, - f"No description found for return value of '{declaration_name}'") + self.emit_msg(ln, + f"No description found for return value of '{declaration_name}'") def dump_struct(self, ln, proto): """ @@ -561,12 +585,12 @@ class KernelDoc: members = r.group(2) if not members: - self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") return if self.entry.identifier != declaration_name: - self.emit_warning(ln, - f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + self.emit_msg(ln, + f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") return args_pattern = r'([^,)]+)' @@ -835,16 +859,16 @@ class KernelDoc: members = r.group(2).rstrip() if not members: - self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") return if self.entry.identifier != declaration_name: if self.entry.identifier == "": - self.emit_warning(ln, - f"{proto}: wrong kernel-doc identifier on prototype") + self.emit_msg(ln, + f"{proto}: wrong kernel-doc identifier on prototype") else: - self.emit_warning(ln, - f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") + self.emit_msg(ln, + f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") return if not declaration_name: @@ -861,14 +885,14 @@ class KernelDoc: self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed - self.emit_warning(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") + self.emit_msg(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") member_set.add(arg) for k in self.entry.parameterdescs: if k not in member_set: - self.emit_warning(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") + self.emit_msg(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, enum=declaration_name, @@ -1023,13 +1047,13 @@ class KernelDoc: found = True break if not found: - self.emit_warning(ln, - f"cannot understand function prototype: '{prototype}'") + self.emit_msg(ln, + f"cannot understand function prototype: '{prototype}'") return if self.entry.identifier != declaration_name: - self.emit_warning(ln, - f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + self.emit_msg(ln, + f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") return prms = " ".join(self.entry.parameterlist) @@ -1092,8 +1116,8 @@ class KernelDoc: args = r.group(3) if self.entry.identifier != declaration_name: - self.emit_warning(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") return decl_type = 'function' @@ -1124,7 +1148,8 @@ class KernelDoc: declaration_name = r.group(1) if self.entry.identifier != declaration_name: - self.emit_warning(ln, f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") return self.output_declaration('typedef', declaration_name, @@ -1135,7 +1160,7 @@ class KernelDoc: purpose=self.entry.declaration_purpose) return - self.emit_warning(ln, "error: Cannot parse typedef!") + self.emit_msg(ln, "error: Cannot parse typedef!") @staticmethod def process_export(function_set, line): @@ -1232,7 +1257,7 @@ class KernelDoc: self.state = state.BODY # if there's no @param blocks need to set up default section here - self.entry.section = self.section_default + self.entry.section = SECTION_DEFAULT self.entry.new_start_line = ln + 1 r = KernRe("[-:](.*)") @@ -1248,28 +1273,28 @@ class KernelDoc: self.entry.declaration_purpose = "" if not self.entry.is_kernel_comment: - self.emit_warning(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") self.state = state.NORMAL if not self.entry.declaration_purpose and self.config.wshort_desc: - self.emit_warning(ln, - f"missing initial short description on line:\n{line}") + self.emit_msg(ln, + f"missing initial short description on line:\n{line}") if not self.entry.identifier and self.entry.decl_type != "enum": - self.emit_warning(ln, - f"wrong kernel-doc identifier on line:\n{line}") + self.emit_msg(ln, + f"wrong kernel-doc identifier on line:\n{line}") self.state = state.NORMAL if self.config.verbose: - self.emit_warning(ln, - f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + self.emit_msg(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", warning=False) return # Failed to find an identifier. Emit a warning - self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") def process_body(self, ln, line): """ @@ -1280,7 +1305,7 @@ class KernelDoc: r = KernRe(r"\s*\*\s?\S") if r.match(line): self.dump_section() - self.entry.section = self.section_default + self.entry.section = SECTION_DEFAULT self.entry.new_start_line = ln self.entry.contents = "" @@ -1325,7 +1350,7 @@ class KernelDoc: # Look for doc_com + + doc_end: r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') if r.match(line): - self.emit_warning(ln, f"suspicious ending line: {line}") + self.emit_msg(ln, f"suspicious ending line: {line}") self.entry.prototype = "" self.entry.new_start_line = ln + 1 @@ -1343,7 +1368,7 @@ class KernelDoc: self.entry.new_start_line = ln self.state = state.BODY else: - if self.entry.section != self.section_default: + if self.entry.section != SECTION_DEFAULT: self.state = state.BODY_WITH_BLANK_LINE else: self.state = state.BODY @@ -1388,7 +1413,7 @@ class KernelDoc: return # Unknown line, ignore - self.emit_warning(ln, f"bad line: {line}") + self.emit_msg(ln, f"bad line: {line}") def process_inline(self, ln, line): """STATE_INLINE: docbook comments within a prototype.""" @@ -1421,8 +1446,8 @@ class KernelDoc: self.entry.contents = "" elif self.inline_doc_state == state.INLINE_NAME: - self.emit_warning(ln, - f"Incorrect use of kernel-doc format: {line}") + self.emit_msg(ln, + f"Incorrect use of kernel-doc format: {line}") self.inline_doc_state = state.INLINE_ERROR @@ -1494,8 +1519,8 @@ class KernelDoc: tracepointargs = r.group(1) if not tracepointname or not tracepointargs: - self.emit_warning(ln, - f"Unrecognized tracepoint format:\n{proto}\n") + self.emit_msg(ln, + f"Unrecognized tracepoint format:\n{proto}\n") else: proto = f"static inline void trace_{tracepointname}({tracepointargs})" self.entry.identifier = f"trace_{self.entry.identifier}" -- cgit From 27ad33b6b349c8c76fdef3bf0f707158ce7c275e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 6 Jun 2025 15:15:42 +0100 Subject: kernel-doc: Fix symbol matching for dropped suffixes The support for dropping "_noprof" missed dropping the suffix from exported symbols. That meant that using the :export: feature would look for kernel-doc for (eg) krealloc_noprof() and not find the kernel-doc for krealloc(). Fixes: 51a7bf0238c2 (scripts/kernel-doc: drop "_noprof" on function prototypes) Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606141543.1285671-1-willy@infradead.org --- scripts/lib/kdoc/kdoc_parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 062453eefc7a..2c6143f7ca0f 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1171,16 +1171,24 @@ class KernelDoc: with a staticmethod decorator. """ + # We support documenting some exported symbols with different + # names. A horrible hack. + suffixes = [ '_noprof' ] + # Note: it accepts only one EXPORT_SYMBOL* per line, as having # multiple export lines would violate Kernel coding style. if export_symbol.search(line): symbol = export_symbol.group(2) + for suffix in suffixes: + symbol = symbol.removesuffix(suffix) function_set.add(symbol) return if export_symbol_ns.search(line): symbol = export_symbol_ns.group(2) + for suffix in suffixes: + symbol = symbol.removesuffix(suffix) function_set.add(symbol) def process_normal(self, ln, line): -- cgit From e8f0303e8b8dce911536963c89eaf0a5ccb62d6a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:30 -0600 Subject: docs: kdoc: simplify the PROTO continuation logic Remove the unneeded "cont" variable and tighten up the code slightly. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-2-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 2c6143f7ca0f..899d5446f95c 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1688,7 +1688,6 @@ class KernelDoc: Besides parsing kernel-doc tags, it also parses export symbols. """ - cont = False prev = "" prev_ln = None export_table = set() @@ -1704,18 +1703,14 @@ class KernelDoc: if self.state == state.PROTO: if line.endswith("\\"): prev += line.rstrip("\\") - cont = True - if not prev_ln: prev_ln = ln - continue - if cont: + if prev: ln = prev_ln line = prev + line prev = "" - cont = False prev_ln = None self.config.log.debug("%d %s%s: %s", -- cgit From cef8c781ca71ddd0777d639775e66f8630359342 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:31 -0600 Subject: docs: kdoc: move the core dispatch into a state table Since all of the handlers already nicely have the same prototype, put them into a table and call them from there and take out the extended if-then-else series. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-3-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 899d5446f95c..1a6c6865b2c5 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1678,6 +1678,21 @@ class KernelDoc: return export_table + # + # The state/action table telling us which function to invoke in + # each state. + # + state_actions = { + state.NORMAL: process_normal, + state.NAME: process_name, + state.BODY: process_body, + state.BODY_MAYBE: process_body, + state.BODY_WITH_BLANK_LINE: process_body, + state.INLINE: process_inline, + state.PROTO: process_proto, + state.DOCBLOCK: process_docblock, + } + def parse_kdoc(self): """ Open and process each line of a C source file. @@ -1729,19 +1744,8 @@ class KernelDoc: self.process_export(export_table, line) # Hand this line to the appropriate state handler - if self.state == state.NORMAL: - self.process_normal(ln, line) - elif self.state == state.NAME: - self.process_name(ln, line) - elif self.state in [state.BODY, state.BODY_MAYBE, - state.BODY_WITH_BLANK_LINE]: - self.process_body(ln, line) - elif self.state == state.INLINE: # scanning for inline parameters - self.process_inline(ln, line) - elif self.state == state.PROTO: - self.process_proto(ln, line) - elif self.state == state.DOCBLOCK: - self.process_docblock(ln, line) + self.state_actions[self.state](self, ln, line) + except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") -- cgit From 42592bd46dded5fab5af1d5e04c9b17cbb4bca6d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:32 -0600 Subject: docs: kdoc: remove the section_intro variable It is only used in one place, so just put the constant string "Introduction" there so people don't have to go looking for it. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-4-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 1a6c6865b2c5..f8871f6a2638 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -203,7 +203,6 @@ class KernelDoc: # Section names - section_intro = "Introduction" section_context = "Context" section_return = "Return" @@ -1215,7 +1214,7 @@ class KernelDoc: self.entry.new_start_line = ln if not doc_block.group(1): - self.entry.section = self.section_intro + self.entry.section = "Introduction" else: self.entry.section = doc_block.group(1) -- cgit From e76a1d2b2623e9f10e2ffd295ae2615bf3228561 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:33 -0600 Subject: docs: kdoc: simplify the kerneldoc recognition code process_name() looks for the first line of a kerneldoc comment. It contains two nearly identical regular expressions, the second of which only catches six cases in the kernel, all of the form: define SOME_MACRO_NAME - description Simply put the "define" into the regex and discard it, eliminating the loop and the code to remove it specially. Note that this still treats these defines as if they were functions, but that's a separate issue. There is no change in the generated output. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-5-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f8871f6a2638..72919a5d71b2 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1238,26 +1238,18 @@ class KernelDoc: # Test for data declaration r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + r2 = KernRe(fr"^{decl_start}{fn_type}(?:define\s+)?(\w+)\s*{parenthesis}\s*{decl_end}?$") if r.search(line): self.entry.decl_type = r.group(1) self.entry.identifier = r.group(2) self.entry.is_kernel_comment = True - else: - # Look for foo() or static void foo() - description; - # or misspelt identifier - - r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") - r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") - - for r in [r1, r2]: - if r.search(line): - self.entry.identifier = r.group(1) - self.entry.decl_type = "function" - - r = KernRe(r"define\s+") - self.entry.identifier = r.sub("", self.entry.identifier) - self.entry.is_kernel_comment = True - break + # + # Look for a function description + # + elif r2.search(line): + self.entry.identifier = r2.group(1) + self.entry.decl_type = "function" + self.entry.is_kernel_comment = True self.entry.identifier = self.entry.identifier.strip(" ") -- cgit From 8f4650fe1a74e68f5c6715413a5a26aa1564780d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:34 -0600 Subject: docs: kdoc: remove the KernelEntry::is_kernel_comment member entry::is_kernel_comment never had anything to do with the entry itself; it is a bit of local state in one branch of process_name(). It can, in fact, be removed entirely; rework the code slightly so that it is no longer needed. No change in the rendered output. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-6-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 72919a5d71b2..dffa3055adc1 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1224,7 +1224,6 @@ class KernelDoc: if doc_decl.search(line): self.entry.identifier = doc_decl.group(1) - self.entry.is_kernel_comment = False decl_start = str(doc_com) # comment block asterisk fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) @@ -1242,14 +1241,20 @@ class KernelDoc: if r.search(line): self.entry.decl_type = r.group(1) self.entry.identifier = r.group(2) - self.entry.is_kernel_comment = True # # Look for a function description # elif r2.search(line): self.entry.identifier = r2.group(1) self.entry.decl_type = "function" - self.entry.is_kernel_comment = True + # + # We struck out. + # + else: + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = state.NORMAL + return self.entry.identifier = self.entry.identifier.strip(" ") @@ -1271,11 +1276,6 @@ class KernelDoc: else: self.entry.declaration_purpose = "" - if not self.entry.is_kernel_comment: - self.emit_msg(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") - self.state = state.NORMAL - if not self.entry.declaration_purpose and self.config.wshort_desc: self.emit_msg(ln, f"missing initial short description on line:\n{line}") -- cgit From f9b4cf2e8518387d4c512d934137dc6968759ec4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:35 -0600 Subject: docs: kdoc: remove the KernelEntry::descr pseudo member The entry.descr value used in process_name() is not actually a member of the KernelEntry class; it is a bit of local state. So just manage it locally. A trim_whitespace() helper was added to clean up the code slightly. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-7-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index dffa3055adc1..2d8a046499c7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -60,6 +60,13 @@ export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+" type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +# +# A little helper to get rid of excess white space +# +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): + return multi_space.sub(' ', s.strip()) + class state: """ State machine enums @@ -1266,12 +1273,7 @@ class KernelDoc: r = KernRe("[-:](.*)") if r.search(line): - # strip leading/trailing/multiple spaces - self.entry.descr = r.group(1).strip(" ") - - r = KernRe(r"\s+") - self.entry.descr = r.sub(" ", self.entry.descr) - self.entry.declaration_purpose = self.entry.descr + self.entry.declaration_purpose = trim_whitespace(r.group(1)) self.state = state.BODY_MAYBE else: self.entry.declaration_purpose = "" -- cgit From b23c71080b6cb0c12d4962321e5266814f980da1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:36 -0600 Subject: docs: kdoc: remove some ineffective code The code testing for a pointer declaration in process_name() has no actual effect on subsequent actions; remove it. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-8-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 2d8a046499c7..575817387a32 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1237,11 +1237,6 @@ class KernelDoc: parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function decl_end = r"(?:[-:].*)" # end of the name part - # test for pointer declaration type, foo * bar() - desc - r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") - if r.search(line): - self.entry.identifier = r.group(1) - # Test for data declaration r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") r2 = KernRe(fr"^{decl_start}{fn_type}(?:define\s+)?(\w+)\s*{parenthesis}\s*{decl_end}?$") -- cgit From 0682bde2c7f44320c621b765f31a0cf24e01b23f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:37 -0600 Subject: docs: kdoc: move the declaration regexes out of process_name() Move two complex regexes up with the other patterns, decluttering this function and allowing the compilation to be done once rather than for every kerneldoc comment. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-9-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 575817387a32..d814e48f9f38 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -47,7 +47,6 @@ doc_sect = doc_com + \ flags=re.I, cache=False) doc_content = doc_com_body + KernRe(r'(.*)', cache=False) -doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) @@ -60,6 +59,18 @@ export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+" type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) + # initial " * ' + r"(?:\w+\s*\*\s*)?" + # type (not captured) + r'(?:define\s+)?' + # possible "define" (not captured) + r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" + r'(?:[-:].*)?$', # description (not captured) + cache = False) + # # A little helper to get rid of excess white space # @@ -1232,22 +1243,15 @@ class KernelDoc: if doc_decl.search(line): self.entry.identifier = doc_decl.group(1) - decl_start = str(doc_com) # comment block asterisk - fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) - parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function - decl_end = r"(?:[-:].*)" # end of the name part - # Test for data declaration - r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") - r2 = KernRe(fr"^{decl_start}{fn_type}(?:define\s+)?(\w+)\s*{parenthesis}\s*{decl_end}?$") - if r.search(line): - self.entry.decl_type = r.group(1) - self.entry.identifier = r.group(2) + if doc_begin_data.search(line): + self.entry.decl_type = doc_begin_data.group(1) + self.entry.identifier = doc_begin_data.group(2) # # Look for a function description # - elif r2.search(line): - self.entry.identifier = r2.group(1) + elif doc_begin_func.search(line): + self.entry.identifier = doc_begin_func.group(1) self.entry.decl_type = "function" # # We struck out. -- cgit From 8666a352dc1738f6302382d9d64611a44978d369 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Jun 2025 10:34:38 -0600 Subject: docs: kdoc: some final touches for process_name() Add some comments to process_name() to cover its broad phases of operation, and slightly restructure the if/then/else structure to remove some early returns. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250606163438.229916-10-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index d814e48f9f38..42b2e0936b72 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1227,7 +1227,9 @@ class KernelDoc: """ STATE_NAME: Looking for the "name - description" line """ - + # + # Check for a DOC: block and handle them specially. + # if doc_block.search(line): self.entry.new_start_line = ln @@ -1238,9 +1240,10 @@ class KernelDoc: self.entry.identifier = self.entry.section self.state = state.DOCBLOCK - return - - if doc_decl.search(line): + # + # Otherwise we're looking for a normal kerneldoc declaration line. + # + elif doc_decl.search(line): self.entry.identifier = doc_decl.group(1) # Test for data declaration @@ -1261,15 +1264,19 @@ class KernelDoc: f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") self.state = state.NORMAL return - - self.entry.identifier = self.entry.identifier.strip(" ") - + # + # OK, set up for a new kerneldoc entry. + # self.state = state.BODY - + self.entry.identifier = self.entry.identifier.strip(" ") # if there's no @param blocks need to set up default section here self.entry.section = SECTION_DEFAULT self.entry.new_start_line = ln + 1 - + # + # Find the description portion, which *should* be there but + # isn't always. + # (We should be able to capture this from the previous parsing - someday) + # r = KernRe("[-:](.*)") if r.search(line): self.entry.declaration_purpose = trim_whitespace(r.group(1)) @@ -1290,11 +1297,11 @@ class KernelDoc: self.emit_msg(ln, f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", warning=False) - - return - + # # Failed to find an identifier. Emit a warning - self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + # + else: + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") def process_body(self, ln, line): """ -- cgit From 823d6f956605cb2f009f75de138622fcd7e03817 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:04 -0600 Subject: docs: kdoc: Make body_with_blank_line parsing more flexible The regex in the BODY_WITH_BLANK_LINE case was looking for lines starting with " * ", where exactly one space was allowed before the following text. There are many kerneldoc comments where the authors have put multiple spaces instead, leading to mis-formatting of the documentation. Specifically, in this case, the description portion is associated with the last of the parameters. Allow multiple spaces in this context. See, for example, synchronize_hardirq() and how its documentation is formatted before and after the change. Acked-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-2-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 42b2e0936b72..c46e1b6a7d4b 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1309,7 +1309,7 @@ class KernelDoc: """ if self.state == state.BODY_WITH_BLANK_LINE: - r = KernRe(r"\s*\*\s?\S") + r = KernRe(r"\s*\*\s*\S") if r.match(line): self.dump_section() self.entry.section = SECTION_DEFAULT -- cgit From df2755269456d9ed02ad689aa8eaa50f7ac4217e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:05 -0600 Subject: docs: kdoc: consolidate the "begin section" logic Pull the repeated "begin a section" logic into a single place and hide it within the KernelEntry class. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-3-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index c46e1b6a7d4b..d29a61a06f6d 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -169,6 +169,15 @@ class KernelEntry: self.warnings.append(log_msg) return + # + # Begin a new section. + # + def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): + if dump: + self.dump_section(start_new = True) + self.section = title + self.new_start_line = line_no + def dump_section(self, start_new=True): """ Dumps section contents to arrays/hashes intended for that purpose. @@ -1231,12 +1240,11 @@ class KernelDoc: # Check for a DOC: block and handle them specially. # if doc_block.search(line): - self.entry.new_start_line = ln if not doc_block.group(1): - self.entry.section = "Introduction" + self.entry.begin_section(ln, "Introduction") else: - self.entry.section = doc_block.group(1) + self.entry.begin_section(ln, doc_block.group(1)) self.entry.identifier = self.entry.section self.state = state.DOCBLOCK @@ -1270,8 +1278,7 @@ class KernelDoc: self.state = state.BODY self.entry.identifier = self.entry.identifier.strip(" ") # if there's no @param blocks need to set up default section here - self.entry.section = SECTION_DEFAULT - self.entry.new_start_line = ln + 1 + self.entry.begin_section(ln + 1) # # Find the description portion, which *should* be there but # isn't always. @@ -1312,8 +1319,7 @@ class KernelDoc: r = KernRe(r"\s*\*\s*\S") if r.match(line): self.dump_section() - self.entry.section = SECTION_DEFAULT - self.entry.new_start_line = ln + self.entry.begin_section(ln) self.entry.contents = "" if doc_sect.search(line): @@ -1340,8 +1346,7 @@ class KernelDoc: if self.entry.contents.strip("\n"): self.dump_section() - self.entry.new_start_line = ln - self.entry.section = newsection + self.entry.begin_section(ln, newsection) self.entry.leading_space = None self.entry.contents = newcontents.lstrip() @@ -1370,9 +1375,7 @@ class KernelDoc: if cont == "": if self.entry.section == self.section_context: - self.dump_section() - - self.entry.new_start_line = ln + self.entry.begin_section(ln, dump = True) self.state = state.BODY else: if self.entry.section != SECTION_DEFAULT: @@ -1427,8 +1430,7 @@ class KernelDoc: if self.inline_doc_state == state.INLINE_NAME and \ doc_inline_sect.search(line): - self.entry.section = doc_inline_sect.group(1) - self.entry.new_start_line = ln + self.entry.begin_section(ln, doc_inline_sect.group(1)) self.entry.contents = doc_inline_sect.group(2).lstrip() if self.entry.contents != "": @@ -1627,7 +1629,7 @@ class KernelDoc: """STATE_PROTO: reading a function/whatever prototype.""" if doc_inline_oneline.search(line): - self.entry.section = doc_inline_oneline.group(1) + self.entry.begin_section(ln, doc_inline_oneline.group(1)) self.entry.contents = doc_inline_oneline.group(2) if self.entry.contents != "": -- cgit From e4153a2255b1a0f3398360895e79e7709a0600b2 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:06 -0600 Subject: docs: kdoc: separate out the handling of the declaration phase The BODY_MAYBE state really describes the "we are in a declaration" state. Rename it accordingly, and split the handling of this state out from that of the other BODY* states. This change introduces a fair amount of duplicated code that will be coalesced in a later patch. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-4-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 93 ++++++++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 15 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index d29a61a06f6d..f1491f8c88e7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -86,7 +86,7 @@ class state: # Parser states NORMAL = 0 # normal code NAME = 1 # looking for function name - BODY_MAYBE = 2 # body - or maybe more description + DECLARATION = 2 # We have seen a declaration which might not be done BODY = 3 # the body of the comment BODY_WITH_BLANK_LINE = 4 # the body which has a blank line PROTO = 5 # scanning prototype @@ -96,7 +96,7 @@ class state: name = [ "NORMAL", "NAME", - "BODY_MAYBE", + "DECLARATION", "BODY", "BODY_WITH_BLANK_LINE", "PROTO", @@ -1287,7 +1287,7 @@ class KernelDoc: r = KernRe("[-:](.*)") if r.search(line): self.entry.declaration_purpose = trim_whitespace(r.group(1)) - self.state = state.BODY_MAYBE + self.state = state.DECLARATION else: self.entry.declaration_purpose = "" @@ -1310,9 +1310,82 @@ class KernelDoc: else: self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if doc_sect.search(line): + self.entry.in_doc_sect = True + newsection = doc_sect.group(1) + + if newsection.lower() in ["description", "context"]: + newsection = newsection.title() + + # Special case: @return is a section, not a param description + if newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + + # Perl kernel-doc has a check here for contents before sections. + # the logic there is always false, as in_doc_sect variable is + # always true. So, just don't implement Wcontents_before_sections + + # .title() + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + + if self.entry.contents.strip("\n"): + self.dump_section() + + self.entry.begin_section(ln, newsection) + self.entry.leading_space = None + + self.entry.contents = newcontents.lstrip() + if self.entry.contents: + self.entry.contents += "\n" + + self.state = state.BODY + return + + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + if r.match(line): + self.emit_msg(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = state.PROTO + return + + if doc_content.search(line): + cont = doc_content.group(1) + + if cont == "": + self.state = state.BODY + self.entry.contents += "\n" # needed? + + else: + # Continued declaration purpose + self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() + self.entry.declaration_purpose += " " + cont + + r = KernRe(r"\s+") + self.entry.declaration_purpose = r.sub(' ', + self.entry.declaration_purpose) + return + + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_body(self, ln, line): """ - STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. + STATE_BODY: the bulk of a kerneldoc comment. """ if self.state == state.BODY_WITH_BLANK_LINE: @@ -1385,16 +1458,6 @@ class KernelDoc: self.entry.contents += "\n" - elif self.state == state.BODY_MAYBE: - - # Continued declaration purpose - self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() - self.entry.declaration_purpose += " " + cont - - r = KernRe(r"\s+") - self.entry.declaration_purpose = r.sub(' ', - self.entry.declaration_purpose) - else: if self.entry.section.startswith('@') or \ self.entry.section == self.section_context: @@ -1687,7 +1750,7 @@ class KernelDoc: state.NORMAL: process_normal, state.NAME: process_name, state.BODY: process_body, - state.BODY_MAYBE: process_body, + state.DECLARATION: process_decl, state.BODY_WITH_BLANK_LINE: process_body, state.INLINE: process_inline, state.PROTO: process_proto, -- cgit From 74cee0dfc2fc50e0d53629c289dc9b2954d31b1c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:07 -0600 Subject: docs: kdoc: split out the special-section state The state known as BODY_WITH_BLANK_LINE really, in a convoluted way, indicates a "special section" that is terminated by a blank line or the beginning of a new section. That is either "@param: desc" sections, or the weird "context" section that plays by the same rules. Rename the state to SPECIAL_SECTION and split its processing into a separate function; no real changes to the logic yet. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-5-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f1491f8c88e7..185ffe4e1469 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -88,7 +88,7 @@ class state: NAME = 1 # looking for function name DECLARATION = 2 # We have seen a declaration which might not be done BODY = 3 # the body of the comment - BODY_WITH_BLANK_LINE = 4 # the body which has a blank line + SPECIAL_SECTION = 4 # doc section ending with a blank line PROTO = 5 # scanning prototype DOCBLOCK = 6 # documentation block INLINE = 7 # gathering doc outside main block @@ -98,7 +98,7 @@ class state: "NAME", "DECLARATION", "BODY", - "BODY_WITH_BLANK_LINE", + "SPECIAL_SECTION", "PROTO", "DOCBLOCK", "INLINE", @@ -1383,18 +1383,18 @@ class KernelDoc: self.emit_msg(ln, f"bad line: {line}") + def process_special(self, ln, line): + """ + STATE_SPECIAL_SECTION: a section ending with a blank line + """ + if KernRe(r"\s*\*\s*\S").match(line): + self.entry.begin_section(ln, dump = True) + self.process_body(ln, line) + def process_body(self, ln, line): """ STATE_BODY: the bulk of a kerneldoc comment. """ - - if self.state == state.BODY_WITH_BLANK_LINE: - r = KernRe(r"\s*\*\s*\S") - if r.match(line): - self.dump_section() - self.entry.begin_section(ln) - self.entry.contents = "" - if doc_sect.search(line): self.entry.in_doc_sect = True newsection = doc_sect.group(1) @@ -1452,7 +1452,7 @@ class KernelDoc: self.state = state.BODY else: if self.entry.section != SECTION_DEFAULT: - self.state = state.BODY_WITH_BLANK_LINE + self.state = state.SPECIAL_SECTION else: self.state = state.BODY @@ -1751,7 +1751,7 @@ class KernelDoc: state.NAME: process_name, state.BODY: process_body, state.DECLARATION: process_decl, - state.BODY_WITH_BLANK_LINE: process_body, + state.SPECIAL_SECTION: process_special, state.INLINE: process_inline, state.PROTO: process_proto, state.DOCBLOCK: process_docblock, -- cgit From 99327067e1974e83cd8a60cf8445ce49086de46e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:08 -0600 Subject: docs: kdoc: coalesce the new-section handling Merge the duplicated code back into a single implementation. Code movement only, no logic changes. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-6-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 49 +++++++++++------------------------------ 1 file changed, 13 insertions(+), 36 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 185ffe4e1469..a336d543e72b 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1310,10 +1310,10 @@ class KernelDoc: else: self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") - def process_decl(self, ln, line): - """ - STATE_DECLARATION: We've seen the beginning of a declaration - """ + # + # Helper function to determine if a new section is being started. + # + def is_new_section(self, ln, line): if doc_sect.search(line): self.entry.in_doc_sect = True newsection = doc_sect.group(1) @@ -1346,6 +1346,14 @@ class KernelDoc: self.entry.contents += "\n" self.state = state.BODY + return True + return False + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if self.is_new_section(ln, line): return if doc_end.search(line): @@ -1395,38 +1403,7 @@ class KernelDoc: """ STATE_BODY: the bulk of a kerneldoc comment. """ - if doc_sect.search(line): - self.entry.in_doc_sect = True - newsection = doc_sect.group(1) - - if newsection.lower() in ["description", "context"]: - newsection = newsection.title() - - # Special case: @return is a section, not a param description - if newsection.lower() in ["@return", "@returns", - "return", "returns"]: - newsection = "Return" - - # Perl kernel-doc has a check here for contents before sections. - # the logic there is always false, as in_doc_sect variable is - # always true. So, just don't implement Wcontents_before_sections - - # .title() - newcontents = doc_sect.group(2) - if not newcontents: - newcontents = "" - - if self.entry.contents.strip("\n"): - self.dump_section() - - self.entry.begin_section(ln, newsection) - self.entry.leading_space = None - - self.entry.contents = newcontents.lstrip() - if self.entry.contents: - self.entry.contents += "\n" - - self.state = state.BODY + if self.is_new_section(ln, line): return if doc_end.search(line): -- cgit From e65d54e19149601b96a19790ee9ba9ed04c59abe Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:09 -0600 Subject: docs: kdoc: rework the handling of SPECIAL_SECTION Move the recognition of this state to when we enter it, rather than when we exit, eliminating some twisty logic along the way. Some changes in output do result from this shift, generally for kerneldoc comments that do not quite fit the format. See, for example, struct irqdomain. As far as I can tell, the new behavior is more correct in each case. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-7-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 48 +++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 28 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index a336d543e72b..5998b02ca3a0 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1316,21 +1316,25 @@ class KernelDoc: def is_new_section(self, ln, line): if doc_sect.search(line): self.entry.in_doc_sect = True + self.state = state.BODY + # + # Pick out the name of our new section, tweaking it if need be. + # newsection = doc_sect.group(1) - - if newsection.lower() in ["description", "context"]: - newsection = newsection.title() - - # Special case: @return is a section, not a param description - if newsection.lower() in ["@return", "@returns", - "return", "returns"]: + if newsection.lower() == 'description': + newsection = 'Description' + elif newsection.lower() == 'context': + newsection = 'Context' + self.state = state.SPECIAL_SECTION + elif newsection.lower() in ["@return", "@returns", + "return", "returns"]: newsection = "Return" - - # Perl kernel-doc has a check here for contents before sections. - # the logic there is always false, as in_doc_sect variable is - # always true. So, just don't implement Wcontents_before_sections - - # .title() + self.state = state.SPECIAL_SECTION + elif newsection[0] == '@': + self.state = state.SPECIAL_SECTION + # + # Initialize the contents, and get the new section going. + # newcontents = doc_sect.group(2) if not newcontents: newcontents = "" @@ -1344,8 +1348,6 @@ class KernelDoc: self.entry.contents = newcontents.lstrip() if self.entry.contents: self.entry.contents += "\n" - - self.state = state.BODY return True return False @@ -1395,8 +1397,9 @@ class KernelDoc: """ STATE_SPECIAL_SECTION: a section ending with a blank line """ - if KernRe(r"\s*\*\s*\S").match(line): + if KernRe(r"\s*\*\s*$").match(line): self.entry.begin_section(ln, dump = True) + self.state = state.BODY self.process_body(ln, line) def process_body(self, ln, line): @@ -1424,20 +1427,9 @@ class KernelDoc: cont = doc_content.group(1) if cont == "": - if self.entry.section == self.section_context: - self.entry.begin_section(ln, dump = True) - self.state = state.BODY - else: - if self.entry.section != SECTION_DEFAULT: - self.state = state.SPECIAL_SECTION - else: - self.state = state.BODY - self.entry.contents += "\n" - else: - if self.entry.section.startswith('@') or \ - self.entry.section == self.section_context: + if self.state == state.SPECIAL_SECTION: if self.entry.leading_space is None: r = KernRe(r'^(\s+)') if r.match(cont): -- cgit From 2ad02b94914ab47b3b94274856e1b56cd94d3e31 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:10 -0600 Subject: docs: kdoc: coalesce the end-of-comment processing Separate out the end-of-comment logic into its own helper and remove the duplicated code introduced earlier. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-8-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 5998b02ca3a0..f7a5b85a8ed7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1351,13 +1351,10 @@ class KernelDoc: return True return False - def process_decl(self, ln, line): - """ - STATE_DECLARATION: We've seen the beginning of a declaration - """ - if self.is_new_section(ln, line): - return - + # + # Helper function to detect (and effect) the end of a kerneldoc comment. + # + def is_comment_end(self, ln, line): if doc_end.search(line): self.dump_section() @@ -1370,6 +1367,15 @@ class KernelDoc: self.entry.new_start_line = ln + 1 self.state = state.PROTO + return True + return False + + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): return if doc_content.search(line): @@ -1406,21 +1412,7 @@ class KernelDoc: """ STATE_BODY: the bulk of a kerneldoc comment. """ - if self.is_new_section(ln, line): - return - - if doc_end.search(line): - self.dump_section() - - # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') - if r.match(line): - self.emit_msg(ln, f"suspicious ending line: {line}") - - self.entry.prototype = "" - self.entry.new_start_line = ln + 1 - - self.state = state.PROTO + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): return if doc_content.search(line): -- cgit From ccad65a494657e899f9139174fcc74c64316c10a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:11 -0600 Subject: docs: kdoc: Add some comments to process_decl() Now that the function can actually fit into a human brain, add a few comments. While I was at it, I switched to the trim_whitespace() helper rather than open-coding it. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-9-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f7a5b85a8ed7..a6ee8bac378d 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1377,26 +1377,28 @@ class KernelDoc: """ if self.is_new_section(ln, line) or self.is_comment_end(ln, line): return - + # + # Look for anything with the " * " line beginning. + # if doc_content.search(line): cont = doc_content.group(1) - + # + # A blank line means that we have moved out of the declaration + # part of the comment (without any "special section" parameter + # descriptions). + # if cont == "": self.state = state.BODY self.entry.contents += "\n" # needed? - + # + # Otherwise we have more of the declaration section to soak up. + # else: - # Continued declaration purpose - self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() - self.entry.declaration_purpose += " " + cont - - r = KernRe(r"\s+") - self.entry.declaration_purpose = r.sub(' ', - self.entry.declaration_purpose) - return - - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") + self.entry.declaration_purpose = \ + trim_whitespace(self.entry.declaration_purpose + ' ' + cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") def process_special(self, ln, line): -- cgit From 07e04d8e7dceae9822377abcb2dd07aae5747e7d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 21 Jun 2025 14:35:12 -0600 Subject: docs: kdoc: finish disentangling the BODY and SPECIAL_SECTION states Move the last SPECIAL_SECTION special case into the proper handler function, getting rid of more if/then/else logic. The leading-space tracking was tightened up a bit in the move. Add some comments describing what is going on. No changes to the generated output. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250621203512.223189-10-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 80 ++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 32 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index a6ee8bac378d..3557c512c85a 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1405,10 +1405,53 @@ class KernelDoc: """ STATE_SPECIAL_SECTION: a section ending with a blank line """ + # + # If we have hit a blank line (only the " * " marker), then this + # section is done. + # if KernRe(r"\s*\*\s*$").match(line): self.entry.begin_section(ln, dump = True) + self.entry.contents += '\n' self.state = state.BODY - self.process_body(ln, line) + return + # + # Not a blank line, look for the other ways to end the section. + # + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # OK, we should have a continuation of the text for this section. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # If the lines of text after the first in a special section have + # leading white space, we need to trim it out or Sphinx will get + # confused. For the second line (the None case), see what we + # find there and remember it. + # + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + # + # Otherwise, before trimming any leading chars, be *sure* + # that they are white space. We should maybe warn if this + # isn't the case. + # + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + self.entry.leading_space = i + break + # + # Add the trimmed result to the section and we're done. + # + self.entry.contents += cont[self.entry.leading_space:] + '\n' + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") def process_body(self, ln, line): """ @@ -1419,37 +1462,10 @@ class KernelDoc: if doc_content.search(line): cont = doc_content.group(1) - - if cont == "": - self.entry.contents += "\n" - else: - if self.state == state.SPECIAL_SECTION: - if self.entry.leading_space is None: - r = KernRe(r'^(\s+)') - if r.match(cont): - self.entry.leading_space = len(r.group(1)) - else: - self.entry.leading_space = 0 - - # Double-check if leading space are realy spaces - pos = 0 - for i in range(0, self.entry.leading_space): - if cont[i] != " ": - break - pos += 1 - - cont = cont[pos:] - - # NEW LOGIC: - # In case it is different, update it - if self.entry.leading_space != pos: - self.entry.leading_space = pos - - self.entry.contents += cont + "\n" - return - - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") + self.entry.contents += cont + "\n" + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") def process_inline(self, ln, line): """STATE_INLINE: docbook comments within a prototype.""" -- cgit From d982828d08b63c2c56f83c09b33cb71929fd4c22 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 25 Jun 2025 14:08:40 -0600 Subject: docs: kdoc: remove KernelEntry::in_doc_sect This field is not used for anything, just get rid of it. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3557c512c85a..f3970ffbf402 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -151,8 +151,6 @@ class KernelEntry: # State flags self.brcount = 0 - - self.in_doc_sect = False self.declaration_start_line = ln + 1 # TODO: rename to emit_message after removal of kernel-doc.pl @@ -1227,7 +1225,6 @@ class KernelDoc: # start a new entry self.reset_state(ln) - self.entry.in_doc_sect = False # next line is always the function name self.state = state.NAME @@ -1315,7 +1312,6 @@ class KernelDoc: # def is_new_section(self, ln, line): if doc_sect.search(line): - self.entry.in_doc_sect = True self.state = state.BODY # # Pick out the name of our new section, tweaking it if need be. -- cgit From d6699d5f601670176bd03f95d1680914bd65b2a9 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 25 Jun 2025 14:51:11 -0600 Subject: docs: kdoc: Move content handling into KernelEntry Rather than having other code mucking around with this bit of internal state, encapsulate it internally. Accumulate the description as a list of strings, joining them at the end, which is a more efficient way of building the text. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 62 ++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 32 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f3970ffbf402..f87355b63c19 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -128,7 +128,7 @@ class KernelEntry: def __init__(self, config, ln): self.config = config - self.contents = "" + self._contents = [] self.function = "" self.sectcheck = "" self.struct_actual = "" @@ -153,6 +153,15 @@ class KernelEntry: self.brcount = 0 self.declaration_start_line = ln + 1 + # + # Management of section contents + # + def add_text(self, text): + self._contents.append(text) + + def contents(self): + return '\n'.join(self._contents) + '\n' + # TODO: rename to emit_message after removal of kernel-doc.pl def emit_msg(self, log_msg, warning=True): """Emit a message""" @@ -180,9 +189,14 @@ class KernelEntry: """ Dumps section contents to arrays/hashes intended for that purpose. """ - + # + # If we have accumulated no contents in the default ("description") + # section, don't bother. + # + if self.section == SECTION_DEFAULT and not self._contents: + return name = self.section - contents = self.contents + contents = self.contents() if type_param.match(name): name = type_param.group(1) @@ -206,7 +220,8 @@ class KernelEntry: if name != SECTION_DEFAULT: self.emit_msg(self.new_start_line, f"duplicate section name '{name}'\n") - self.sections[name] += contents + # Treat as a new paragraph - add a blank line + self.sections[name] += '\n' + contents else: self.sections[name] = contents self.sectionlist.append(name) @@ -217,7 +232,7 @@ class KernelEntry: if start_new: self.section = SECTION_DEFAULT - self.contents = "" + self._contents = [] class KernelDoc: @@ -1334,16 +1349,11 @@ class KernelDoc: newcontents = doc_sect.group(2) if not newcontents: newcontents = "" - - if self.entry.contents.strip("\n"): - self.dump_section() - + self.dump_section() self.entry.begin_section(ln, newsection) self.entry.leading_space = None - self.entry.contents = newcontents.lstrip() - if self.entry.contents: - self.entry.contents += "\n" + self.entry.add_text(newcontents.lstrip()) return True return False @@ -1385,7 +1395,6 @@ class KernelDoc: # if cont == "": self.state = state.BODY - self.entry.contents += "\n" # needed? # # Otherwise we have more of the declaration section to soak up. # @@ -1407,7 +1416,6 @@ class KernelDoc: # if KernRe(r"\s*\*\s*$").match(line): self.entry.begin_section(ln, dump = True) - self.entry.contents += '\n' self.state = state.BODY return # @@ -1444,7 +1452,7 @@ class KernelDoc: # # Add the trimmed result to the section and we're done. # - self.entry.contents += cont[self.entry.leading_space:] + '\n' + self.entry.add_text(cont[self.entry.leading_space:]) else: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") @@ -1458,7 +1466,7 @@ class KernelDoc: if doc_content.search(line): cont = doc_content.group(1) - self.entry.contents += cont + "\n" + self.entry.add_text(cont) else: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") @@ -1470,27 +1478,20 @@ class KernelDoc: doc_inline_sect.search(line): self.entry.begin_section(ln, doc_inline_sect.group(1)) - self.entry.contents = doc_inline_sect.group(2).lstrip() - if self.entry.contents != "": - self.entry.contents += "\n" - + self.entry.add_text(doc_inline_sect.group(2).lstrip()) self.inline_doc_state = state.INLINE_TEXT # Documentation block end */ return if doc_inline_end.search(line): - if self.entry.contents not in ["", "\n"]: - self.dump_section() - + self.dump_section() self.state = state.PROTO self.inline_doc_state = state.INLINE_NA return if doc_content.search(line): if self.inline_doc_state == state.INLINE_TEXT: - self.entry.contents += doc_content.group(1) + "\n" - if not self.entry.contents.strip(" ").rstrip("\n"): - self.entry.contents = "" + self.entry.add_text(doc_content.group(1)) elif self.inline_doc_state == state.INLINE_NAME: self.emit_msg(ln, @@ -1668,11 +1669,8 @@ class KernelDoc: if doc_inline_oneline.search(line): self.entry.begin_section(ln, doc_inline_oneline.group(1)) - self.entry.contents = doc_inline_oneline.group(2) - - if self.entry.contents != "": - self.entry.contents += "\n" - self.dump_section(start_new=False) + self.entry.add_text(doc_inline_oneline.group(2)) + self.dump_section() elif doc_inline_start.search(line): self.state = state.INLINE @@ -1696,7 +1694,7 @@ class KernelDoc: self.reset_state(ln) elif doc_content.search(line): - self.entry.contents += doc_content.group(1) + "\n" + self.entry.add_text(doc_content.group(1)) def parse_export(self): """ -- cgit From 1550a409e778673a63a6957718b802050f98359a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 25 Jun 2025 15:43:37 -0600 Subject: docs: kdoc: remove a bit of dead code The type_param regex matches "@..." just fine, so the special-case branch for that in dump_section() is never executed. Just remove it. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 7 ------- 1 file changed, 7 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f87355b63c19..9e46cfa20978 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -207,13 +207,6 @@ class KernelEntry: self.sectcheck += name + " " self.new_start_line = 0 - elif name == "@...": - name = "..." - self.parameterdescs[name] = contents - self.sectcheck += name + " " - self.parameterdesc_start_lines[name] = self.new_start_line - self.new_start_line = 0 - else: if name in self.sections and self.sections[name] != "": # Only warn on user-specified duplicate section names -- cgit From f61e404f5b6124905025dbda58afa1fd3171100f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 25 Jun 2025 16:58:55 -0600 Subject: docs: kdoc: remove KernelEntry::function This member is unused, to take it out. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 9e46cfa20978..224dea5f7c2e 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -129,7 +129,6 @@ class KernelEntry: self.config = config self._contents = [] - self.function = "" self.sectcheck = "" self.struct_actual = "" self.prototype = "" -- cgit From 473734e086ccdd50af9d0abf81c0b70085dcf625 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 25 Jun 2025 17:19:40 -0600 Subject: docs: kdoc: rework process_export() slightly Reorganize process_export() to eliminate duplicated code, don't look for exports in states where we don't expect them, and don't bother with normal state-machine processing if an export declaration has been found. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 224dea5f7c2e..734b908579c3 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1211,16 +1211,17 @@ class KernelDoc: if export_symbol.search(line): symbol = export_symbol.group(2) - for suffix in suffixes: - symbol = symbol.removesuffix(suffix) - function_set.add(symbol) - return - - if export_symbol_ns.search(line): + elif export_symbol_ns.search(line): symbol = export_symbol_ns.group(2) - for suffix in suffixes: - symbol = symbol.removesuffix(suffix) - function_set.add(symbol) + else: + return False + # + # Found an export, trim out any special suffixes + # + for suffix in suffixes: + symbol = symbol.removesuffix(suffix) + function_set.add(symbol) + return True def process_normal(self, ln, line): """ @@ -1767,13 +1768,10 @@ class KernelDoc: # it was read twice. Here, we use the already-existing # loop to parse exported symbols as well. # - # TODO: It should be noticed that not all states are - # needed here. On a future cleanup, process export only - # at the states that aren't handling comment markups. - self.process_export(export_table, line) - - # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) + if (self.state != state.NORMAL) or \ + not self.process_export(export_table, line): + # Hand this line to the appropriate state handler + self.state_actions[self.state](self, ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") -- cgit From dd49aae52b5e03bc151c65f0e8ee1731fdd73c0a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 26 Jun 2025 13:38:05 -0600 Subject: docs: kdoc: remove the INLINE_END state It is never used, so just get rid of it. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 734b908579c3..03a0e44707a7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -108,8 +108,7 @@ class state: INLINE_NA = 0 # not applicable ($state != INLINE) INLINE_NAME = 1 # looking for member name (@foo:) INLINE_TEXT = 2 # looking for member documentation - INLINE_END = 3 # done - INLINE_ERROR = 4 # error - Comment without header was found. + INLINE_ERROR = 3 # error - Comment without header was found. # Spit a warning as it's not # proper kernel-doc and ignore the rest. @@ -117,7 +116,6 @@ class state: "", "_NAME", "_TEXT", - "_END", "_ERROR", ] -- cgit From 096f73ab01b95aaeaa7f678c56257d2e4c8490d3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 27 Jun 2025 11:33:18 -0600 Subject: docs: kdoc: remove the inline states-within-a-state The processing of inline kerneldoc comments is a state like the rest, but it was implemented as a set of separate substates. Just remove the substate logic and make the inline states normal ones like the rest. INLINE_ERROR was never actually used for anything, so just take it out. No changes to the generated output. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 43 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 03a0e44707a7..a931c1471fa8 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -91,7 +91,8 @@ class state: SPECIAL_SECTION = 4 # doc section ending with a blank line PROTO = 5 # scanning prototype DOCBLOCK = 6 # documentation block - INLINE = 7 # gathering doc outside main block + INLINE_NAME = 7 # gathering doc outside main block + INLINE_TEXT = 8 # reading the body of inline docs name = [ "NORMAL", @@ -101,23 +102,10 @@ class state: "SPECIAL_SECTION", "PROTO", "DOCBLOCK", - "INLINE", + "INLINE_NAME", + "INLINE_TEXT", ] - # Inline documentation state - INLINE_NA = 0 # not applicable ($state != INLINE) - INLINE_NAME = 1 # looking for member name (@foo:) - INLINE_TEXT = 2 # looking for member documentation - INLINE_ERROR = 3 # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. - - inline_name = [ - "", - "_NAME", - "_TEXT", - "_ERROR", - ] SECTION_DEFAULT = "Description" # default section @@ -246,7 +234,6 @@ class KernelDoc: # Initial state for the state machines self.state = state.NORMAL - self.inline_doc_state = state.INLINE_NA # Store entry currently being processed self.entry = None @@ -323,7 +310,6 @@ class KernelDoc: # State flags self.state = state.NORMAL - self.inline_doc_state = state.INLINE_NA def push_parameter(self, ln, decl_type, param, dtype, org_arg, declaration_name): @@ -1465,30 +1451,28 @@ class KernelDoc: def process_inline(self, ln, line): """STATE_INLINE: docbook comments within a prototype.""" - if self.inline_doc_state == state.INLINE_NAME and \ + if self.state == state.INLINE_NAME and \ doc_inline_sect.search(line): self.entry.begin_section(ln, doc_inline_sect.group(1)) self.entry.add_text(doc_inline_sect.group(2).lstrip()) - self.inline_doc_state = state.INLINE_TEXT + self.state = state.INLINE_TEXT # Documentation block end */ return if doc_inline_end.search(line): self.dump_section() self.state = state.PROTO - self.inline_doc_state = state.INLINE_NA return if doc_content.search(line): - if self.inline_doc_state == state.INLINE_TEXT: + if self.state == state.INLINE_TEXT: self.entry.add_text(doc_content.group(1)) - elif self.inline_doc_state == state.INLINE_NAME: + elif self.state == state.INLINE_NAME: self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") - - self.inline_doc_state = state.INLINE_ERROR + self.state = state.PROTO def syscall_munge(self, ln, proto): # pylint: disable=W0613 """ @@ -1664,8 +1648,7 @@ class KernelDoc: self.dump_section() elif doc_inline_start.search(line): - self.state = state.INLINE - self.inline_doc_state = state.INLINE_NAME + self.state = state.INLINE_NAME elif self.entry.decl_type == 'function': self.process_proto_function(ln, line) @@ -1716,7 +1699,8 @@ class KernelDoc: state.BODY: process_body, state.DECLARATION: process_decl, state.SPECIAL_SECTION: process_special, - state.INLINE: process_inline, + state.INLINE_NAME: process_inline, + state.INLINE_TEXT: process_inline, state.PROTO: process_proto, state.DOCBLOCK: process_docblock, } @@ -1756,9 +1740,8 @@ class KernelDoc: prev = "" prev_ln = None - self.config.log.debug("%d %s%s: %s", + self.config.log.debug("%d %s: %s", ln, state.name[self.state], - state.inline_name[self.inline_doc_state], line) # This is an optimization over the original script. -- cgit From c7eedb09417e4372183bf1843676d2008da340d5 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 27 Jun 2025 12:23:05 -0600 Subject: docs: kdoc: split the processing of the two remaining inline states Now that "inline_*" are just ordinary parser states, split them into two separate functions, getting rid of some nested conditional logic. The original process_inline() would simply ignore lines that didn't match any of the regexes (those lacking the initial " * " marker). I have preserved that behavior, but we should perhaps emit a warning instead. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index a931c1471fa8..93938155fce2 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1448,31 +1448,30 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_inline(self, ln, line): - """STATE_INLINE: docbook comments within a prototype.""" + def process_inline_name(self, ln, line): + """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" - if self.state == state.INLINE_NAME and \ - doc_inline_sect.search(line): + if doc_inline_sect.search(line): self.entry.begin_section(ln, doc_inline_sect.group(1)) - self.entry.add_text(doc_inline_sect.group(2).lstrip()) self.state = state.INLINE_TEXT - # Documentation block end */ - return - - if doc_inline_end.search(line): + elif doc_inline_end.search(line): self.dump_section() self.state = state.PROTO - return + elif doc_content.search(line): + self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") + self.state = state.PROTO + # else ... ?? - if doc_content.search(line): - if self.state == state.INLINE_TEXT: - self.entry.add_text(doc_content.group(1)) + def process_inline_text(self, ln, line): + """STATE_INLINE_TEXT: docbook comments within a prototype.""" - elif self.state == state.INLINE_NAME: - self.emit_msg(ln, - f"Incorrect use of kernel-doc format: {line}") - self.state = state.PROTO + if doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + # else ... ?? def syscall_munge(self, ln, proto): # pylint: disable=W0613 """ @@ -1699,8 +1698,8 @@ class KernelDoc: state.BODY: process_body, state.DECLARATION: process_decl, state.SPECIAL_SECTION: process_special, - state.INLINE_NAME: process_inline, - state.INLINE_TEXT: process_inline, + state.INLINE_NAME: process_inline_name, + state.INLINE_TEXT: process_inline_text, state.PROTO: process_proto, state.DOCBLOCK: process_docblock, } -- cgit From 362ec251a6aba32c8d950f0278c75aaa8c1b0b10 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 27 Jun 2025 13:08:20 -0600 Subject: docs: kdoc: don't reinvent string.strip() process_proto_type() and process_proto_function() reinventing the strip() string method with a whole series of separate regexes; take all that out and just use strip(). The previous implementation also (in process_proto_type()) removed C++ comments *after* the above dance, leaving trailing whitespace in that case; now we do the stripping afterward. This results in exactly one output change: the removal of a spurious space in the definition of BACKLIGHT_POWER_REDUCED - see https://docs.kernel.org/gpu/backlight.html#c.backlight_properties. I note that we are putting semicolons after #define lines that really shouldn't be there - a task for another day. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 93938155fce2..d9ff2d066160 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1567,17 +1567,9 @@ class KernelDoc: self.entry.prototype += r.group(1) + " " if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): - # strip comments - r = KernRe(r'/\*.*?\*/') - self.entry.prototype = r.sub('', self.entry.prototype) - - # strip newlines/cr's - r = KernRe(r'[\r\n]+') - self.entry.prototype = r.sub(' ', self.entry.prototype) - - # strip leading spaces - r = KernRe(r'^\s+') - self.entry.prototype = r.sub('', self.entry.prototype) + # strip comments and surrounding spaces + r = KernRe(r'/\*.*\*/') + self.entry.prototype = r.sub('', self.entry.prototype).strip() # Handle self.entry.prototypes for function pointers like: # int (*pcs_config)(struct foo) @@ -1600,17 +1592,8 @@ class KernelDoc: def process_proto_type(self, ln, line): """Ancillary routine to process a type""" - # Strip newlines/cr's. - line = KernRe(r'[\r\n]+', re.S).sub(' ', line) - - # Strip leading spaces - line = KernRe(r'^\s+', re.S).sub('', line) - - # Strip trailing spaces - line = KernRe(r'\s+$', re.S).sub('', line) - - # Strip C99-style comments to the end of the line - line = KernRe(r"\/\/.*$", re.S).sub('', line) + # Strip C99-style comments and surrounding whitespace + line = KernRe(r"//.*$", re.S).sub('', line).strip() # To distinguish preprocessor directive from regular declaration later. if line.startswith('#'): -- cgit From bfa5bb3d104b0f2ffd25daa3b4900d54fe060285 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 30 Jun 2025 10:03:28 -0600 Subject: docs: kdoc: remove the brcount floor in process_proto_type() Putting the floor under brcount does not change the output in any way, just remove it. Change the termination test from ==0 to <=0 to prevent infinite loops in case somebody does something truly wacko in the code. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index d9ff2d066160..935f2a3c4b47 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1609,9 +1609,7 @@ class KernelDoc: self.entry.brcount += r.group(2).count('{') self.entry.brcount -= r.group(2).count('}') - self.entry.brcount = max(self.entry.brcount, 0) - - if r.group(2) == ';' and self.entry.brcount == 0: + if r.group(2) == ';' and self.entry.brcount <= 0: self.dump_declaration(ln, self.entry.prototype) self.reset_state(ln) break -- cgit From 1aeb8099d053af79d50f4ffee740c29cc10d56fc Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 30 Jun 2025 11:08:32 -0600 Subject: docs: kdoc: rework type prototype parsing process_proto_type() is using a complex regex and a "while True" loop to split a declaration into chunks and, in the end, count brackets. Switch to using a simpler regex to just do the split directly, and handle each chunk as it comes. The result is, IMO, easier to understand and reason about. The old algorithm would occasionally elide the space between function parameters; see struct rng_alg->generate(), foe example. The only output difference is to not elide that space, which is more correct. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 43 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 935f2a3c4b47..61da297df623 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1594,30 +1594,37 @@ class KernelDoc: # Strip C99-style comments and surrounding whitespace line = KernRe(r"//.*$", re.S).sub('', line).strip() + if not line: + return # nothing to see here # To distinguish preprocessor directive from regular declaration later. if line.startswith('#'): line += ";" - - r = KernRe(r'([^\{\};]*)([\{\};])(.*)') - while True: - if r.search(line): - if self.entry.prototype: - self.entry.prototype += " " - self.entry.prototype += r.group(1) + r.group(2) - - self.entry.brcount += r.group(2).count('{') - self.entry.brcount -= r.group(2).count('}') - - if r.group(2) == ';' and self.entry.brcount <= 0: + # + # Split the declaration on any of { } or ;, and accumulate pieces + # until we hit a semicolon while not inside {brackets} + # + r = KernRe(r'(.*?)([{};])') + for chunk in r.split(line): + if chunk: # Ignore empty matches + self.entry.prototype += chunk + # + # This cries out for a match statement ... someday after we can + # drop Python 3.9 ... + # + if chunk == '{': + self.entry.brcount += 1 + elif chunk == '}': + self.entry.brcount -= 1 + elif chunk == ';' and self.entry.brcount <= 0: self.dump_declaration(ln, self.entry.prototype) self.reset_state(ln) - break - - line = r.group(3) - else: - self.entry.prototype += line - break + return + # + # We hit the end of the line while still in the declaration; put + # in a space to represent the newline. + # + self.entry.prototype += ' ' def process_proto(self, ln, line): """STATE_PROTO: reading a function/whatever prototype.""" -- cgit From 901f506945b8d0a9386c126a2af6bec52354f7b3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 30 Jun 2025 11:38:42 -0600 Subject: docs: kdoc: some tweaks to process_proto_function() Add a set of comments to process_proto_function and reorganize the logic slightly; no functional change. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 43 +++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 61da297df623..d5ef3ce87438 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1553,39 +1553,44 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - r = KernRe(r"\/\/.*$", re.S) - line = r.sub('', line) - + line = KernRe(r"\/\/.*$", re.S).sub('', line) + # + # Soak up the line's worth of prototype text, stopping at { or ; if present. + # if KernRe(r'\s*#\s*define').match(line): self.entry.prototype = line - elif line.startswith('#'): - # Strip other macros like #ifdef/#ifndef/#endif/... - pass - else: + elif not line.startswith('#'): # skip other preprocessor stuff r = KernRe(r'([^\{]*)') if r.match(line): self.entry.prototype += r.group(1) + " " - + # + # If we now have the whole prototype, clean it up and declare victory. + # if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): # strip comments and surrounding spaces - r = KernRe(r'/\*.*\*/') - self.entry.prototype = r.sub('', self.entry.prototype).strip() - + self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() + # # Handle self.entry.prototypes for function pointers like: # int (*pcs_config)(struct foo) - + # by turning it into + # int pcs_config(struct foo) + # r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) - + # + # Handle special declaration syntaxes + # if 'SYSCALL_DEFINE' in self.entry.prototype: self.entry.prototype = self.syscall_munge(ln, self.entry.prototype) - - r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') - if r.search(self.entry.prototype): - self.entry.prototype = self.tracepoint_munge(ln, - self.entry.prototype) - + else: + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + # + # ... and we're done + # self.dump_function(ln, self.entry.prototype) self.reset_state(ln) -- cgit From d1af2889682e83acc791e2a2191687958b548da1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 1 Jul 2025 13:02:54 -0600 Subject: docs: kdoc: pretty up dump_enum() Add some comments to dump_enum to help the next person who has to figure out what it is actually doing. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index d5ef3ce87438..831f061f61b8 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -860,39 +860,48 @@ class KernelDoc: # Strip #define macros inside enums proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - members = None - declaration_name = None - + # + # Parse out the name and members of the enum. Typedef form first. + # r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) members = r.group(1).rstrip() + # + # Failing that, look for a straight enum + # else: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) members = r.group(2).rstrip() - - if not members: - self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") - return - + # + # OK, this isn't going to work. + # + else: + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") + return + # + # Make sure we found what we were expecting. + # if self.entry.identifier != declaration_name: if self.entry.identifier == "": self.emit_msg(ln, f"{proto}: wrong kernel-doc identifier on prototype") else: self.emit_msg(ln, - f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") + f"expecting prototype for enum {self.entry.identifier}. " + f"Prototype was for enum {declaration_name} instead") return if not declaration_name: declaration_name = "(anonymous)" - + # + # Parse out the name of each enum member, and verify that we + # have a description for it. + # member_set = set() - - members = KernRe(r'\([^;]*?[\)]').sub('', members) - + members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): if not arg: continue @@ -903,7 +912,9 @@ class KernelDoc: self.emit_msg(ln, f"Enum value '{arg}' not described in enum '{declaration_name}'") member_set.add(arg) - + # + # Ensure that every described member actually exists in the enum. + # for k in self.entry.parameterdescs: if k not in member_set: self.emit_msg(ln, -- cgit From 60016e0116b8d33f95e797b011799e717766ec13 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 1 Jul 2025 15:31:11 -0600 Subject: docs: kdoc; Add a rudimentary class to represent output items This class is intended to replace the unstructured dict used to accumulate an entry to pass to an output module. For now, it remains unstructured, but it works well enough that the output classes don't notice the difference. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 831f061f61b8..a5a59b97a444 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -16,7 +16,7 @@ import re from pprint import pformat from kdoc_re import NestedMatch, KernRe - +from kdoc_item import KdocItem # # Regular expressions used to parse kernel-doc markups at KernelDoc class. @@ -271,32 +271,20 @@ class KernelDoc: The actual output and output filters will be handled elsewhere """ - # The implementation here is different than the original kernel-doc: - # instead of checking for output filters or actually output anything, - # it just stores the declaration content at self.entries, as the - # output will happen on a separate class. - # - # For now, we're keeping the same name of the function just to make - # easier to compare the source code of both scripts - - args["declaration_start_line"] = self.entry.declaration_start_line - args["type"] = dtype - args["warnings"] = self.entry.warnings - - # TODO: use colletions.OrderedDict to remove sectionlist + item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) + item.warnings = self.entry.warnings - sections = args.get('sections', {}) - sectionlist = args.get('sectionlist', []) + sections = item.get('sections', {}) + sectionlist = item.get('sectionlist', []) # Drop empty sections # TODO: improve empty sections logic to emit warnings for section in ["Description", "Return"]: - if section in sectionlist: - if not sections[section].rstrip(): - del sections[section] - sectionlist.remove(section) + if section in sectionlist and not sections[section].rstrip(): + del sections[section] + sectionlist.remove(section) - self.entries.append((name, args)) + self.entries.append((name, item)) self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) -- cgit From 703f9074a8e10ac3fe939025233acb7c47529608 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 1 Jul 2025 15:54:09 -0600 Subject: docs: kdoc: simplify the output-item passing Since our output items contain their name, we don't need to pass it separately. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index a5a59b97a444..97380ff30a0d 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -284,7 +284,7 @@ class KernelDoc: del sections[section] sectionlist.remove(section) - self.entries.append((name, item)) + self.entries.append(item) self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) -- cgit From eade9f57ca7245cc59072706f0f1fdbc446fda61 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:46 +0200 Subject: scripts/kernel_doc.py: properly handle VIRTIO_DECLARE_FEATURES The mentioned macro introduce by the next patch will foul kdoc; fully expand the mentioned macro to avoid the issue. Signed-off-by: Paolo Abeni --- scripts/lib/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 062453eefc7a..3115558925ac 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -666,6 +666,7 @@ class KernelDoc: (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'), ] # Regexes here are guaranteed to have the end limiter matching -- cgit From 8d9d122915492ea6984f32e5df30cef5c582f062 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 1 Jul 2025 16:21:24 -0600 Subject: docs: kdoc: drop "sectionlist" Python dicts (as of 3.7) are guaranteed to remember the insertion order of items, so we do not need a separate list for that purpose. Drop the per-entry sectionlist variable and just rely on native dict ordering. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 97380ff30a0d..2e00c8b3a5f2 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -127,7 +127,6 @@ class KernelEntry: self.parameterdesc_start_lines = {} self.section_start_lines = {} - self.sectionlist = [] self.sections = {} self.anon_struct_union = False @@ -202,7 +201,6 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.sectionlist.append(name) self.section_start_lines[name] = self.new_start_line self.new_start_line = 0 @@ -275,14 +273,12 @@ class KernelDoc: item.warnings = self.entry.warnings sections = item.get('sections', {}) - sectionlist = item.get('sectionlist', []) # Drop empty sections # TODO: improve empty sections logic to emit warnings for section in ["Description", "Return"]: - if section in sectionlist and not sections[section].rstrip(): + if section in sections and not sections[section].rstrip(): del sections[section] - sectionlist.remove(section) self.entries.append(item) @@ -828,7 +824,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) @@ -913,7 +908,6 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) @@ -1085,7 +1079,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, @@ -1099,7 +1092,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, @@ -1145,7 +1137,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) @@ -1168,7 +1159,6 @@ class KernelDoc: self.output_declaration('typedef', declaration_name, typedef=declaration_name, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) @@ -1653,7 +1643,6 @@ class KernelDoc: if doc_end.search(line): self.dump_section() self.output_declaration("doc", self.entry.identifier, - sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines) self.reset_state(ln) -- cgit From 8d7338752d76c3854a5c54cf7df976c539baab5b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 1 Jul 2025 16:47:59 -0600 Subject: docs: kdoc: Centralize handling of the item section list The section list always comes directly from the under-construction entry and is used uniformly. Formalize section handling in the KdocItem class, and have output_declaration() load the sections directly from the entry, eliminating a lot of duplicated, verbose parameters. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 2e00c8b3a5f2..608f3a1045dc 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -272,13 +272,13 @@ class KernelDoc: item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) item.warnings = self.entry.warnings - sections = item.get('sections', {}) - # Drop empty sections # TODO: improve empty sections logic to emit warnings + sections = self.entry.sections for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] + item.set_sections(sections, self.entry.section_start_lines) self.entries.append(item) @@ -824,8 +824,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): @@ -908,8 +906,6 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_declaration(self, ln, prototype): @@ -1079,8 +1075,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) else: @@ -1092,8 +1086,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) @@ -1137,8 +1129,6 @@ class KernelDoc: parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1159,8 +1149,6 @@ class KernelDoc: self.output_declaration('typedef', declaration_name, typedef=declaration_name, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1642,9 +1630,7 @@ class KernelDoc: if doc_end.search(line): self.dump_section() - self.output_declaration("doc", self.entry.identifier, - sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines) + self.output_declaration("doc", self.entry.identifier) self.reset_state(ln) elif doc_content.search(line): -- cgit From 172bee3376ab29fbf38b09bf01d6f06f7f6c39e1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Jul 2025 11:04:43 -0600 Subject: docs: kdoc: remove the "struct_actual" machinery The code goes out of its way to create a special list of parameters in entry.struct_actual that is just like entry.parameterlist, but with extra junk. The only use of that information, in check_sections(), promptly strips all the extra junk back out. Drop all that extra work and just use parameterlist. No output changes. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 608f3a1045dc..b28f056365cb 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -116,7 +116,6 @@ class KernelEntry: self._contents = [] self.sectcheck = "" - self.struct_actual = "" self.prototype = "" self.warnings = [] @@ -366,15 +365,6 @@ class KernelDoc: org_arg = KernRe(r'\s\s+').sub(' ', org_arg) self.entry.parametertypes[param] = org_arg - def save_struct_actual(self, actual): - """ - Strip all spaces from the actual param so that it looks like - one string item. - """ - - actual = KernRe(r'\s*').sub("", actual, count=1) - - self.entry.struct_actual += actual + " " def create_parameter_list(self, ln, decl_type, args, splitter, declaration_name): @@ -420,7 +410,6 @@ class KernelDoc: param = arg dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) @@ -437,7 +426,6 @@ class KernelDoc: dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) @@ -470,7 +458,6 @@ class KernelDoc: param = r.group(1) - self.save_struct_actual(r.group(2)) self.push_parameter(ln, decl_type, r.group(2), f"{dtype} {r.group(1)}", arg, declaration_name) @@ -482,12 +469,10 @@ class KernelDoc: continue if dtype != "": # Skip unnamed bit-fields - self.save_struct_actual(r.group(1)) self.push_parameter(ln, decl_type, r.group(1), f"{dtype}:{r.group(2)}", arg, declaration_name) else: - self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) @@ -499,24 +484,11 @@ class KernelDoc: sects = sectcheck.split() prms = prmscheck.split() - err = False for sx in range(len(sects)): # pylint: disable=C0200 err = True for px in range(len(prms)): # pylint: disable=C0200 - prm_clean = prms[px] - prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) - prm_clean = attribute.sub('', prm_clean) - - # ignore array size in a parameter string; - # however, the original param string may contain - # spaces, e.g.: addr[6 + 2] - # and this appears in @prms as "addr[6" since the - # parameter list is split at spaces; - # hence just ignore "[..." for the sections check; - prm_clean = KernRe(r'\[.*').sub('', prm_clean) - - if prm_clean == sects[sx]: + if prms[px] == sects[sx]: err = False break @@ -782,7 +754,7 @@ class KernelDoc: self.create_parameter_list(ln, decl_type, members, ';', declaration_name) self.check_sections(ln, declaration_name, decl_type, - self.entry.sectcheck, self.entry.struct_actual) + self.entry.sectcheck, ' '.join(self.entry.parameterlist)) # Adjust declaration for better display declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) -- cgit From efacdf85135ae02a8c25452e40547b773bb1b6b3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Jul 2025 11:12:27 -0600 Subject: docs: kdoc: use self.entry.parameterlist directly in check_sections() Callers of check_sections() join parameterlist into a single string, which is then immediately split back into the original list. Rather than do all that, just use parameterlist directly in check_sections(). Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index b28f056365cb..ffd49f9395ae 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -476,19 +476,18 @@ class KernelDoc: self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): + def check_sections(self, ln, decl_name, decl_type, sectcheck): """ Check for errors inside sections, emitting warnings if not found parameters are described. """ sects = sectcheck.split() - prms = prmscheck.split() for sx in range(len(sects)): # pylint: disable=C0200 err = True - for px in range(len(prms)): # pylint: disable=C0200 - if prms[px] == sects[sx]: + for param in self.entry.parameterlist: + if param == sects[sx]: err = False break @@ -753,8 +752,7 @@ class KernelDoc: self.create_parameter_list(ln, decl_type, members, ';', declaration_name) - self.check_sections(ln, declaration_name, decl_type, - self.entry.sectcheck, ' '.join(self.entry.parameterlist)) + self.check_sections(ln, declaration_name, decl_type, self.entry.sectcheck) # Adjust declaration for better display declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) @@ -1032,9 +1030,7 @@ class KernelDoc: f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") return - prms = " ".join(self.entry.parameterlist) - self.check_sections(ln, declaration_name, "function", - self.entry.sectcheck, prms) + self.check_sections(ln, declaration_name, "function", self.entry.sectcheck) self.check_return_section(ln, declaration_name, return_type) -- cgit From de6f7ac91a08d723a6eaa9c5bbce30c5a126c861 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Jul 2025 13:05:56 -0600 Subject: docs: kdoc: Coalesce parameter-list handling Callers to output_declaration() always pass the parameter information from self.entry; remove all of the boilerplate arguments and just get at that information directly. Formalize its placement in the KdocItem class. It would be nice to get rid of parameterlist as well, but that has the effect of reordering the output of function parameters and struct fields to match the order in the kerneldoc comment rather than in the declaration. One could argue about which is more correct, but the ordering has been left unchanged for now. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ffd49f9395ae..298abd260264 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -278,7 +278,9 @@ class KernelDoc: if section in sections and not sections[section].rstrip(): del sections[section] item.set_sections(sections, self.entry.section_start_lines) - + item.set_params(self.entry.parameterlist, self.entry.parameterdescs, + self.entry.parametertypes, + self.entry.parameterdesc_start_lines) self.entries.append(item) self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) @@ -790,10 +792,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, struct=declaration_name, definition=declaration, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - parametertypes=self.entry.parametertypes, - parameterdesc_start_lines=self.entry.parameterdesc_start_lines, purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): @@ -873,9 +871,6 @@ class KernelDoc: self.output_declaration('enum', declaration_name, enum=declaration_name, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - parameterdesc_start_lines=self.entry.parameterdesc_start_lines, purpose=self.entry.declaration_purpose) def dump_declaration(self, ln, prototype): @@ -1039,10 +1034,6 @@ class KernelDoc: function=declaration_name, typedef=True, functiontype=return_type, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - parametertypes=self.entry.parametertypes, - parameterdesc_start_lines=self.entry.parameterdesc_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) else: @@ -1050,10 +1041,6 @@ class KernelDoc: function=declaration_name, typedef=False, functiontype=return_type, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - parametertypes=self.entry.parametertypes, - parameterdesc_start_lines=self.entry.parameterdesc_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) @@ -1093,10 +1080,6 @@ class KernelDoc: function=declaration_name, typedef=True, functiontype=return_type, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - parametertypes=self.entry.parametertypes, - parameterdesc_start_lines=self.entry.parameterdesc_start_lines, purpose=self.entry.declaration_purpose) return -- cgit From a0db2051d7e1fca9a63a8643f1f187ff0b5931f1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Jul 2025 13:17:59 -0600 Subject: docs: kdoc: Regularize the use of the declaration name Each declaration type passes through the name in a unique field of the "args" blob - even though we have always just passed the name separately. Get rid of all the weird names and just use the common version. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 6 ------ 1 file changed, 6 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 298abd260264..6e35e508608b 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -790,7 +790,6 @@ class KernelDoc: level += 1 self.output_declaration(decl_type, declaration_name, - struct=declaration_name, definition=declaration, purpose=self.entry.declaration_purpose) @@ -870,7 +869,6 @@ class KernelDoc: f"Excess enum value '%{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, - enum=declaration_name, purpose=self.entry.declaration_purpose) def dump_declaration(self, ln, prototype): @@ -1031,14 +1029,12 @@ class KernelDoc: if 'typedef' in return_type: self.output_declaration(decl_type, declaration_name, - function=declaration_name, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose, func_macro=func_macro) else: self.output_declaration(decl_type, declaration_name, - function=declaration_name, typedef=False, functiontype=return_type, purpose=self.entry.declaration_purpose, @@ -1077,7 +1073,6 @@ class KernelDoc: self.create_parameter_list(ln, decl_type, args, ',', declaration_name) self.output_declaration(decl_type, declaration_name, - function=declaration_name, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) @@ -1099,7 +1094,6 @@ class KernelDoc: return self.output_declaration('typedef', declaration_name, - typedef=declaration_name, purpose=self.entry.declaration_purpose) return -- cgit From 08b8dc43d18d5d0c4791cc630d5cddf98eaa51ea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Jul 2025 13:34:40 -0600 Subject: docs: kdoc: straighten up dump_declaration() Get rid of the excess "return" statements in dump_declaration(), along with a line of never-executed dead code. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 6e35e508608b..7191fa94e17a 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -878,18 +878,13 @@ class KernelDoc: if self.entry.decl_type == "enum": self.dump_enum(ln, prototype) - return - - if self.entry.decl_type == "typedef": + elif self.entry.decl_type == "typedef": self.dump_typedef(ln, prototype) - return - - if self.entry.decl_type in ["union", "struct"]: + elif self.entry.decl_type in ["union", "struct"]: self.dump_struct(ln, prototype) - return - - self.output_declaration(self.entry.decl_type, prototype, - entry=self.entry) + else: + # This would be a bug + self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') def dump_function(self, ln, prototype): """ -- cgit From 636d4d9ec641025b98e8df4623a77ecc09026209 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Jul 2025 14:53:32 -0600 Subject: docs: kdoc: clean up check_sections() entry.sectcheck is just a duplicate of our list of sections that is only passed to check_sections(); its main purpose seems to be to avoid checking the special named sections. Rework check_sections() to not use that field (which is then deleted), tocheck for the known sections directly, and tighten up the logic in general. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 7191fa94e17a..fdde14b045fe 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -42,9 +42,11 @@ doc_decl = doc_com + KernRe(r'(\w+)', cache=False) # @{section-name}: # while trying to not match literal block starts like "example::" # +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I) doc_sect = doc_com + \ - KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', - flags=re.I, cache=False) + KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + flags=re.I, cache=False) doc_content = doc_com_body + KernRe(r'(.*)', cache=False) doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) @@ -115,7 +117,6 @@ class KernelEntry: self.config = config self._contents = [] - self.sectcheck = "" self.prototype = "" self.warnings = [] @@ -187,7 +188,6 @@ class KernelEntry: self.parameterdescs[name] = contents self.parameterdesc_start_lines[name] = self.new_start_line - self.sectcheck += name + " " self.new_start_line = 0 else: @@ -478,29 +478,20 @@ class KernelDoc: self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - def check_sections(self, ln, decl_name, decl_type, sectcheck): + def check_sections(self, ln, decl_name, decl_type): """ Check for errors inside sections, emitting warnings if not found parameters are described. """ - - sects = sectcheck.split() - - for sx in range(len(sects)): # pylint: disable=C0200 - err = True - for param in self.entry.parameterlist: - if param == sects[sx]: - err = False - break - - if err: + for section in self.entry.sections: + if section not in self.entry.parameterlist and \ + not known_sections.search(section): if decl_type == 'function': dname = f"{decl_type} parameter" else: dname = f"{decl_type} member" - self.emit_msg(ln, - f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") + f"Excess {dname} '{section}' description in '{decl_name}'") def check_return_section(self, ln, declaration_name, return_type): """ @@ -754,7 +745,7 @@ class KernelDoc: self.create_parameter_list(ln, decl_type, members, ';', declaration_name) - self.check_sections(ln, declaration_name, decl_type, self.entry.sectcheck) + self.check_sections(ln, declaration_name, decl_type) # Adjust declaration for better display declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) @@ -1018,7 +1009,7 @@ class KernelDoc: f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") return - self.check_sections(ln, declaration_name, "function", self.entry.sectcheck) + self.check_sections(ln, declaration_name, "function") self.check_return_section(ln, declaration_name, return_type) -- cgit From 40020fe8e3a4038ed6fb4b3115ad4c60fd354ab3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 10 Jul 2025 17:24:07 -0600 Subject: docs: kdoc: emit a warning for ancient versions of Python Versions of Python prior to 3.7 do not guarantee to remember the insertion order of dicts; since kernel-doc depends on that guarantee, running with such older versions could result in output with reordered sections. Python 3.9 is the minimum for the kernel as a whole, so this should not be a problem, but put in a warning just in case somebody tries to use something older. Suggested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index fdde14b045fe..06f55f38d4a7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -12,6 +12,7 @@ Read a C language source or header FILE and extract embedded documentation comments """ +import sys import re from pprint import pformat @@ -238,6 +239,14 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] + # + # We need Python 3.7 for its "dicts remember the insertion + # order" guarantee + # + if sys.version_info.major == 3 and sys.version_info.minor < 7: + self.emit_msg(0, + 'Python 3.7 or later is required for correct results') + def emit_msg(self, ln, msg, warning=True): """Emit a message""" -- cgit From 39e39af70d066029c788800ee07e0491e07eb081 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 11 Jul 2025 09:27:09 +0200 Subject: scripts: kdoc: make it backward-compatible with Python 3.7 There was a change at kdoc that ended breaking compatibility with Python 3.7: str.removesuffix() was introduced on version 3.9. Restore backward compatibility. Reported-by: Akira Yokosawa Closes: https://lore.kernel.org/linux-doc/57be9f77-9a94-4cde-aacb-184cae111506@gmail.com/ Fixes: 27ad33b6b349 ("kernel-doc: Fix symbol matching for dropped suffixes") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/d13058d285838ac2bc04c492e60531c013a8a919.1752218291.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 06f55f38d4a7..c3fe4bd5eab4 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1120,7 +1120,9 @@ class KernelDoc: # Found an export, trim out any special suffixes # for suffix in suffixes: - symbol = symbol.removesuffix(suffix) + # Be backward compatible with Python < 3.9 + if symbol.endswith(suffix): + symbol = symbol[:-len(suffix)] function_set.add(symbol) return True -- cgit