From 778b8ebe5192e7a7f00563a7456517dfa63e1d90 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:29 -0700 Subject: docs: Move the python libraries to tools/lib/python "scripts/lib" was always a bit of an awkward place for Python modules. We already have tools/lib; create a tools/lib/python, move the libraries there, and update the users accordingly. While at it, move the contents of tools/docs/lib. Rather than make another directory, just put these documentation-oriented modules under "kdoc". Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-2-corbet@lwn.net> --- tools/lib/python/kdoc/kdoc_re.py | 270 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 tools/lib/python/kdoc/kdoc_re.py (limited to 'tools/lib/python/kdoc/kdoc_re.py') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..612223e1e723 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out -- cgit From 5f88f44d8427a97347afda3a6114aed0df472a0b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Nov 2025 20:10:11 -0800 Subject: docs: kdoc: various fixes for grammar, spelling, punctuation Correct grammar, spelling, and punctuation in comments, strings, print messages, logs. Change two instances of two spaces between words to just one space. codespell was used to find misspelled words. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <20251124041011.3030571-1-rdunlap@infradead.org> --- tools/lib/python/kdoc/kdoc_re.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_re.py') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 612223e1e723..2dfa1bf83d64 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -16,7 +16,7 @@ re_cache = {} class KernRe: """ - Helper class to simplify regex declaration and usage, + Helper class to simplify regex declaration and usage. It calls re.compile for a given pattern. It also allows adding regular expressions and define sub at class init time. @@ -27,7 +27,7 @@ class KernRe: def _add_regex(self, string, flags): """ - Adds a new regex or re-use it from the cache. + Adds a new regex or reuses it from the cache. """ self.regex = re_cache.get(string, None) if not self.regex: @@ -114,7 +114,7 @@ class NestedMatch: '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - which is used to properly match open/close parenthesis of the + which is used to properly match open/close parentheses of the string search STRUCT_GROUP(), Add a class that counts pairs of delimiters, using it to match and @@ -136,13 +136,13 @@ class NestedMatch: # \bSTRUCT_GROUP\( # # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter's aligned. + # except that the content inside the match group is delimiter-aligned. # - # The content inside parenthesis are converted into a single replace + # The content inside parentheses is converted into a single replace # group (e.g. r`\1'). # # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to. + # match groups, allowing a regex equivalent to: # # FOO\((.*), (.*), (.*)\) # @@ -168,14 +168,14 @@ class NestedMatch: but I ended using a different implementation to align all three types of delimiters and seek for an initial regular expression. - The algorithm seeks for open/close paired delimiters and place them - into a stack, yielding a start/stop position of each match when the + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the stack is zeroed. - The algorithm shoud work fine for properly paired lines, but will - silently ignore end delimiters that preceeds an start delimiter. + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to rise exceptions + would cause compilation errors. So, we don't need to raise exceptions to cover such issues. """ @@ -203,7 +203,7 @@ class NestedMatch: stack.append(end) continue - # Does the end delimiter match what it is expected? + # Does the end delimiter match what is expected? if stack and d == stack[-1]: stack.pop() -- cgit