#!/usr/bin/python import sys from lxml import etree as ET # this does not recognize octal unlike int(s, 0) def intdh(s): if s.startswith("0x"): return int(s[2:], 16) else: return int(s) root = ET.Element("database") if sys.argv[1:2]: filename = sys.argv[1] rnn = open(filename) else: rnn = sys.stdin s = "\n" + rnn.read() default_reg = "reg32" comments = {} token = "" i = 0 tokens = [] attrvalues = [] comment = "" linenum = 0 errors = 0 linestart = 0 brief = None indent = 0 def str_width(s): w = 0 for i in s: if i == '\t': w = (w + 8) & ~7 else: w = w + 1 return w def hexa(v): if v <= 9: return str(v) else: return hex(v) def indent_str(indent): return ("\t" * (indent >> 3) + " " * (indent & 7)) if indent > 0 else "" def error(msg): global errors print >> sys.stderr, filename + ":" + str(linenum) + ":" + str(str_width(s[linestart:i]) + 1) + ": " + msg errors += 1 def isident(c): return c.isalnum() or c == "_" or c == "-" or c == "#" or c == "," def reduce_stack(target): global stack while len(stack) > target: stack[-1][1].tail = "\n" + indent_str(stack[-2][0]) stack = stack[:-1] special_chars = ":=[]{}" stack = [(-1, root)] last_attachment = None empty_line = False while True: c = s[i:i+1] if s[i:i + 2] == "//": i += 2 begin = i while s[i] and s[i] != "\n": i += 1 comment = s[begin:i] c = "\n" if not c or c == "\n": if len(tokens) == 0: empty_line = True else: last_closed_elem = None if last_attachment is not None: last_closed_elem = last_attachment last_attachment = None for j in xrange(len(stack)): if indent <= stack[j][0]: if indent < stack[j][0]: error("line indentation does not match any previous indentation") last_closed_elem = stack[j][1] reduce_stack(j) stack = stack[0:j] break elem = None parent = stack[-1][1] ptag = parent.tag parent_is_reg = ptag in ("reg8", "reg16", "reg32") reglike_tag = None while tokens: if tokens[0] in ("bare", "inline"): attrvalues.append((tokens[0], "yes")) tokens = tokens[1:] else: break for j in xrange(len(tokens)): if not tokens[j]: continue if tokens[j][0] == "!": attrvalues.append(("access", tokens[j][1:])) tokens[j] = None if j >= 2 and tokens[j] == ":" and (j + 1) < len(tokens) and tokens[j + 1].isdigit(): reglike_tag = "reg" + tokens[j + 1] tokens[j] = tokens[j + 1] = None if (j + 2) < len(tokens): if tokens[j] == "{" and tokens[j + 2] == "}": attrvalues.append(("stride", tokens[j + 1])) tokens[j] = tokens[j + 1] = tokens[j + 2] = None if tokens[j] == "[" and tokens[j + 2] == "]": attrvalues.append(("length", tokens[j + 1])) tokens[j] = tokens[j + 1] = tokens[j + 2] = None tokens = [token for token in tokens if token] if tokens and tokens[-1] == "=": if len(tokens) >= 2 and tokens[-2] == ":": reglike_tag = "stripe" tokens = tokens[:-2] else: reglike_tag = "array" tokens = tokens[:-1] if not tokens: error("line with no tokens") elif tokens[0] == "#import": elem = ET.Element("import") if brief: elem.attrib["file"] = brief brief = None else: error("#include without file argument") if len(tokens) > 1: error("too many tokens in #include line") elif tokens[0] == "#pragma": if len(tokens) == 1: error("pragma with no arguments") elif tokens[1][:3] == "reg": default_reg = tokens[1] else: error("unrecognized pragma") elem = None elif tokens[0] == "@": elem = ET.Element(tokens[0][1:]) if len(tokens) >= 2: elem.attrib["name"] = tokens[1] elif len(tokens) >= 3: error("too many tokens in generic tag line") elif tokens[0] == ":": doc = parent.find("doc") if doc is not None: prev = doc.getprevious() if prev is not None: docindent = prev.tail else: prev = doc.getparent() if prev is not None: docindent = prev.text else: docindent = "" if "\n" not in doc.text: doc.text = docindent + "\t" + doc.text + docindent nl = doc.text.rfind("\n") doc.text = doc.text[:nl] + docindent + "\t" + (tokens[1] if len(tokens) >= 2 else "") + docindent elem = None else: elem = ET.Element("doc") if len(tokens) >= 2: elem.text = tokens[1] elif tokens[-1] == ":": elem = ET.Element(tokens[0]) if len(tokens) >= 3: elem.attrib["name"] = tokens[1] elif len(tokens) >= 4: error("too many tokens in generic tag line") elif len(tokens) == 1 and (parent_is_reg or ptag == "enum"): elem = ET.Element("value") if tokens[0][0].isdigit(): elem.attrib["value"] = tokens[0] else: elem.attrib["name"] = tokens[0] elif len(tokens) >= 3 and tokens[1] == "=": elem = ET.Element("value") elem.attrib["value"] = tokens[0] elem.attrib["name"] = tokens[2] if len(tokens) >= 4: error("too many tokens in value line") elif len(tokens) >= 3 and tokens[0] == "use": elem = ET.Element("use-" + tokens[1]) elem.attrib["name"] = tokens[2] if len(tokens) >= 4: error("too many tokens in use line") elif parent_is_reg or ptag == "bitset": elem = ET.Element("bitfield") sl = tokens[0].find("-") if sl >= 0: low = tokens[0][:sl] high = tokens[0][sl + 1:] else: low = high = tokens[0] if low == high: elem.attrib["pos"] = str(intdh(low)) else: elem.attrib["low"] = str(intdh(low)) elem.attrib["high"] = str(intdh(high)) if len(tokens) >= 2: elem.attrib["name"] = tokens[1] if len(tokens) >= 3: elem.attrib["type"] = tokens[2] if len(tokens) >= 4: error("too many tokens in bitfield line") else: elem = ET.Element(reglike_tag if reglike_tag else default_reg) elem.attrib["offset"] = hexa(int(tokens[0], 16)) if len(tokens) >= 2: elem.attrib["name"] = tokens[1] if len(tokens) >= 3: elem.attrib["type"] = tokens[2].replace(",", " ") if len(tokens) >= 4: error("too many tokens in reg line") reglike_tag = None if elem is not None: for attr, value in attrvalues: elem.attrib[attr] = value if elem.attrib.get("type") == "reg": del elem.attrib["type"] # if elem.tag == "bitfield": # if parent.tag.startswith("reg"): # parent.attrib["type"] = "bitfield" # if elem.tag == "value": # if parent.tag.startswith("reg"): # parent.attrib["type"] = "enum" if brief: briefelem = ET.Element("brief") briefelem.tag = "brief" briefelem.text = brief elem.append(briefelem) last_attachment = briefelem parent.append(elem) xmlindent = "\n" +indent_str(indent) if last_closed_elem is not None: last_closed_elem.tail = ("\n" if empty_line else "") + xmlindent else: parent.text = xmlindent stack.append((indent, elem)) empty_line = False if reglike_tag: error("line specified to be a " + reglike_tag + " but the line format does not match it") if comment: commentelem = ET.Comment(comment) comments.setdefault(stack[-1][1], []).append(commentelem) tokens = [] attrvalues = [] comment = None brief = None linenum += 1 linestart = i indent = 0 if not c: break i += 1 while s[i:i + 1]: if s[i:i+1] == " ": indent += 1 if s[i:i+1] == "\t": indent = (indent + 8) & ~7 else: break i += 1 indent &= ~7 if s[i:i + 1] == ":": begin = i + 1 while s[i:i+1] and s[i] != "\n" and s[i:i+2] != "//": i += 1 tokens.append(":") tokens.append(s[begin:i].strip()) elif c.isspace(): i += 1 pass elif c in special_chars: tokens.append(c) i += 1 elif c == "\"": i += 1 begin = i while True: if not s[i:i+1] or s[i] == "\"": break if s[i:i+1] == "\\": i += 2 else: i += 1 if brief: error("brief specified multiple times") else: if not tokens: error("brief cannot be the start of a line") else: brief = s[begin:i] i += 1 else: begin = i paren = -1 while True: d = s[i:i+1] if d == "(": paren = i i += 1 while True: if not s[i:i+1]: error("unterminated parenthesis") break elif s[i] == ')': break else: i += 1 break elif d.isspace() or d in special_chars: break else: i += 1 if paren >= 0: if paren == begin: variants = s[paren + 1:i] equals = variants.rfind("=") if equals >= 0: attrvalues.append(("varset", variants[:equals])) variants = variants[equals + 1:] attrvalues.append(("variants", variants)) else: attrvalues.append((s[begin:paren], s[paren + 1:i])) if s[i] == ")": i += 1 else: token = s[begin:i] tokens.append(token) reduce_stack(1) # TODO: this takes in ordinate amount of code... can it be shortened? for elem, commentelems in comments.items(): for action, subelem in ET.iterwalk(elem, events = ("start", "end")): if action == "start": if subelem.text and "\n" in subelem.text: if len(commentelems) == 1: subelem.insert(0, commentelems[0]) commentelems[0].tail = subelem.text subelem.text = " " else: for i in xrange(len(commentelems)): subelem.insert(i, commentelems[i]) commentelems[i].tail = subelem.text break elif action == "end": if subelem.tail and "\n" in subelem.tail: parent = subelem.getparent() i = parent.index(subelem) i += 1 if len(commentelems) == 1: parent.insert(i, commentelems[0]) commentelems[0].tail = subelem.tail subelem.tail = " " else: prev = elem.getprevious() if prev is not None: indent = prev.tail else: prev = elem.getparent() if prev is not None: indent = prev.text else: indent = "\n" indent += "\t" for j in xrange(len(commentelems)): parent.insert(i + j, commentelems[j]) commentelems[j].tail = indent commentelems[-1].tail = subelem.tail subelem.tail = indent break # lxml etree refuses attributes with colons, and using namespaces "properly" is harder and has no advantages # this is a sequence from /dev/urandom colon = "39584ac56e6d0976692778b80915f94b61767814a8704982" root.attrib["xmlns"] = "http://nouveau.freedesktop.org/" root.attrib["xmlns" + colon + "xsi"] = "http://www.w3.org/2001/XMLSchema-instance" root.attrib["xsi" + colon + "schemaLocation"] = "http://nouveau.freedesktop.org/ rules-ng.xsd" print ET.tostring(root).replace(colon, ":") sys.exit(0 if errors == 0 else 1)