From 82e62614e9171981a3972940b8e96e02c7e55f83 Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 19 Feb 2023 22:53:21 +0100 Subject: [PATCH] nixos-render-docs: add html renderer the basic html renderer. it doesn't have all the docbook compatibility codes embedded into it, but there is a good amount. this renderer is unaware of manual structure and does not traverse structural include tokens (if it finds any it'll just fail), that task falls to derived classes. once we have more uses for structural includes than just the manual we may revisit this decision. --- .../src/nixos_render_docs/__init__.py | 1 + .../src/nixos_render_docs/html.py | 245 ++++++++++++++++++ .../nixos-render-docs/src/tests/test_html.py | 179 +++++++++++++ 3 files changed, 425 insertions(+) create mode 100644 pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/html.py create mode 100644 pkgs/tools/nix/nixos-render-docs/src/tests/test_html.py diff --git a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py index 1c58accb4166..7f7463e5c837 100644 --- a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py +++ b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py @@ -8,6 +8,7 @@ from pprint import pprint from typing import Any, Dict from .md import Converter +from . import html from . import manual from . import options from . import parallel diff --git a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/html.py b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/html.py new file mode 100644 index 000000000000..39d2da6adf8c --- /dev/null +++ b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/html.py @@ -0,0 +1,245 @@ +from collections.abc import Mapping, Sequence +from typing import cast, Optional, NamedTuple + +from html import escape +from markdown_it.token import Token + +from .manual_structure import XrefTarget +from .md import Renderer + +class UnresolvedXrefError(Exception): + pass + +class Heading(NamedTuple): + container_tag: str + level: int + html_tag: str + # special handling for part content: whether partinfo div was already closed from + # elsewhere or still needs closing. + partintro_closed: bool + # tocs are generated when the heading opens, but have to be emitted into the file + # after the heading titlepage (and maybe partinfo) has been closed. + toc_fragment: str + +_bullet_list_styles = [ 'disc', 'circle', 'square' ] +_ordered_list_styles = [ '1', 'a', 'i', 'A', 'I' ] + +class HTMLRenderer(Renderer): + _xref_targets: Mapping[str, XrefTarget] + + _headings: list[Heading] + _attrspans: list[str] + _hlevel_offset: int = 0 + _bullet_list_nesting: int = 0 + _ordered_list_nesting: int = 0 + + def __init__(self, manpage_urls: Mapping[str, str], xref_targets: Mapping[str, XrefTarget]): + super().__init__(manpage_urls) + self._headings = [] + self._attrspans = [] + self._xref_targets = xref_targets + + def render(self, tokens: Sequence[Token]) -> str: + result = super().render(tokens) + result += self._close_headings(None) + return result + + def text(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return escape(token.content) + def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "

" + def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "

" + def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
" + def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "\n" + def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return f'{escape(token.content)}' + def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return self.fence(token, tokens, i) + def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + href = escape(cast(str, token.attrs['href']), True) + tag, title, target, text = "link", "", 'target="_top"', "" + if href.startswith('#'): + if not (xref := self._xref_targets.get(href[1:])): + raise UnresolvedXrefError(f"bad local reference, id {href} not known") + if tokens[i + 1].type == 'link_close': + tag, text = "xref", xref.title_html + if xref.title: + title = f'title="{escape(xref.title, True)}"' + target, href = "", xref.href() + return f'{text}' + def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "" + def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '
  • ' + def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
  • " + def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + extra = 'compact' if token.meta.get('compact', False) else '' + style = _bullet_list_styles[self._bullet_list_nesting % len(_bullet_list_styles)] + self._bullet_list_nesting += 1 + return f'
    " + def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '' + def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "" + def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '' + def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "" + def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str: + # TODO use token.info. docbook doesn't so we can't yet. + return f'
    \n{escape(token.content)}
    ' + def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '
    ' + def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '

    Note

    ' + def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '

    Caution

    ' + def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '

    Important

    ' + def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '

    Tip

    ' + def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '

    Warning

    ' + def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '
    ' + def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return '
    ' + def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "
    " + def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str: + if token.meta['name'] == 'command': + return f'{escape(token.content)}' + if token.meta['name'] == 'file': + return f'{escape(token.content)}' + if token.meta['name'] == 'var': + return f'{escape(token.content)}' + if token.meta['name'] == 'env': + return f'{escape(token.content)}' + if token.meta['name'] == 'option': + return f'{escape(token.content)}' + if token.meta['name'] == 'manpage': + [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ] + section = section[:-1] + man = f"{page}({section})" + title = f'{escape(page)}' + vol = f"({escape(section)})" + ref = f'{title}{vol}' + if man in self._manpage_urls: + return f'{ref}' + else: + return ref + return super().myst_role(token, tokens, i) + def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str: + # we currently support *only* inline anchors and the special .keycap class to produce + # keycap-styled spans. + (id_part, class_part) = ("", "") + if s := token.attrs.get('id'): + id_part = f'' + if s := token.attrs.get('class'): + if s == 'keycap': + class_part = '' + self._attrspans.append("") + else: + return super().attr_span_begin(token, tokens, i) + else: + self._attrspans.append("") + return id_part + class_part + def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return self._attrspans.pop() + def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + hlevel = int(token.tag[1:]) + htag, hstyle = self._make_hN(hlevel) + if hstyle: + hstyle = f'style="{escape(hstyle, True)}"' + if anchor := cast(str, token.attrs.get('id', '')): + anchor = f'' + result = self._close_headings(hlevel) + tag = self._heading_tag(token, tokens, i) + toc_fragment = self._build_toc(tokens, i) + self._headings.append(Heading(tag, hlevel, htag, tag != 'part', toc_fragment)) + return ( + f'{result}' + f'
    ' + f'
    ' + f'
    ' + f'
    ' + f' <{htag} class="title" {hstyle}>' + f' {anchor}' + ) + def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + heading = self._headings[-1] + result = ( + f' ' + f'
    ' + f'
    ' + f'
    ' + ) + if heading.container_tag == 'part': + result += '
    ' + else: + result += heading.toc_fragment + return result + def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + extra = 'compact' if token.meta.get('compact', False) else '' + start = f'start="{token.attrs["start"]}"' if 'start' in token.attrs else "" + style = _ordered_list_styles[self._ordered_list_nesting % len(_ordered_list_styles)] + self._ordered_list_nesting += 1 + return f'
      ' + def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + self._ordered_list_nesting -= 1; + return "
    " + def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: + if id := token.attrs.get('id'): + return f'' + return "" + def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "" + + def _make_hN(self, level: int) -> tuple[str, str]: + return f"h{min(6, max(1, level + self._hlevel_offset))}", "" + + def _maybe_close_partintro(self) -> str: + if self._headings: + heading = self._headings[-1] + if heading.container_tag == 'part' and not heading.partintro_closed: + self._headings[-1] = heading._replace(partintro_closed=True) + return heading.toc_fragment + "
    " + return "" + + def _close_headings(self, level: Optional[int]) -> str: + result = [] + while len(self._headings) and (level is None or self._headings[-1].level >= level): + result.append(self._maybe_close_partintro()) + result.append("
    ") + self._headings.pop() + return "\n".join(result) + + def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> str: + return "section" + def _build_toc(self, tokens: Sequence[Token], i: int) -> str: + return "" diff --git a/pkgs/tools/nix/nixos-render-docs/src/tests/test_html.py b/pkgs/tools/nix/nixos-render-docs/src/tests/test_html.py new file mode 100644 index 000000000000..df366a8babd7 --- /dev/null +++ b/pkgs/tools/nix/nixos-render-docs/src/tests/test_html.py @@ -0,0 +1,179 @@ +import nixos_render_docs as nrd +import pytest + +from sample_md import sample1 + +class Converter(nrd.md.Converter[nrd.html.HTMLRenderer]): + def __init__(self, manpage_urls: dict[str, str], xrefs: dict[str, nrd.manual_structure.XrefTarget]): + super().__init__() + self._renderer = nrd.html.HTMLRenderer(manpage_urls, xrefs) + +def unpretty(s: str) -> str: + return "".join(map(str.strip, s.splitlines())).replace('␣', ' ').replace('↵', '\n') + +def test_lists_styles() -> None: + # nested lists rotate through a number of list style + c = Converter({}, {}) + assert c._render("- - - - foo") == unpretty(""" +
      +
    • +
        +
      • +
          +
        • +
            +
          • foo

          • +
          +
        • +
        +
      • +
      +
    • +
    + """) + assert c._render("1. 1. 1. 1. 1. 1. foo") == unpretty(""" +
      +
    1. +
        +
      1. +
          +
        1. +
            +
          1. +
              +
            1. +
                +
              1. foo

              2. +
              +
            2. +
            +
          2. +
          +
        2. +
        +
      2. +
      +
    2. +
    + """) + +def test_xrefs() -> None: + # nested lists rotate through a number of list style + c = Converter({}, { + 'foo': nrd.manual_structure.XrefTarget('foo', '
    ', 'toc1', 'title1', 'index.html'), + 'bar': nrd.manual_structure.XrefTarget('bar', '
    ', 'toc2', 'title2', 'index.html', True), + }) + assert c._render("[](#foo)") == '


    ' + assert c._render("[](#bar)") == '


    ' + with pytest.raises(nrd.html.UnresolvedXrefError) as exc: + c._render("[](#baz)") + assert exc.value.args[0] == 'bad local reference, id #baz not known' + +def test_full() -> None: + c = Converter({ 'man(1)': 'http://example.org' }, {}) + assert c._render(sample1) == unpretty(""" +
    +

    Warning

    +

    foo

    +
    +

    Note

    +

    nested

    +
    +
    +

    + ↵ + multiline↵ + +

    +

    + + man(1) + reference +

    +

    some nested anchors

    +

    + emph␣ + strong␣ + nesting emph and strong␣ + and code +

    +
    +
      +
    • wide bullet

    • +
    • list

    • +
    +
    +
    +
      +
    1. wide ordered

    2. +
    3. list

    4. +
    +
    +
    +
      +
    • narrow bullet

    • +
    • list

    • +
    +
    +
    +
      +
    1. narrow ordered

    2. +
    3. list

    4. +
    +
    +
    +
    +

    quotes

    +
    +
    +

    with nesting

    +
    ↵
    +             nested code block↵
    +            
    +
    +
    +
    +
      +
    • and lists

    • +
    • +
      ↵
      +              containing code↵
      +             
      +
    • +
    +
    +

    and more quote

    +
    +
    +
    +
      +
    1. list starting at 100

    2. +
    3. goes on

    4. +
    +
    +
    +
    +
    deflist
    +
    +
    +
    +

    + with a quote↵ + and stuff +

    +
    +
    +
    ↵
    +            code block↵
    +           
    +
    ↵
    +            fenced block↵
    +           
    +

    text

    +
    +
    more stuff in same deflist
    +
    +

    foo

    +
    +
    +
    """)