~robin_jadoul/blog

ec1f86ba01e19f75c60897b3a4c4fae97ef76b9a — Robin Jadoul 3 months ago 533f270
ruff formatting
2 files changed, 188 insertions(+), 58 deletions(-)

M site.py
M ssg.py
M site.py => site.py +82 -23
@@ 1,6 1,7 @@
# TODO:
#  - minimalist/gemini/text-only version??
#  - Turn it into a nix flake? :)
#  - Embedded previews and things

#  - Improve homepage
###


@@ 14,55 15,104 @@
#    + Authored CTF challenges?
###

import os, datetime, collections, hashlib, pathlib, typing
import pygments.lexers, pygments.formatters, pygments.util
import collections
import datetime
import hashlib
import os
import pathlib
import typing

import pandocfilters  # type: ignore
import pygments.formatters
import pygments.lexers
import pygments.util

os.chdir(os.path.abspath(os.path.dirname(__file__)))

from ssg import *
from ssg import (  # noqa: E402
    CacheRegistrationRenderer,
    EncryptionRenderer,
    JinjaEnvironment,
    Page,
    PandocElement,
    PandocRenderer,
    PandocValue,
    PandocWalker,
    Site,
    YamlFrontmatterLoader,
    conditional_render,
    create,
    curdir,
    gather,
    gather_directory,
    gather_single,
    meta_p,
    output,
    render,
    select,
    t_Content,
    t_Transformer,
    transform,
)


def cleanurl(page: Page) -> None:
    name = page.outpath.stem
    page.outpath = page.outpath.parent / name / "index.html"


def strip_leading(page: Page) -> None:
    rel = page.outpath.relative_to(config.output_directory)
    page.outpath = config.output_directory / page.outpath.relative_to(config.output_directory / rel.parts[0])


def replace_leading(replacement: str) -> t_Transformer:
    def transformer(page: Page) -> None:
        rel = page.outpath.relative_to(config.output_directory)
        page.outpath = config.output_directory / replacement / page.outpath.relative_to(config.output_directory / rel.parts[0])
        page.outpath = (
            config.output_directory / replacement / page.outpath.relative_to(config.output_directory / rel.parts[0])
        )

    return transformer


def attach_meta(**kw: typing.Any) -> t_Transformer:
    def transformer(page: Page) -> None:
        page.metadata.update(kw)

    return transformer


def change_extension(ext: str) -> t_Transformer:
    def transformer(page: Page) -> None:
        page.outpath = page.outpath.with_suffix(ext)

    return transformer


def as_link(path: pathlib.Path) -> str:
    rel = path.relative_to(config.output_directory)
    if rel.parts[-1] == "index.html":
        rel = rel.parent
    return f"/{rel}"


def as_linkable(page: Page) -> dict[str, typing.Any]:
    return {"title": page.metadata.get('title', "<no title>"), "link": as_link(page.outpath), "metadata": page.metadata}
    return {"title": page.metadata.get("title", "<no title>"), "link": as_link(page.outpath), "metadata": page.metadata}


def gather_tags(target: dict[str, list[Page]]) -> t_Transformer:
    def transformer(page: Page) -> None:
        for tag in page.metadata.get('tags', []):
        for tag in page.metadata.get("tags", []):
            target[tag].append(page)

    return transformer


is_draft = meta_p("draft", False)
is_encrypted = meta_p("encrypted", False)


def serialize_content(content: t_Content) -> bytes:
    if isinstance(content, dict):
        # Not exactly free from malicious collisions, but good enough


@@ 71,20 121,27 @@ def serialize_content(content: t_Content) -> bytes:
    else:
        return b"\x01" + content


def attach_cache_keys(p: Page) -> None:
    location_key = hashlib.sha256(p.outpath.as_posix().encode()).hexdigest()
    content_key = hashlib.sha256(serialize_content(p.metadata) + serialize_content(p.get_content())).hexdigest()
    p.metadata["_cache_keys"] = (location_key, content_key)


def has_changed(p: Page) -> bool:
    location_key, content_key = p.metadata["_cache_keys"]
    cachedir = curdir / ".cache"
    cachedir.mkdir(exist_ok = True)
    cachedir.mkdir(exist_ok=True)
    cache = cachedir / location_key
    return not (p.outpath.exists() and cache.exists() and cache.read_text().strip() == content_key)


LANG_COMPAT = {"python-repl": "pycon", "txt": "text"}
def pygments_filter(key: str, value: PandocValue, format: str, meta: dict[str, typing.Any]) -> typing.Union[None, list[PandocElement], PandocElement]:


def pygments_filter(
    key: str, value: PandocValue, format: str, meta: dict[str, typing.Any]
) -> typing.Union[None, list[PandocElement], PandocElement]:
    if format != "html5":
        return None
    if key == "CodeBlock":


@@ 101,22 158,24 @@ def pygments_filter(key: str, value: PandocValue, format: str, meta: dict[str, t
        except pygments.util.ClassNotFound:
            print(f"WARNING: No lexer found for language {lang}")
            return
        hl = pygments.highlight(body, lexer, pygments.formatters.HtmlFormatter(wrapcode=True, cssclass=f"highlight {lang}"))
        hl = pygments.highlight(
            body, lexer, pygments.formatters.HtmlFormatter(wrapcode=True, cssclass=f"highlight {lang}")
        )
        return pandocfilters.RawBlock(format, hl)
    return None


mdreader = PandocRenderer.markdown_reader(options=["--mathml", "--no-highlight"])
htmlwriter = PandocRenderer.html_writer(options=["--mathml", "--no-highlight"])

with Site(
        output_directory = curdir / "output",
        loaders = {".md": YamlFrontmatterLoader()},

        sitename = "/dev/ur4ndom - Robin Jadoul",
        nav = [("Posts", "/posts/")],
        location = "https://ur4ndom.dev",
        description = "Cryptography, CTFs, and more"
        ) as config:
    output_directory=curdir / "output",
    loaders={".md": YamlFrontmatterLoader()},
    sitename="/dev/ur4ndom - Robin Jadoul",
    nav=[("Posts", "/posts/")],
    location="https://ur4ndom.dev",
    description="Cryptography, CTFs, and more",
) as config:
    with gather_directory(curdir / "static"):
        output()



@@ 139,12 198,12 @@ with Site(
        conditional_render(is_encrypted, EncryptionRenderer())
        conditional_render(is_encrypted, tmpls.get("encrypted_post.html"))
        conditional_render(~is_encrypted, tmpls.get("post.html"))
        

        render(CacheRegistrationRenderer(curdir / ".cache", "_cache_keys"))
        output()

    default_date = datetime.datetime(1, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(0), "UTC"))
    sorted_posts = sorted(all_posts, key=lambda p: p.metadata.get('date', default_date), reverse=True)
    sorted_posts = sorted(all_posts, key=lambda p: p.metadata.get("date", default_date), reverse=True)

    with gather_single(curdir / "index.md"):
        transform(change_extension(".html"))


@@ 176,16 235,16 @@ with Site(
    for tag, tag_posts in posts_by_tags.items():
        with create(pathlib.Path("tags") / tag, metadata={"tag": tag}):
            transform(cleanurl)
            srt = sorted(tag_posts, key=lambda p: p.metadata.get('date', default_date), reverse=True)
            srt = sorted(tag_posts, key=lambda p: p.metadata.get("date", default_date), reverse=True)
            transform(attach_meta(posts=[as_linkable(p) for p in srt]))
            render(tmpls.get("tag.html"))
            output()

    prevheader: str = ""
    prevheader: str | None = ""
    posts_per_month: list[tuple[typing.Optional[str], list[dict[str, str]]]] = []
    for post in sorted_posts:
        if 'date' in post.metadata:
            header = post.metadata['date'].strftime("%B %Y")
        if "date" in post.metadata:
            header = post.metadata["date"].strftime("%B %Y")
        else:
            header = None
        if header != prevheader:

M ssg.py => ssg.py +106 -35
@@ 1,42 1,63 @@
import dataclasses, pathlib, os, typing, copy, shutil, hashlib, secrets, subprocess, json
import copy
import dataclasses
import hashlib
import json
import os
import pathlib
import secrets
import shutil
import subprocess
import typing

import jinja2

# TODO: figure out a clean way to make dependencies optional and keep mypy happy
#   Maybe by making it a package and having separate modules that might fail to import
import pandocfilters  # type: ignore
import yaml
import jinja2
from Crypto.Cipher import AES, _mode_gcm


def _ensure_global_context() -> "PageSet":
    if _global_context is None:
        raise RuntimeError("No context is available for ssg to work on")
    return _global_context


def _ensure_config() -> "Site":
    if _config is None:
        raise RuntimeError("No config set")
    return _config

def _expose_global(fn: typing.Callable[..., typing.Optional["PageSet"]], overwrite: bool = True) -> typing.Callable[..., typing.Optional["PageSet"]]:

def _expose_global(
    fn: typing.Callable[..., typing.Optional["PageSet"]], overwrite: bool = True
) -> typing.Callable[..., typing.Optional["PageSet"]]:
    if overwrite:

        def wrapper(*args: typing.Any, **kw: typing.Any) -> typing.Optional["PageSet"]:
            global _global_context
            res = fn(_ensure_global_context(), *args, **kw)
            _global_context = res
            return res
    else:

        def wrapper(*args: typing.Any, **kw: typing.Any) -> typing.Optional["PageSet"]:
            return fn(_ensure_global_context(), *args, **kw)

    wrapper.__name__ = fn.__name__

    globals()[fn.__name__] = wrapper
    return fn


t_Predicate = typing.Callable[["Page"], bool]
t_Transformer = typing.Callable[["Page"], typing.Optional["Page"]]
t_Content = typing.Union[bytes, dict[str, typing.Any]]
t_Meta = dict[str, typing.Any]
t_Page = typing.TypeVar("t_Page", bound="Page")


@dataclasses.dataclass
class Page:
    sourcepath: typing.Union[pathlib.Path, typing.Callable[["Page"], t_Content], t_Content]


@@ 45,34 66,39 @@ class Page:

    @classmethod
    def from_path(cls: typing.Type[t_Page], path: pathlib.Path) -> t_Page:
        config = _ensure_config() 
        config = _ensure_config()
        path = path.resolve()
        assert path.is_relative_to(curdir)
        return cls(
                sourcepath = path,
                outpath = config.output_directory / path.relative_to(curdir),
                metadata = config.loaders.get(path.suffix, PureLoader()).load_metadata(path),
                )
            sourcepath=path,
            outpath=config.output_directory / path.relative_to(curdir),
            metadata=config.loaders.get(path.suffix, PureLoader()).load_metadata(path),
        )

    @classmethod
    def from_generator(cls: typing.Type[t_Page], gen: typing.Callable[["Page"], t_Content], outrel: pathlib.Path, metadata: typing.Optional[t_Meta] = None) -> t_Page:
    def from_generator(
        cls: typing.Type[t_Page],
        gen: typing.Callable[["Page"], t_Content],
        outrel: pathlib.Path,
        metadata: typing.Optional[t_Meta] = None,
    ) -> t_Page:
        if metadata is None:
            metadata = {}

        return cls(
            sourcepath = gen,
            outpath = _ensure_config().output_directory / outrel,
            metadata = metadata,
            sourcepath=gen,
            outpath=_ensure_config().output_directory / outrel,
            metadata=metadata,
        )

    def copy(self: t_Page) -> t_Page:
        return type(self)(
                sourcepath=self.sourcepath,
                outpath=self.outpath,
                metadata=copy.copy(self.metadata),
                )
            sourcepath=self.sourcepath,
            outpath=self.outpath,
            metadata=copy.copy(self.metadata),
        )

    def get_content(self: t_Page) -> t_Content:
    def get_content(self: "Page") -> t_Content:
        if callable(self.sourcepath):
            return self.sourcepath(self)
        elif isinstance(self.sourcepath, pathlib.Path):


@@ 104,6 130,7 @@ class Page:
        self.outpath.write_bytes(content)
        return self


class PageSet:
    _saved_context: list[typing.Optional["PageSet"]]
    _pages: list[Page]


@@ 168,6 195,7 @@ class PageSet:
    def __iter__(self) -> typing.Iterator["Page"]:
        return iter(self._pages)


# Annotations for the magic
output: typing.Callable[[], PageSet]
transform: typing.Callable[[t_Transformer], PageSet]


@@ 176,6 204,7 @@ select: typing.Callable[[t_Predicate], PageSet]
render: typing.Callable[["Renderer"], PageSet]
conditional_render: typing.Callable[[t_Predicate, "Renderer"], PageSet]


class Loader:
    def load_content(self, path: pathlib.Path) -> bytes:
        raise NotImplementedError("Can't use MetadataLoader.load_content, this must be overwritten in a subclass")


@@ 183,6 212,7 @@ class Loader:
    def load_metadata(self, path: pathlib.Path) -> t_Meta:
        raise NotImplementedError("Can't use MetadataLoader.load_metadata, this must be overwritten in a subclass")


class PureLoader(Loader):
    def load_content(self, path: pathlib.Path) -> bytes:
        return path.read_bytes()


@@ 190,6 220,7 @@ class PureLoader(Loader):
    def load_metadata(self, path: pathlib.Path) -> t_Meta:
        return {}


class YamlFrontmatterLoader(Loader):
    def load_content(self, path: pathlib.Path) -> bytes:
        f = path.open("rb")


@@ 198,11 229,12 @@ class YamlFrontmatterLoader(Loader):
            return f.read()
        f.seek(0)
        loader = yaml.SafeLoader(f)
        loader.get_event(); loader.get_event() # type: ignore[no-untyped-call] # Skip stream and first document start, apparently no type stub available
        while not isinstance(e := loader.get_event(), yaml.DocumentStartEvent): # type: ignore[no-untyped-call]
        loader.get_event()
        loader.get_event()  # Skip stream and first document start, apparently no type stub available
        while not isinstance(e := loader.get_event(), yaml.DocumentStartEvent):
            pass
        f.seek(e.end_mark.index)
        return f.read().lstrip(b'\n')
        return f.read().lstrip(b"\n")

    def load_metadata(self, path: pathlib.Path) -> t_Meta:
        relpath = path.relative_to(curdir)


@@ 214,6 246,7 @@ class YamlFrontmatterLoader(Loader):
        except StopIteration:
            return {"_sourcepath": relpath}


class YamlCompanionLoader(Loader):
    def __init__(self, extension: str = ".meta.yaml"):
        self._extension = extension


@@ 230,19 263,30 @@ class YamlCompanionLoader(Loader):
                return res | {"_sourcepath": relpath}
        return {"_sourcepath": relpath}


def gather(path: pathlib.Path, glob: str) -> PageSet:
    return PageSet([Page.from_path(c) for c in path.glob(glob)])


def gather_single(path: pathlib.Path) -> PageSet:
    return PageSet([Page.from_path(path)])


def gather_multi(paths: list[pathlib.Path]) -> PageSet:
    return PageSet([Page.from_path(path) for path in paths])


def gather_directory(directory: pathlib.Path) -> PageSet:
    return PageSet([Page.from_path(pathlib.Path(base) / file) for base, _, files in os.walk(directory) for file in files])
    return PageSet(
        [Page.from_path(pathlib.Path(base) / file) for base, _, files in os.walk(directory) for file in files]
    )

def create(outrel: pathlib.Path, generator: typing.Optional[typing.Callable[[Page], t_Content]] = None, metadata: typing.Optional[t_Meta] = None) -> PageSet:

def create(
    outrel: pathlib.Path,
    generator: typing.Optional[typing.Callable[[Page], t_Content]] = None,
    metadata: typing.Optional[t_Meta] = None,
) -> PageSet:
    if generator is None:
        return PageSet([Page.from_generator(lambda p: b"", outrel, metadata)])
    else:


@@ 255,12 299,16 @@ class Renderer:
            return self.render_bytes(page, source)
        elif isinstance(source, dict):
            return self.render_dict(page, source)
        else:
            assert False, "t_Content not being dict or bytes in generic render"

    def render_bytes(self, page: Page, source: bytes) -> tuple[t_Meta, t_Content]:
        raise NotImplementedError("Can't use Renderer.render, this must be overwritten in a subclass")

    def render_dict(self, page: Page, source: dict[str, typing.Any]) -> tuple[t_Meta, t_Content]:
        raise NotImplementedError("Can't use Renderer.render, this must be overwritten in a subclass")


class EncryptionRenderer(Renderer):
    def __init__(self, default_password: typing.Optional[str] = None):
        self.default_password = default_password


@@ 276,13 324,19 @@ class EncryptionRenderer(Renderer):

        nonce = secrets.token_bytes(16)
        cipher = AES.new(key, AES.MODE_GCM, nonce=nonce)
        assert isinstance(cipher, _mode_gcm.GcmMode) # make mypy happy
        assert isinstance(cipher, _mode_gcm.GcmMode)  # make mypy happy

        header = page.metadata.get("title", "").encode()
        cipher.update(header)
        ciphertext, tag = cipher.encrypt_and_digest(source)

        return {"_encryption": {"pbkdf": {"salt": salt, "iterations": iterations}, "gcm": {"nonce": nonce, "tag": tag, "header": header}}}, ciphertext
        return {
            "_encryption": {
                "pbkdf": {"salt": salt, "iterations": iterations},
                "gcm": {"nonce": nonce, "tag": tag, "header": header},
            }
        }, ciphertext


class PandocRenderer(Renderer):
    def __init__(self, pandoc_opts: list[str], /, outputs_json: bool = False):


@@ 290,7 344,9 @@ class PandocRenderer(Renderer):
        self.outputs_json = outputs_json

    @classmethod
    def markdown_reader(cls, extensions: typing.Optional[list[str]] = None, options: typing.Optional[list[str]] = None) -> "PandocRenderer":
    def markdown_reader(
        cls, extensions: typing.Optional[list[str]] = None, options: typing.Optional[list[str]] = None
    ) -> "PandocRenderer":
        if options is None:
            options = []
        if extensions is None:


@@ 307,7 363,9 @@ class PandocRenderer(Renderer):
        return cls(["-f", "json", "-t", "html5"] + options)

    @classmethod
    def markdown_to_html(cls, extensions: typing.Optional[list[str]] = None, options: typing.Optional[list[str]] = None) -> "PandocRenderer":
    def markdown_to_html(
        cls, extensions: typing.Optional[list[str]] = None, options: typing.Optional[list[str]] = None
    ) -> "PandocRenderer":
        if options is None:
            options = []
        if extensions is None:


@@ 332,10 390,14 @@ class PandocRenderer(Renderer):
        res = subprocess.check_output(["pandoc"] + self.opts, input=json.dumps(source).encode())
        return {}, self._maybe_parse_json(res)


PandocElement = dict[str, typing.Any]  # Not great, but okay
PandocValue = typing.Union[str, list[typing.Union[str, PandocElement]]]
PandocAction = typing.Callable[[str, PandocValue, str, dict[str, typing.Any]],
                               typing.Union[None, list[PandocElement], PandocElement]]
PandocAction = typing.Callable[
    [str, PandocValue, str, dict[str, typing.Any]], typing.Union[None, list[PandocElement], PandocElement]
]


class PandocWalker(Renderer):
    def __init__(self, format: str, actions: list[PandocAction]):
        self.format = format


@@ 343,9 405,10 @@ class PandocWalker(Renderer):

    def render_dict(self, page: Page, source: dict[str, typing.Any]) -> tuple[t_Meta, t_Content]:
        for action in self.actions:
            source = pandocfilters.walk(source, action, self.format, source.get("meta", {}) | page.metadata)
            source = pandocfilters.walk(source, action, self.format, source.get("meta", {}) | page.metadata)  # pyright: ignore
        return {}, source


class CacheRegistrationRenderer(Renderer):
    def __init__(self, cachedir: pathlib.Path, meta_key: str):
        self.cachedir = cachedir


@@ 355,7 418,7 @@ class CacheRegistrationRenderer(Renderer):
        assert self.meta_key in page.metadata
        location_key, content_key = page.metadata[self.meta_key]
        (self.cachedir / location_key).write_text(content_key)
        

    def render_bytes(self, page: Page, source: bytes) -> tuple[t_Meta, t_Content]:
        self._register(page)
        return {}, source


@@ 387,6 450,7 @@ class _JinjaTemplate(Renderer):
            ctx["config"] = {}
        return {}, self.template.render(ctx).encode()


class Site:
    output_directory: pathlib.Path
    loaders: dict[str, Loader]


@@ 401,17 465,23 @@ class Site:
        global _config
        _config = self
        return self
        

    def __exit__(self, *_: typing.Any) -> None:
        # Do nothing
        pass


T = typing.TypeVar("T")


class meta_p(typing.Generic[T]):
    pred: typing.Union[typing.Callable[[Page], T], typing.NoReturn] # Ugly union to not make mypy treat it as a method
    pred: typing.Union[typing.Callable[[Page], T], typing.NoReturn]  # Ugly union to not make mypy treat it as a method

    def __init__(self, key: typing.Union[str, typing.Callable[[Page], T]], default: typing.Optional[T] = None):
        if isinstance(key, str):
            self.pred = (lambda k: lambda p: p.metadata.get(key, default))(key) # Fix the key so the type can't change from mypy's PoV
            self.pred = (lambda k: lambda p: p.metadata.get(key, default))(
                key
            )  # Fix the key so the type can't change from mypy's PoV
        else:
            assert default is None
            self.pred = key


@@ 419,15 489,16 @@ class meta_p(typing.Generic[T]):
    def __invert__(self) -> "meta_p[bool]":
        return meta_p(lambda p: not self(p))

    def __eq__(self, x: T) -> "meta_p[bool]": # type: ignore[override] # Break liskov for syntactic convenience
    def __eq__(self, x: T) -> "meta_p[bool]":  # type: ignore[override] # Break liskov for syntactic convenience
        return meta_p(lambda p: self(p) == x)

    def __ne__(self, x: T) -> "meta_p[bool]": # type: ignore[override] # Break liskov for syntactic convenience
    def __ne__(self, x: T) -> "meta_p[bool]":  # type: ignore[override] # Break liskov for syntactic convenience
        return meta_p(lambda p: self(p) != x)

    def __call__(self, p: Page) -> T:
        return self.pred(p)


_global_context: typing.Optional[PageSet] = None
_config: typing.Optional[Site] = None