~vladh/hare-regex-tex

197943315d808a328a6fac75c610e197d925a22b — Vlad-Stefan Harbuz 2 months ago 9ac310c
add tex demo
7 files changed, 206 insertions(+), 0 deletions(-)

A .gitignore
A Makefile
M README.md
A exprs.txt
A exprs/.gitignore
A hare-regex-demo.tex
A run.sh
A .gitignore => .gitignore +4 -0
@@ 0,0 1,4 @@
*.aux
*.log
*.out
*.pdf

A Makefile => Makefile +5 -0
@@ 0,0 1,5 @@
.PHONY: all

all:
	./run.sh
	xelatex -shell-escape hare-regex-demo.tex

M README.md => README.md +11 -0
@@ 7,3 7,14 @@ Creates a LaTeX TikZ illustration from a regular expression using Hare's regular
```sh
hare run regex-tex.ha 'My title' '[mM]y expr?'
```

## Demo

Put the expressions you want in `exprs.txt`, run `make`, then open `hare-regex-demo.pdf`.

## TODO

Before users can use this, the following things must be done.

* Upstream regex:: patch
* Fix .tex file to remove custom fonts etc.

A exprs.txt => exprs.txt +8 -0
@@ 0,0 1,8 @@
Chained alternation;^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$
UK postcode;^([a-zA-Z]{1,2}[[:digit:]]{1,2})[[:space:]]*([[:digit:]][a-zA-Z]{2})$
Ranges;([a-zA.][a[:digit:]][a-z[:digit:]])[^a-zA.][^a[:digit:]][^a-z[:digit:]]
Repetition;(alpha)?(beta)+(gamma)*(ab|a)(bcd|c)(d|.*){2,}
Unicode;[а-я]+[а-д][А-Я]+[а-ш]+[a-ż]+[ζ-ξ]+
Email address;^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$
AM/PM time;(1[0-2]|0?[1-9]):[0-5][0-9] (AM|PM)
Hex colour;^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$

A exprs/.gitignore => exprs/.gitignore +2 -0
@@ 0,0 1,2 @@
*
!.gitignore

A hare-regex-demo.tex => hare-regex-demo.tex +166 -0
@@ 0,0 1,166 @@
\documentclass[11pt,a3paper,notitlepage]{article}
\usepackage{geometry}
\geometry{a3paper, left=20mm, top=20mm, bottom=20mm, right=20mm}
\usepackage[yyyymmdd]{datetime}
\usepackage{lmodern}
\renewcommand{\dateseparator}{--}
\usepackage[htt]{hyphenat}
\usepackage{hyperref}
\usepackage{setspace}
\usepackage{sectsty}
\usepackage{xcolor}
\usepackage{fontspec}
\usepackage[skip=2mm, indent=0mm]{parskip}
\usepackage{varwidth}
\usepackage{longtable}
\usepackage{makecell}
\usepackage{pdfpages}
\usepackage{float}
\usepackage{inconsolata}
\usepackage{tabularray}
\usepackage{tabularx}
\usepackage{multicol}
\usepackage{caption}
\usepackage{tikz}
\usetikzlibrary{automata,positioning,arrows.meta}

\usepackage[T1]{fontenc}
\catcode`\_=12

% Frames
\usepackage[most]{tcolorbox}
\newtcolorbox{todoframe}[2][]{%
  enhanced,colback=white,colframe=black,coltitle=black,
  breakable,
  sharp corners,boxrule=0.4pt,
  fonttitle=\itshape,
  attach boxed title to top left={yshift=-0.3\baselineskip-0.4pt,xshift=2mm},
  boxed title style={tile,size=minimal,left=0.5mm,right=0.5mm,
    colback=white,before upper=\strut},
  title=#2,#1
}

% Listings
\usepackage{minted}
\tcbuselibrary{minted, skins}
\tcbset{listing engine=minted}
\newcommand{\shellfgcolor}{%
  \def\FancyVerbFormatText##1{\textcolor{white}{##1}}%
}
\definecolor{code-bg}{HTML}{2e3440}
\AtBeginEnvironment{minted}{\renewcommand{\colorbox}[3][]{#3}}
\newtcblisting{codeblock}[1]{
  listing only,
  minted style=nord,
  minted language=#1,
  colback=code-bg,
  enhanced,
  frame hidden,
  breakable,
}
\setminted{
  fontsize=\footnotesize,
  formatcom=\shellfgcolor,
  tabsize=2,
  breaklines,
  breaksymbolleft={~},
  autogobble,
}
\newcommand\code[1]{\colorbox{code-bg}{\textcolor{white}{\ttfamily\hyphenchar\font=45\relax #1}}}

\setmainfont[
  BoldFont={Sabon LT Pro Bold},
  ItalicFont={Sabon LT Pro Italic},
]{Sabon LT Pro}
\setsansfont{Sabon LT Pro}
\renewcommand{\baselinestretch}{1.4}
\allsectionsfont{\normalfont\sffamily}
\setmonofont{Iosevka Term SS07}

\captionsetup{labelformat=empty}

\setlength{\columnsep}{1cm}

\newcommand\defitem[2]{
  \begin{samepage}
    {#1} {#2}

  \end{samepage}
}

\begin{document}

{\huge Hare Regular Expression Engine Virtual Machine NFA Representation}

\begin{tblr}{
  colspec = {X[l,h]X[l,m]},
  stretch = 0,
  rowsep = 6pt,
  hlines,
}

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {literal\\\textit{\textquotesingle<c>\textquotesingle}};
\end{tikzpicture}
& Consume a literal rune \code{c} \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {charset\\\textit{(not) $\chi^{idx}$}};
\end{tikzpicture}
& Consume one of a set of runes included in (or excluded from) charset \code{idx} \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {any};
\end{tikzpicture}
& Consume any rune \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {split\\\textit{$\rightarrow$<dest>}};
\end{tikzpicture}
& Start a new execution thread from program counter \code{dest}, in parallel \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {jump\\\textit{$\rightarrow$<dest>}};
\end{tikzpicture}
& Jump to the instruction at program counter \code{dest} \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {skip};
\end{tikzpicture}
& Start a new execution thread at the next index and stop the current thread \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {match\\\textit{anchored}};
\end{tikzpicture}
& End matching, anchored or unanchored \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {groupstart};
\end{tikzpicture}
& Begin a capture group \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {groupend};
\end{tikzpicture}
& End a capture group \\

\begin{tikzpicture}
  \node[state,align=center] (n0) [rectangle] {repeat\\\textit{$\rightarrow$<from>, <x>–<y> times}};
\end{tikzpicture}
& Repeat from instruction \code{from} to the current instruction, a minimum of \code{x} and maximum of \code{y}
times

\end{tblr}

\pagebreak

\include{exprs/01.tex}
\include{exprs/02.tex}
\include{exprs/03.tex}
\include{exprs/04.tex}
\include{exprs/05.tex}
\include{exprs/06.tex}
\include{exprs/07.tex}
\include{exprs/08.tex}

\end{document}

A run.sh => run.sh +10 -0
@@ 0,0 1,10 @@
#!/bin/sh -eu

idx=1
while read -r line; do
    filename=$(printf "%02d.tex" "$idx")
    title=${line%%";"*}
    exp=${line#*";"}
    hare run regex-tex.ha "$title" "$exp" > exprs/$filename
    idx=$((idx+1))
done < "exprs.txt"