~adigitoleo/reshape

925d23d83691728646c11bdec52e5d34c3b59614 — adigitoleo 3 months ago v0.1.0
First commit, version 0.1.0
7 files changed, 904 insertions(+), 0 deletions(-)

A LICENSE
A README.md
A config.nims
A reshape.nimble
A src/reshape.nim
A tests/config.nims
A tests/test.nim
A  => LICENSE +6 -0
@@ 1,6 @@
BSD Zero Clause License

Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.


A  => README.md +74 -0
@@ 1,74 @@
# reshape

Reshape delimited text files.

Reads input from a file if the filename is given as the last argument.
Reads from `stdin` stream (e.g. unix pipe) otherwise, until terminated by an EOF signal.
A filename argument of `-` can also be used to switch to `stdin` input.
Note that the whole input is read into memory at once for processing.

## Examples

In:

```sh
reshape -d, -t -
a,b,c^D
```

Out:

```
a
b
c
```

In:

```sh
reshape -d, -c1 -s3x4
,a,b,c,d
,1,2,3,4
,",",,ß^D
```

Out:

```
  a,b, c,d
  1,2, 3,4
",", ,ß,
```

Note that unicode symbols are currently not aligned properly.

## Build

`nim c src/reshape.nim`

## Test

`nimble test`

## Use

Run with the `--help` option if built, or check the `printHelp` proc in the code.
Note that short options must not be separated from their arguments by a space.
Use `:` or `=` instead, or append the argument to the option flag directly.
This behaviour is inherited from Nim's [parseopt][parseopt] module.

## Similar solutions

- [BSD's rs command](https://man.netbsd.org/rs.1)
- [transposer](https://github.com/keithhamilton/transposer)
- [Some ideas for transposing files using awk](https://stackoverflow.com/questions/1729824/an-efficient-way-to-transpose-a-file-in-bash)

## TODO

- Support negative values in `--skiprows` and `--skipcols`
  for row/column indices counted backwards from the last row/column.
- Support a range syntax for `--skiprows` and `--skipcols`
- Fix cell padding for tables with unicode characters.

[parseopt]: https://nim-lang.org/docs/parseopt.html

A  => config.nims +2 -0
@@ 1,2 @@
switch("outdir", "build/bin/")


A  => reshape.nimble +10 -0
@@ 1,10 @@
version = "0.1.0"
author = "adigitoleo"
description = "Reshape a data array"
license = "0BSD"

requires "nim >= 1.4.8"

srcdir = "src"

bin = @["reshape"]

A  => src/reshape.nim +412 -0
@@ 1,412 @@
import algorithm
import logging
import os
import parseopt
import sequtils
import streams
import strformat
import strutils
import typetraits

const NimblePkgVersion {.strdefine.} = "Unknown"
const version = NimblePkgVersion
proc printVersion() =
    if version == "Unknown":
        echo "Unknown version of reshape, compiled on {CompileDate} at {CompileTime}".fmt
    else:
        echo "Version {version} of reshape, compiled on {CompileDate} at {CompileTime}".fmt
    quit(QuitSuccess)


proc printHelp() =
    echo """
Usage: reshape [-h|-v][--help|--version]
       reshape [-i] TABLE
       reshape [-p][-t]
               [-d:delim][-c:c1,c2,...][-r:r1,r2,...][-o:file][-s:RxC] TABLE

Options:
-v,--version            print version information
-i,--info               print diagnostic information for TABLE
-p,--nopad              don't pad output cells with leading whitespace
-t,--transpose          transpose TABLE, swap meaning of "rows" and "columns"
-d,--delim <delim>      split input lines at each occurance of <delim>
-s,--shape <RxC>        reshape TABLE into R rows and C columns, applied last
-c,--skipcols <c1,...>  skip columns <c1,...> in TABLE
-r,--skiprows <r1,...>  skip rows <r1,...> in TABLE
-o,--out <file>         write output to <file>

Operands:
    TABLE               File path or input stream
                        containing tabular input data

Reshape TABLE, or print diagnostic metadata. When using --transpose,
"rows" and "columns" for other options refer to the table before transposing.
The default delimiter is a tab, i.e. `\t`. Reshaping with --shape is always
applied after --skip{rows,cols} and --transpose. For short options,
option arguments must be separated from the flag by a colon or equals sign,
e.g. `-d:,`. Multi-byte delimiters such as unicode characters are not supported.
Tab and space delimiters can be specified with -d:'\t' and -d:'\ ' respectively.
Empty columns are propagated without warning."""
    quit(QuitSuccess)


type Shape = tuple[rows, cols: int]
type Opts = tuple[
    inputFile: string,
    outputFile: string,
    info: bool,
    pretty: bool,
    transpose: bool,
    delimiter: char,
    newShape: Shape,
    skipCols: seq[int],
    skipRows: seq[int],
]

type ArgumentError* = object of CatchableError


proc chooseInput(filename: string): Stream =
    if "" == filename or "-" == filename:
        return newFileStream(stdin)
    return openFileStream(filename)


func splitCells(row: string, delimiterIndices: seq[int]): seq[string] =
    ## Splits row by removing `char`s at `delimiterIndices`.
    ## Leading or trailing delimiters result in corresponding empty cells.
    if len(delimiterIndices) > 0:
        var cells = newSeq[string](len(delimiterIndices) + 1)
        # First cell.
        if delimiterIndices[0] == 0:
            cells[0] = ""
        else:
            cells[0] = strip(row[0 ..< delimiterIndices[0]])
        # Intermediate cells.
        for i, delimiterIndex in delimiterIndices[0 ..< ^1].pairs:
            cells[i + 1] = strip(row[delimiterIndices[i] + 1 ..< delimiterIndices[i + 1]])
        # Last cell.
        if delimiterIndices[^1] == len(row):
            cells[^1] = ""
        else:
            cells[^1] = strip(row[delimiterIndices[^1] + 1 .. ^1])
        return cells
    else:
        return @[row]


func splitCells(row: string, delimiter: char): seq[string] =
    ## Splits row on delimiter, ignoring delimiters in quoted cells (double quotes only).
    ## Leadaing or trailing delimiters result in corresponding empty cells.
    var
        start: int
        delimiterIndices: seq[int]
        delimiterIndex = -1
        prevQuoteIndex = -1
        nextQuoteIndex = -1
        prevClosingQuoteIndex = -1

    while start < row.high:
        delimiterIndex = row.find(delimiter, start = start)
        start = delimiterIndex + 1
        if delimiterIndex == -1: break
        elif delimiterIndex == 0: delimiterIndices.add(0)
        else:
            prevQuoteIndex = row.rfind(
                '"',
                start = prevClosingQuoteIndex + 1,
                last = delimiterIndex,
            )
            if prevQuoteIndex != -1:
                nextQuoteIndex = row.find('"', start = delimiterIndex)
                if nextQuoteIndex != -1:
                    prevClosingQuoteIndex = nextQuoteIndex
                else:
                    delimiterIndices.add(delimiterIndex)
            else:
                delimiterIndices.add(delimiterIndex)
    return splitCells(row, delimiterIndices)


proc readShape*(
    input: Stream, delimiter: char, sink: Stream = newStringStream(), warnings = false
    ): Shape =
    ## Guesses the shape of the delimited text given in the input stream.
    ## Quoted delimiters are skipped, and the quote char `"` is an illegal delimiter
    ## (which causes a `ValueError` to be raised). The input is passed on to the
    ## `sink` stream if provided. Optionally logs warnings about malformed rows.
    if delimiter == '"':
        raise newException(ValueError, "delimiter must not be the quote character (U+0022)")
    var
        rowCount: int
        colCount: int
        badRows: seq[int]
        line: string
    while input.readLine(line):
        sink.writeLine(line)
        inc rowCount
        var currentColCount = len(line.splitCells(delimiter))
        if currentColCount >= colCount:
            if currentColCount > colCount:
                if rowCount > 1: badRows.add(rowCount - 1)
                colCount = currentColCount
            else:
                continue
        else:
            badRows.add(rowCount)

        # Track a maximum of 100 malformed rows.
        # The 100'th item is always the last known malformed row.
        # Trailing zero to denotes truncation.
        # Reminder: zero-based indexing.
        if len(badRows) >= 101:
            badRows[99] = badRows[^1]
            badRows[100] = 0
            if len(badRows) == 102: badRows.delete(101)

    if warnings:
        if colCount == 1: warn("delimiter '{delimiter}' not found.".fmt)
        elif len(badRows) > 0:
            var badRowsTrunc =
                if badRows[^1] == 0:
                    badRows[0..^3].join(",") & "..." & $badRows[^2]
                else:
                    badRows.join(",")
            warn("encountered malformed rows: {badRowsTrunc}.".fmt)
    return (rowCount, colCount)


func toSlices(s:seq[int]): seq[Slice[int]] =
    ## Condenses a sequence of integers to a sequence of slices.
    ## Raises a `ValueError` if `s` is not sorted.
    if not isSorted(s): raise newException(ValueError, "sequence must be sorted")
    var sliceBounds = newSeqWith(1, s[0])
    var slices: seq[Slice[int]]
    for i, val in s[0 ..< ^1].pairs:
        var nextVal = s[i + 1]
        if (nextVal - val) > 1:
            sliceBounds.add(val)
            sliceBounds.add(nextVal)
    sliceBounds.add(s[^1])
    for i in countup(0, len(sliceBounds) - 1, 2):
        slices.add(sliceBounds[i] .. sliceBounds[i + 1])
    return slices


proc readTable*(input: Stream, delimiter: char, skipRows, skipCols: seq[int] = @[]):
    seq[seq[string]] =
    ## Reads delimited tabular data from `input` and returns a sequence of rows.
    ## Rows are sequences of cells (strings). Quoted delimiters are skipped.
    ## Raises a `ValueError` if the quote character `"` is used as a delimiter,
    ## or either `skipRows` or `skipCols` contain negative values.
    ## Raises an `IOError` if no data can be read from the `input` stream.
    ## Quietly propagates empty cells. Empty cells are also created to complete
    ## malformed rows of the table. They are added to the right of existing cells.
    ## `skipRows` and `skipCols` can be used to exclude the specified rows/columns.
    ## This is done after filling out malformed rows. Rows/columns are zero-indexed.
    if any(skipRows, proc(x: int): bool = x < 0) or
        any(skipCols, proc(x: int): bool = x < 0):
            raise newException(
                ValueError, "indices for skipped rows/columns must be non-negative"
            )
    if atEnd(input): raise newException(IOError, "input stream must not be exhausted")

    let uniqueSkipCols = deduplicate(sorted(skipCols), isSorted = true)
    let uniqueSkipRows = deduplicate(sorted(skipRows), isSorted = true)
    var stream = newStringStream()
    let shape = readShape(input, delimiter, stream)
    var table = newSeqWith(
        shape.rows - len(uniqueSkipRows),
        newSeq[string](shape.cols - len(uniqueSkipCols)),
    )
    var line: string
    var rowIndex: int
    var newRowIndex: int
    stream.setPosition(0)
    while stream.readLine(line):
        if rowIndex in uniqueSkipRows: inc rowIndex; continue
        var cells = line.splitCells(delimiter)
        if len(uniqueSkipCols) > 0:
            for slice in reversed(toSlices(uniqueSkipCols)):
                when NimMajor == 1 and NimMinor < 6:
                    # https://github.com/nim-lang/Nim/commit/1d6863a7899fd87fd9eb017ae370ef37db18ad32
                    cells.delete(slice.a, slice.b)
                else:
                    cells.delete(slice)
        table[newRowIndex][0 ..< len(cells)] = cells
        inc newRowIndex
        inc rowIndex
    # Make zero-column return the same as zero-row return.
    if len(table) > 0 and len(table[0]) == 0: return newSeq[seq[string]]()
    close stream
    return table


func transpose*(table: seq[seq[string]]): seq[seq[string]] =
    ## Returns a transposed copy of `table`.
    ## Assumes that each row of `table` has an equal number of cells.
    ## Raises an `IndexDefect` if the number of cells increases.
    if len(table) == 0: return table
    var newTable = newSeqWith(len(table[0]), newSeq[string](len(table)))
    for r, row in table.pairs:
        for c, cell in row.pairs:
            newTable[c][r] = cell
    return newTable


func reshape*(table: seq[seq[string]], newShape: Shape): seq[seq[string]] =
    ## Returns a reshaped copy of `table` by filling a table of shape `newShape`
    ## one row at a time. Assumes that each row of `table` has
    ## an equal number of cells. Raises an `IndexDefect` for overfull inputs.
    ## Raises a `ValueError` if `newShape` contains non-positive integers,
    ## or would result in a different capacity (number of cells).
    if newShape.rows < 1 or newShape.cols < 1:
        raise newException(ValueError, "shape must be a tuple of positive integers")
    if len(table) == 0: return table
    if (newShape.rows * newShape.cols) != (len(table) * len(table[0])):
        raise newException(ValueError, "new shape must retain table capacity")
    var newTable = newSeqWith(newShape.rows, newSeq[string](newShape.cols))
    var rowCursor: int
    var colCursor: int
    for row in table:
        for cell in row:
            newTable[rowCursor][colCursor] = cell
            if newShape.cols - colCursor > 1:
                inc colCursor
            else:
                colCursor = 0
                inc rowCursor
    return newTable


proc padCells*(table: var seq[seq[string]]) =
    ## Pads cells in-place with whitespace to right-align tabular columns.
    ## Raises a `ValueError` if the rows don't all contain the same amount of cells.
    var cellSizes = newSeq[int](len(table[0]))
    for row in table:
        if len(row) != len(cellSizes):
            raise newException(ValueError, "must provide rows of equal length")
        for i, cell in row.pairs:
            var cellSize = len(cell)
            if cellSize > cellSizes[i]:
                cellSizes[i] = cellSize
    for row in table.mitems:
        for i, cell in row.mpairs:
            cell = align(cell, cellSizes[i])


proc printInfo(input: Stream, delimiter: char) =
    let (rows, cols) = readShape(input, delimiter, warnings = true)
    echo "Rows: {rows}".fmt
    echo "Columns: {cols}".fmt


proc validate(key, val: string): string =
    var parsedVal: string
    if len(val) == 2 and val[0] == '\\':
        if val[1] == 't': parsedVal = "\t"
        elif val[1] == ' ': parsedVal = " "
    else: parsedVal = val
    if parsedVal == "":
        raise newException(ArgumentError, "option {key} requires an argument".fmt)
    return parsedVal


func validateChar(key, val: string): char =
    var parsedVal = validate(key, val)
    if len(parsedVal) > 1:
        raise newException(
            ArgumentError, "must provide a single-byte delimiter, not '{parsedVal}'.".fmt
        )
    return parsedVal[0]


func validateShape(key: string, val: string): Shape =
    # Unpacking operator would be nice: https://forum.nim-lang.org/t/8793
    var shape = validate(key, val).split('x').map(parseInt)
    if any(shape, proc(x: int): bool = x < 1):
        raise newException(
            ArgumentError, "must provide positive integers for new shape"
        )
    return (rows: shape[0], cols: shape[1])


func validateSkips(key: string, val: string): seq[int] =
    var s = validate(key, val).split(',').map(parseInt)
    if any(s, proc(x: int): bool = x < 1):
        raise newException(
            ArgumentError, "indices for skipped rows/columns must be positive"
        )
    return s.map(proc(x: int): int = x - 1)


proc parseOpts*(cmdline = ""): Opts =
    ## Parses command line options from the input string, or `stdin` by default.
    ## Raises an `ArgumentError` on illegal combinations or argument values.
    var parser = initOptParser(
        cmdline,
        shortNoVal = {'h' ,'v', 'i', 'p', 't'},
        longNoVal = @["help", "version", "info", "nopad", "transpose"],
    )
    # Set defaults.
    var opts: Opts
    opts.pretty = true
    opts.transpose = false
    opts.delimiter = '\t'

    for kind, key, val in getopt(parser):
        case kind
        of cmdLongOption, cmdShortOption:
            case key
                # Process options without arguments.
                of "help", "h": printHelp()
                of "version", "v": printVersion()
                of "info", "i": opts.info = true
                of "nopad", "p": opts.pretty = false
                of "transpose", "t": opts.transpose = true
                # Process options with arguments.
                of "out", "o": opts.outputFile = validate(key, val)
                of "delim", "d": opts.delimiter = validateChar(key, val)
                of "shape", "s": opts.newShape = validateShape(key, val)
                of "skipcols", "c": opts.skipCols = validateSkips(key, val)
                of "skiprows", "r": opts.skipRows = validateSkips(key, val)
        of cmdArgument: opts.inputFile = key
        of cmdEnd: assert(false)
    return opts


proc main() =
    let opts = parseOpts()
    let input = chooseInput(opts.inputFile)
    let logger = newConsoleLogger(); addHandler(logger)
    if opts.info:
        printInfo(input, opts.delimiter)
        close input
        quit(QuitSuccess)

    let table = readTable(
        input,
        opts.delimiter,
        skipRows = opts.skiprows,
        skipCols = opts.skipcols,
    )
    close input

    var newTable = if opts.transpose: transpose(table) else: table
    if opts.newShape.rows > 0 and opts.newShape.cols > 0:
        newTable = reshape(newTable, opts.newShape)
    if opts.pretty:
        padCells(newTable)
    var output = if opts.outputFile == "":
        stdout
    else:
        if fileExists(opts.outputFile):
            raise newException(IOError, "file '{opts.outputFile}' already exists".fmt)
        open(opts.outputFile, mode = fmWrite)
    for row in newTable:
        writeLine(output, row.join($opts.delimiter))
    close output


if isMainModule: main()

A  => tests/config.nims +1 -0
@@ 1,1 @@
switch("path", "$projectDir/../src/")

A  => tests/test.nim +399 -0
@@ 1,399 @@
import logging
import streams
import strformat
import strutils
import unittest

import reshape


const tabTable3x3 = """
    a	b	c
    0.1	0.2	0.3
    "	1"	"2	"	"3	3"
"""
const malformedCommaTable2x3 = """
    ä, ¿, ©
    1", "2, 3"
"""
const malformedSpaceTable4x4 = """
 a b c" "

 10 20 30
 1 2 """
const malformedCommaTable101x3 = "a, b, c\n" & repeat("foo\n", 100)
const malformedCommaTable102x3 = malformedCommaTable101x3 & "foo\n"


proc readLines(file: File): seq[string] =
    ## Reads all lines from the given open file.
    file.setFilePos(0)
    return splitLines(readAll(file))[0..^2]  # Remove spurious empty line.


suite "Command line parsing":
    let emptyOpts = parseOpts()
    test "nopad":
        check emptyOpts.pretty == true
        check parseOpts("-p").pretty == false
    test "transpose":
        check emptyOpts.transpose == false
        check parseOpts("-t").transpose == true
    test "out":
        check emptyOpts.outputFile == ""
        expect ArgumentError: discard parseOpts("-o")
        check parseOpts("-o:foo").outputFile == "foo"
        check parseOpts("--out foo").outputFile == "foo"
    test "delim":
        check emptyOpts.delimiter == '\t'
        expect ArgumentError: discard parseOpts("-d")
        check parseOpts("-d::").delimiter == ':'
        expect ArgumentError: discard parseOpts("-d:ð")  # Multi-byte delimiter: error.
        expect ArgumentError: discard parseOpts("--delim ||")  # Multi-byte delimiter: error.
        check parseOpts("--delim ,").delimiter == ','
    test "shape":
        check emptyOpts.newShape == (0, 0)
        expect ArgumentError: discard parseOpts("-s")
        check parseOpts("-s:3x4").newShape == (3, 4)
        check parseOpts("--shape 3x4").newShape == (3, 4)
        expect ValueError: discard parseOpts("--shape 2.5x1")
    test "skipcols":
        check emptyOpts.skipCols == newSeq[int]()
        expect ArgumentError: discard parseOpts("-c")
        check parseOpts("-c:1,4,10").skipCols == @[0, 3, 9]
        check parseOpts("--skipcols 1,4,10").skipCols == @[0, 3, 9]
        expect ValueError: discard parseOpts("--skipcols 1.2,3.4")
        check parseOpts("-c:1").skipCols == @[0]
    test "skiprows":
        check emptyOpts.skipRows == newSeq[int]()
        expect ArgumentError: discard parseOpts("-r")
        check parseOpts("-r:1,4,10").skipRows == @[0, 3, 9]
        check parseOpts("--skiprows 1,4,10").skipRows == @[0, 3, 9]
        expect ValueError: discard parseOpts("--skiprows 1.2,3.4")
        check parseOpts("-r:1").skipRows == @[0]


suite "Table shape parsing":
    let log = newFileLogger("tests/readShape.log", mode = fmReadWrite)
    addHandler(log)

    test "tabTable3x3":
        info("starting tabTable3x3 test")
        let input = newStringStream(tabTable3x3)
        check readShape(input, '\t', warnings = true) == (3, 3)
        require atEnd(input)

        input.setPosition(0)
        check readShape(input, '.', warnings = true) == (3, 4)  # Suspect delimiter.
        require atEnd(input)

        input.setPosition(0)
        check readShape(input, ',', warnings = true) == (3, 1)  # Missing delimiter.
        require atEnd(input)

        check readLines(log.file)[^2..^1] == @[
            "WARN encountered malformed rows: 1,3.",
            "WARN delimiter ',' not found.",
        ]

        close input

    test "malformedCommaTable2x3":
        info("starting malformedCommaTable2x3 test")
        let input = newStringStream(malformedCommaTable2x3)
        # Malformed second line, bad quoting.
        check readShape(input, ',', warnings = true) == (2, 3)
        require atEnd(input)

        input.setPosition(0)
        expect ValueError: discard readShape(input, '"')  # Illegal delimiter.
        check atEnd(input) == false  # Nothing was read.

        check readLines(log.file)[^1] == "WARN encountered malformed rows: 2."
        close input

    test "malformedSpaceTable4x4":
        info("starting malformedSpaceTable4x4 test")
        let input = newStringStream(malformedSpaceTable4x4)
        # Empty leading/trailing column is counted.
        check readShape(input, ' ', warnings = true) == (4, 4)
        require atEnd(input)

        check readLines(log.file)[^1] == "WARN encountered malformed rows: 2."
        close input

    test "malformedCommaTable101x3":
        info("starting malformedCommaTable101x3 test")
        let input = newStringStream(malformedCommaTable101x3)
        var badRows = newSeq[int](100)
        for i in 2..101: badRows[i - 2] = i
        var badRowString = badRows.join(",")
        check readShape(input, ',', warnings = true) == (101, 3)
        require atEnd(input)

        check readLines(log.file)[^1] == "WARN encountered malformed rows: {badRowString}.".fmt
        close input

    test "malformedCommaTable102x3":
        info("starting malformedCommaTable102x3 test")
        let input = newStringStream(malformedCommaTable102x3)
        var badRows = newSeq[int](100)
        for i in 2..100: badRows[i - 2] = i
        badRows[^1] = 102
        var badRowString = badRows[0..^2].join(",") & "..." & $badRows[^1]
        check readShape(input, ',', warnings = true) == (102, 3)
        require atEnd(input)

        check readLines(log.file)[^1] == "WARN encountered malformed rows: {badRowString}.".fmt
        close input

    close log.file


suite "Cell padding":
    test "tabTable3x3":
        let input = newStringStream(tabTable3x3)
        var table = readTable(input, '\t')
        check table == @[
            @["a", "b", "c"],
            @["0.1", "0.2", "0.3"],
            @["\"\t1\"", "\"2\t\"", "\"3\t3\""],
        ]
        padCells(table)
        check table == @[
            @["   a", "   b", "    c"],
            @[" 0.1", " 0.2", "  0.3"],
            @["\"\t1\"", "\"2\t\"", "\"3\t3\""],
        ]

        close input

    test "malformedCommaTable2x3":
        let input = newStringStream(malformedCommaTable2x3)
        var table = readTable(input, ',')
        check table == @[@["ä", "¿", "©"], @["1\", \"2", "3\"", ""]]
        padCells(table)
        # Unicode characters create weird cell sizes, because they have len(char) == 2.
        # Could handle this by using Runes <https://nim-lang.org/docs/unicode.html>?
        check table == @[@["    ä", "¿", "©"], @["1\", \"2", "3\"", "  "]]

        close input

    test "malformedSpaceTable4x4":
        let input = newStringStream(malformedSpaceTable4x4)
        var table = readTable(input, ' ')
        check table == @[
            @["", "a", "b", "c\" \""],
            @["", "", "", ""],
            @["", "10", "20", "30"],
            @["", "1", "2", ""],
        ]
        padCells(table)
        check table == @[
            @["", " a", " b", "c\" \""],
            @["", "  ", "  ", "    "],
            @["", "10", "20", "  30"],
            @["", " 1", " 2", "    "],
        ]

        close input


suite "Row and column skipping":
    test "tabTable3x3":
        let input = newStringStream(tabTable3x3)
        var table = readTable(input, '\t', skipRows = @[2])
        check table == @[@["a", "b", "c"], @["0.1", "0.2", "0.3"]]
        input.setPosition(0)
        table = readTable(input, '\t', skipCols = @[2])
        check table == @[@["a", "b"], @["0.1", "0.2"], @["\"\t1\"", "\"2\t\""]]
        input.setPosition(0)
        table = readTable(input, '\t', skipRows = @[0, 1, 2])
        check table == newSeq[seq[string]]()
        input.setPosition(0)
        table = readTable(input, '\t', skipCols = @[2, 1, 0])
        check table == newSeq[seq[string]]()

        close input

    test "malformedCommaTable2x3":
        let input = newStringStream(malformedCommaTable2x3)
        var table = readTable(input, ',', skipRows = @[0], skipCols = @[1])
        check table == @[@["1\", \"2", ""]]
        input.setPosition(0)
        table = readTable(input, ',', skipRows = @[0, 1], skipCols = @[1])
        check table == newSeq[seq[string]]()
        input.setPosition(0)
        table = readTable(input, ',', skipRows = @[1], skipCols = @[1, 0, 2, 1, 2])
        check table == newSeq[seq[string]]()

        close input

    test "malformedSpaceTable4x4":
        let input = newStringStream(malformedSpaceTable4x4)
        var table = readTable(input, ' ', skipRows = @[1], skipCols = @[0])
        check table == @[@["a", "b", "c\" \""], @["10", "20", "30"], @["1", "2", ""]]
        input.setPosition(0)
        table = readTable(input, ' ', skipRows = @[1], skipCols = @[0, 3])
        check table == @[@["a", "b"], @["10", "20"], @["1", "2"]]
        input.setPosition(0)
        table = readTable(input, ' ', skipRows = @[0, 1, 2, 3], skipCols = @[1])
        check table == newSeq[seq[string]]()

        close input

    test "tabTable3x3 errors":
        let input = newStringStream(tabTable3x3)
        expect IndexDefect: discard readTable(input, '\t', skipRows = @[0, 1, 5])
        input.setPosition(0)
        expect ValueError: discard readTable(input, '\t', skipCols = @[0, 1, -5])

        close input

    test "malformedCommaTable2x3 errors":
        let input = newStringStream(malformedCommaTable2x3)
        expect ValueError: discard readTable(input, ',', skipRows = @[-1])
        input.setPosition(0)
        expect ValueError: discard readTable(input, ',', skipCOls = @[-1])

        close input

    test "malformedSpaceTable4x4 errors":
        let input = newStringStream(malformedSpaceTable4x4)
        expect IndexDefect: discard readTable(input, ' ', skipRows = @[4])
        input.setPosition(0)
        expect IndexDefect: discard readTable(input, ' ', skipCols = @[4])

        close input


suite "Transpose":
    test "tabTable3x3":
        let input = newStringStream(tabTable3x3)
        let table = readTable(input, '\t')
        check transpose(table) == @[
            @["a", "0.1", "\"\t1\""],
            @["b", "0.2", "\"2\t\""],
            @["c", "0.3", "\"3\t3\""],
        ]
        check transpose(transpose(table)) == table

        close input

    test "malformedCommaTable2x3":
        let input = newStringStream(malformedCommaTable2x3)
        let table = readTable(input, ',')
        check transpose(table) == @[@["ä", "1\", \"2"], @["¿", "3\""], @["©", ""]]
        check transpose(transpose(table)) == table

        close input

    test "malformedSpaceTable4x4":
        let input = newStringStream(malformedSpaceTable4x4)
        let table = readTable(input, ' ')
        check transpose(table) == @[
            @["", "", "", ""],
            @["a", "", "10", "1"],
            @["b", "", "20", "2"],
            @["c\" \"", "", "30", ""],
        ]
        check transpose(transpose(table)) == table

        close input

    test "malformedCommaTable101x3":
        # TODO: Better tests for large tables?
        let input = newStringStream(malformedCommaTable101x3)
        let table = readTable(input, ',')
        check transpose(transpose(table)) == table

        close input

    test "malformedCommaTable102x3":
        # TODO: Better tests for large tables?
        let input = newStringStream(malformedCommaTable102x3)
        let table = readTable(input, ',')
        check transpose(transpose(table)) == table

        close input

    test "empty table (no-op)":
        expect IOError: discard readTable(newStringStream(""), ',')
        let input = newStringStream("a, b, c")
        let table = readTable(input, ',', skipRows = @[0])
        require table == newSeq[seq[string]]()
        check transpose(table) == newSeq[seq[string]]()

        close input

    test "malformed tables (raw)":
        # Underfull table is filled with empty cells during transpose.
        check transpose(@[@["a", "b", "c"], @["1", "2"]]) == @[
            @["a", "1"], @["b", "2"], @["c", ""]
        ]
        # Malformed table: IndexDefect.
        expect IndexDefect: discard transpose(@[@["a", "b"], @["1", "2", "3"]])


suite "reshape":
    test "tabTable3x3":
        let input = newStringStream(tabTable3x3)
        let table = readTable(input, '\t')
        # Check reshaping to single-row and single-column tables.
        check reshape(table, (rows: 1, cols: 9)) == @[
            @["a", "b", "c", "0.1", "0.2", "0.3", "\"\t1\"", "\"2\t\"", "\"3\t3\""]
        ]
        check reshape(table, (rows: 9, cols: 1)) == @[
            @["a"],
            @["b"],
            @["c"],
            @["0.1"],
            @["0.2"],
            @["0.3"],
            @["\"\t1\""],
            @["\"2\t\""],
            @["\"3\t3\""],
        ]

        close input

    test "malformedCommaTable2x3":
        let input = newStringStream(malformedCommaTable2x3)
        let table = readTable(input, ',')
        check reshape(table, (rows: 3, cols: 2)) == @[
            @["ä", "¿"],
            @["©", "1\", \"2"],
            @["3\"", ""],
        ]
        # Check that we can un-reshape.
        check reshape(reshape(table, (rows: 3, cols: 2)), (rows: 2, cols: 3)) == table

        close input

    test "malformedSpaceTable4x4":
        let input = newStringStream(malformedSpaceTable4x4)
        let table = readTable(input, ' ')
        check reshape(table, (rows: 4, cols: 4)) == @[  # No-op.
            @["", "a", "b", "c\" \""],
            @["", "", "", ""],
            @["", "10", "20", "30"],
            @["", "1", "2", ""],
        ]
        # Reshaping to smaller capacity: ValueError.
        expect ValueError: discard reshape(table, (rows: 2, cols: 4))

        close input

    test "malformed tables (raw)":
        # Underfull table, filled in with empty cells.
        check reshape(@[@["a", "b", "c"], @["1", "2"]], (rows: 3, cols: 2)) == @[
            @["a", "b"], @["c", "1"], @["2", ""]
        ]
        # Malformed table: ValueError.
        expect ValueError: discard reshape(
            @[@["a", "b"], @["1", "2", "3"]], (rows: 3, cols: 2)
        )
        # Overfull table: IndexDefect.
        expect IndexDefect: discard reshape(
            @[@["a", "b", "c"], @["1", "2", "3", "4"]], (rows: 3, cols: 2)
        )