@@ 1,412 @@
+import algorithm
+import logging
+import os
+import parseopt
+import sequtils
+import streams
+import strformat
+import strutils
+import typetraits
+
+const NimblePkgVersion {.strdefine.} = "Unknown"
+const version = NimblePkgVersion
+proc printVersion() =
+ if version == "Unknown":
+ echo "Unknown version of reshape, compiled on {CompileDate} at {CompileTime}".fmt
+ else:
+ echo "Version {version} of reshape, compiled on {CompileDate} at {CompileTime}".fmt
+ quit(QuitSuccess)
+
+
+proc printHelp() =
+ echo """
+Usage: reshape [-h|-v][--help|--version]
+ reshape [-i] TABLE
+ reshape [-p][-t]
+ [-d:delim][-c:c1,c2,...][-r:r1,r2,...][-o:file][-s:RxC] TABLE
+
+Options:
+-v,--version print version information
+-i,--info print diagnostic information for TABLE
+-p,--nopad don't pad output cells with leading whitespace
+-t,--transpose transpose TABLE, swap meaning of "rows" and "columns"
+-d,--delim <delim> split input lines at each occurance of <delim>
+-s,--shape <RxC> reshape TABLE into R rows and C columns, applied last
+-c,--skipcols <c1,...> skip columns <c1,...> in TABLE
+-r,--skiprows <r1,...> skip rows <r1,...> in TABLE
+-o,--out <file> write output to <file>
+
+Operands:
+ TABLE File path or input stream
+ containing tabular input data
+
+Reshape TABLE, or print diagnostic metadata. When using --transpose,
+"rows" and "columns" for other options refer to the table before transposing.
+The default delimiter is a tab, i.e. `\t`. Reshaping with --shape is always
+applied after --skip{rows,cols} and --transpose. For short options,
+option arguments must be separated from the flag by a colon or equals sign,
+e.g. `-d:,`. Multi-byte delimiters such as unicode characters are not supported.
+Tab and space delimiters can be specified with -d:'\t' and -d:'\ ' respectively.
+Empty columns are propagated without warning."""
+ quit(QuitSuccess)
+
+
+type Shape = tuple[rows, cols: int]
+type Opts = tuple[
+ inputFile: string,
+ outputFile: string,
+ info: bool,
+ pretty: bool,
+ transpose: bool,
+ delimiter: char,
+ newShape: Shape,
+ skipCols: seq[int],
+ skipRows: seq[int],
+]
+
+type ArgumentError* = object of CatchableError
+
+
+proc chooseInput(filename: string): Stream =
+ if "" == filename or "-" == filename:
+ return newFileStream(stdin)
+ return openFileStream(filename)
+
+
+func splitCells(row: string, delimiterIndices: seq[int]): seq[string] =
+ ## Splits row by removing `char`s at `delimiterIndices`.
+ ## Leading or trailing delimiters result in corresponding empty cells.
+ if len(delimiterIndices) > 0:
+ var cells = newSeq[string](len(delimiterIndices) + 1)
+ # First cell.
+ if delimiterIndices[0] == 0:
+ cells[0] = ""
+ else:
+ cells[0] = strip(row[0 ..< delimiterIndices[0]])
+ # Intermediate cells.
+ for i, delimiterIndex in delimiterIndices[0 ..< ^1].pairs:
+ cells[i + 1] = strip(row[delimiterIndices[i] + 1 ..< delimiterIndices[i + 1]])
+ # Last cell.
+ if delimiterIndices[^1] == len(row):
+ cells[^1] = ""
+ else:
+ cells[^1] = strip(row[delimiterIndices[^1] + 1 .. ^1])
+ return cells
+ else:
+ return @[row]
+
+
+func splitCells(row: string, delimiter: char): seq[string] =
+ ## Splits row on delimiter, ignoring delimiters in quoted cells (double quotes only).
+ ## Leadaing or trailing delimiters result in corresponding empty cells.
+ var
+ start: int
+ delimiterIndices: seq[int]
+ delimiterIndex = -1
+ prevQuoteIndex = -1
+ nextQuoteIndex = -1
+ prevClosingQuoteIndex = -1
+
+ while start < row.high:
+ delimiterIndex = row.find(delimiter, start = start)
+ start = delimiterIndex + 1
+ if delimiterIndex == -1: break
+ elif delimiterIndex == 0: delimiterIndices.add(0)
+ else:
+ prevQuoteIndex = row.rfind(
+ '"',
+ start = prevClosingQuoteIndex + 1,
+ last = delimiterIndex,
+ )
+ if prevQuoteIndex != -1:
+ nextQuoteIndex = row.find('"', start = delimiterIndex)
+ if nextQuoteIndex != -1:
+ prevClosingQuoteIndex = nextQuoteIndex
+ else:
+ delimiterIndices.add(delimiterIndex)
+ else:
+ delimiterIndices.add(delimiterIndex)
+ return splitCells(row, delimiterIndices)
+
+
+proc readShape*(
+ input: Stream, delimiter: char, sink: Stream = newStringStream(), warnings = false
+ ): Shape =
+ ## Guesses the shape of the delimited text given in the input stream.
+ ## Quoted delimiters are skipped, and the quote char `"` is an illegal delimiter
+ ## (which causes a `ValueError` to be raised). The input is passed on to the
+ ## `sink` stream if provided. Optionally logs warnings about malformed rows.
+ if delimiter == '"':
+ raise newException(ValueError, "delimiter must not be the quote character (U+0022)")
+ var
+ rowCount: int
+ colCount: int
+ badRows: seq[int]
+ line: string
+ while input.readLine(line):
+ sink.writeLine(line)
+ inc rowCount
+ var currentColCount = len(line.splitCells(delimiter))
+ if currentColCount >= colCount:
+ if currentColCount > colCount:
+ if rowCount > 1: badRows.add(rowCount - 1)
+ colCount = currentColCount
+ else:
+ continue
+ else:
+ badRows.add(rowCount)
+
+ # Track a maximum of 100 malformed rows.
+ # The 100'th item is always the last known malformed row.
+ # Trailing zero to denotes truncation.
+ # Reminder: zero-based indexing.
+ if len(badRows) >= 101:
+ badRows[99] = badRows[^1]
+ badRows[100] = 0
+ if len(badRows) == 102: badRows.delete(101)
+
+ if warnings:
+ if colCount == 1: warn("delimiter '{delimiter}' not found.".fmt)
+ elif len(badRows) > 0:
+ var badRowsTrunc =
+ if badRows[^1] == 0:
+ badRows[0..^3].join(",") & "..." & $badRows[^2]
+ else:
+ badRows.join(",")
+ warn("encountered malformed rows: {badRowsTrunc}.".fmt)
+ return (rowCount, colCount)
+
+
+func toSlices(s:seq[int]): seq[Slice[int]] =
+ ## Condenses a sequence of integers to a sequence of slices.
+ ## Raises a `ValueError` if `s` is not sorted.
+ if not isSorted(s): raise newException(ValueError, "sequence must be sorted")
+ var sliceBounds = newSeqWith(1, s[0])
+ var slices: seq[Slice[int]]
+ for i, val in s[0 ..< ^1].pairs:
+ var nextVal = s[i + 1]
+ if (nextVal - val) > 1:
+ sliceBounds.add(val)
+ sliceBounds.add(nextVal)
+ sliceBounds.add(s[^1])
+ for i in countup(0, len(sliceBounds) - 1, 2):
+ slices.add(sliceBounds[i] .. sliceBounds[i + 1])
+ return slices
+
+
+proc readTable*(input: Stream, delimiter: char, skipRows, skipCols: seq[int] = @[]):
+ seq[seq[string]] =
+ ## Reads delimited tabular data from `input` and returns a sequence of rows.
+ ## Rows are sequences of cells (strings). Quoted delimiters are skipped.
+ ## Raises a `ValueError` if the quote character `"` is used as a delimiter,
+ ## or either `skipRows` or `skipCols` contain negative values.
+ ## Raises an `IOError` if no data can be read from the `input` stream.
+ ## Quietly propagates empty cells. Empty cells are also created to complete
+ ## malformed rows of the table. They are added to the right of existing cells.
+ ## `skipRows` and `skipCols` can be used to exclude the specified rows/columns.
+ ## This is done after filling out malformed rows. Rows/columns are zero-indexed.
+ if any(skipRows, proc(x: int): bool = x < 0) or
+ any(skipCols, proc(x: int): bool = x < 0):
+ raise newException(
+ ValueError, "indices for skipped rows/columns must be non-negative"
+ )
+ if atEnd(input): raise newException(IOError, "input stream must not be exhausted")
+
+ let uniqueSkipCols = deduplicate(sorted(skipCols), isSorted = true)
+ let uniqueSkipRows = deduplicate(sorted(skipRows), isSorted = true)
+ var stream = newStringStream()
+ let shape = readShape(input, delimiter, stream)
+ var table = newSeqWith(
+ shape.rows - len(uniqueSkipRows),
+ newSeq[string](shape.cols - len(uniqueSkipCols)),
+ )
+ var line: string
+ var rowIndex: int
+ var newRowIndex: int
+ stream.setPosition(0)
+ while stream.readLine(line):
+ if rowIndex in uniqueSkipRows: inc rowIndex; continue
+ var cells = line.splitCells(delimiter)
+ if len(uniqueSkipCols) > 0:
+ for slice in reversed(toSlices(uniqueSkipCols)):
+ when NimMajor == 1 and NimMinor < 6:
+ # https://github.com/nim-lang/Nim/commit/1d6863a7899fd87fd9eb017ae370ef37db18ad32
+ cells.delete(slice.a, slice.b)
+ else:
+ cells.delete(slice)
+ table[newRowIndex][0 ..< len(cells)] = cells
+ inc newRowIndex
+ inc rowIndex
+ # Make zero-column return the same as zero-row return.
+ if len(table) > 0 and len(table[0]) == 0: return newSeq[seq[string]]()
+ close stream
+ return table
+
+
+func transpose*(table: seq[seq[string]]): seq[seq[string]] =
+ ## Returns a transposed copy of `table`.
+ ## Assumes that each row of `table` has an equal number of cells.
+ ## Raises an `IndexDefect` if the number of cells increases.
+ if len(table) == 0: return table
+ var newTable = newSeqWith(len(table[0]), newSeq[string](len(table)))
+ for r, row in table.pairs:
+ for c, cell in row.pairs:
+ newTable[c][r] = cell
+ return newTable
+
+
+func reshape*(table: seq[seq[string]], newShape: Shape): seq[seq[string]] =
+ ## Returns a reshaped copy of `table` by filling a table of shape `newShape`
+ ## one row at a time. Assumes that each row of `table` has
+ ## an equal number of cells. Raises an `IndexDefect` for overfull inputs.
+ ## Raises a `ValueError` if `newShape` contains non-positive integers,
+ ## or would result in a different capacity (number of cells).
+ if newShape.rows < 1 or newShape.cols < 1:
+ raise newException(ValueError, "shape must be a tuple of positive integers")
+ if len(table) == 0: return table
+ if (newShape.rows * newShape.cols) != (len(table) * len(table[0])):
+ raise newException(ValueError, "new shape must retain table capacity")
+ var newTable = newSeqWith(newShape.rows, newSeq[string](newShape.cols))
+ var rowCursor: int
+ var colCursor: int
+ for row in table:
+ for cell in row:
+ newTable[rowCursor][colCursor] = cell
+ if newShape.cols - colCursor > 1:
+ inc colCursor
+ else:
+ colCursor = 0
+ inc rowCursor
+ return newTable
+
+
+proc padCells*(table: var seq[seq[string]]) =
+ ## Pads cells in-place with whitespace to right-align tabular columns.
+ ## Raises a `ValueError` if the rows don't all contain the same amount of cells.
+ var cellSizes = newSeq[int](len(table[0]))
+ for row in table:
+ if len(row) != len(cellSizes):
+ raise newException(ValueError, "must provide rows of equal length")
+ for i, cell in row.pairs:
+ var cellSize = len(cell)
+ if cellSize > cellSizes[i]:
+ cellSizes[i] = cellSize
+ for row in table.mitems:
+ for i, cell in row.mpairs:
+ cell = align(cell, cellSizes[i])
+
+
+proc printInfo(input: Stream, delimiter: char) =
+ let (rows, cols) = readShape(input, delimiter, warnings = true)
+ echo "Rows: {rows}".fmt
+ echo "Columns: {cols}".fmt
+
+
+proc validate(key, val: string): string =
+ var parsedVal: string
+ if len(val) == 2 and val[0] == '\\':
+ if val[1] == 't': parsedVal = "\t"
+ elif val[1] == ' ': parsedVal = " "
+ else: parsedVal = val
+ if parsedVal == "":
+ raise newException(ArgumentError, "option {key} requires an argument".fmt)
+ return parsedVal
+
+
+func validateChar(key, val: string): char =
+ var parsedVal = validate(key, val)
+ if len(parsedVal) > 1:
+ raise newException(
+ ArgumentError, "must provide a single-byte delimiter, not '{parsedVal}'.".fmt
+ )
+ return parsedVal[0]
+
+
+func validateShape(key: string, val: string): Shape =
+ # Unpacking operator would be nice: https://forum.nim-lang.org/t/8793
+ var shape = validate(key, val).split('x').map(parseInt)
+ if any(shape, proc(x: int): bool = x < 1):
+ raise newException(
+ ArgumentError, "must provide positive integers for new shape"
+ )
+ return (rows: shape[0], cols: shape[1])
+
+
+func validateSkips(key: string, val: string): seq[int] =
+ var s = validate(key, val).split(',').map(parseInt)
+ if any(s, proc(x: int): bool = x < 1):
+ raise newException(
+ ArgumentError, "indices for skipped rows/columns must be positive"
+ )
+ return s.map(proc(x: int): int = x - 1)
+
+
+proc parseOpts*(cmdline = ""): Opts =
+ ## Parses command line options from the input string, or `stdin` by default.
+ ## Raises an `ArgumentError` on illegal combinations or argument values.
+ var parser = initOptParser(
+ cmdline,
+ shortNoVal = {'h' ,'v', 'i', 'p', 't'},
+ longNoVal = @["help", "version", "info", "nopad", "transpose"],
+ )
+ # Set defaults.
+ var opts: Opts
+ opts.pretty = true
+ opts.transpose = false
+ opts.delimiter = '\t'
+
+ for kind, key, val in getopt(parser):
+ case kind
+ of cmdLongOption, cmdShortOption:
+ case key
+ # Process options without arguments.
+ of "help", "h": printHelp()
+ of "version", "v": printVersion()
+ of "info", "i": opts.info = true
+ of "nopad", "p": opts.pretty = false
+ of "transpose", "t": opts.transpose = true
+ # Process options with arguments.
+ of "out", "o": opts.outputFile = validate(key, val)
+ of "delim", "d": opts.delimiter = validateChar(key, val)
+ of "shape", "s": opts.newShape = validateShape(key, val)
+ of "skipcols", "c": opts.skipCols = validateSkips(key, val)
+ of "skiprows", "r": opts.skipRows = validateSkips(key, val)
+ of cmdArgument: opts.inputFile = key
+ of cmdEnd: assert(false)
+ return opts
+
+
+proc main() =
+ let opts = parseOpts()
+ let input = chooseInput(opts.inputFile)
+ let logger = newConsoleLogger(); addHandler(logger)
+ if opts.info:
+ printInfo(input, opts.delimiter)
+ close input
+ quit(QuitSuccess)
+
+ let table = readTable(
+ input,
+ opts.delimiter,
+ skipRows = opts.skiprows,
+ skipCols = opts.skipcols,
+ )
+ close input
+
+ var newTable = if opts.transpose: transpose(table) else: table
+ if opts.newShape.rows > 0 and opts.newShape.cols > 0:
+ newTable = reshape(newTable, opts.newShape)
+ if opts.pretty:
+ padCells(newTable)
+ var output = if opts.outputFile == "":
+ stdout
+ else:
+ if fileExists(opts.outputFile):
+ raise newException(IOError, "file '{opts.outputFile}' already exists".fmt)
+ open(opts.outputFile, mode = fmWrite)
+ for row in newTable:
+ writeLine(output, row.join($opts.delimiter))
+ close output
+
+
+if isMainModule: main()
@@ 1,399 @@
+import logging
+import streams
+import strformat
+import strutils
+import unittest
+
+import reshape
+
+
+const tabTable3x3 = """
+ a b c
+ 0.1 0.2 0.3
+ " 1" "2 " "3 3"
+"""
+const malformedCommaTable2x3 = """
+ ä, ¿, ©
+ 1", "2, 3"
+"""
+const malformedSpaceTable4x4 = """
+ a b c" "
+
+ 10 20 30
+ 1 2 """
+const malformedCommaTable101x3 = "a, b, c\n" & repeat("foo\n", 100)
+const malformedCommaTable102x3 = malformedCommaTable101x3 & "foo\n"
+
+
+proc readLines(file: File): seq[string] =
+ ## Reads all lines from the given open file.
+ file.setFilePos(0)
+ return splitLines(readAll(file))[0..^2] # Remove spurious empty line.
+
+
+suite "Command line parsing":
+ let emptyOpts = parseOpts()
+ test "nopad":
+ check emptyOpts.pretty == true
+ check parseOpts("-p").pretty == false
+ test "transpose":
+ check emptyOpts.transpose == false
+ check parseOpts("-t").transpose == true
+ test "out":
+ check emptyOpts.outputFile == ""
+ expect ArgumentError: discard parseOpts("-o")
+ check parseOpts("-o:foo").outputFile == "foo"
+ check parseOpts("--out foo").outputFile == "foo"
+ test "delim":
+ check emptyOpts.delimiter == '\t'
+ expect ArgumentError: discard parseOpts("-d")
+ check parseOpts("-d::").delimiter == ':'
+ expect ArgumentError: discard parseOpts("-d:ð") # Multi-byte delimiter: error.
+ expect ArgumentError: discard parseOpts("--delim ||") # Multi-byte delimiter: error.
+ check parseOpts("--delim ,").delimiter == ','
+ test "shape":
+ check emptyOpts.newShape == (0, 0)
+ expect ArgumentError: discard parseOpts("-s")
+ check parseOpts("-s:3x4").newShape == (3, 4)
+ check parseOpts("--shape 3x4").newShape == (3, 4)
+ expect ValueError: discard parseOpts("--shape 2.5x1")
+ test "skipcols":
+ check emptyOpts.skipCols == newSeq[int]()
+ expect ArgumentError: discard parseOpts("-c")
+ check parseOpts("-c:1,4,10").skipCols == @[0, 3, 9]
+ check parseOpts("--skipcols 1,4,10").skipCols == @[0, 3, 9]
+ expect ValueError: discard parseOpts("--skipcols 1.2,3.4")
+ check parseOpts("-c:1").skipCols == @[0]
+ test "skiprows":
+ check emptyOpts.skipRows == newSeq[int]()
+ expect ArgumentError: discard parseOpts("-r")
+ check parseOpts("-r:1,4,10").skipRows == @[0, 3, 9]
+ check parseOpts("--skiprows 1,4,10").skipRows == @[0, 3, 9]
+ expect ValueError: discard parseOpts("--skiprows 1.2,3.4")
+ check parseOpts("-r:1").skipRows == @[0]
+
+
+suite "Table shape parsing":
+ let log = newFileLogger("tests/readShape.log", mode = fmReadWrite)
+ addHandler(log)
+
+ test "tabTable3x3":
+ info("starting tabTable3x3 test")
+ let input = newStringStream(tabTable3x3)
+ check readShape(input, '\t', warnings = true) == (3, 3)
+ require atEnd(input)
+
+ input.setPosition(0)
+ check readShape(input, '.', warnings = true) == (3, 4) # Suspect delimiter.
+ require atEnd(input)
+
+ input.setPosition(0)
+ check readShape(input, ',', warnings = true) == (3, 1) # Missing delimiter.
+ require atEnd(input)
+
+ check readLines(log.file)[^2..^1] == @[
+ "WARN encountered malformed rows: 1,3.",
+ "WARN delimiter ',' not found.",
+ ]
+
+ close input
+
+ test "malformedCommaTable2x3":
+ info("starting malformedCommaTable2x3 test")
+ let input = newStringStream(malformedCommaTable2x3)
+ # Malformed second line, bad quoting.
+ check readShape(input, ',', warnings = true) == (2, 3)
+ require atEnd(input)
+
+ input.setPosition(0)
+ expect ValueError: discard readShape(input, '"') # Illegal delimiter.
+ check atEnd(input) == false # Nothing was read.
+
+ check readLines(log.file)[^1] == "WARN encountered malformed rows: 2."
+ close input
+
+ test "malformedSpaceTable4x4":
+ info("starting malformedSpaceTable4x4 test")
+ let input = newStringStream(malformedSpaceTable4x4)
+ # Empty leading/trailing column is counted.
+ check readShape(input, ' ', warnings = true) == (4, 4)
+ require atEnd(input)
+
+ check readLines(log.file)[^1] == "WARN encountered malformed rows: 2."
+ close input
+
+ test "malformedCommaTable101x3":
+ info("starting malformedCommaTable101x3 test")
+ let input = newStringStream(malformedCommaTable101x3)
+ var badRows = newSeq[int](100)
+ for i in 2..101: badRows[i - 2] = i
+ var badRowString = badRows.join(",")
+ check readShape(input, ',', warnings = true) == (101, 3)
+ require atEnd(input)
+
+ check readLines(log.file)[^1] == "WARN encountered malformed rows: {badRowString}.".fmt
+ close input
+
+ test "malformedCommaTable102x3":
+ info("starting malformedCommaTable102x3 test")
+ let input = newStringStream(malformedCommaTable102x3)
+ var badRows = newSeq[int](100)
+ for i in 2..100: badRows[i - 2] = i
+ badRows[^1] = 102
+ var badRowString = badRows[0..^2].join(",") & "..." & $badRows[^1]
+ check readShape(input, ',', warnings = true) == (102, 3)
+ require atEnd(input)
+
+ check readLines(log.file)[^1] == "WARN encountered malformed rows: {badRowString}.".fmt
+ close input
+
+ close log.file
+
+
+suite "Cell padding":
+ test "tabTable3x3":
+ let input = newStringStream(tabTable3x3)
+ var table = readTable(input, '\t')
+ check table == @[
+ @["a", "b", "c"],
+ @["0.1", "0.2", "0.3"],
+ @["\"\t1\"", "\"2\t\"", "\"3\t3\""],
+ ]
+ padCells(table)
+ check table == @[
+ @[" a", " b", " c"],
+ @[" 0.1", " 0.2", " 0.3"],
+ @["\"\t1\"", "\"2\t\"", "\"3\t3\""],
+ ]
+
+ close input
+
+ test "malformedCommaTable2x3":
+ let input = newStringStream(malformedCommaTable2x3)
+ var table = readTable(input, ',')
+ check table == @[@["ä", "¿", "©"], @["1\", \"2", "3\"", ""]]
+ padCells(table)
+ # Unicode characters create weird cell sizes, because they have len(char) == 2.
+ # Could handle this by using Runes <https://nim-lang.org/docs/unicode.html>?
+ check table == @[@[" ä", "¿", "©"], @["1\", \"2", "3\"", " "]]
+
+ close input
+
+ test "malformedSpaceTable4x4":
+ let input = newStringStream(malformedSpaceTable4x4)
+ var table = readTable(input, ' ')
+ check table == @[
+ @["", "a", "b", "c\" \""],
+ @["", "", "", ""],
+ @["", "10", "20", "30"],
+ @["", "1", "2", ""],
+ ]
+ padCells(table)
+ check table == @[
+ @["", " a", " b", "c\" \""],
+ @["", " ", " ", " "],
+ @["", "10", "20", " 30"],
+ @["", " 1", " 2", " "],
+ ]
+
+ close input
+
+
+suite "Row and column skipping":
+ test "tabTable3x3":
+ let input = newStringStream(tabTable3x3)
+ var table = readTable(input, '\t', skipRows = @[2])
+ check table == @[@["a", "b", "c"], @["0.1", "0.2", "0.3"]]
+ input.setPosition(0)
+ table = readTable(input, '\t', skipCols = @[2])
+ check table == @[@["a", "b"], @["0.1", "0.2"], @["\"\t1\"", "\"2\t\""]]
+ input.setPosition(0)
+ table = readTable(input, '\t', skipRows = @[0, 1, 2])
+ check table == newSeq[seq[string]]()
+ input.setPosition(0)
+ table = readTable(input, '\t', skipCols = @[2, 1, 0])
+ check table == newSeq[seq[string]]()
+
+ close input
+
+ test "malformedCommaTable2x3":
+ let input = newStringStream(malformedCommaTable2x3)
+ var table = readTable(input, ',', skipRows = @[0], skipCols = @[1])
+ check table == @[@["1\", \"2", ""]]
+ input.setPosition(0)
+ table = readTable(input, ',', skipRows = @[0, 1], skipCols = @[1])
+ check table == newSeq[seq[string]]()
+ input.setPosition(0)
+ table = readTable(input, ',', skipRows = @[1], skipCols = @[1, 0, 2, 1, 2])
+ check table == newSeq[seq[string]]()
+
+ close input
+
+ test "malformedSpaceTable4x4":
+ let input = newStringStream(malformedSpaceTable4x4)
+ var table = readTable(input, ' ', skipRows = @[1], skipCols = @[0])
+ check table == @[@["a", "b", "c\" \""], @["10", "20", "30"], @["1", "2", ""]]
+ input.setPosition(0)
+ table = readTable(input, ' ', skipRows = @[1], skipCols = @[0, 3])
+ check table == @[@["a", "b"], @["10", "20"], @["1", "2"]]
+ input.setPosition(0)
+ table = readTable(input, ' ', skipRows = @[0, 1, 2, 3], skipCols = @[1])
+ check table == newSeq[seq[string]]()
+
+ close input
+
+ test "tabTable3x3 errors":
+ let input = newStringStream(tabTable3x3)
+ expect IndexDefect: discard readTable(input, '\t', skipRows = @[0, 1, 5])
+ input.setPosition(0)
+ expect ValueError: discard readTable(input, '\t', skipCols = @[0, 1, -5])
+
+ close input
+
+ test "malformedCommaTable2x3 errors":
+ let input = newStringStream(malformedCommaTable2x3)
+ expect ValueError: discard readTable(input, ',', skipRows = @[-1])
+ input.setPosition(0)
+ expect ValueError: discard readTable(input, ',', skipCOls = @[-1])
+
+ close input
+
+ test "malformedSpaceTable4x4 errors":
+ let input = newStringStream(malformedSpaceTable4x4)
+ expect IndexDefect: discard readTable(input, ' ', skipRows = @[4])
+ input.setPosition(0)
+ expect IndexDefect: discard readTable(input, ' ', skipCols = @[4])
+
+ close input
+
+
+suite "Transpose":
+ test "tabTable3x3":
+ let input = newStringStream(tabTable3x3)
+ let table = readTable(input, '\t')
+ check transpose(table) == @[
+ @["a", "0.1", "\"\t1\""],
+ @["b", "0.2", "\"2\t\""],
+ @["c", "0.3", "\"3\t3\""],
+ ]
+ check transpose(transpose(table)) == table
+
+ close input
+
+ test "malformedCommaTable2x3":
+ let input = newStringStream(malformedCommaTable2x3)
+ let table = readTable(input, ',')
+ check transpose(table) == @[@["ä", "1\", \"2"], @["¿", "3\""], @["©", ""]]
+ check transpose(transpose(table)) == table
+
+ close input
+
+ test "malformedSpaceTable4x4":
+ let input = newStringStream(malformedSpaceTable4x4)
+ let table = readTable(input, ' ')
+ check transpose(table) == @[
+ @["", "", "", ""],
+ @["a", "", "10", "1"],
+ @["b", "", "20", "2"],
+ @["c\" \"", "", "30", ""],
+ ]
+ check transpose(transpose(table)) == table
+
+ close input
+
+ test "malformedCommaTable101x3":
+ # TODO: Better tests for large tables?
+ let input = newStringStream(malformedCommaTable101x3)
+ let table = readTable(input, ',')
+ check transpose(transpose(table)) == table
+
+ close input
+
+ test "malformedCommaTable102x3":
+ # TODO: Better tests for large tables?
+ let input = newStringStream(malformedCommaTable102x3)
+ let table = readTable(input, ',')
+ check transpose(transpose(table)) == table
+
+ close input
+
+ test "empty table (no-op)":
+ expect IOError: discard readTable(newStringStream(""), ',')
+ let input = newStringStream("a, b, c")
+ let table = readTable(input, ',', skipRows = @[0])
+ require table == newSeq[seq[string]]()
+ check transpose(table) == newSeq[seq[string]]()
+
+ close input
+
+ test "malformed tables (raw)":
+ # Underfull table is filled with empty cells during transpose.
+ check transpose(@[@["a", "b", "c"], @["1", "2"]]) == @[
+ @["a", "1"], @["b", "2"], @["c", ""]
+ ]
+ # Malformed table: IndexDefect.
+ expect IndexDefect: discard transpose(@[@["a", "b"], @["1", "2", "3"]])
+
+
+suite "reshape":
+ test "tabTable3x3":
+ let input = newStringStream(tabTable3x3)
+ let table = readTable(input, '\t')
+ # Check reshaping to single-row and single-column tables.
+ check reshape(table, (rows: 1, cols: 9)) == @[
+ @["a", "b", "c", "0.1", "0.2", "0.3", "\"\t1\"", "\"2\t\"", "\"3\t3\""]
+ ]
+ check reshape(table, (rows: 9, cols: 1)) == @[
+ @["a"],
+ @["b"],
+ @["c"],
+ @["0.1"],
+ @["0.2"],
+ @["0.3"],
+ @["\"\t1\""],
+ @["\"2\t\""],
+ @["\"3\t3\""],
+ ]
+
+ close input
+
+ test "malformedCommaTable2x3":
+ let input = newStringStream(malformedCommaTable2x3)
+ let table = readTable(input, ',')
+ check reshape(table, (rows: 3, cols: 2)) == @[
+ @["ä", "¿"],
+ @["©", "1\", \"2"],
+ @["3\"", ""],
+ ]
+ # Check that we can un-reshape.
+ check reshape(reshape(table, (rows: 3, cols: 2)), (rows: 2, cols: 3)) == table
+
+ close input
+
+ test "malformedSpaceTable4x4":
+ let input = newStringStream(malformedSpaceTable4x4)
+ let table = readTable(input, ' ')
+ check reshape(table, (rows: 4, cols: 4)) == @[ # No-op.
+ @["", "a", "b", "c\" \""],
+ @["", "", "", ""],
+ @["", "10", "20", "30"],
+ @["", "1", "2", ""],
+ ]
+ # Reshaping to smaller capacity: ValueError.
+ expect ValueError: discard reshape(table, (rows: 2, cols: 4))
+
+ close input
+
+ test "malformed tables (raw)":
+ # Underfull table, filled in with empty cells.
+ check reshape(@[@["a", "b", "c"], @["1", "2"]], (rows: 3, cols: 2)) == @[
+ @["a", "b"], @["c", "1"], @["2", ""]
+ ]
+ # Malformed table: ValueError.
+ expect ValueError: discard reshape(
+ @[@["a", "b"], @["1", "2", "3"]], (rows: 3, cols: 2)
+ )
+ # Overfull table: IndexDefect.
+ expect IndexDefect: discard reshape(
+ @[@["a", "b", "c"], @["1", "2", "3", "4"]], (rows: 3, cols: 2)
+ )