A => .gitignore +3 -0
@@ 1,3 @@
+*
+!/**/
+!*.*
A => README.md +34 -0
@@ 1,34 @@
+# simplediff
+
+A [Nim](https://nim-lang.org) implementaion of a simple diff algorithm, based on [Paul Butler's `simplediff`](https://github.com/paulgb/simplediff).
+
+## Usage
+
+`simplediff` provides a `diff` proc which takes two `openArray`s and generates a `seq` of "instructions" to turn the first into the second. Each "instruction" is of the `Diff` type, which is either an `Insertion`, a `Deletion`, or a `NoChange`. Each `Diff` also has a `tokens` field, which contains a subsequence of elements that the insertion/deletion/leaving alone should be applied to.
+
+For example:
+
+```
+import simplediff
+
+echo diff([1, 2, 3], [1, 2])
+# @[Diff(kind: NoChange, tokens: @[1, 2]), Diff(kind: Deletion, tokens: @[3])]
+```
+
+Any type that implements the `==` operator can be used.
+
+`simplediff` also provides a convenience wrapper for diffing two strings. By default, the strings are split into lines for diffing, but this can be changed with the `seps` parameter.
+
+```
+import simplediff
+
+for diff in stringDiff("the word is blue", "the word is red", seps={' '}):
+ echo diff
+# Diff(kind: NoChange, tokens: @["the", "word", "is"])
+# Diff(kind: Deletion, tokens: @["blue"])
+# Diff(kind: Insertion, tokens: @["red"])
+```
+
+## Contributing
+
+Contributions are welcome! Please send patches, questions, requests, etc. to my [public inbox](mailto:~reesmichael1/public-inbox@lists.sr.ht).
A => simplediff.nimble +12 -0
@@ 1,12 @@
+# Package
+
+version = "0.1.0"
+author = "Michael Rees"
+description = "A library for straightforward calculation of string differences"
+license = "GPL-3.0"
+srcDir = "src"
+installExt = @["nim"]
+
+# Dependencies
+
+requires "nim >= 1.0"
A => src/simplediff.nim +73 -0
@@ 1,73 @@
+import strutils
+import tables
+
+
+type
+ ChangeType* = enum
+ Insertion, Deletion, NoChange
+
+ Diff*[T] = object
+ kind*: ChangeType
+ tokens*: seq[T]
+
+
+proc diff*[T](itemsOld, itemsNew: openArray[T]): seq[Diff[T]] =
+ ## Find the differences between two seqs.
+ ## Each entry of the returned seq is an instruction describing the
+ ## shortest method of changing itemsOld into itemsNew.
+ var oldIndexMap: Table[T, seq[int]]
+ for ix, item in itemsOld:
+ if item in oldIndexMap:
+ oldIndexMap[item].add(ix)
+ else:
+ oldIndexMap[item] = @[ix]
+
+ var overlap: Table[int, int]
+ var subStartOld = 0
+ var subStartNew = 0
+ var subLength = 0
+
+ # Iterate over each value in the new list. At each iteration,
+ # overlap[ix] is the length of the largest suffix of itemsOld[:ix]
+ # equal to a suffix of itemsNew[:ixNew].
+ #
+ # subLength, subStartOld, and subStartNew keep track
+ # of the largest substring of the overlapping strings.
+ for ixNew, value in itemsNew:
+ var overlapTemp: Table[int, int]
+ for ixOld in oldIndexMap.getOrDefault(value):
+ var newSuffixLen = 1
+ if ixOld > 0 and overlap.getOrDefault(ixOld - 1, 0) > 0:
+ newSuffixLen = overlap.getOrDefault(ixOld - 1, 0) + 1
+ overlaptemp[ixOld] = newSuffixLen
+ if overlapTemp[ixOld] > subLength:
+ subLength = overlapTemp[ixOld]
+ subStartOld = ixOld - subLength + 1
+ subStartNew = ixNew - subLength + 1
+
+ overlap = overlapTemp
+
+ if subLength == 0:
+ # If there is no common substring, return an insertion and a deletion
+ if itemsOld.len > 0:
+ result.add(Diff[T](kind: Deletion, tokens: @itemsOld))
+ if itemsNew.len > 0:
+ result.add(Diff[T](kind: Insertion, tokens: @itemsNew))
+
+ else:
+ # Otherwise, the common substring is left alone and we can find the diff
+ # of the elements before and after it.
+ let diffBefore = diff(itemsOld[0..<subStartOld], itemsNew[0..<subStartNew])
+ let same = itemsNew[subStartNew..<subStartNew+subLength]
+ let unchanged = Diff[T](kind: NoChange, tokens: same)
+ let diffAfter = diff(itemsOld[subStartOld+subLength..<itemsOld.len],
+ itemsNew[subStartNew+subLength..<itemsNew.len])
+
+ return diffBefore & unchanged & diffAfter
+
+
+proc stringDiff*(s1, s2: string, seps: set[char] = Newlines): seq[Diff[string]] =
+ ## Return the difference between two strings on a line-by-line basis.
+ ## Each entry of the returned seq is an instruction describing the
+ ## shortest method of changing s1 into s2.
+ return diff(split(s1, seps = seps), split(s2, seps = seps))
A => tests/config.nims +1 -0
@@ 1,1 @@
+switch("path", "$projectDir/../src")<
\ No newline at end of file
A => tests/testDiff.nim +73 -0
@@ 1,73 @@
+import strutils
+import unittest
+
+import simplediff
+
+
+suite "test bare diff":
+ # These tests are from the doctests in simplediff's Python implementation
+ test "bare diff on ints with equal start":
+ check diff(@[1, 2, 3, 4], @[1, 3, 4]) == [
+ Diff[int](kind: NoChange, tokens: @[1]),
+ Diff[int](kind: Deletion, tokens: @[2]),
+ Diff[int](kind: NoChange, tokens: @[3, 4])
+ ]
+
+ test "bare diff on ints with deletion at start":
+ check diff(@[1, 2, 3, 4], @[2, 3, 4, 1]) == [
+ Diff[int](kind: Deletion, tokens: @[1]),
+ Diff[int](kind: NoChange, tokens: @[2, 3, 4]),
+ Diff[int](kind: Insertion, tokens: @[1])
+ ]
+
+ test "bare diff on strings with words for tokens":
+ check diff(split("The quick brown fox jumps over the lazy dog"),
+ split("The slow blue cheese drips over the lazy carrot")) == [
+ Diff[string](kind: NoChange, tokens: @["The"]),
+ Diff[string](kind: Deletion, tokens: @["quick", "brown", "fox",
+ "jumps"]),
+ Diff[string](kind: Insertion, tokens: @["slow", "blue", "cheese",
+ "drips"]),
+ Diff[string](kind: NoChange, tokens: @["over", "the", "lazy"]),
+ Diff[string](kind: Deletion, tokens: @["dog"]),
+ Diff[string](kind: Insertion, tokens: @["carrot"]),
+ ]
+
+
+suite "test stringDiff":
+ test "correct diff for identical one-line strings":
+ check stringDiff("abc", "abc") == [
+ Diff[string](kind: NoChange, tokens: @["abc"])
+ ]
+
+ test "correct diff for identical multi-line strings":
+ check stringDiff("abc def\n123 456", "abc def\n123 456") == [
+ Diff[string](kind: NoChange, tokens: @["abc def", "123 456"])
+ ]
+
+ test "correct diff for different one-line strings":
+ check stringDiff("abc", "def") == [
+ Diff[string](kind: Deletion, tokens: @["abc"]),
+ Diff[string](kind: Insertion, tokens: @["def"])
+ ]
+
+ test "correct diff for different multi-line strings":
+ check stringDiff("abc\ndef", "abc 123\ndef") == [
+ Diff[string](kind: Deletion, tokens: @["abc"]),
+ Diff[string](kind: Insertion, tokens: @["abc 123"]),
+ Diff[string](kind: NoChange, tokens: @["def"])
+ ]
+
+ test "correct diff when splitting on a different character":
+ check stringDiff("abc;def", "abc 123;def", seps = {';'}) == [
+ Diff[string](kind: Deletion, tokens: @["abc"]),
+ Diff[string](kind: Insertion, tokens: @["abc 123"]),
+ Diff[string](kind: NoChange, tokens: @["def"])
+ ]
+
+ test "correct diff when splitting on multiple characters":
+ check stringDiff("abc;def,123", "abc;fed;abc", seps = {';', ','}) == [
+ Diff[string](kind: NoChange, tokens: @["abc"]),
+ Diff[string](kind: Deletion, tokens: @["def", "123"]),
+ Diff[string](kind: Insertion, tokens: @["fed", "abc"])
+ ]