From 18afdf13c0278e5c283640cc0e923481f3370972 Mon Sep 17 00:00:00 2001 From: Bradley Taunt Date: Fri, 2 Feb 2024 12:48:04 -0500 Subject: [PATCH] Initial commit for cgit platform --- .gitignore | 2 + LICENSE | 21 + Makefile | 71 ++ README | 258 ++++++ config.mk | 20 + docs/index.html | 236 ++++++ smu.1 | 23 + smu.c | 791 ++++++++++++++++++ smu.h | 19 + tests/code_fence.html | 25 + tests/code_fence.text | 37 + tests/commonmark/backslash_escapes.html | 3 + tests/commonmark/backslash_escapes.text | 5 + tests/html.html | 7 + tests/html.text | 10 + tests/linebreak.html | 2 + tests/linebreak.text | 2 + tests/lists.html | 14 + tests/lists.text | 10 + tests/mdtest/Amps_and_angle_encoding.html | 6 + tests/mdtest/Amps_and_angle_encoding.text | 12 + tests/mdtest/Auto_links.html | 12 + tests/mdtest/Auto_links.text | 13 + .../mdtest/Blockquotes_with_code_blocks.html | 11 + .../mdtest/Blockquotes_with_code_blocks.text | 11 + tests/mdtest/Code_Blocks.html | 12 + tests/mdtest/Code_Blocks.text | 14 + tests/mdtest/Code_Spans.html | 3 + tests/mdtest/Code_Spans.text | 5 + tests/mdtest/Inline_HTML_comments.html | 8 + tests/mdtest/Inline_HTML_comments.text | 13 + tests/mdtest/LICENSE | 339 ++++++++ tests/mdtest/Links,_inline_style.html | 12 + tests/mdtest/Links,_inline_style.text | 24 + tests/mdtest/Literal_quotes_in_titles.html | 1 + tests/mdtest/Literal_quotes_in_titles.text | 1 + tests/mdtest/Nested_blockquotes.html | 5 + tests/mdtest/Nested_blockquotes.text | 5 + tests/mdtest/Ordered_and_unordered_lists.html | 129 +++ tests/mdtest/Ordered_and_unordered_lists.text | 122 +++ tests/mdtest/README.md | 1 + tests/mdtest/Strong_and_em_together.html | 4 + tests/mdtest/Strong_and_em_together.text | 7 + tests/mdtest/Tabs.html | 21 + tests/mdtest/Tabs.text | 21 + tests/mdtest/Tidyness.html | 7 + tests/mdtest/Tidyness.text | 5 + tests/nohtml/basic.html | 7 + tests/nohtml/basic.text | 7 + tests/old_readme.html | 220 +++++ tests/old_readme.text | 238 ++++++ tests/ruler.html | 9 + tests/ruler.text | 11 + tests/table.html | 9 + tests/table.text | 10 + tests/testdoc.html | 72 ++ tests/testdoc.text | 80 ++ tests/unicode.html | 2 + tests/unicode.text | 5 + 59 files changed, 3050 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README create mode 100644 config.mk create mode 100644 docs/index.html create mode 100644 smu.1 create mode 100644 smu.c create mode 100644 smu.h create mode 100644 tests/code_fence.html create mode 100644 tests/code_fence.text create mode 100644 tests/commonmark/backslash_escapes.html create mode 100644 tests/commonmark/backslash_escapes.text create mode 100644 tests/html.html create mode 100644 tests/html.text create mode 100644 tests/linebreak.html create mode 100644 tests/linebreak.text create mode 100644 tests/lists.html create mode 100644 tests/lists.text create mode 100644 tests/mdtest/Amps_and_angle_encoding.html create mode 100644 tests/mdtest/Amps_and_angle_encoding.text create mode 100644 tests/mdtest/Auto_links.html create mode 100644 tests/mdtest/Auto_links.text create mode 100644 tests/mdtest/Blockquotes_with_code_blocks.html create mode 100644 tests/mdtest/Blockquotes_with_code_blocks.text create mode 100644 tests/mdtest/Code_Blocks.html create mode 100644 tests/mdtest/Code_Blocks.text create mode 100644 tests/mdtest/Code_Spans.html create mode 100644 tests/mdtest/Code_Spans.text create mode 100644 tests/mdtest/Inline_HTML_comments.html create mode 100644 tests/mdtest/Inline_HTML_comments.text create mode 100644 tests/mdtest/LICENSE create mode 100644 tests/mdtest/Links,_inline_style.html create mode 100644 tests/mdtest/Links,_inline_style.text create mode 100644 tests/mdtest/Literal_quotes_in_titles.html create mode 100644 tests/mdtest/Literal_quotes_in_titles.text create mode 100644 tests/mdtest/Nested_blockquotes.html create mode 100644 tests/mdtest/Nested_blockquotes.text create mode 100644 tests/mdtest/Ordered_and_unordered_lists.html create mode 100644 tests/mdtest/Ordered_and_unordered_lists.text create mode 100644 tests/mdtest/README.md create mode 100644 tests/mdtest/Strong_and_em_together.html create mode 100644 tests/mdtest/Strong_and_em_together.text create mode 100644 tests/mdtest/Tabs.html create mode 100644 tests/mdtest/Tabs.text create mode 100644 tests/mdtest/Tidyness.html create mode 100644 tests/mdtest/Tidyness.text create mode 100644 tests/nohtml/basic.html create mode 100644 tests/nohtml/basic.text create mode 100644 tests/old_readme.html create mode 100644 tests/old_readme.text create mode 100644 tests/ruler.html create mode 100644 tests/ruler.text create mode 100644 tests/table.html create mode 100644 tests/table.text create mode 100644 tests/testdoc.html create mode 100644 tests/testdoc.text create mode 100644 tests/unicode.html create mode 100644 tests/unicode.text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dc6a1ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +smu.o +smu diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5956c4f --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT/X Consortium License + +(c) 2007-2014 Enno Boland + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..38039e8 --- /dev/null +++ b/Makefile @@ -0,0 +1,71 @@ +# libsmu - simple markup +# (c) 2007, 2008 Enno Boland + +include config.mk + +SRC = smu.c +OBJ = ${SRC:.c=.o} +# VALGRIND = valgrind -q --error-exitcode=1 + +all: options smu + +options: + @echo smu build options: + @echo "CFLAGS = ${CFLAGS}" + @echo "LDFLAGS = ${LDFLAGS}" + @echo "CC = ${CC}" + +.c.o: + @echo CC $< + @${CC} -c ${CFLAGS} $< + +${OBJ}: config.mk + +smu: ${OBJ} + @echo LD $@ + @${CC} -o $@ ${OBJ} ${LDFLAGS} + +clean: + @echo cleaning + @rm -f smu ${OBJ} ${LIBOBJ} smu-${VERSION}.tar.gz + +dist: clean + @echo creating dist tarball + @mkdir -p smu-${VERSION} + @cp -R LICENSE Makefile config.mk smu.1 ${SRC} smu-${VERSION} + @tar -cf smu-${VERSION}.tar smu-${VERSION} + @gzip smu-${VERSION}.tar + @rm -rf smu-${VERSION} + +install: all + @echo installing executable file to ${DESTDIR}${PREFIX}/bin + @mkdir -p ${DESTDIR}${PREFIX}/bin + @cp -f smu ${DESTDIR}${PREFIX}/bin + @chmod 755 ${DESTDIR}${PREFIX}/bin/smu + @echo installing manual page to ${DESTDIR}${MANPREFIX}/man1 + @mkdir -p ${DESTDIR}${MANPREFIX}/man1 + @sed "s/VERSION/${VERSION}/g" < smu.1 > ${DESTDIR}${MANPREFIX}/man1/smu.1 + @chmod 644 ${DESTDIR}${MANPREFIX}/man1/smu.1 + +uninstall: + @echo removing executable file from ${DESTDIR}${PREFIX}/bin + @rm -f ${DESTDIR}${PREFIX}/bin/smu + @echo removing manual page from ${DESTDIR}${MANPREFIX}/man1 + @rm -f ${DESTDIR}${MANPREFIX}/man1/smu.1 + +test: $(patsubst %.text,%.html,$(wildcard tests/*.text tests/*/*.text)) + git diff --exit-code -- tests + +docs: docs/index.html + +docs/index.html: README smu + ./smu $< > $@ + +tests/nohtml/%.html: tests/nohtml/%.text smu + ${VALGRIND} ./smu -n $< > $@ + +%.html: %.text smu + ${VALGRIND} ./smu $< > $@ + +.PHONY: all options clean dist install uninstall +.DELETE_ON_ERROR: diff --git a/README b/README new file mode 100644 index 0000000..cfe91c7 --- /dev/null +++ b/README @@ -0,0 +1,258 @@ +smu - a Simple Markup Language +============================== + +_smu_ is a very simple and minimal markup language. It is designed for use in +wiki-like environments. smu makes it very easy to write your documents on the +fly and convert them into HTML. + +smu is capable of parsing very large documents. It scales just great as long +as you avoid a huge amount of indents. + +Syntax +====== + +smu was started as a rewrite of +[markdown](http://daringfireball.net/projects/markdown/) but became something +more lightweight and consistent. It differs from [CommonMark](https://commonmark.org/) in the following ways: + +* No support for _reference style links_ +* Stricter indentation rules for lists +* Lists don't end paragraphs by themselves (blank line needed) +* Horizontal rules (`
`) must use `- - -` as syntax +* Code fences have stricter syntax + +Patches that increase the CommonMark compatibility are welcome as long as they don't increase the code complexity significantly. + +This project is a fork of the [original smu](https://github.com/gottox/smu) by +[Enno Boland (gottox)](https://eboland.de). The main differences to the +original smu are: + +* Support for code fences +* Improved [CommonMark](https://commonmark.org/) compatibility. E.g. + * Code blocks need four spaces indentation instead of three + * Skip empty lines at end of code blocks + * Ignore single spaces around code spans + * Keep HTML comments in output + * Improved spec compliance for lists + * Nesting code block in blockquotes works + * "Empty" lines in lists behave identically, no matter how much whitespace they contain + * No backslash escapes in code blocks + * Use first number as start number for ordered lists +* Added a simple test suite to check for compliance and avoid regressions + +Inline patterns +--------------- + +There are several patterns you can use to highlight your text: + +* Emphasis + * Surround your text with `*` or `_` to get *emphasised* text: + This *is* cool. + This _is_ cool, too. + * Surround your text with `**` or `__` to get **strong** text: + This **is** cool. + This __is__ cool, too. + * Surround your text with `***` or `___` to get ***strong and emphasised*** text: + This ***is*** cool. + This ___is___ cool, too. + * But this example won't work as expected: + ***Hello** you* + This is a wontfix bug because it would make the source too complex. + Use this instead: + ***Hello*** *you* + +* inline Code + + You can produce inline code by surrounding it with backticks. + + Use `rm -rf /` if you're a N00b. + Use ``rm -rf /`` if you're a N00b. + Use ```rm -rf /``` if you're a N00b. + + Double and triple backticks can be used if the code itself contains backticks. + + +Titles +------ + +Creating titles in smu is very easy. There are two different syntax styles. The +first is underlining with at least three characters: + + Heading + ======= + + Topic + ----- + +This is very intuitive and self explaining. The resulting sourcecode looks like +this: + +

Heading

+

Topic

+ +Use the following prefixes if you don't like underlining: + + # h1 + ## h2 + ### h3 + #### h4 + ##### h5 + ###### h6 + +Links +----- + +The simplest way to define a link is with simple `<>`. + + + +You can do the same for E-Mail addresses: + + + +If you want to define a label for the url, you have to use a different syntax + + [smu - simple mark up](http://s01.de/~gottox/index.cgi/proj_smu) + +The resulting HTML-Code + + smu - simple mark up

+ +Lists +----- + +Defining lists is very straightforward: + + * Item 1 + * Item 2 + * Item 3 + +Result: + +
    +
  • Item 1
  • +
  • Item 2
  • +
  • Item 3
  • +
+ +Defining ordered lists is also very easy: + + 1. Item 1 + 2. Item 2 + 3. Item 3 + +Only the first number in a list is meaningful. All following list items are +continously counted. If you want a list starting at 2, you could write: + + 2. Item 1 + 2. Item 2 + 2. Item 3 + +and get the following HTML which will render with the numbers 2, 3, 4: + +
    +
  1. Item 1
  2. +
  3. Item 2
  4. +
  5. Item 3
  6. +
+ +Code & Blockquote +----------------- + +Use the `> ` as a line prefix for defining blockquotes. Blockquotes are +interpreted as well. This makes it possible to embed links, headings and even +other quotes into a quote: + + > Hello + > This is a quote with a [link](http://s01.de/~gottox) + +Result: +

+ Hello + This is a quote with a link

+
+ + +You can define a code block with a leading Tab or with __4__ leading spaces + + this.is(code) + + this.is(code, too) + +Result: +
this.is(code)
+
this.is(code, too)
+	
+ +Please note that you can't use HTML or smu syntax in a code block. + +Another way to write code blocks is to use code fences: + + ```json + {"some": "code"} + ``` + +This has two advantages: +* The optional language identifier will be turned into a `language-` class name +* You can keep the original indentation which helps when doing copy & paste + +Tables +------ + +Tables can be generated with the following syntax: + + | Heading1 | Heading2 | + | -------- | -------- | + | Cell 1 | Cell2 | + +Aligning the columns make the input nicer to read, but is not necessary to get +correct table output. You could just write + + | Heading1 | Heading2 | + | --- | --- | + | Cell 1 | Cell2 | + +To align the content of table cells, use `|:--|` for left, `|--:|` for right +and `|:--:|` for centered alignment in the row which separates the header from +the table body. + + | Heading1 | Heading2 | Heading3 | + | :------- | :------: | -------: | + | Left | Center | Right | + +Other interesting stuff +----------------------- + +* to insert a horizontal rule simple add `- - -` into an empty line: + + Hello + - - - + Hello2 + + Result: +

+ Hello +


+ + Hello2

+ +* Any ASCII punctuation character may escaped by precedeing them with a + backslash to avoid them being interpreted: + + !"#$%&'()*+,-./:;<=>?@[]^_`{|}~\ + +* To force a linebreak simple add two spaces to the end of the line: + + No linebreak + here. + But here is + one. + +embed HTML +---------- + +You can include arbitrary HTML code in your documents. The HTML will be +passed through to the resulting document without modification. This is a good +way to work around features that are missing in smu. If you don't want this +behaviour, use the `-n` flag when executing smu to stricly escape the HTML +tags. diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..a41c92f --- /dev/null +++ b/config.mk @@ -0,0 +1,20 @@ +# smu version +VERSION = 1.5 + +# paths +PREFIX = /usr/local +MANPREFIX = ${PREFIX}/share/man + +# includes and libs +INCS = -I. -I/usr/include +LIBS = -L/usr/lib + +# flags +CFLAGS = -g -O0 -Wall -Werror -ansi ${INCS} -DVERSION=\"${VERSION}\" -Wstrict-prototypes +#CFLAGS = -fprofile-arcs -ftest-coverage -pg -g -O0 -Wall -Werror -ansi ${INCS} -DVERSION=\"${VERSION}\" +#CFLAGS = -Os -Wall -Werror -ansi ${INCS} -DVERSION=\"${VERSION}\" +#LDFLAGS = -fprofile-arcs -ftest-coverage -pg ${LIBS} +LDFLAGS = ${LIBS} + +# compiler +CC = cc diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..51bfc42 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,236 @@ +

smu - a Simple Markup Language

+

smu is a very simple and minimal markup language. It is designed for use in +wiki-like environments. smu makes it very easy to write your documents on the +fly and convert them into HTML.

+

smu is capable of parsing very large documents. It scales just great as long +as you avoid a huge amount of indents.

+

Syntax

+

smu was started as a rewrite of +markdown but became something +more lightweight and consistent. It differs from CommonMark in the following ways:

+
    +
  • No support for reference style links
  • +
  • Stricter indentation rules for lists
  • +
  • Lists don't end paragraphs by themselves (blank line needed)
  • +
  • Horizontal rules (<hr>) must use - - - as syntax
  • +
  • Code fences have stricter syntax
  • +
+

Patches that increase the CommonMark compatibility are welcome as long as they don't increase the code complexity significantly.

+

This project is a fork of the original smu by +Enno Boland (gottox). The main differences to the +original smu are:

+
    +
  • Support for code fences
  • +
  • Improved CommonMark compatibility. E.g. +
      +
    • Code blocks need four spaces indentation instead of three
    • +
    • Skip empty lines at end of code blocks
    • +
    • Ignore single spaces around code spans
    • +
    • Keep HTML comments in output
    • +
    • Improved spec compliance for lists
    • +
    • Nesting code block in blockquotes works
    • +
    • "Empty" lines in lists behave identically, no matter how much whitespace they contain
    • +
    • No backslash escapes in code blocks
    • +
    • Use first number as start number for ordered lists
    • +
    +
  • +
  • Added a simple test suite to check for compliance and avoid regressions
  • +
+

Inline patterns

+

There are several patterns you can use to highlight your text:

+
    +
  • Emphasis

    + +
      +
    • Surround your text with * or _ to get emphasised text: +
      This *is* cool.
      +This _is_ cool, too.
      +
      +
    • +
    • Surround your text with ** or __ to get strong text: +
      This **is** cool.
      +This __is__ cool, too.
      +
      +
    • +
    • Surround your text with *** or ___ to get strong and emphasised text: +
      This ***is*** cool.
      +This ___is___ cool, too.
      +
      +
    • +
    • But this example won't work as expected: +
      ***Hello** you*
      +
      +

      This is a wontfix bug because it would make the source too complex. +Use this instead: +

      +
      ***Hello*** *you*
      +
      +
    • +
    +
  • +
  • inline Code

    +

    You can produce inline code by surrounding it with backticks.

    +
    Use `rm -rf /` if you're a N00b.
    +Use ``rm -rf /`` if you're a N00b.
    +Use ```rm -rf /``` if you're a N00b.
    +
    +

    Double and triple backticks can be used if the code itself contains backticks.

    +
  • +
+

Titles

+

Creating titles in smu is very easy. There are two different syntax styles. The +first is underlining with at least three characters:

+
Heading
+=======
+
+Topic
+-----
+
+

This is very intuitive and self explaining. The resulting sourcecode looks like +this:

+
<h1>Heading</h1>
+<h2>Topic</h2>
+
+

Use the following prefixes if you don't like underlining:

+
# h1
+## h2
+### h3
+#### h4
+##### h5
+###### h6
+
+

Links

+

The simplest way to define a link is with simple <>.

+
<http://s01.de>
+
+

You can do the same for E-Mail addresses:

+
<yourname@s01.de>
+
+

If you want to define a label for the url, you have to use a different syntax

+
[smu - simple mark up](http://s01.de/~gottox/index.cgi/proj_smu)
+
+

The resulting HTML-Code

+
<a href="http://s01.de/~gottox/index.cgi/proj_smu">smu - simple mark up</a></p>
+
+

Lists

+

Defining lists is very straightforward:

+
* Item 1
+* Item 2
+* Item 3
+
+

Result:

+
<ul>
+<li>Item 1</li>
+<li>Item 2</li>
+<li>Item 3</li>
+</ul>
+
+

Defining ordered lists is also very easy:

+
1. Item 1
+2. Item 2
+3. Item 3
+
+

Only the first number in a list is meaningful. All following list items are +continously counted. If you want a list starting at 2, you could write:

+
2. Item 1
+2. Item 2
+2. Item 3
+
+

and get the following HTML which will render with the numbers 2, 3, 4:

+
<ol start="2">
+<li>Item 1</li>
+<li>Item 2</li>
+<li>Item 3</li>
+</ol>
+
+

Code & Blockquote

+

Use the > as a line prefix for defining blockquotes. Blockquotes are +interpreted as well. This makes it possible to embed links, headings and even +other quotes into a quote:

+
> Hello
+> This is a quote with a [link](http://s01.de/~gottox)
+
+

Result: +

+
<blockquote><p>
+Hello
+This is a quote with a <a href="http://s01.de/~gottox">link</a></p>
+</blockquote>
+
+

You can define a code block with a leading Tab or with 4 leading spaces

+
	this.is(code)
+
+    this.is(code, too)
+
+

Result: +

+
<pre><code>this.is(code)</code></pre>
+<pre><code>this.is(code, too)
+</code></pre>
+
+

Please note that you can't use HTML or smu syntax in a code block.

+

Another way to write code blocks is to use code fences:

+
```json
+{"some": "code"}
+```
+
+

This has two advantages:

+ +
    +
  • The optional language identifier will be turned into a language- class name
  • +
  • You can keep the original indentation which helps when doing copy & paste
  • +
+

Tables

+

Tables can be generated with the following syntax:

+ + + +
Heading1 Heading2
Cell 1 Cell2
+

Aligning the columns make the input nicer to read, but is not necessary to get +correct table output. You could just write

+ + + +
Heading1 Heading2
Cell 1 Cell2
+

To align the content of table cells, use |:--| for left, |--:| for right +and |:--:| for centered alignment in the row which separates the header from +the table body.

+ + + +
Heading1 Heading2 Heading3
Left Center Right
+

Other interesting stuff

+
    +
  • to insert a horizontal rule simple add - - - into an empty line:

    +
    Hello
    +- - -
    +Hello2
    +
    +

    Result: +

    +
    <p>
    +Hello
    +<hr />
    +
    +
    Hello2</p>
    +
    +
  • +
  • Any ASCII punctuation character may escaped by precedeing them with a +backslash to avoid them being interpreted:

    +
    !"#$%&'()*+,-./:;<=>?@[]^_`{|}~\
    +
    +
  • +
  • To force a linebreak simple add two spaces to the end of the line:

    +
    No linebreak
    +here.
    +But here is  
    +one.
    +
    +
  • +
+

embed HTML

+

You can include arbitrary HTML code in your documents. The HTML will be +passed through to the resulting document without modification. This is a good +way to work around features that are missing in smu. If you don't want this +behaviour, use the -n flag when executing smu to stricly escape the HTML +tags.

diff --git a/smu.1 b/smu.1 new file mode 100644 index 0000000..3ddd112 --- /dev/null +++ b/smu.1 @@ -0,0 +1,23 @@ +.TH smu 1 smu\-VERSION +.SH NAME +smu \- simple markup +.SH SYNOPSIS +.B smu +.RB [ \-h ] +.RB [ \-v ] +.RB [ \-n ] +.RB [ file ] +.SH DESCRIPTION +smu is a simple interpreter for a simplified markdown dialect. +.SH OPTIONS +.TP +.B \-v +prints version information to standard error, then exits. +.TP +.B \-h +prints usage information to standard error, then exits. +.TP +.B \-n +escapes all HTML Tags. +.SH BUGS +Please report any Bugs to https://github.com/Gottox/smu/issues or via mail. diff --git a/smu.c b/smu.c new file mode 100644 index 0000000..63e1977 --- /dev/null +++ b/smu.c @@ -0,0 +1,791 @@ +/* smu - simple markup + * Copyright (C) <2007, 2008> Enno Boland + * 2019-2022 Karl Bartel + * 2022 bzt + * + * See LICENSE for further informations + */ +#include +#include +#include +#include +#include +#include + +#define LENGTH(x) sizeof(x)/sizeof(x[0]) +#define ADDC(b,i) if (i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ) * sizeof(char)); if (!b) eprint("Malloc failed."); } b[i] + +typedef int (*Parser)(const char *, const char *, int); +typedef struct { + char *search; + int process; + char *before, *after; +} Tag; + +static int docomment(const char *begin, const char *end, int newblock); /* Parser for html-comments */ +static int docodefence(const char *begin, const char *end, int newblock); /* Parser for code fences */ +static int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */ +static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */ +static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */ +static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */ +static int dotable(const char *begin, const char *end, int newblock); /* Parser for tables */ +static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */ +static int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple replaces */ +static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */ +static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */ +static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */ +static void *ereallocz(void *p, size_t size); +static void hprint(const char *begin, const char *end); /* escapes HTML and prints it to output */ +static void process(const char *begin, const char *end, int isblock); /* Processes range between begin and end. */ + +/* list of parsers */ +static Parser parsers[] = { dounderline, docomment, docodefence, dolineprefix, + dolist, dotable, doparagraph, dosurround, dolink, + doshortlink, dohtml, doreplace }; +static int nohtml = 0; +static int in_paragraph = 0; + +regex_t p_end_regex; /* End of paragraph */ + +static Tag lineprefix[] = { + { " ", 0, "
", "\n
" }, + { "\t", 0, "
", "\n
" }, + { ">", 2, "
", "
" }, + { "###### ", 1, "
", "
" }, + { "##### ", 1, "
", "
" }, + { "#### ", 1, "

", "

" }, + { "### ", 1, "

", "

" }, + { "## ", 1, "

", "

" }, + { "# ", 1, "

", "

" }, + { "- - -\n", 1, "
", ""}, + { "---\n", 1, "
", ""}, +}; + +static Tag underline[] = { + { "=", 1, "

", "

\n" }, + { "-", 1, "

", "

\n" }, +}; + +static Tag surround[] = { + { "```", 0, "", "" }, + { "``", 0, "", "" }, + { "`", 0, "", "" }, + { "___", 1, "", "" }, + { "***", 1, "", "" }, + { "__", 1, "", "" }, + { "**", 1, "", "" }, + { "_", 1, "", "" }, + { "*", 1, "", "" }, +}; + +static const char *replace[][2] = { + /* Backslash escapes */ + { "\\\\", "\\" }, + { "\\`", "`" }, + { "\\*", "*" }, + { "\\_", "_" }, + { "\\{", "{" }, + { "\\}", "}" }, + { "\\[", "[" }, + { "\\]", "]" }, + { "\\(", "(" }, + { "\\)", ")" }, + { "\\#", "#" }, + { "\\+", "+" }, + { "\\-", "-" }, + { "\\.", "." }, + { "\\!", "!" }, + { "\\\"", """ }, + { "\\$", "$" }, + { "\\%", "%" }, + { "\\&", "&" }, + { "\\'", "'" }, + { "\\,", "," }, + { "\\-", "-" }, + { "\\.", "." }, + { "\\/", "/" }, + { "\\:", ":" }, + { "\\;", ";" }, + { "\\<", "<" }, + { "\\>", ">" }, + { "\\=", "=" }, + { "\\?", "?" }, + { "\\@", "@" }, + { "\\^", "^" }, + { "\\|", "|" }, + { "\\~", "~" }, + /* HTML syntax symbols that need to be turned into entities */ + { "<", "<" }, + { ">", ">" }, + { "&", "&" }, /* Avoid replacing the & in & */ + { "&", "&" }, + /* Preserve newlines with two spaces before linebreak */ + { " \n", "
\n" }, +}; + +static const char *code_fence = "```"; + +void +eprint(const char *format, ...) { + va_list ap; + + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +void end_paragraph(void) { + if (in_paragraph) { + fputs("

\n", stdout); + in_paragraph = 0; + } +} + +int +docomment(const char *begin, const char *end, int newblock) { + char *p; + + if (nohtml || strncmp(""); + if (!p || p + 3 >= end) + return 0; + fprintf(stdout, "%.*s\n", (int)(p + 3 - begin), begin); + return (p + 3 - begin) * (newblock ? -1 : 1); +} + +int +docodefence(const char *begin, const char *end, int newblock) { + const char *p, *start, *stop, *lang_start, *lang_stop; + unsigned int l = strlen(code_fence); + + if (!newblock) + return 0; + + if (strncmp(begin, code_fence, l) != 0) + return 0; + + /* Find start of content and read language string */ + start = begin + l; + lang_start = start; + while (start[0] != '\n') + start++; + lang_stop = start; + start++; + + /* Find end of fence */ + p = start - 1; + do { + stop = p; + p = strstr(p + 1, code_fence); + } while (p && p[-1] == '\\'); + if (p && p[-1] != '\\') + stop = p; + + /* No closing code fence means the rest of file is code (CommonMark) */ + if (!p) + stop = end; + + /* Print output */ + if (lang_start == lang_stop) { + fputs("
", stdout);
+	} else {
+		fputs("
", stdout);
+	}
+	hprint(start, stop);
+	fputs("
\n", stdout); + return -(stop - begin + l); +} + +int +dohtml(const char *begin, const char *end, int newblock) { + const char *p, *tag, *tagend; + + if (nohtml || begin + 2 >= end) + return 0; + p = begin; + if (p[0] != '<' || !isalpha(p[1])) + return 0; + p++; + tag = p; + for (; isalnum(*p) && p < end; p++); + tagend = p; + if (p > end || tag == tagend) + return 0; + while ((p = strstr(p, "') { + p++; + fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout); + return p - begin + tagend - tag; + } + } + p = strchr(tagend, '>'); + if (p) { + fwrite(begin, sizeof(char), p - begin + 2, stdout); + return p - begin + 2; + } + else + return 0; +} + +int +dolineprefix(const char *begin, const char *end, int newblock) { + unsigned int i, j, l; + char *buffer; + const char *p; + int consumed_input = 0; + + if (newblock) + p = begin; + else if (*begin == '\n') { + p = begin + 1; + consumed_input += 1; + } else + return 0; + for (i = 0; i < LENGTH(lineprefix); i++) { + l = strlen(lineprefix[i].search); + if (end - p + 1 < l) + continue; + if (strncmp(lineprefix[i].search, p, l)) + continue; + if (*begin == '\n') + fputc('\n', stdout); + + /* All line prefixes add a block element. These are not allowed + * inside paragraphs, so we must end the paragraph first. */ + end_paragraph(); + + fputs(lineprefix[i].before, stdout); + if (lineprefix[i].search[l-1] == '\n') { + fputc('\n', stdout); + return l - 1 + consumed_input; + } + if (!(buffer = malloc(BUFSIZ))) + eprint("Malloc failed."); + buffer[0] = '\0'; + + /* Collect lines into buffer while they start with the prefix */ + j = 0; + while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) { + p += l; + + /* Special case for blockquotes: optional space after > */ + if (lineprefix[i].search[0] == '>' && *p == ' ') { + p++; + } + + while (p < end) { + ADDC(buffer, j) = *p; + j++; + if (*(p++) == '\n') + break; + } + } + + /* Skip empty lines in block */ + while (*(buffer + j - 1) == '\n') { + j--; + } + + ADDC(buffer, j) = '\0'; + if (lineprefix[i].process) + process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2); + else + hprint(buffer, buffer + strlen(buffer)); + puts(lineprefix[i].after); + free(buffer); + return -(p - begin); + } + return 0; +} + +int +dolink(const char *begin, const char *end, int newblock) { + int img, len, sep, parens_depth = 1; + const char *desc, *link, *p, *q, *descend, *linkend; + const char *title = NULL, *titleend = NULL; + + if (*begin == '[') + img = 0; + else if (strncmp(begin, "![", 2) == 0) + img = 1; + else + return 0; + p = desc = begin + 1 + img; + if (!(p = strstr(desc, "](")) || p > end) + return 0; + for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "![")) + if (!(p = strstr(p + 1, "](")) || p > end) + return 0; + descend = p; + link = p + 2; + + /* find end of link while handling nested parens */ + q = link; + while (parens_depth) { + if (!(q = strpbrk(q, "()")) || q > end) + return 0; + if (*q == '(') + parens_depth++; + else + parens_depth--; + if (parens_depth && q < end) + q++; + } + + if ((p = strpbrk(link, "\"'")) && p < end && q > p) { + sep = p[0]; /* separator: can be " or ' */ + title = p + 1; + /* strip trailing whitespace */ + for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--); + for (titleend = q - 1; titleend > link && isspace(*(titleend)); titleend--); + if (titleend < title || *titleend != sep) { + return 0; + } + } + else { + linkend = q; + } + + /* Links can be given in angular brackets */ + if (*link == '<' && *(linkend - 1) == '>') { + link++; + linkend--; + } + + len = q + 1 - begin; + if (img) { + fputs("\"",", stdout); + } + else { + fputs("", stdout); + process(desc, descend, 0); + fputs("", stdout); + } + return len; +} + +int +dolist(const char *begin, const char *end, int newblock) { + unsigned int i, j, indent, run, isblock, start_number; + const char *p, *q, *num_start; + char *buffer = NULL; + char marker = '\0'; /* Bullet symbol or \0 for unordered lists */ + + isblock = 0; + if (newblock) + p = begin; + else if (*begin == '\n') + p = begin + 1; + else + return 0; + q = p; + if (*p == '-' || *p == '*' || *p == '+') { + marker = *p; + } else { + num_start = p; + for (; p < end && *p >= '0' && *p <= '9'; p++); + if (p >= end || *p != '.') + return 0; + start_number = atoi(num_start); + } + p++; + if (p >= end || !(*p == ' ' || *p == '\t')) + return 0; + + end_paragraph(); + + for (p++; p != end && (*p == ' ' || *p == '\t'); p++); + indent = p - q; + buffer = ereallocz(buffer, BUFSIZ); + if (!newblock) + fputc('\n', stdout); + + if (marker) { + fputs("
    \n", stdout); + } else if (start_number == 1) { + fputs("
      \n", stdout); + } else { + printf("
        \n", start_number); + } + run = 1; + for (; p < end && run; p++) { + for (i = 0; p < end && run; p++, i++) { + if (*p == '\n') { + if (p + 1 == end) + break; + else { + /* Handle empty lines */ + for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++); + if (*q == '\n') { + ADDC(buffer, i) = '\n'; + i++; + run = 0; + isblock++; + p = q; + } + } + q = p + 1; + j = 0; + if (marker && *q == marker) + j = 1; + else { + for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++); + if (q + j == end) + break; + if (j > 0 && q[j] == '.') + j++; + else + j = 0; + } + if (q + indent < end) + for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++); + if (j == indent) { + ADDC(buffer, i) = '\n'; + i++; + p += indent; + run = 1; + if (*q == ' ' || *q == '\t') + p++; + else + break; + } + else if (j < indent) + run = 0; + } + ADDC(buffer, i) = *p; + } + ADDC(buffer, i) = '\0'; + fputs("
      1. ", stdout); + process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run)); + fputs("
      2. \n", stdout); + } + fputs(marker ? "
\n" : "\n", stdout); + free(buffer); + p--; + while (*(--p) == '\n'); + return -(p - begin + 1); +} + +/* smu table parser, Copyright(C) bzt 2022 MIT */ + +static char intable = 0, inrow, incell; /* table state */ +static long int calign; + +int +dotable(const char *begin, const char *end, int newblock) { + const char *p; + int i, l = (int)sizeof(calign) * 4; + + if(*begin != '|') + return 0; + if(inrow && (begin + 1 >= end || begin[1] == '\n')) { /* close cell and row and if ends, table too */ + fprintf(stdout, "", inrow == -1 ? 'h' : 'd'); + inrow = 0; + if(begin + 2 >= end || begin[2] == '\n') { + intable = 0; + fputs("\n", stdout); + return 2; + } + return 1; + } + if(begin < end && (begin[1] == '-' || begin[1] == ':')) { /* only load cell aligns from 2nd line */ + for(i = -1, p = begin; p < end && *p != '\n'; p++) + if(*p == '|') { + i++; + if(i < l && p[1] == ':') { + calign |= 1 << (i * 2); p++; + } + } else + if(i < l && *p == ':') + calign |= 1 << (i * 2 + 1); + return p - begin + 1; + } + if(!intable) { /* open table */ + intable = 1; inrow = -1; incell = 0; calign = 0; + fputs("\n", stdout); + } + if(!inrow) { /* open row */ + inrow = 1; incell = 0; + fputs("", stdout); + } + if(incell) /* close cell */ + fprintf(stdout, "", inrow == -1 ? 'h' : 'd'); + l = incell < l ? (calign >> (incell * 2)) & 3 : 0; /* open cell */ + fprintf(stdout, "", inrow == -1 ? 'h' : 'd', + l == 2 ? " class=\"right\"" : (l == 3 ? " class=\"center\"" : "")); + incell++; + for(p = begin + 1; p < end && *p == ' '; p++); + return p - begin; +} + +int +doparagraph(const char *begin, const char *end, int newblock) { + const char *p; + regmatch_t match; + + if (!newblock) + return 0; + if (regexec(&p_end_regex, begin + 1, 1, &match, 0)) { + p = end; + } else { + p = begin + 1 + match.rm_so; + } + + fputs("

", stdout); + in_paragraph = 1; + process(begin, p, 0); + end_paragraph(); + + return -(p - begin); +} + +int +doreplace(const char *begin, const char *end, int newblock) { + unsigned int i, l; + + for (i = 0; i < LENGTH(replace); i++) { + l = strlen(replace[i][0]); + if (end - begin < l) + continue; + if (strncmp(replace[i][0], begin, l) == 0) { + fputs(replace[i][1], stdout); + return l; + } + } + return 0; +} + +int +doshortlink(const char *begin, const char *end, int newblock) { + const char *p, *c; + int ismail = 0; + + if (*begin != '<') + return 0; + for (p = begin + 1; p != end; p++) { + switch (*p) { + case ' ': + case '\t': + case '\n': + return 0; + case '#': + case ':': + ismail = -1; + break; + case '@': + if (ismail == 0) + ismail = 1; + break; + case '>': + if (ismail == 0) + return 0; + fputs("", stdout); + for (c = begin + 1; *c != '>'; c++) + fprintf(stdout, "&#%u;", *c); + } + else { + hprint(begin + 1, p); + fputs("\">", stdout); + hprint(begin + 1, p); + } + fputs("", stdout); + return p - begin + 1; + } + } + return 0; +} + +int +dosurround(const char *begin, const char *end, int newblock) { + unsigned int i, l; + const char *p, *start, *stop; + + for (i = 0; i < LENGTH(surround); i++) { + l = strlen(surround[i].search); + if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0) + continue; + start = begin + l; + p = start; + do { + stop = p; + p = strstr(p + 1, surround[i].search); + } while (p && p[-1] == '\\'); + if (p && p[-1] != '\\') + stop = p; + if (!stop || stop < start || stop >= end) + continue; + fputs(surround[i].before, stdout); + + /* Single space at start and end are ignored */ + if (start[0] == ' ' && stop[-1] == ' ' && start < stop - 1) { + start++; + stop--; + l++; + } + + if (surround[i].process) + process(start, stop, 0); + else + hprint(start, stop); + fputs(surround[i].after, stdout); + return stop - start + 2 * l; + } + return 0; +} + +int +dounderline(const char *begin, const char *end, int newblock) { + unsigned int i, j, l; + const char *p; + + if (!newblock) + return 0; + p = begin; + for (l = 0; p + l != end && p[l] != '\n'; l++); + p += l + 1; + if (l == 0) + return 0; + for (i = 0; i < LENGTH(underline); i++) { + for (j = 0; p + j < end && p[j] != '\n' && p[j] == underline[i].search[0]; j++); + if (j >= 3) { + fputs(underline[i].before, stdout); + if (underline[i].process) + process(begin, begin + l, 0); + else + hprint(begin, begin + l); + fputs(underline[i].after, stdout); + return -(j + p - begin); + } + } + return 0; +} + +void * +ereallocz(void *p, size_t size) { + void *res; + res = realloc(p, size); + if (!res) + eprint("realloc: %zu bytes\n", size); + return res; +} + +void +hprint(const char *begin, const char *end) { + const char *p; + + for (p = begin; p != end; p++) { + if (*p == '&') + fputs("&", stdout); + else if (*p == '"') + fputs(""", stdout); + else if (*p == '>') + fputs(">", stdout); + else if (*p == '<') + fputs("<", stdout); + else + fputc(*p, stdout); + } +} + +void +process(const char *begin, const char *end, int newblock) { + const char *p; + int affected; + unsigned int i; + + for (p = begin; p < end;) { + if (newblock) + while (*p == '\n') + if (++p == end) + return; + + for (i = 0; i < LENGTH(parsers); i++) + if ((affected = parsers[i](p, end, newblock))) + break; + if (affected) + p += abs(affected); + else + fputc(*p++, stdout); + + /* Don't print single newline at end */ + if (p + 1 == end && *p == '\n') + return; + + if (p[0] == '\n' && p + 1 != end && p[1] == '\n') + newblock = 1; + else + newblock = affected < 0; + } +} + +int +main(int argc, char *argv[]) { + char *buffer = NULL; + int s, i; + unsigned long len, bsize; + FILE *source = stdin; + + regcomp(&p_end_regex, "(\n\n|(^|\n)```)", REG_EXTENDED); + + for (i = 1; i < argc; i++) { + if (!strcmp("-v", argv[i])) + eprint("simple markup %s (C) Enno Boland\n",VERSION); + else if (!strcmp("-n", argv[i])) + nohtml = 1; + else if (argv[i][0] != '-') + break; + else if (!strcmp("--", argv[i])) { + i++; + break; + } + else + eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]); + } + if (i < argc && !(source = fopen(argv[i], "r"))) + eprint("Cannot open file `%s`\n",argv[i]); + bsize = 2 * BUFSIZ; + buffer = ereallocz(buffer, bsize); + len = 0; + while ((s = fread(buffer + len, 1, BUFSIZ, source))) { + len += s; + if (BUFSIZ + len + 1 > bsize) { + bsize += BUFSIZ; + if (!(buffer = realloc(buffer, bsize))) + eprint("realloc failed."); + } + } + buffer[len] = '\0'; + process(buffer, buffer + len, 1); + fclose(source); + free(buffer); + return EXIT_SUCCESS; +} diff --git a/smu.h b/smu.h new file mode 100644 index 0000000..6a54043 --- /dev/null +++ b/smu.h @@ -0,0 +1,19 @@ +/* libsmu - simple markup library + * Copyright (C) <2007, 2008> Enno Boland + * + * See LICENSE for further informations + */ +#include + +/** + * Converts contents of a simple markup stream (in) and prints them to out. + * If suppresshtml == 1, it will create plain text of the simple markup instead + * of HTML. + * + * Returns 0 on success. + */ +int smu_convert(FILE *out, FILE *in, int suppresshtml); + +/** utility */ +void eprint(const char *format, ...); + diff --git a/tests/code_fence.html b/tests/code_fence.html new file mode 100644 index 0000000..d0d3629 --- /dev/null +++ b/tests/code_fence.html @@ -0,0 +1,25 @@ +

code blocks can start in the first line
+
+

This is a code block

+
foo bar
+eggs bacon
+
+

They are allowed to start within a block:

+
foo
+
+

Code blocks can contain empty lines

+
before empty line
+
+after empty line
+
+

Language info strings will add a class to the code block

+
#!/bin/sh
+ls -la .
+
+

Accidental code fences can be avoided by escaping +``` +like this. +```

+

According to commonmark, a code block that is not terminated will continue until the end of file.

+
This is code.
+
diff --git a/tests/code_fence.text b/tests/code_fence.text new file mode 100644 index 0000000..3e3b681 --- /dev/null +++ b/tests/code_fence.text @@ -0,0 +1,37 @@ +``` +code blocks can start in the first line +``` + +This is a code block + +``` +foo bar +eggs bacon +``` + +They are allowed to start within a block: +``` +foo +``` + +Code blocks can contain empty lines +``` +before empty line + +after empty line +``` + +Language info strings will add a class to the code block +```sh +#!/bin/sh +ls -la . +``` + +Accidental code fences can be avoided by escaping +\`\`\` +like this. +\`\`\` + +According to commonmark, a code block that is not terminated will continue until the end of file. +``` +This is code. diff --git a/tests/commonmark/backslash_escapes.html b/tests/commonmark/backslash_escapes.html new file mode 100644 index 0000000..0976aed --- /dev/null +++ b/tests/commonmark/backslash_escapes.html @@ -0,0 +1,3 @@ +

Backslash escapes

+

https://spec.commonmark.org/0.29/#example-298

+

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

diff --git a/tests/commonmark/backslash_escapes.text b/tests/commonmark/backslash_escapes.text new file mode 100644 index 0000000..106551a --- /dev/null +++ b/tests/commonmark/backslash_escapes.text @@ -0,0 +1,5 @@ +# [Backslash escapes](https://spec.commonmark.org/0.29/#backslash-escapes) + + + +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ diff --git a/tests/html.html b/tests/html.html new file mode 100644 index 0000000..0a5b69d --- /dev/null +++ b/tests/html.html @@ -0,0 +1,7 @@ +

bold

+

text

+

title subtext

+

    +
  • One +
  • Two +

diff --git a/tests/html.text b/tests/html.text new file mode 100644 index 0000000..534bc83 --- /dev/null +++ b/tests/html.text @@ -0,0 +1,10 @@ +bold + +text + +

title subtext

+ +
    +
  • One +
  • Two +
diff --git a/tests/linebreak.html b/tests/linebreak.html new file mode 100644 index 0000000..38e27ac --- /dev/null +++ b/tests/linebreak.html @@ -0,0 +1,2 @@ +

If a line ends with two space (like this line),
+a hard line brake is inserted

diff --git a/tests/linebreak.text b/tests/linebreak.text new file mode 100644 index 0000000..17360db --- /dev/null +++ b/tests/linebreak.text @@ -0,0 +1,2 @@ +If a line ends with two space (like this line), +a hard line brake is inserted diff --git a/tests/lists.html b/tests/lists.html new file mode 100644 index 0000000..6208fe7 --- /dev/null +++ b/tests/lists.html @@ -0,0 +1,14 @@ +
    +
  1. One
  2. +
  3. Two
  4. +
+

Ordered lists will use the first number as start number and continue counting from there.

+
    +
  1. Two
  2. +
  3. Three
  4. +
+

HTML does not allow lists in paragraphs, so the following should end the paragraph before starting the list:

+ +
    +
  • bar
  • +
diff --git a/tests/lists.text b/tests/lists.text new file mode 100644 index 0000000..71e2886 --- /dev/null +++ b/tests/lists.text @@ -0,0 +1,10 @@ +1. One +1. Two + +Ordered lists will use the first number as start number and continue counting from there. + +2. Two +4. Three + +HTML does not allow lists in paragraphs, so the following should end the paragraph before starting the list: +- bar diff --git a/tests/mdtest/Amps_and_angle_encoding.html b/tests/mdtest/Amps_and_angle_encoding.html new file mode 100644 index 0000000..a3fd44a --- /dev/null +++ b/tests/mdtest/Amps_and_angle_encoding.html @@ -0,0 +1,6 @@ +

AT&T has an ampersand in their name.

+

AT&T is another way to write it.

+

This & that.

+

4 < 5.

+

6 > 5.

+

Here's an inline link.

diff --git a/tests/mdtest/Amps_and_angle_encoding.text b/tests/mdtest/Amps_and_angle_encoding.text new file mode 100644 index 0000000..5fe4125 --- /dev/null +++ b/tests/mdtest/Amps_and_angle_encoding.text @@ -0,0 +1,12 @@ +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 < 5. + +6 > 5. + +Here's an inline [link](/script?foo=1&bar=2). + diff --git a/tests/mdtest/Auto_links.html b/tests/mdtest/Auto_links.html new file mode 100644 index 0000000..8e85d01 --- /dev/null +++ b/tests/mdtest/Auto_links.html @@ -0,0 +1,12 @@ +

Link: http://example.com/.

+

With an ampersand: http://example.com/?foo=1&bar=2

+ +

Blockquoted: http://example.com/

+
+

Auto-links should not occur here: <http://example.com/>

+
or here: <http://example.com/>
+
diff --git a/tests/mdtest/Auto_links.text b/tests/mdtest/Auto_links.text new file mode 100644 index 0000000..abbc488 --- /dev/null +++ b/tests/mdtest/Auto_links.text @@ -0,0 +1,13 @@ +Link: . + +With an ampersand: + +* In a list? +* +* It should. + +> Blockquoted: + +Auto-links should not occur here: `` + + or here: \ No newline at end of file diff --git a/tests/mdtest/Blockquotes_with_code_blocks.html b/tests/mdtest/Blockquotes_with_code_blocks.html new file mode 100644 index 0000000..87ea58a --- /dev/null +++ b/tests/mdtest/Blockquotes_with_code_blocks.html @@ -0,0 +1,11 @@ +

Example:

+
sub status {
+    print "working";
+}
+
+

Or:

+
sub status {
+    return "working";
+}
+
+
diff --git a/tests/mdtest/Blockquotes_with_code_blocks.text b/tests/mdtest/Blockquotes_with_code_blocks.text new file mode 100644 index 0000000..c31d171 --- /dev/null +++ b/tests/mdtest/Blockquotes_with_code_blocks.text @@ -0,0 +1,11 @@ +> Example: +> +> sub status { +> print "working"; +> } +> +> Or: +> +> sub status { +> return "working"; +> } diff --git a/tests/mdtest/Code_Blocks.html b/tests/mdtest/Code_Blocks.html new file mode 100644 index 0000000..7d89615 --- /dev/null +++ b/tests/mdtest/Code_Blocks.html @@ -0,0 +1,12 @@ +
code block on the first line
+
+

Regular text.

+
code block indented by spaces
+
+

Regular text.

+
the lines in this block  
+all contain trailing spaces  
+
+

Regular Text.

+
code block on the last line
+
diff --git a/tests/mdtest/Code_Blocks.text b/tests/mdtest/Code_Blocks.text new file mode 100644 index 0000000..b54b092 --- /dev/null +++ b/tests/mdtest/Code_Blocks.text @@ -0,0 +1,14 @@ + code block on the first line + +Regular text. + + code block indented by spaces + +Regular text. + + the lines in this block + all contain trailing spaces + +Regular Text. + + code block on the last line \ No newline at end of file diff --git a/tests/mdtest/Code_Spans.html b/tests/mdtest/Code_Spans.html new file mode 100644 index 0000000..27acea1 --- /dev/null +++ b/tests/mdtest/Code_Spans.html @@ -0,0 +1,3 @@ +

<test a=" content of attribute ">

+

Fix for backticks within HTML tag: like this

+

Here's how you put `backticks` in a code span.

diff --git a/tests/mdtest/Code_Spans.text b/tests/mdtest/Code_Spans.text new file mode 100644 index 0000000..5c229c7 --- /dev/null +++ b/tests/mdtest/Code_Spans.text @@ -0,0 +1,5 @@ +`` + +Fix for backticks within HTML tag: like this + +Here's how you put `` `backticks` `` in a code span. \ No newline at end of file diff --git a/tests/mdtest/Inline_HTML_comments.html b/tests/mdtest/Inline_HTML_comments.html new file mode 100644 index 0000000..ebc4818 --- /dev/null +++ b/tests/mdtest/Inline_HTML_comments.html @@ -0,0 +1,8 @@ +

Paragraph one.

+ + +

Paragraph two.

+ +

The end.

diff --git a/tests/mdtest/Inline_HTML_comments.text b/tests/mdtest/Inline_HTML_comments.text new file mode 100644 index 0000000..99878b8 --- /dev/null +++ b/tests/mdtest/Inline_HTML_comments.text @@ -0,0 +1,13 @@ +Paragraph one. + + + + + +Paragraph two. + + + +The end. diff --git a/tests/mdtest/LICENSE b/tests/mdtest/LICENSE new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/tests/mdtest/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/tests/mdtest/Links,_inline_style.html b/tests/mdtest/Links,_inline_style.html new file mode 100644 index 0000000..feb4637 --- /dev/null +++ b/tests/mdtest/Links,_inline_style.html @@ -0,0 +1,12 @@ +

Just a URL.

+

URL and title.

+

URL and title.

+

URL and title.

+

URL and title.

+

URL wrapped in angle brackets.

+

URL w/ angle brackets + title.

+

Empty.

+

With parens in the URL

+

(With outer parens and parens in url)

+

With parens in the URL

+

(With outer parens and parens in url)

diff --git a/tests/mdtest/Links,_inline_style.text b/tests/mdtest/Links,_inline_style.text new file mode 100644 index 0000000..aba9658 --- /dev/null +++ b/tests/mdtest/Links,_inline_style.text @@ -0,0 +1,24 @@ +Just a [URL](/url/). + +[URL and title](/url/ "title"). + +[URL and title](/url/ "title preceded by two spaces"). + +[URL and title](/url/ "title preceded by a tab"). + +[URL and title](/url/ "title has spaces afterward" ). + +[URL wrapped in angle brackets](). + +[URL w/ angle brackets + title]( "Here's the title"). + +[Empty](). + +[With parens in the URL](http://en.wikipedia.org/wiki/WIMP_(computing)) + +(With outer parens and [parens in url](/foo(bar))) + + +[With parens in the URL](/foo(bar) "and a title") + +(With outer parens and [parens in url](/foo(bar) "and a title")) diff --git a/tests/mdtest/Literal_quotes_in_titles.html b/tests/mdtest/Literal_quotes_in_titles.html new file mode 100644 index 0000000..94b80ae --- /dev/null +++ b/tests/mdtest/Literal_quotes_in_titles.html @@ -0,0 +1 @@ +

Foo bar.

diff --git a/tests/mdtest/Literal_quotes_in_titles.text b/tests/mdtest/Literal_quotes_in_titles.text new file mode 100644 index 0000000..ce74d90 --- /dev/null +++ b/tests/mdtest/Literal_quotes_in_titles.text @@ -0,0 +1 @@ +Foo [bar](/url/ "Title with "quotes" inside"). diff --git a/tests/mdtest/Nested_blockquotes.html b/tests/mdtest/Nested_blockquotes.html new file mode 100644 index 0000000..d990034 --- /dev/null +++ b/tests/mdtest/Nested_blockquotes.html @@ -0,0 +1,5 @@ +

foo

+

bar

+
+

foo

+
diff --git a/tests/mdtest/Nested_blockquotes.text b/tests/mdtest/Nested_blockquotes.text new file mode 100644 index 0000000..ed3c624 --- /dev/null +++ b/tests/mdtest/Nested_blockquotes.text @@ -0,0 +1,5 @@ +> foo +> +> > bar +> +> foo diff --git a/tests/mdtest/Ordered_and_unordered_lists.html b/tests/mdtest/Ordered_and_unordered_lists.html new file mode 100644 index 0000000..d516a55 --- /dev/null +++ b/tests/mdtest/Ordered_and_unordered_lists.html @@ -0,0 +1,129 @@ +

Unordered

+

Asterisks tight:

+
    +
  • asterisk 1
  • +
  • asterisk 2
  • +
  • asterisk 3
  • +
+

Asterisks loose:

+
    +
  • asterisk 1

    +
  • +
  • asterisk 2

    +
  • +
  • asterisk 3

    +
  • +
+
+

Pluses tight:

+
    +
  • Plus 1
  • +
  • Plus 2
  • +
  • Plus 3
  • +
+

Pluses loose:

+
    +
  • Plus 1

    +
  • +
  • Plus 2

    +
  • +
  • Plus 3

    +
  • +
+
+

Minuses tight:

+
    +
  • Minus 1
  • +
  • Minus 2
  • +
  • Minus 3
  • +
+

Minuses loose:

+
    +
  • Minus 1

    +
  • +
  • Minus 2

    +
  • +
  • Minus 3

    +
  • +
+

Ordered

+

Tight:

+
    +
  1. First
  2. +
  3. Second
  4. +
  5. Third
  6. +
+

and:

+
    +
  1. One
  2. +
  3. Two
  4. +
  5. Three
  6. +
+

Loose using tabs:

+
    +
  1. First

    +
  2. +
  3. Second

    +
  4. +
  5. Third

    +
  6. +
+

and using spaces:

+
    +
  1. One

    +
  2. +
  3. Two

    +
  4. +
  5. Three

    +
  6. +
+

Multiple paragraphs:

+
    +
  1. Item 1, graf one.

    +

    Item 2. graf two. The quick brown fox jumped over the lazy dog's +back.

    +
  2. +
  3. Item 2.

    +
  4. +
  5. Item 3.

    +
  6. +
+

Nested

+
    +
  • Tab +
      +
    • Tab +
        +
      • Tab
      • +
      +
    • +
    +
  • +
+

Here's another:

+
    +
  1. First
  2. +
  3. Second: +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
    +
  4. +
  5. Third
  6. +
+

Same thing but with paragraphs:

+
    +
  1. First

    +
  2. +
  3. Second:

    + +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
    +
  4. +
  5. Third

    +
  6. +
diff --git a/tests/mdtest/Ordered_and_unordered_lists.text b/tests/mdtest/Ordered_and_unordered_lists.text new file mode 100644 index 0000000..b7d9b23 --- /dev/null +++ b/tests/mdtest/Ordered_and_unordered_lists.text @@ -0,0 +1,122 @@ +## Unordered + +Asterisks tight: + +* asterisk 1 +* asterisk 2 +* asterisk 3 + + +Asterisks loose: + +* asterisk 1 + +* asterisk 2 + +* asterisk 3 + +- - - + +Pluses tight: + ++ Plus 1 ++ Plus 2 ++ Plus 3 + + +Pluses loose: + ++ Plus 1 + ++ Plus 2 + ++ Plus 3 + +- - - + + +Minuses tight: + +- Minus 1 +- Minus 2 +- Minus 3 + + +Minuses loose: + +- Minus 1 + +- Minus 2 + +- Minus 3 + + +## Ordered + +Tight: + +1. First +2. Second +3. Third + +and: + +1. One +2. Two +3. Three + + +Loose using tabs: + +1. First + +2. Second + +3. Third + +and using spaces: + +1. One + +2. Two + +3. Three + +Multiple paragraphs: + +1. Item 1, graf one. + + Item 2. graf two. The quick brown fox jumped over the lazy dog's + back. + +2. Item 2. + +3. Item 3. + + + +## Nested + +* Tab + * Tab + * Tab + +Here's another: + +1. First +2. Second: + * Fee + * Fie + * Foe +3. Third + +Same thing but with paragraphs: + +1. First + +2. Second: + * Fee + * Fie + * Foe + +3. Third diff --git a/tests/mdtest/README.md b/tests/mdtest/README.md new file mode 100644 index 0000000..4866f3e --- /dev/null +++ b/tests/mdtest/README.md @@ -0,0 +1 @@ +Tests adapted from [MDTest](https://github.com/michelf/mdtest). diff --git a/tests/mdtest/Strong_and_em_together.html b/tests/mdtest/Strong_and_em_together.html new file mode 100644 index 0000000..2629594 --- /dev/null +++ b/tests/mdtest/Strong_and_em_together.html @@ -0,0 +1,4 @@ +

This is strong and em.

+

So is this word.

+

This is strong and em.

+

So is this word.

diff --git a/tests/mdtest/Strong_and_em_together.text b/tests/mdtest/Strong_and_em_together.text new file mode 100644 index 0000000..95ee690 --- /dev/null +++ b/tests/mdtest/Strong_and_em_together.text @@ -0,0 +1,7 @@ +***This is strong and em.*** + +So is ***this*** word. + +___This is strong and em.___ + +So is ___this___ word. diff --git a/tests/mdtest/Tabs.html b/tests/mdtest/Tabs.html new file mode 100644 index 0000000..82110b9 --- /dev/null +++ b/tests/mdtest/Tabs.html @@ -0,0 +1,21 @@ +
    +
  • this is a list item +indented with tabs

    +
  • +
  • this is a list item +indented with spaces

    +
  • +
+

Code:

+
this code block is indented by one tab
+
+

And:

+
	this code block is indented by two tabs
+
+

And:

+
+	this is an example list item
+	indented with tabs
+
++   this is an example list item
+    indented with spaces
+
diff --git a/tests/mdtest/Tabs.text b/tests/mdtest/Tabs.text new file mode 100644 index 0000000..e93637f --- /dev/null +++ b/tests/mdtest/Tabs.text @@ -0,0 +1,21 @@ ++ this is a list item + indented with tabs + ++ this is a list item + indented with spaces + +Code: + + this code block is indented by one tab + +And: + + this code block is indented by two tabs + +And: + + + this is an example list item + indented with tabs + + + this is an example list item + indented with spaces diff --git a/tests/mdtest/Tidyness.html b/tests/mdtest/Tidyness.html new file mode 100644 index 0000000..564644d --- /dev/null +++ b/tests/mdtest/Tidyness.html @@ -0,0 +1,7 @@ +

A list within a blockquote:

+
    +
  • asterisk 1
  • +
  • asterisk 2
  • +
  • asterisk 3
  • +
+
diff --git a/tests/mdtest/Tidyness.text b/tests/mdtest/Tidyness.text new file mode 100644 index 0000000..5f18b8d --- /dev/null +++ b/tests/mdtest/Tidyness.text @@ -0,0 +1,5 @@ +> A list within a blockquote: +> +> * asterisk 1 +> * asterisk 2 +> * asterisk 3 diff --git a/tests/nohtml/basic.html b/tests/nohtml/basic.html new file mode 100644 index 0000000..8384b31 --- /dev/null +++ b/tests/nohtml/basic.html @@ -0,0 +1,7 @@ +

This document contains <html> elements, but they should not be interpreted, because smu is called with -n.

+

Here are <em>some</em> tags, they can also be uppercase <HTML>.

+

<p> +

+
More tags <a href="">here</a>
+
+

<p>

diff --git a/tests/nohtml/basic.text b/tests/nohtml/basic.text new file mode 100644 index 0000000..193fcda --- /dev/null +++ b/tests/nohtml/basic.text @@ -0,0 +1,7 @@ +This document contains elements, but they should not be interpreted, because smu is called with `-n`. + +Here are some tags, they can also be uppercase . + +

+ More tags here +

diff --git a/tests/old_readme.html b/tests/old_readme.html new file mode 100644 index 0000000..a7d4e47 --- /dev/null +++ b/tests/old_readme.html @@ -0,0 +1,220 @@ +

smu - a Simple Markup Language

+

smu is a very simple and minimal markup language. It is designed for use in +wiki-like environments. smu makes it very easy to write your documents on the +fly and convert them into HTML.

+

smu is capable of parsing very large documents. It scales just great as long +as you avoid a huge amount of indents.

+

Syntax

+

smu was started as a rewrite of +markdown but became something +more lightweight and consistent. It differs from CommonMark in the following ways:

+
    +
  • No support for reference style links
  • +
  • Stricter indentation rules for lists
  • +
  • Lists don't end paragraphs by themselves (blank line needed)
  • +
  • Horizontal rules (<hr>) must use - - - as syntax
  • +
  • Code fences have stricter syntax
  • +
+

Patches that increase the CommonMark compatibility are welcome as long as they don't increase the code complexity significantly.

+

This project is a fork of the original smu by +Enno Boland (gottox). The main differences to the +original smu are:

+
    +
  • Support for code fences
  • +
  • Improved CommonMark compatibility. E.g. +
      +
    • Code blocks need four spaces indentation instead of three
    • +
    • Skip empty lines at end of code blocks
    • +
    • Ignore single spaces around code spans
    • +
    • Keep HTML comments in output
    • +
    • Improved spec compliance for lists
    • +
    • Nesting code block in blockquotes works
    • +
    • "Empty" lines in lists behave identically, no matter how much whitespace they contain
    • +
    +
  • +
  • Added a simple test suite to check for compliance and avoid regressions
  • +
+

Inline patterns

+

There are several patterns you can use to highlight your text:

+
    +
  • Emphasis

    + +
      +
    • Surround your text with * or _ to get emphasised text: +
      This *is* cool.
      +This _is_ cool, too.
      +
      +
    • +
    • Surround your text with ** or __ to get strong text: +
      This **is** cool.
      +This __is__ cool, too.
      +
      +
    • +
    • Surround your text with *** or ___ to get strong and emphasised text: +
      This ***is*** cool.
      +This ___is___ cool, too.
      +
      +
    • +
    • But this example won't work as expected: +
      ***Hello** you*
      +
      +

      This is a wontfix bug because it would make the source too complex. +Use this instead: +

      +
      ***Hello*** *you*
      +
      +
    • +
    +
  • +
  • inline Code

    +

    You can produce inline code with surrounding ` or ``

    +
    Use `rm -rf /` if you're a N00b.
    +
    +
    Use ``rm -rf /`` if you're a N00b.
    +
    +

    Using ``ABC`` makes it possible to use Backticks without backslashing them.

    +
  • +
+

Titles

+

Creating titles in smu is very easy. There are two different syntax styles. The +first is underlining:

+
Heading
+=======
+
+Topic
+-----
+
+

This is very intuitive and self explaining. The resulting sourcecode looks like +this:

+
<h1>Heading</h1>
+<h2>Topic</h2>
+
+

Use the following prefixes if you don't like underlining:

+
# h1
+## h2
+### h3
+#### h4
+##### h5
+###### h6
+
+

Links

+

The simplest way to define a link is with simple <>.

+
<http://s01.de>
+
+

You can do the same for E-Mail addresses:

+
<yourname@s01.de>
+
+

If you want to define a label for the url, you have to use a different syntax

+
[smu - simple mark up](http://s01.de/~gottox/index.cgi/proj_smu)
+
+

The resulting HTML-Code

+
<a href="http://s01.de/~gottox/index.cgi/proj_smu">smu - simple mark up</a></p>
+
+

Lists

+

Defining lists is very straightforward:

+
* Item 1
+* Item 2
+* Item 3
+
+

Result:

+
<ul>
+<li>Item 1</li>
+<li>Item 2</li>
+<li>Item 3</li>
+</ul>
+
+

Defining ordered lists is also very easy:

+
1. Item 1
+2. Item 2
+3. Item 3
+
+

It is possible to use any leading number you want. So if you don't want to keep +your list synchronised, you simple can use any number. In this case it's +recommended to use 0., but it isn't mandatory.

+
0. Item 1
+0. Item 2
+0. Item 3
+
+

Both examples will cause the same result. Even this is possible:

+
1000. Item 1
+432.  Item 2
+0.    Item 3
+
+

This will be the result in these example:

+
<ol>
+<li>Item 1</li>
+<li>Item 2</li>
+<li>Item 3</li>
+</ol>
+
+

Code & Blockquote

+

Use the > as a line prefix for defining blockquotes. Blockquotes are +interpreted as well. This makes it possible to embed links, headings and even +other quotes into a quote:

+
> Hello
+> This is a quote with a [link](http://s01.de/~gottox)
+
+

Result: +

+
<blockquote><p>
+Hello
+This is a quote with a <a href="http://s01.de/~gottox">link</a></p>
+</blockquote>
+
+

You can define a code block with a leading Tab or with 4 leading spaces

+
	this.is(code)
+
+    this.is(code, too)
+
+

Result: +

+
<pre><code>this.is(code)</code></pre>
+<pre><code>this.is(code, too)
+</code></pre>
+
+

Please note that you can't use HTML or smu syntax in a code block.

+

Another way to write code blocks is to use code fences:

+
```json
+{"some": "code"}
+```
+
+

This has two advantages:

+ +
    +
  • The optional language identifier will be turned into a language- class name
  • +
  • You can keep the original indentation which helps when doing copy & paste
  • +
+

Other interesting stuff

+
    +
  • to insert a horizontal rule simple add - - - into an empty line:

    +
    Hello
    +- - -
    +Hello2
    +
    +

    Result: +

    +
    <p>
    +Hello
    +<hr />
    +
    +
    Hello2</p>
    +
    +
  • +
  • You can escape the following pattern to avoid them from being interpreted:

    +
    \ ` * _ { } [ ] ( ) # + - . !
    +
    +
  • +
  • To force a linebreak simple add two spaces to the end of the line:

    +
    No linebreak
    +here.
    +But here is  
    +one.
    +
    +
  • +
+

embed HTML

+

You can include arbitrary HTML code in your documents. The HTML will be +passed through to the resulting document without modification. This is a good +way to work around features that are missing in smu. If you don't want this +behaviour, use the -n flag when executing smu to stricly escape the HTML +tags.

diff --git a/tests/old_readme.text b/tests/old_readme.text new file mode 100644 index 0000000..0854b42 --- /dev/null +++ b/tests/old_readme.text @@ -0,0 +1,238 @@ +smu - a Simple Markup Language +============================== + +_smu_ is a very simple and minimal markup language. It is designed for use in +wiki-like environments. smu makes it very easy to write your documents on the +fly and convert them into HTML. + +smu is capable of parsing very large documents. It scales just great as long +as you avoid a huge amount of indents. + +Syntax +====== + +smu was started as a rewrite of +[markdown](http://daringfireball.net/projects/markdown/) but became something +more lightweight and consistent. It differs from [CommonMark](https://commonmark.org/) in the following ways: + +* No support for _reference style links_ +* Stricter indentation rules for lists +* Lists don't end paragraphs by themselves (blank line needed) +* Horizontal rules (`
`) must use `- - -` as syntax +* Code fences have stricter syntax + +Patches that increase the CommonMark compatibility are welcome as long as they don't increase the code complexity significantly. + +This project is a fork of the [original smu](https://github.com/gottox/smu) by +[Enno Boland (gottox)](https://eboland.de). The main differences to the +original smu are: + +* Support for code fences +* Improved [CommonMark](https://commonmark.org/) compatibility. E.g. + * Code blocks need four spaces indentation instead of three + * Skip empty lines at end of code blocks + * Ignore single spaces around code spans + * Keep HTML comments in output + * Improved spec compliance for lists + * Nesting code block in blockquotes works + * "Empty" lines in lists behave identically, no matter how much whitespace they contain +* Added a simple test suite to check for compliance and avoid regressions + +Inline patterns +--------------- + +There are several patterns you can use to highlight your text: + +* Emphasis + * Surround your text with `*` or `_` to get *emphasised* text: + This *is* cool. + This _is_ cool, too. + * Surround your text with `**` or `__` to get **strong** text: + This **is** cool. + This __is__ cool, too. + * Surround your text with `***` or `___` to get ***strong and emphasised*** text: + This ***is*** cool. + This ___is___ cool, too. + * But this example won't work as expected: + ***Hello** you* + This is a wontfix bug because it would make the source too complex. + Use this instead: + ***Hello*** *you* + +* inline Code + + You can produce inline code with surrounding `` ` `` or ``` `` ``` + + Use `rm -rf /` if you're a N00b. + + Use ``rm -rf /`` if you're a N00b. + + Using ``` ``ABC`` ``` makes it possible to use Backticks without backslashing them. + + +Titles +------ + +Creating titles in smu is very easy. There are two different syntax styles. The +first is underlining: + + Heading + ======= + + Topic + ----- + +This is very intuitive and self explaining. The resulting sourcecode looks like +this: + +

Heading

+

Topic

+ +Use the following prefixes if you don't like underlining: + + # h1 + ## h2 + ### h3 + #### h4 + ##### h5 + ###### h6 + +Links +----- + +The simplest way to define a link is with simple `<>`. + + + +You can do the same for E-Mail addresses: + + + +If you want to define a label for the url, you have to use a different syntax + + [smu - simple mark up](http://s01.de/~gottox/index.cgi/proj_smu) + +The resulting HTML-Code + + smu - simple mark up

+ +Lists +----- + +Defining lists is very straightforward: + + * Item 1 + * Item 2 + * Item 3 + +Result: + +
    +
  • Item 1
  • +
  • Item 2
  • +
  • Item 3
  • +
+ +Defining ordered lists is also very easy: + + 1. Item 1 + 2. Item 2 + 3. Item 3 + +It is possible to use any leading number you want. So if you don't want to keep +your list synchronised, you simple can use any number. In this case it's +recommended to use `0.`, but it isn't mandatory. + + 0. Item 1 + 0. Item 2 + 0. Item 3 + +Both examples will cause the same result. Even this is possible: + + 1000. Item 1 + 432. Item 2 + 0. Item 3 + +This will be the result in these example: + +
    +
  1. Item 1
  2. +
  3. Item 2
  4. +
  5. Item 3
  6. +
+ +Code & Blockquote +----------------- + +Use the `> ` as a line prefix for defining blockquotes. Blockquotes are +interpreted as well. This makes it possible to embed links, headings and even +other quotes into a quote: + + > Hello + > This is a quote with a [link](http://s01.de/~gottox) + +Result: +

+ Hello + This is a quote with a link

+
+ + +You can define a code block with a leading Tab or with __4__ leading spaces + + this.is(code) + + this.is(code, too) + +Result: +
this.is(code)
+
this.is(code, too)
+	
+ +Please note that you can't use HTML or smu syntax in a code block. + +Another way to write code blocks is to use code fences: + + ```json + {"some": "code"} + ``` + +This has two advantages: +* The optional language identifier will be turned into a `language-` class name +* You can keep the original indentation which helps when doing copy & paste + +Other interesting stuff +----------------------- + +* to insert a horizontal rule simple add `- - -` into an empty line: + + Hello + - - - + Hello2 + + Result: +

+ Hello +


+ + Hello2

+ +* You can escape the following pattern to avoid them from being interpreted: + + \ ` * _ { } [ ] ( ) # + - . ! + +* To force a linebreak simple add two spaces to the end of the line: + + No linebreak + here. + But here is + one. + +embed HTML +---------- + +You can include arbitrary HTML code in your documents. The HTML will be +passed through to the resulting document without modification. This is a good +way to work around features that are missing in smu. If you don't want this +behaviour, use the `-n` flag when executing smu to stricly escape the HTML +tags. diff --git a/tests/ruler.html b/tests/ruler.html new file mode 100644 index 0000000..ce18d20 --- /dev/null +++ b/tests/ruler.html @@ -0,0 +1,9 @@ +
+ +test +
+

bar

+
+

foo +

+
diff --git a/tests/ruler.text b/tests/ruler.text new file mode 100644 index 0000000..83b395c --- /dev/null +++ b/tests/ruler.text @@ -0,0 +1,11 @@ +- - - +test +- - - + +bar + +- - - + +foo +- - - + diff --git a/tests/table.html b/tests/table.html new file mode 100644 index 0000000..9c9aa78 --- /dev/null +++ b/tests/table.html @@ -0,0 +1,9 @@ +
+ + +
1st field 2nd field 3rd field
1st entry 2nd entry 3rd entry
+

And here is another table

+ + + +
Heading 1 Some other heading
I am a table cell. Me too!
diff --git a/tests/table.text b/tests/table.text new file mode 100644 index 0000000..0000c48 --- /dev/null +++ b/tests/table.text @@ -0,0 +1,10 @@ +| 1st field | 2nd field | 3rd field | +| :-- | :--: | --: | +| 1st entry | 2nd entry | 3rd entry | + +And here is another table + +| Heading 1 | Some other heading | +| --- | ------ | +| I am a table cell. | Me too! | + diff --git a/tests/testdoc.html b/tests/testdoc.html new file mode 100644 index 0000000..6a63329 --- /dev/null +++ b/tests/testdoc.html @@ -0,0 +1,72 @@ +

smu test

+

simple tests

+

first paragraph. +testing surround: emph then strong and code.

+

`escaped backticks`.

+

x = *y * 6;

+

horizontal rule:

+
+

blocks and entities

+

preformatted block: +

+
.'''' .'.'. |  |
+ '''. | ' | |  |
+''''  '   '  ""
+
+

quoted text: +

+

When in doubt, +use brute force.

+
+

list:

+ +
    +
  • Make each program do one thing well.
  • +
  • Expect the output of every program to become the input to another,
  • +
+

as yet unknown, program.

+ +
    +
  • Design and build software, even operating systems, to be tried early,
  • +
+

ideally within weeks.

+ +
    +
  • Use tools in preference to unskilled help to lighten a programming task.
  • +
+

list in list:

+ +
    +
  • a +
      +
    • b +
        +
      1. c
      2. +
      3. d
      4. +
      +
    • +
    • e
    • +
    +
  • +
  • f
  • +
+

entity: &, <, >

+

code: +

+
int powerof2(unsigned int n) {
+	return !((n - 1) & n) && n > 0;
+}
+
+

links

+

link: suckless

+

link with title: suckless

+

link with title (single quote): suckless

+

images

+

image:

+

image with alt text: alt text

+

image with title: alt text

+

image with title (single quote): alt text

+

inline html

+

+ ABC +

diff --git a/tests/testdoc.text b/tests/testdoc.text new file mode 100644 index 0000000..65fef1b --- /dev/null +++ b/tests/testdoc.text @@ -0,0 +1,80 @@ +smu test +======== + +simple tests +------------ + +first paragraph. +testing surround: _emph_ then **strong** and `code`. + +`` `escaped backticks` ``. + +`x = *y * 6;` + +horizontal rule: + +- - - + + +blocks and entities +------------------- + +preformatted block: + .'''' .'.'. | | + '''. | ' | | | + '''' ' ' "" + +quoted text: +> When in doubt, +> use brute force. + +list: +* Make each program do one thing well. +* Expect the output of every program to become the input to another, +as yet unknown, program. +* Design and build software, even operating systems, to be tried early, +ideally within weeks. +* Use tools in preference to unskilled help to lighten a programming task. + +list in list: +* a + * b + 1. c + 2. d + * e +* f + +entity: &, <, > + +code: + int powerof2(unsigned int n) { + return !((n - 1) & n) && n > 0; + } + +links +----- + +link: [suckless](http://suckless.org/) + +link with title: [suckless](http://suckless.org/ "software that sucks less") + +link with title (single quote): [suckless](http://suckless.org/ 'software that sucks less') + + +images +------ + +image: ![](http://st.suckless.org/screenshots/20h-2012-s.png) + +image with alt text: ![alt text](http://st.suckless.org/screenshots/20h-2012-s.png) + +image with title: ![alt text](http://st.suckless.org/screenshots/20h-2012-s.png "screenshot of st") + +image with title (single quote): ![alt text](http://st.suckless.org/screenshots/20h-2012-s.png 'screenshot of st') + +inline html +----------- + +
+ ABC +
diff --git a/tests/unicode.html b/tests/unicode.html new file mode 100644 index 0000000..3570eca --- /dev/null +++ b/tests/unicode.html @@ -0,0 +1,2 @@ +

👨‍👩‍👦 Family

+

👨‍👩‍👦

diff --git a/tests/unicode.text b/tests/unicode.text new file mode 100644 index 0000000..e40dab5 --- /dev/null +++ b/tests/unicode.text @@ -0,0 +1,5 @@ +👨‍👩‍👦 Family +========= + +👨‍👩‍👦 +--- -- 2.45.2