~shulhan/asciidoctor-go

e98aea8108d267fc5f8364d0e611698f413095c5 — Shulhan 3 months ago 9a2bcad
all: use strict document header format

Previously, an empty line before Document Title cause the parser
stop parsing the document header, now an empty lines are skipped.
Also document attribute can be place anywhere, either before or
after title, and in between attributes; now it can be only placed
after revision or author or title.
M _doc/SPECS.adoc => _doc/SPECS.adoc +8 -10
@@ 67,21 67,19 @@ REF_ID    = 1*ALPHA *("-" / "_" / ALPHA / DIGIT)
{url_ref}/document/header/[Reference^].

Document header consist of title and optional authors, a revision, and zero or
more metadata.
The document metadata can be in any order, before or after title, but the
author and revision MUST be after title and in order.
more attributes.
The author and revision MUST be after title and in order.
The document attributes can be in any order, after title, author or
revision.

----
DOC_HEADER     = *(DOC_ATTRIBUTE / COMMENTS)
                 "=" SP DOC_TITLE LF
                 (*DOC_ATTRIBUTE)
                 DOC_AUTHORS LF
                 (*DOC_ATTRIBUTE)
                 DOC_REVISION LF
DOC_HEADER     = [ "=" SP DOC_TITLE LF
                 [ DOC_AUTHORS LF
                 [ DOC_REVISION LF ]]]
                 (*DOC_ATTRIBUTE)
                 LF
----

There are no empty line before and after the document header.
An empty line mark as the end of document header.

===  Title

M document_parser.go => document_parser.go +71 -44
@@ 686,72 686,76 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
// The document attributes can be in any order, but the author and revision
// MUST be in order.
//
//	DOC_HEADER  = *(DOC_ATTRIBUTE / COMMENTS)
//	              "=" SP *ADOC_WORD LF
//	              (*DOC_ATTRIBUTE)
//	              DOC_AUTHORS LF
//	              (*DOC_ATTRIBUTE)
//	              DOC_REVISION LF
//	DOC_HEADER  = [ "=" SP *ADOC_WORD LF
//	              [ DOC_AUTHORS LF
//	              [ DOC_REVISION LF ]]]
//	              (*DOC_ATTRIBUTE)
//	              LF
func (docp *documentParser) parseHeader() {
	const (
		stateBegin int = iota
		stateTitle
		stateAuthor
		stateRevision
	var (
		logp = `parseHeader`
		line []byte
		ok   bool
	)

	var (
		logp  = `parseHeader`
		state = stateBegin
	line, ok = docp.skipCommentAndEmptyLine()
	if !ok {
		return
	}
	if docp.kind == lineKindText && isTitle(line) {
		docp.doc.header.Write(bytes.TrimSpace(line[2:]))
		docp.doc.Title.raw = string(docp.doc.header.raw)

		key   string
		value string
		line  []byte
		ok    bool
	)
	for {
		_, line, ok = docp.line(logp)
		if !ok {
			return
		}
		if len(line) == 0 {
		if docp.kind == lineKindText {
			docp.doc.rawAuthors = string(line)

			_, line, ok = docp.line(logp)
			if !ok {
				return
			}
			if docp.kind == lineKindText {
				docp.doc.rawRevision = string(line)
				line = nil
			}
		}
	}

	// Parse the rest of attributes until we found an empty line or
	// line with non-attribute.
	for {
		if line == nil {
			_, line, ok = docp.line(logp)
			if !ok {
				return
			}
		}
		if docp.kind == lineKindEmpty {
			return
		}
		if bytes.HasPrefix(line, []byte(`////`)) {
		if docp.kind == lineKindBlockComment {
			docp.parseIgnoreCommentBlock()
			line = nil
			continue
		}
		if bytes.HasPrefix(line, []byte(`//`)) {
		if docp.kind == lineKindComment {
			line = nil
			continue
		}
		if line[0] == ':' {
		if docp.kind == lineKindAttribute {
			var key, value string
			key, value, ok = docp.parseAttribute(line, false)
			if ok {
				docp.doc.Attributes.apply(key, value)
			}
			line = nil
			continue
		}
		if state == stateBegin {
			if isTitle(line) {
				docp.doc.header.Write(bytes.TrimSpace(line[2:]))
				docp.doc.Title.raw = string(docp.doc.header.raw)
				state = stateTitle
			} else {
				docp.doc.rawAuthors = string(line)
				state = stateAuthor
			}
			continue
		}
		switch state {
		case stateTitle:
			docp.doc.rawAuthors = string(line)
			state = stateAuthor

		case stateAuthor:
			docp.doc.rawRevision = string(line)
			state = stateRevision
		}
		docp.lineNum--
		break
	}
}



@@ 1572,3 1576,26 @@ func (docp *documentParser) parseParagraph(parent, el *element, line []byte, ter
	el.parseInlineMarkup(docp.doc, elKindText)
	return line
}

func (docp *documentParser) skipCommentAndEmptyLine() (line []byte, ok bool) {
	var logp = `skipCommentAndEmptyLine`

	for {
		_, line, ok = docp.line(logp)
		if !ok {
			return nil, false
		}
		if docp.kind == lineKindEmpty {
			continue
		}
		if docp.kind == lineKindBlockComment {
			docp.parseIgnoreCommentBlock()
			continue
		}
		if docp.kind == lineKindComment {
			continue
		}
		break
	}
	return line, true
}

M document_test.go => document_test.go +3 -2
@@ 71,8 71,9 @@ func TestParse_document_title(t *testing.T) {
		expString: `a: b: c`,
	}, {
		// With custom separator.
		content: `:title-separator: x
= Mainx sub`,
		content: `
= Mainx sub
:title-separator: x`,
		exp: DocumentTitle{
			Main: `Main`,
			Sub:  `sub`,

M testdata/document_title_test.txt => testdata/document_title_test.txt +1 -1
@@ 25,8 25,8 @@ output_call: htmlWriteHeader
</div>

>>> With custom separator
:title-separator: x
= Mainx sub
:title-separator: x

<<< With custom separator
<div id="header">

M testdata/header_with_empty_line_test.txt => testdata/header_with_empty_line_test.txt +1 -3
@@ 12,11 12,9 @@ Below is empty line with spaces.

<<<
<div id="header">
<h1>Title</h1>
</div>
<div id="content">
<div class="paragraph">
<p>= Title</p>
</div>
</div>
<div id="footer">
<div id="footer-text">

M testdata/test.adoc => testdata/test.adoc +2 -2
@@ 1,10 1,10 @@
// SPDX-FileCopyrightText: 2020 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-or-later
= _Example `Document` **title**_
:metadata key: value
Author A <a@a.com>; Author mid_dle B <b@b.com>
:unclosed metadata:
v1.1.1, 18 July 2020: remark
:metadata key: value
:unclosed metadata:
:sectnums:
:sectlinks:
:sectanchors:

M testdata/test.got.html => testdata/test.got.html +1 -1
@@ 3056,7 3056,7 @@ this sidebar.</p>
<div id="footer">
<div id="footer-text">
 1.1.1<br>
Last updated 2024-04-04 21:22:35 +0700
Last updated 2024-08-12 23:31:24 +0700
</div>
</div>
</body>