~emersion/gyosu

b2b08e781ec4983679cc33a1c885afb7c77717ad — Simon Ser a month ago d401748
Split comment-related logic into separate file
2 files changed, 127 insertions(+), 120 deletions(-)

A comment.go
M main.go
A comment.go => comment.go +127 -0
@@ 0,0 1,127 @@
package main

import (
	"strings"
	"unicode"

	"modernc.org/cc/v3"
)

func cutComment(s string) (string, string) {
	start := strings.Index(s, "/*")
	if start < 0 {
		return "", ""
	}

	j := strings.Index(s[start:], "*/")
	if j < 0 {
		return "", ""
	}
	end := start + j + len("*/")

	return s[start:end], s[end:]
}

// parseParagraph parses a raw paragraph text into inline elements. The
// recognized elements are:
//
//     struct foo
//     enum foo
//     union foo
//     foo()
func parseParagraph(raw string) paragraph {
	var p paragraph
	for len(raw) > 0 {
		var word string
		word, raw = nextWord(raw)

		if len(raw) >= 2 && raw[0] == ' ' && isDeclKind(word) {
			var name string
			name, raw = nextWord(raw[1:])
			p = append(p, inline{
				Type:  inlineSymbol,
				Value: name,
				Kind:  declKind(word),
			})
			continue
		}

		if len(raw) >= 2 && raw[:2] == "()" {
			p = append(p, inline{
				Type:  inlineSymbol,
				Value: word,
				Kind:  declFunc,
			})
			raw = raw[2:]
			continue
		}

		p = append(p, inline{Type: inlineText, Value: word})
	}

	return p
}

func nextWord(s string) (word string, next string) {
	for i, ch := range s {
		if isWordBoundary(ch) {
			if i == 0 {
				return s[:1], s[1:]
			} else {
				return s[:i], s[i:]
			}
		}
	}
	return s, ""
}

func isDeclKind(s string) bool {
	switch s {
	case "struct", "enum", "union":
		return true
	default:
		return false
	}
}

func isWordBoundary(ch rune) bool {
	return !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_'
}

func extractDocComment(tok cc.Token) []paragraph {
	// Pick the last comment
	rest := tok.Sep.String()
	var comment string
	for {
		nextComment, nextRest := cutComment(rest)
		if nextComment == "" {
			break
		}
		comment, rest = nextComment, nextRest
	}

	if comment == "" || rest != "\n" {
		return nil
	}

	s := comment
	s = strings.TrimPrefix(strings.TrimPrefix(s, "/*"), "*")
	s = strings.TrimSuffix(s, "*/")
	s = strings.TrimSpace(s)

	lines := strings.Split(s, "\n")
	for i, l := range lines {
		l = strings.TrimPrefix(strings.TrimSpace(l), "* ")
		if l == "*" {
			l = ""
		}
		lines[i] = l
	}

	doc := strings.Join(lines, "\n")
	var out []paragraph
	for _, raw := range strings.Split(doc, "\n\n") {
		out = append(out, parseParagraph(raw))
	}
	return out
}

M main.go => main.go +0 -120
@@ 10,131 10,11 @@ import (
	"path/filepath"
	"sort"
	"strings"
	"unicode"

	"git.sr.ht/~sircmpwn/getopt"
	"modernc.org/cc/v3"
)

func cutComment(s string) (string, string) {
	start := strings.Index(s, "/*")
	if start < 0 {
		return "", ""
	}

	j := strings.Index(s[start:], "*/")
	if j < 0 {
		return "", ""
	}
	end := start + j + len("*/")

	return s[start:end], s[end:]
}

// parseParagraph parses a raw paragraph text into inline elements. The
// recognized elements are:
//
//     struct foo
//     enum foo
//     union foo
//     foo()
func parseParagraph(raw string) paragraph {
	var p paragraph
	for len(raw) > 0 {
		var word string
		word, raw = nextWord(raw)

		if len(raw) >= 2 && raw[0] == ' ' && isDeclKind(word) {
			var name string
			name, raw = nextWord(raw[1:])
			p = append(p, inline{
				Type:  inlineSymbol,
				Value: name,
				Kind:  declKind(word),
			})
			continue
		}

		if len(raw) >= 2 && raw[:2] == "()" {
			p = append(p, inline{
				Type:  inlineSymbol,
				Value: word,
				Kind:  declFunc,
			})
			raw = raw[2:]
			continue
		}

		p = append(p, inline{Type: inlineText, Value: word})
	}

	return p
}

func nextWord(s string) (word string, next string) {
	for i, ch := range s {
		if isWordBoundary(ch) {
			if i == 0 {
				return s[:1], s[1:]
			} else {
				return s[:i], s[i:]
			}
		}
	}
	return s, ""
}

func isDeclKind(s string) bool {
	switch s {
	case "struct", "enum", "union":
		return true
	default:
		return false
	}
}

func isWordBoundary(ch rune) bool {
	return !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_'
}

func extractDocComment(tok cc.Token) []paragraph {
	// Pick the last comment
	rest := tok.Sep.String()
	var comment string
	for {
		nextComment, nextRest := cutComment(rest)
		if nextComment == "" {
			break
		}
		comment, rest = nextComment, nextRest
	}

	if comment == "" || rest != "\n" {
		return nil
	}

	s := comment
	s = strings.TrimPrefix(strings.TrimPrefix(s, "/*"), "*")
	s = strings.TrimSuffix(s, "*/")
	s = strings.TrimSpace(s)

	lines := strings.Split(s, "\n")
	for i, l := range lines {
		l = strings.TrimPrefix(strings.TrimSpace(l), "* ")
		if l == "*" {
			l = ""
		}
		lines[i] = l
	}

	doc := strings.Join(lines, "\n")
	var out []paragraph
	for _, raw := range strings.Split(doc, "\n\n") {
		out = append(out, parseParagraph(raw))
	}
	return out
}

func enumSpecifierPrototype(enumSpec *cc.EnumSpecifier) prototype {
	var proto prototype
	if enumSpec.Token2.Value != 0 {