From 05f02064e5e7c510a52df50503267e4d541c229c Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sun, 11 Aug 2024 12:08:11 -0400 Subject: [PATCH] comp/lisp: add comments --- fs/comp/lisp/raw.fs | 3 ++- fs/doc/comp/lisp.txt | 44 ++++++++++++++++++++++++++++++------------- fs/tests/comp/lisp.fs | 4 +++- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/fs/comp/lisp/raw.fs b/fs/comp/lisp/raw.fs index 8896ea77..3d97af8f 100644 --- a/fs/comp/lisp/raw.fs +++ b/fs/comp/lisp/raw.fs @@ -1,7 +1,7 @@ needs mem/cons comp/tok unit comp/lisp/raw -: symchar? ( c -- f ) "()'\"" c@+ cidx dup if nip then ; +: symchar? ( c -- f ) "()'\"\\" c@+ cidx dup if nip then ; : _?tok< ( -- tok-or-0 ) newtok tonws< nip dup tokacc ( c ) symchar? not if begin @@ -22,6 +22,7 @@ unit comp/lisp/raw else case ''' isChar? of raw< 0 cons "quote" swap cons endof '"' isChar? of ,str" n>tok endof + '\' isChar? of [compile] \ raw< endof endcase then ; : ?stype ( s -- ) ?dup if stype else ."(null)" then ; : .raw diff --git a/fs/doc/comp/lisp.txt b/fs/doc/comp/lisp.txt index 1f2d3da9..3531f07a 100644 --- a/fs/doc/comp/lisp.txt +++ b/fs/doc/comp/lisp.txt @@ -25,9 +25,11 @@ regular Forth words, so this is just syntactic sugar for "42 2 3 + -". ## Tokenisation logic -Tokenization (see [comp/tok]) boundaries are: whitespace ( ) ' " +Tokenization (see [comp/tok]) boundaries are: -Therefore, "(foo'"bar)" tokenizes as: ( foo ' " bar ) + whitespace ( ) ' " \ + +For example, "(foo'bar)" tokenizes as: ( foo ' bar ) ## Parsing logic @@ -46,12 +48,7 @@ Yields the cons: ("foo" ("bar" "42" "baz") ("left" "." "12345") -You'll notice that the "." is not handled at this level. You'll notice that -string literals are handled in a weird manner. That's because those strings in -the raw list are supposed to be either a literal or a symbol. If we keep "right" -as is, we effectively "de-literalize" it. Therefore, when a string literal is -encountered, it is first copied to permanent storage, then its pointer is -yielded as a numerical literal in string form. +You'll notice that the "." is not handled at this level. Once we have that raw list, we "cook" it, which is where the magic happens. The word "cook" generally expects cons that have the format @@ -60,17 +57,38 @@ pusging arguments to PS, then calling "executable". To figure out what "executable" is, we look it up in the system dictionary and thus have the address to call. And that's it. That's the process. -But there are exceptions that we call "parsers", for example "quote" and -"lambda". These words have a signature that is incompatible with the regular -resolving logic. For example, in "(defun foo ...)", if we let the default -resolver pick it up, we'll look for "foo" in the system dict and not find it, -which is normal because it's the name of the word we want to create! +## Parsers + +There are exceptions to the regular parsing logic that we call "parsers", for +example "quote" and "lambda". These words have a signature that is incompatible +with the regular resolving logic. For example, in "(defun foo ...)", if we let +the default resolver pick it up, we'll look for "foo" in the system dict and not +find it, which is normal because it's the name of the word we want to create! So when we encounter these parser words, we circumvent the regular resolving mechanism and call the parser word while supplying it with the raw list directly. We then expect that parser to yield a result that we can feed into our cooking process. +## Comments + +Comments use the "\" token and behaves like the system "\", that is, it drops +input stream until the end of the line. For example: + + raw< (foo + \This is a comment + bar) + +will yield ("foo" "bar"). + +## String literals + +You'll notice that string literals are handled in a weird manner. That's because +those strings in the raw list are supposed to be either a literal or a symbol. +If we keep "right" as is, we effectively "de-literalize" it. Therefore, when a +string literal is encountered, it is first copied to permanent storage, then its +pointer is yielded as a numerical literal in string form. + ## Compiling "lambda" and "defun" cook their body in a particular manner: they compile the diff --git a/fs/tests/comp/lisp.fs b/fs/tests/comp/lisp.fs index a2fe6d50..06f70953 100644 --- a/fs/tests/comp/lisp.fs +++ b/fs/tests/comp/lisp.fs @@ -12,7 +12,9 @@ raw< '(foo) decons 0 #eq "foo" #s= raw< (foo) decons 0 #eq "foo" #s= -raw< (foo (bar baz) qux) +raw< (foo + \This is a comment + (bar baz) qux) decons swap "foo" #s= decons swap dup iscons? #true decons swap "bar" #s= -- 2.45.2