~vdupras/duskos

ab86276371d59e7dcba011cb62c31b822b07c454 — Virgil Dupras 4 months ago 2ef45a5
rpi: change call conventions

See "pushret, and popret," section in doc/hal.
4 files changed, 110 insertions(+), 72 deletions(-)

M fs/asm/arm.fs
M fs/doc/hal.txt
M fs/xcomp/arm/rpi/kernel.fs
M fs/xcomp/rpiboot.fs
M fs/asm/arm.fs => fs/asm/arm.fs +0 -3
@@ 82,13 82,10 @@ $90 al) const mul)

\ Macros

: lret, ( -- ) mov) rPC rd) rLR rm) ,) ;
: push, ( r -- ) str) swap rd) rSP rn) CELLSZ -i) pre) !) ,) ;
: pop, ( r -- ) ldr) swap rd) rSP rn) CELLSZ +i) post) ,) ;
: ppush, ( r -- ) str) swap rd) rPSP rn) CELLSZ -i) pre) !) ,) ;
: ppop, ( r -- ) ldr) swap rd) rPSP rn) CELLSZ +i) post) ,) ;
: call, ( rel -- ) rLR push, CELLSZ - bl) ,) rLR pop, ;
: abscall, ( tgt -- ) abs>rel call, ;

\ HAL


M fs/doc/hal.txt => fs/doc/hal.txt +39 -0
@@ 176,6 176,41 @@ All addresses passed to branching words are absolute addresses. If the native
instructions use relative branching addressing, the HAL takes care of the
translation.

## pushret, and popret,

In Dusk, "Call" means "Push the address of the instruction following the current
one to RSP, and then jump to the address being called". "Return" means "Pop RSP
and jump to that address".

On "traditional" CPU architectures, this maps exactly to the behavior of the
native "call" and "return" instructions, so we can live a happy life of
blissful ignorance when using these CPUs.

On some CPUs such as ARM, the native "call" model is to save the address we'll
want to return to to a register and leave the task of push/popping to RSP to the
programmer.

Of course, one thing we could do is to simply wrap all calls and returns in Dusk
into RSP push/pop operation, but that would squander a wonderful speedup
opportunity: With such an approach to calling, we can avoid one push and one pop
on each "leaf" routine call, that is, on each call to a routine that doesn't
call any other routine. That adds up to quite a lot of pushes and pops.

To grab this opportunity, the HAL has two words: pushret, and popret,

On "traditional" CPUs, these are noops. On ARM, these words push and pop the
return address register to and from RSP.

Words defined through "high level" mechanism such as ":" call those words
automatically, no need to worry. However, words created with "code" don't.

This means that if you create such a word and that this word calls another word,
it needs to call "pushret," as a prelude and to call "popret," before it
returns. Leaf words don't need to do that, which makes them faster.

NOTE: the ARM port is currently being written, so pushret, and popret, hasn't
been added everywhere it needs to yet.

## Low HAL

Operand words:


@@ 214,6 249,10 @@ branch!    tgtaddr braddr --
  branching.
branchA,   --
  Branch to the address held in the A register.
pushret,   --
  Push the current return address to RSP (on relevant CPUs)
popret,   --
  Pop RSP in return address register (on relevant CPUs)

Instructions:


M fs/xcomp/arm/rpi/kernel.fs => fs/xcomp/arm/rpi/kernel.fs +70 -68
@@ 21,8 21,12 @@ $44 const UART0_ICR
: xdrop, rTOP ppop, ;
: xgrow, sub) rPSP rdn) CELLSZ i) ,) ;
: xdup, rTOP ppush, ;
: wcall, xwordlbl abscall, ;
: wjmp, xwordlbl abs>rel b) ,) ;
: pushret, rLR push, ;
: popret, rLR pop, ;
: exit, rLR bx) ,) ;
: execute, abs>rel bl) ,) ;
: wcall, xwordlbl execute, ;
: wjmp, xwordlbl abs>rel b) ,) ; \ only for leaf words!

: delay, ( ncycles -- )
  mov) r0 rd) swap ( ncycles ) i) ,)


@@ 41,13 45,13 @@ $44 const UART0_ICR
  rot mov) over rd) rot i) ,) ( n1 r )
  add) swap rdn) swap i) ,) ;

: xaddr ( lbl -- ) xcode xdup, rTOP pc>reg, lret, ;
: xconst ( n -- ) pc swap le, xcode xdup, rTOP pc@>reg, lret, ;
: xaddr ( lbl -- ) xcode xdup, rTOP pc>reg, exit, ;
: xconst ( n -- ) pc swap le, xcode xdup, rTOP pc@>reg, exit, ;

: values ( n -- ) for 0 value next ;
13 values lblcurword lblsysdict lblhere lblnextmeta lblcompiling lblbootptr
12 values lblcurword lblsysdict lblhere lblnextmeta lblcompiling lblbootptr
          lblhbank
          lblcwrite lbldwrite lblwriterange lblrel lblerrmsg lblmain
          lblcwrite lbldwrite lblwriterange lblerrmsg lblmain
$8000 to binstart
binstart const RSTOP
RSTOP $1000 - const PSTOP


@@ 61,14 65,14 @@ HERESTART xconst herestart
pc to lblcompiling 0 le,
xcode compiling
  xdup, lblcompiling rTOP pc@>reg,
  lret,
  exit,

xcode [ ximm
  mov) r0 rd) 0 i) ,)
pc
  lblcompiling rTOP pc>reg,
  str) r0 rd) rTOP rn) ,)
  lret,
  exit,

xcode ]
  mov) r0 rd) 1 i) ,)


@@ 88,7 92,7 @@ xcode bye 0 b) ,)

pc to L1 \ fail
  mov) rTOP rd) 0 i) ,)
  lret,
  exit,

\ parse char
pc to L2 \ rTOP=a-with-'-skipped r0=u


@@ 101,7 105,7 @@ pc to L2 \ rTOP=a-with-'-skipped r0=u
  and) r0 rdn) $ff i) ,)
  r0 ppush,
  mov) rTOP rd) 1 i) ,)
  lret,
  exit,

\ parse hexadecimal
pc to L3 \ rTOP=a-with-$-skipped r0=u


@@ 127,7 131,7 @@ L4 forward! \ parse ok
  ( loop ) abs>rel b) nz) ,)
  r2 ppush,
  mov) rTOP rd) 1 i) ,)
  lret,
  exit,

\ parse unsigned decimal
pc to L4 \ rTOP=a+1 r1=first-char r0=u


@@ 151,7 155,7 @@ pc \ loop
  rsb) z) r2 rdn) 0 i) ,) \ negate
  r2 ppush,
  mov) rTOP rd) 1 i) ,)
  lret,
  exit,

xcode parse ( str -- n? f )
  ldr) r0 rd) rTOP rn) 8b) 1 +i) post) ,) \ rTOP=a r0=u


@@ 170,7 174,7 @@ xcode emit ( c -- )
    ( pc ) abs>rel b) ne) ,)
  str) rTOP rd) r7 rn) UART0_DR +i) ,)
  xdrop,
  lret,
  exit,

xcode key ( -- c )
  pc


@@ 179,9 183,9 @@ xcode key ( -- c )
    ( pc ) abs>rel b) ne) ,)
  xdup,
  ldr) rTOP rd) r7 rn) UART0_DR +i) 8b) ,)
  lret,
  exit,

xcode rtype ( a u -- )
xcode rtype pushret, ( a u -- )
  r1 ppop, mov) r2 rd) rTOP rm) ,) \ r1=a r2=u
  pc
    ldr) rTOP rd) r1 rn) 8b) 1 +i) post) ,)


@@ 190,7 194,7 @@ xcode rtype ( a u -- )
    sub) r2 rdn) 1 i) f) ,)
    ( pc ) abs>rel b) ne) ,)
  xdrop,
  lret,
  popret, exit,

xcode stype ( str -- )
  ldr) r0 rd) rTOP rn) 8b) 1 +i) post) ,)


@@ 211,7 215,7 @@ xcode boot<
  xwordlbl key abs>rel b) eq) ,) \ until we have a proper "realias"
  xdup, ldr) rTOP rd) r1 rn) 8b) 1 +i) post) ,)
  str) r1 rd) r0 rn) ,)
  lret,
  exit,

xcode in< wjmp, boot<



@@ 219,12 223,12 @@ xcode interactive! \ until we have a proper "realias"
  lblbootptr r0 pc>reg,
  mov) r1 rd) 0 i) ,)
  str) r1 rd) r0 rn) ,)
  lret,
  exit,

pc to lblcurword $20 allot0
lblcurword xaddr curword

xcode word ( -- str )
xcode word pushret, ( -- str )
  xdup,
  pc
    wcall, in<


@@ 242,7 246,7 @@ xcode word ( -- str )
    ( pc ) abs>rel b) hi) ,)
  lblcurword rTOP pc>reg,
  str) r6 rd) rTOP rn) 8b) ,)
  lret,
  popret, exit,

pc ,"  word not found" 0 align4
xcode (wnf)


@@ 300,13 304,13 @@ pc \ loop2
  ( loop2 ) abs>rel b) ne) ,)
  \ same contents
  add) rTOP rdn) 4 i) ,) \ e>w
  lret,
  exit,
L2 forward! L1 forward! \ not matching, try next
  ldr) rTOP rdn) 0 +i) ,)
  cmp) rTOP rn) 0 i) ,)
  ( loop1 ) abs>rel b) ne) ,)
  \ not found
  lret,
  exit,

pc to lblhere HERESTART le,
lblhere xaddr here


@@ 316,14 320,14 @@ pc to lblcwrite \ r0=char
  ldr) r1 rd) r2 rn) ,)
  str) r0 rd) r1 rn) 8b) 1 +i) post) ,)
  str) r1 rd) r2 rn) ,)
  lret,
  exit,

pc to lbldwrite \ r0=n
  lblhere r2 pc>reg,
  ldr) r1 rd) r2 rn) ,)
  str) r0 rd) r1 rn) 4 +i) post) ,)
  str) r1 rd) r2 rn) ,)
  lret,
  exit,

pc to lblwriterange \ r0=addr r1=len
  lblhere r2 pc>reg,


@@ 334,15 338,7 @@ pc to lblwriterange \ r0=addr r1=len
    sub) r1 rdn) 1 i) f) ,)
    ( pc ) abs>rel b) ne) ,)
  str) r3 rd) r2 rn) ,)
  lret,

pc to lblrel \ r0=abs addr -- r0=rel offset | preserves r6
  lblhere r1 pc@>reg,
  sub) r0 rdn) r1 rm) ,)
  mov) r0 rd) r0 rm) 2 lsr) ,)
  sub) r0 rdn) 2 i) ,)
  bic) r0 rn) $ff000000 i) ,)
  lret,
  exit,

xcode align4 ( n -- )
  lblhere r0 pc>reg,


@@ 352,34 348,35 @@ xcode align4 ( n -- )
  sub) ne) r1 rdn) r2 rm) ,)
  add) ne) r1 rdn) 4 i) ,)
  str) ne) r1 rd) r0 rn) ,)
  xdrop, lret,
  xdrop, exit,

pc to lblnextmeta 0 le,
lblnextmeta xaddr nextmeta

xcode entry ( 'dict s -- )
xcode entry pushret, ( 'dict s -- )
  mov) r6 rd) rTOP rm) ,)
  ldr) r5 rd) r6 rn) 8b) 1 +i) post) ,) \ r7=a r6=len
  add) rTOP rd) r5 rn) 1 i) ,) \ rTOP=len+1
  wcall, align4 \ rTOP='dict
  mov) r0 rd) r6 rm) ,)
  mov) r1 rd) r5 rm) ,)
  lblwriterange abscall,
  lblwriterange execute,
  mov) r0 rd) r5 rm) ,)
  lblcwrite abscall,
  lblcwrite execute,
  lblnextmeta r0 pc>reg,
  ldr) r0 rdn) ,)
  lbldwrite abscall,
  lbldwrite execute,
  ldr) r0 rd) rTOP rn) ,) \ r0=dict
  lblhere r1 pc>reg,
  ldr) r1 rdn) ,)
  str) r1 rd) rTOP rn) ,) \ "here" is new sysdict
  popret,
  lbldwrite abs>rel b) ,)

xcode code
xcode code pushret,
  wcall, sysdict
  wcall, word
  wjmp, entry
  popret, wjmp, entry

\ HAL operands
HALBASE rTOP + xconst W)


@@ 395,26 392,26 @@ xcode m) ( a -- operand )
  lblhbank r0 pc>reg,
  str) rTOP rd) r0 rn) ,)
  ( pc ) rTOP pc@>reg,
  lret,
  exit,

xcode +) ( operand n -- operand )
  lblhbank r0 pc>reg,
  str) rTOP rd) r0 rn) ,)
  xdrop,
  lret,
  exit,

xcode 8b) ( operand -- operand )
  orr) rTOP rdn) $00400000 i) ,)
  lret,
  exit,

xcode 16b) ( operand -- operand )
  bic) rTOP rdn) $04000000 i) ,)
  lret,
  exit,

xcode 32b) ( operand -- operand )
  bic) rTOP rdn) $00400000 i) ,)
  orr) rTOP rdn) $04000000 i) ,)
  lret,
  exit,

\ HAL operations
\ r0 is used as the immediate accumulator


@@ 438,22 435,26 @@ xcode ps+, ( n -- )
  ( pc ) r0 pc@>reg,
  L1 abs>rel b) ,)

pc lret,
pc pushret,
xcode pushret,
  ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,)

pc popret,
xcode popret,
  ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,)

pc exit,
xcode exit,
  ( pc ) r0 pc@>reg,
  lbldwrite abs>rel b) ,)
  ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,)

pc rLR push, rLR pop,
xcode execute, ( w -- )
  ( pc ) r6 pc>reg,
  ldr) r0 rd) r6 rn) 4 +i) post) ,)
  lbldwrite abscall,
  mov) r0 rd) rTOP rm) ,)
  xdrop,
  lblrel abscall, \ r0=offset
  lblhere r1 pc@>reg,
  sub) r0 rd) rTOP rn) r1 rm) ,)
  mov) r0 rd) r0 rm) 2 lsr) ,)
  sub) r0 rdn) 2 i) ,)
  bic) r0 rn) $ff000000 i) ,)
  orr) r0 rdn) $eb000000 i) ,)
  lbldwrite abscall,
  ldr) r0 rd) r6 rn) ,)
  xdrop,
  lbldwrite abs>rel b) ,)

pc xdup,


@@ 461,36 462,37 @@ xcode dup,
  ( pc ) r0 pc@>reg,
  lbldwrite abs>rel b) ,)

xcode ; ximm
xcode ; ximm pushret,
  wcall, popret,
  wcall, exit,
  wjmp, [
  popret, wjmp, [

pc to L2 ( str -- w ) \ find in sys dict
pc to L2 pushret, ( str -- w ) \ find in sys dict
  wcall, curword
  wcall, sysdict
  wcall, find
  teq) rTOP rn) 0 i) ,)
  xwordlbl (wnf) abs>rel b) eq) ,)
  lret,
  popret, exit,

xcode compword ( str -- )
  L2 abscall,
  pushret, L2 execute, popret,
  ldr) r0 rd) rTOP rn) 8b) 9 -i) ,)
  tst) r0 rn) $80 i) ,)
  xwordlbl execute abs>rel b) ne) ,) \ immediate? execute
  \ compile word
  wjmp, execute,

pc lblcompiling le,
xcode runword ( str -- )
  wcall, compiling
  cmp) rTOP rn) 0 i) ,)
  xdrop,
  ( pc ) r0 pc@>reg, ldr) r0 rdn) ,)
  cmp) r0 rn) 0 i) ,)
  xwordlbl compword abs>rel b) ne) ,)
  wcall, parse
  pushret, wcall, parse popret,
  cmp) rTOP rn) 0 i) ,)
  xdrop,
  mov) ne) rPC rd) rLR rm) ,) \ literal: nothing to do
  L2 abscall,
  pushret, L2 execute, popret,
  wjmp, execute

xcode uartinit


@@ 527,7 529,7 @@ xcode uartinit
  \ Enable UART0, receive & transfer part of UART.
  mov) r1 rd) $300 i) ,) add) r1 rd) r1 rn) $01 i) ,)
  str) r1 rd) r7 rn) UART0_CR +i) ,)
  lret,
  exit,

xcode main pc w>e lblsysdict pc>addr le!
lblmain forward!

M fs/xcomp/rpiboot.fs => fs/xcomp/rpiboot.fs +1 -1
@@ 1,4 1,4 @@
code : ] code ] ;
code : pushret, ] code ] ;
code hey 42 ps+, exit,
code ho -12 ps+, exit,
uartinit prompt interactive!