~nch/onward

c3bd095243374c4e39c690233189f5fa77f77744 — nc 4 years ago 092023e
moved repl code out, started writing based on jonesforth
3 files changed, 220 insertions(+), 109 deletions(-)

M makefile
M onward.s
A repl.s
M makefile => makefile +1 -1
@@ 1,2 1,2 @@
onward: onward.s
	nasm -f elf64 -o onward.o onward.s && ld -o onward onward.o
	fasm onward.s && ld -o onward onward.o

M onward.s => onward.s +103 -108
@@ 1,108 1,103 @@
; vi: ft=nasm

; https://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/

;

global _start
section .text
%define SYS_READ 0
%define SYS_WRITE 1
%define SYS_EXIT 60

%define STDIN 0
%define STDOUT 1

_start:

rep_loop:
        ;; write prompt
        mov rax, SYS_WRITE
        mov rdi, STDOUT
        mov rsi, prompt
        mov rdx, prompt_len
        syscall

        ;; read input string
        mov rax, SYS_READ
        mov rdi, STDIN
        mov rsi, in_buf
        mov rdx, buf_len
        syscall

        mov rdx, rax                ; length in rax

        cmp rdx, 0                  ; exit if empty (user sent Ctrl-D)
        jz exit

        ;; get the first word in the string
        ;; while calculating hash of string

        ; update hash (sdbm hash):
        ;   for c in str:
        ;     new_hash = c + (hash << 6) + (hash << 16) - hash
        ;     hash = new_hash

        mov rbx, word_buf           ; word_ptr:   rbx = (char *) word_buf
        mov rcx, 0                  ; hash:       rcx = 0
                                    ; in_buf_ptr: rsi (set above)

read_next_word_loop:
        mov rax, 0                  ; clear rax
        mov al, [rsi]               ; c = *in_buf_ptr
        cmp al, byte ' '            ; check for whitespace
        je .end
        cmp al, byte `\n`
        je .end
        cmp al, byte `\t`
        je .end
        mov [rbx], al               ; *word_buf = c

        ; rdx = new_hash (copy of hash for iteration)
        ; rax = current character (because of al)

        ; calculate hash
        mov rdx, rcx                ; rdx = hash. copied so we can shift it around
        sal rdx, 6                  ; hash << 6
        add rax, rdx
        sal rdx, 10                 ; hash << 16
        add rax, rdx
        sub rax, rcx                ; - hash
        mov rcx, rax                ; hash = new_hash

        add rsi, 1                  ; in_buf_ptr++
        add rbx, 1                  ; word_ptr++
        jmp read_next_word_loop
.end:
        ; calculate len, store it in rdx
        mov rdx, rbx
        sub rdx, word_buf

        ; TODO: store value into intern table
        ; TODO: push symbol value onto stack
        ; TODO: bootstrap
        ; TODO: jit self

        ;; echo word
        mov rax, SYS_WRITE
        mov rdi, STDOUT
        mov rsi, word_buf
        ; rdx set above
        syscall

        jmp rep_loop

exit:
        mov rax, SYS_EXIT
        mov rdi, 0
        syscall

section .bss
        in_buf: resb 128
        buf_len: equ $ - in_buf

        word_buf: resb 32

section .rodata
        prompt: db "> "
        prompt_len: equ $ - prompt
; vi: ft=fasm
use64
format ELF64
public start as '_start'

;; system calls
SYS_WRITE = 1
SYS_EXIT = 60

STDIN = 0
STDOUT = 1

;;;

ADDR_SIZE = 8             ; addr size in bytes

macro dstack { rsp }      ; data stack
; rsi points to virtual instructions

macro defcode name {
    public l_#name
l_#name:
    ;dd link
    ;set link, name_#label
    ;mstr db `name
    align ADDR_SIZE
name:
}

; ASMWORD = array of instructions (aka a label)

macro pushd r {
    lea rsp, [rsp + ADDR_SIZE]
    mov [rsp], r
}

macro popd r {
    mov r, [rsp]
    lea rsp, [rsp - ADDR_SIZE]
}

macro next {
    ; rsi has type ASMWORD*;
    mov rax, qword [rsi] ; rax = ASMWORD[0] (or *ASMWORD)
    lea rsi, [rsi + ADDR_SIZE]
    jmp qword rax
}

section '.text' ; readable executable

    defcode drop
    popd rax
    next

    defcode swap
    popd rax
    popd rbx
    pushd rbx
    pushd rax
    next

    defcode sdup
    popd rax
    pushd rax
    pushd rax
    next

    defcode emit
    mov r8, rsi ; backup rsi since we'll clobber it
    mov rax, SYS_WRITE
    mov rdi, STDOUT
    lea rsi, [rsp]
    mov rdx, 1
    syscall
    popd rax
    mov rsi, r8 ; restore rsi
    next

    defcode exit
    mov rax, SYS_EXIT
    mov rdi, 0
    syscall

start:
    cld
    mov rsi, instrs
    pushd byte 'h'
    pushd byte 'e'
    pushd byte 'l'
    pushd byte 'l'
    pushd byte 'o'
    next

instrs:
    dq sdup
    dq emit
    dq emit
    dq emit
    dq emit
    dq emit
    dq emit
    dq exit
;section '.rodata' ;readable

A repl.s => repl.s +116 -0
@@ 0,0 1,116 @@
; vi: ft=nasm

; https://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/

;

bits 64
global _start
section .text
%define SYS_READ 0
%define SYS_WRITE 1
%define SYS_EXIT 60

%define STDIN 0
%define STDOUT 1

%define MAX_WORD_LEN 16 ; 15 + 1 for null terminator
%define MAX_WORDS 32

_start:

rep_loop:
        ;; write prompt
        mov rax, SYS_WRITE
        mov rdi, STDOUT
        mov rsi, prompt
        mov rdx, prompt_len
        syscall

        ;; read input string
        mov rax, SYS_READ
        mov rdi, STDIN
        mov rsi, in_buf
        mov rdx, buf_len
        syscall

        mov rdx, rax                ; length in rax

        cmp rdx, 0                  ; exit if empty (user sent Ctrl-D)
        jz exit

        ;; get the first word in the string
        ;; while calculating hash of string

        ; update hash (sdbm hash):
        ;   for c in str:
        ;     new_hash = (c + (hash << 6) + (hash << 16) - hash) % MAX_WORDS
        ;     hash = new_hash

        mov rbx, word_buf           ; word_ptr:   rbx = (char *) word_buf
        mov ecx, 0                  ; hash:       ecx = 0
                                    ; in_buf_ptr: rsi (set above)

read_next_word_loop:
        mov rax, 0                  ; clear rax
        mov al, [rsi]               ; c = *in_buf_ptr
        cmp al, byte ' '            ; check for whitespace
        je .end
        cmp al, byte `\n`
        je .end
        cmp al, byte `\t`
        je .end
        mov [rbx], al               ; *word_buf = c

        ; edx = new_hash (copy of hash for iteration)
        ; eax = current character (because of al)

        ; calculate hash (only need 32 bits -- we're taking the mod anyway)
        mov edx, ecx                ; edx = hash. copied so we can shift it around
        sal edx, 6                  ; hash << 6
        add eax, edx
        sal edx, 10                 ; hash << 16
        add eax, edx
        sub eax, ecx                ; - hash
        mov ecx, eax                ; hash = new_hash
        mov r8w, MAX_WORDS
        idiv r8w                    ; mod in dx
        ;; TODO: implement forth eval, write this in forth instead
        ;; TODO: leave original stream, and put pointer to word (like what strtok does)

        add rsi, 1                  ; in_buf_ptr++
        add rbx, 1                  ; word_ptr++
        jmp read_next_word_loop
.end:   ; calculate len, store it in rdx
        mov rdx, rbx
        sub rdx, word_buf

        ; TODO: store value into intern table
        ; TODO: push symbol value onto stack
        ; TODO: bootstrap
        ; TODO: jit self

        ;; echo word
        mov rax, SYS_WRITE
        mov rdi, STDOUT
        mov rsi, word_buf
        ; rdx set above
        syscall

        jmp rep_loop

exit:
        mov rax, SYS_EXIT
        mov rdi, 0
        syscall

section .bss
        in_buf: resb 128
        buf_len: equ $ - in_buf
        word_buf: resb 32

        words_dict: resb (MAX_WORD_LEN * MAX_WORDS)

section .rodata
        prompt: db "> "
        prompt_len: equ $ - prompt