~maelkum/viuavm

9d7b45cd2e50f2f2629af89c2ad863206742e4c3 — Marek Marecki 9 months ago 863d586
Implement ATXTP instruction

However, the situation of symbol-address loads right now is a mess.

Call using a pseudoinstruction? Have a LUI, LLI, CALL.
Call using a pre-filled address? Have a ATXTP, CALL.
Need an atom? One LUI, LLI, ATOM coming right up!

And it also complicates the relocation table preparation logic.

It NEEDS to be simplified. I will probably go with what RISC-V is doing
instead of innovating too much ie,

    auipc $x, high-half
    addi $x, low-half
    foo ..., $x

with foo being ATOM, CALL, etc. The AUIPC instruction would create a
pointer and then the ADDI would be used to add any extra bits necessary
to form the full address.

However, I still think that it would be useful to have two distinct
instructions for loading data and text addresses:

 - LDA (Load Data Address): create a pointer into .rodata or .data
   section which would only be useful for M operations
 - LTA (Load Text Address): create a pointer into .text section which
   would only be useful for flow operations

The VM could use the data in .symtab and in DWARF to see if the pointers
being used by the program with M and flow operations are really correct.

Yeah. Something to think about, I guess.
M new/include/viua/arch/ins.h => new/include/viua/arch/ins.h +6 -0
@@ 290,6 290,12 @@ struct ARODP : Instruction {
    ARODP(viua::arch::ops::E i) : instruction{i}
    {}
};
struct ATXTP : Instruction {
    viua::arch::ops::E instruction;

    ATXTP(viua::arch::ops::E i) : instruction{i}
    {}
};

struct ADDI : Instruction {
    using value_type                                     = int64_t;

M new/include/viua/arch/ops.h => new/include/viua/arch/ops.h +2 -0
@@ 279,6 279,7 @@ enum class OPCODE : opcode_type {

    CAST  = (FORMAT_E | 0x0001),
    ARODP = (FORMAT_E | 0x0002),
    ATXTP = (FORMAT_E | 0x0003),

    ADDI  = (FORMAT_R | 0x0001),
    ADDIU = (FORMAT_R | 0x0001 | UNSIGNED),


@@ 357,6 358,7 @@ enum class OPCODE_F : opcode_type {
enum class OPCODE_E : opcode_type {
    Make_entry(CAST),
    Make_entry(ARODP),
    Make_entry(ATXTP),
};
enum class OPCODE_R : opcode_type {
    Make_entry(ADDI),

M new/include/viua/libs/lexer.h => new/include/viua/libs/lexer.h +2 -0
@@ 248,6 248,8 @@ inline auto const OPCODE_NAMES = std::set<std::string_view>{

    "arodp",
    "g.arodp",
    "atxtp",
    "g.atxtp",

    "sm",
    "g.sm",

M new/include/viua/vm/ins.h => new/include/viua/vm/ins.h +1 -0
@@ 77,6 77,7 @@ Work_instruction(LUIU);
Work_instruction(LLI);
Work_instruction(CAST);
Work_instruction(ARODP);
Work_instruction(ATXTP);

Work_instruction(FLOAT);
Work_instruction(DOUBLE);

M new/src/arch/ops.cpp => new/src/arch/ops.cpp +4 -0
@@ 434,6 434,8 @@ auto to_string(opcode_type const raw) -> std::string
        return greedy + "cast";
    case OPCODE::ARODP:
        return greedy + "arodp";
    case OPCODE::ATXTP:
        return greedy + "atxtp";
    case OPCODE::SM:
        return greedy + "sm";
    case OPCODE::LM:


@@ 572,6 574,8 @@ auto parse_opcode(std::string_view const raw) -> opcode_type
        return (op | static_cast<opcode_type>(OPCODE::CAST));
    } else if (sv == "arodp") {
        return (op | static_cast<opcode_type>(OPCODE::ARODP));
    } else if (sv == "atxtp") {
        return (op | static_cast<opcode_type>(OPCODE::ATXTP));
    } else if (sv == "sm") {
        return (op | static_cast<opcode_type>(OPCODE::SM));
    } else if (sv == "lm") {

M new/src/tools/exec/asm.cpp => new/src/tools/exec/asm.cpp +75 -3
@@ 1506,6 1506,48 @@ auto save_objects(std::vector<std::unique_ptr<ast::Node>>& nodes,
                    }
                    throw e;
                }
            } else if (instr.leader == "atxtp" or instr.leader == "g.atxtp") {
                auto const lx = instr.operands.back().ingredients.front();
                using enum viua::libs::lexer::TOKEN;
                if (lx.token == viua::libs::lexer::TOKEN::AT) {
                    auto const label = instr.operands.back().ingredients.back();
                    try {
                        saved_at = symbol_map.at(label.text);
                    } catch (std::out_of_range const&) {
                        using viua::libs::errors::compile_time::Cause;
                        using viua::libs::errors::compile_time::Error;

                        auto e = Error{label, Cause::Unknown_label, label.text};
                        e.add(lx);

                        using viua::support::string::levenshtein_filter;
                        auto misspell_candidates =
                            levenshtein_filter(label.text, symbol_map);
                        if (not misspell_candidates.empty()) {
                            using viua::support::string::levenshtein_best;
                            auto best_candidate =
                                levenshtein_best(label.text,
                                                 misspell_candidates,
                                                 (label.text.size() / 2));
                            if (best_candidate.second != label.text) {
                                did_you_mean(e, best_candidate.second);
                            }
                        }

                        throw e;
                    }
                } else {
                    using viua::libs::errors::compile_time::Cause;
                    using viua::libs::errors::compile_time::Error;

                    auto e = Error{lx,
                                   Cause::Invalid_operand,
                                   "expected a label reference"};
                    if (lx.token == viua::libs::lexer::TOKEN::LITERAL_ATOM) {
                        did_you_mean(e, '@' + lx.text);
                    }
                    throw e;
                }
            } else if (instr.leader == "double" or instr.leader == "g.double") {
                auto const lx = instr.operands.back().ingredients.front();
                using enum viua::libs::lexer::TOKEN;


@@ 2117,6 2159,13 @@ auto expand_call(ast::Instruction const& raw,
                 std::vector<Elf64_Sym>& symbol_table,
                 std::map<std::string, size_t> const& symbol_map) -> Text
{
    using viua::libs::lexer::TOKEN;
    auto const call_addr_already_loaded =
        raw.operands.back().ingredients.front() == TOKEN::DOLLAR;
    if (call_addr_already_loaded) {
        return {emit_instruction(raw)};
    }

    auto cooked = Text{};

    /*


@@ 2155,7 2204,6 @@ auto expand_call(ast::Instruction const& raw,
         */
        fn_offset = ast::Operand{};

        using viua::libs::lexer::TOKEN;
        auto const& lx = ret.ingredients.front();
        fn_offset.ingredients.push_back(lx.make_synth("$", TOKEN::DOLLAR));
        fn_offset.ingredients.push_back(


@@ 2810,18 2858,41 @@ auto make_reloc_table(Text const& text) -> std::vector<Elf64_Rel>
                                 or (op == OPCODE::ARODP);
        auto const type = into_rodata ? R_VIUA_OBJECT : R_VIUA_JUMP_SLOT;

        using viua::arch::ops::FORMAT_MASK;
        using viua::arch::ops::FORMAT_F;
        auto const reloc_to_section_ptr = op == OPCODE::ARODP or op == OPCODE::ATXTP;
        auto const reloc_to_long_addr = (text.at(i - 1) & FORMAT_MASK) == FORMAT_F;

        auto symtab_entry_index = uint32_t{};
        if (op == OPCODE::ARODP) {
        if (reloc_to_section_ptr) {
            using viua::arch::ops::E;
            symtab_entry_index =
                static_cast<uint32_t>(E::decode(text.at(i)).immediate);
        } else {
        } else if (reloc_to_long_addr) {
            using viua::arch::ops::F;
            auto const hi =
                static_cast<uint64_t>(F::decode(text.at(i - 2)).immediate)
                << 32;
            auto const lo      = F::decode(text.at(i - 1)).immediate;
            symtab_entry_index = static_cast<uint32_t>(hi | lo);
        } else {
            /*
             * Well, it is not really a reloc after all.
             * This case can be encountered when using
             *
             *      atxtp $x, fn
             *      call void, $x
             *
             * instead of
             *
             *      call void, fn
             *
             * directly.
             */
            // FIXME This branch should be removed after calls and jumps use
            // atxtp exclusively. Addresses should not really be loaded using
            // integers, but using pointers instead. See AUIPC of RISC-V.
            return;
        }

        Elf64_Rel rel;


@@ 2846,6 2917,7 @@ auto make_reloc_table(Text const& text) -> std::vector<Elf64_Rel>
        case ATOM:
        case DOUBLE:
        case ARODP:
        case ATXTP:
            push_reloc(i);
            break;
        default:

M new/src/tools/exec/dis.cpp => new/src/tools/exec/dis.cpp +23 -0
@@ 541,6 541,29 @@ auto demangle_arodp(Cooked_text& text,
                              + arodp.out.to_string() + ", " + label_or_value));
            continue;
        }
        if (m(i, ATXTP) or m(i, ATXTP, GREEDY)) {
            using viua::arch::ops::E;
            auto const atxtp        = E::decode(ins_at(i));
            auto const needs_greedy = (atxtp.opcode & GREEDY);

            auto const off = atxtp.immediate;

            auto const sym = std::find_if(
                symtab.begin(), symtab.end(), [off](auto const& each) -> bool {
                    return (each.st_value == off)
                           and (ELF64_ST_TYPE(each.st_info) == STT_FUNC);
                });
            // FIXME See if the symbol was actually found.

            auto idx          = text.at(i).index;
            idx.physical_span = idx.physical;
            tmp.emplace_back(idx,
                             std::nullopt,
                             std::nullopt,
                             ((needs_greedy ? "g." : "") + std::string{"atxtp "}
                              + atxtp.out.to_string() + ", " + make_label_ref(strtab, *sym)));
            continue;
        }

        tmp.push_back(std::move(text.at(i)));
    }

M new/src/tools/exec/ld.cpp => new/src/tools/exec/ld.cpp +1 -1
@@ 535,7 535,7 @@ auto relocate(Text& text, Elf64_Rel const rel, uint64_t const value) -> void
    auto const op =
        static_cast<OPCODE>(text.at(text_ndx) & viua::arch::ops::OPCODE_MASK);

    if (op == OPCODE::ARODP) {
    if (op == OPCODE::ARODP or op == OPCODE::ATXTP) {
        using viua::arch::ops::E;
        auto imm_op       = E::decode(text.at(text_ndx));
        text.at(text_ndx) = E{imm_op.opcode, imm_op.out, value}.encode();

M new/src/vm/ins.cpp => new/src/vm/ins.cpp +6 -0
@@ 163,6 163,7 @@ auto execute(viua::vm::Stack& stack,
        break
            Work(CAST);
            Work(ARODP);
            Work(ATXTP);
#undef Work
        }
        break;


@@ 1150,6 1151,11 @@ auto execute(ARODP const op, Stack& stack, ip_type const) -> void
    pointer_info.foreign = true;
    stack.proc->record_pointer(pointer_info);
}
auto execute(ATXTP const op, Stack& stack, ip_type const) -> void
{
    // FIXME Yeah... This should really be a pointer instead.
    mutable_proxy(stack, op.instruction.out) = op.instruction.immediate;
}

auto execute(FLOAT const op, Stack& stack, ip_type const) -> void
{

M new/tests/asm/call_indirect.asm => new/tests/asm/call_indirect.asm +1 -1
@@ 10,7 10,7 @@
    atom $1, @hello_world
    ebreak

    li $2.l, @dummy
    atxtp $2.l, @dummy

    frame $1.a
    move $0.a, $1.l