~maelkum/viuavm

6fbae81b5a3b8ae7e9d42e02e1820e2fbf6f0c17 — Marek Marecki 9 months ago 68f8dbb
Expand CALL and ACTOR pseudoinstructions using ATXTP

Instead of doing

    lui
    lli
    call/actor

the sequence is now

    atxtp
    call/actor

Why? To force CALL and ACTOR to use a pointer instead of a raw integer
as their input.
M new/src/tools/exec/asm.cpp => new/src/tools/exec/asm.cpp +13 -11
@@ 2219,13 2219,13 @@ auto expand_call(ast::Instruction const& raw,
     * function, which will then be used by the call instruction to
     * invoke the function.
     */
    auto li = ast::Instruction{};
    auto atxtp = ast::Instruction{};
    {
        li.leader      = raw.leader;
        li.leader.text = "g.li";
        atxtp.leader      = raw.leader;
        atxtp.leader.text = "g.atxtp";

        li.operands.push_back(fn_offset);
        li.operands.push_back(fn_offset);
        atxtp.operands.push_back(fn_offset);
        atxtp.operands.push_back(fn_offset);

        using viua::libs::lexer::TOKEN;



@@ 2268,11 2268,12 @@ auto expand_call(ast::Instruction const& raw,
                .add(raw.leader);
        }

        li.operands.back().ingredients.front().text =
        atxtp.operands.back().ingredients.front().text =
            (std::to_string(sym_off) + 'u');
        li.operands.back().ingredients.front().token = TOKEN::LITERAL_INTEGER;
        atxtp.operands.back().ingredients.front().token =
            TOKEN::LITERAL_INTEGER;
    }
    std::ranges::copy(expand_li(li, true), std::back_inserter(cooked));
    cooked.push_back(emit_instruction(atxtp));

    /*
     * Then, synthesize the actual call instruction. This means


@@ 2860,8 2861,10 @@ auto make_reloc_table(Text const& text) -> std::vector<Elf64_Rel>

        using viua::arch::ops::FORMAT_MASK;
        using viua::arch::ops::FORMAT_F;
        auto const reloc_to_section_ptr = op == OPCODE::ARODP or op == OPCODE::ATXTP;
        auto const reloc_to_long_addr = (text.at(i - 1) & FORMAT_MASK) == FORMAT_F;
        auto const reloc_to_section_ptr = op == OPCODE::ARODP
                                          or op == OPCODE::ATXTP;
        auto const reloc_to_long_addr =
            (text.at(i - 1) & FORMAT_MASK) == FORMAT_F;

        auto symtab_entry_index = uint32_t{};
        if (reloc_to_section_ptr) {


@@ 2913,7 2916,6 @@ auto make_reloc_table(Text const& text) -> std::vector<Elf64_Rel>
        switch (op) {
            using enum viua::arch::ops::OPCODE;
        case IF:
        case CALL:
        case ATOM:
        case DOUBLE:
        case ARODP:

M new/src/tools/exec/dis.cpp => new/src/tools/exec/dis.cpp +44 -19
@@ 236,22 236,28 @@ auto demangle_symbol_load(Cooked_text& raw,

    using enum viua::arch::ops::OPCODE;
    using viua::arch::ops::D;
    using viua::arch::ops::E;
    using viua::arch::ops::S;
    if (m(i + 1, ATOM) and S::decode(ins_at(i + 1)).out == out) {
        auto ins = raw.at(i + 1);

        auto const sym = std::find_if(
        auto const sym_it = std::find_if(
            symtab.begin(),
            symtab.end(),
            [immediate](auto const& each) -> bool {
                return (each.st_value == immediate)
                       and (ELF64_ST_TYPE(each.st_info) == STT_OBJECT);
            });
        if (sym == symtab.end()) {
            abort();  // FIXME symbol not found? should never happen here
        }
        auto const label_or_value = sym->st_name
                                        ? make_label_ref(strtab, *sym)
        /*
         * FIXME OvsELF
         * In executable files we need to find by .st_value ie, the real offset.
         * In relocatable files we need to find by index into symbol table since
         * relocations have not been performed yet.
         */
        auto const& sym = (sym_it == symtab.end()) ? symtab.at(immediate)
                                                   : *sym_it;
        auto const label_or_value = sym.st_name
                                        ? make_label_ref(strtab, sym)
                                        : load_string(rodata, immediate);

        auto tt =


@@ 300,8 306,7 @@ auto demangle_symbol_load(Cooked_text& raw,
    if (m(i + 1, ACTOR) and D::decode(ins_at(i + 1)).in == out) {
        auto ins = raw.at(i + 1);

        auto const sym      = symtab.at(immediate);
        auto const sym_name = get_symbol_name(sym.st_value, symtab, strtab);
        auto const sym_name = get_symbol_name(immediate, symtab, strtab);
        auto const safe_sym_name =
            match_atom(sym_name) ? sym_name : ('"' + sym_name + '"');



@@ 392,6 397,7 @@ auto demangle_canonical_li(Cooked_text& text,
                (std::string{"[[full]] "} + (needs_greedy ? "g." : "")
                 + std::string{"li "} + lui.out.to_string() + ", " + literal));

            // FIXME calls are using ATXTP instead of LUIU
            if (needs_unsigned) {
                demangle_symbol_load(
                    text, tmp, i, lui.out, value, symtab, strtab, rodata);


@@ 548,20 554,38 @@ auto demangle_arodp(Cooked_text& text,

            auto const off = atxtp.immediate;

            auto const sym = std::find_if(
            auto const sym_it = std::find_if(
                symtab.begin(), symtab.end(), [off](auto const& each) -> bool {
                    return (each.st_value == off)
                           and (ELF64_ST_TYPE(each.st_info) == STT_FUNC);
                });
            // FIXME See if the symbol was actually found.
            /*
             * FIXME OvsELF
             * In executable files we need to find by .st_value ie, the real
             * offset. In relocatable files we need to find by index into symbol
             * table since relocations have not been performed yet.
             */
            auto const& sym = (sym_it == symtab.end()) ? symtab.at(off)
                                                       : *sym_it;

            auto idx          = text.at(i).index;
            idx.physical_span = idx.physical;
            tmp.emplace_back(idx,
                             std::nullopt,
                             std::nullopt,
                             ((needs_greedy ? "g." : "") + std::string{"atxtp "}
                              + atxtp.out.to_string() + ", " + make_label_ref(strtab, *sym)));
            tmp.emplace_back(
                idx,
                std::nullopt,
                std::nullopt,
                ((needs_greedy ? "g." : "") + std::string{"atxtp "}
                 + atxtp.out.to_string() + ", " + make_label_ref(strtab, sym)));

            demangle_symbol_load(text,
                                 tmp,
                                 i,
                                 atxtp.out,
                                 atxtp.immediate,
                                 symtab,
                                 strtab,
                                 rodata);

            continue;
        }



@@ 1213,6 1237,11 @@ auto main(int argc, char* argv[]) -> int
            cooked_text.emplace_back(i, opcode, ip, ins_to_string(ip));
        }

        cook::demangle_arodp(cooked_text,
                             main_module.symtab,
                             main_module.strtab_quick,
                             rodata->get().data);

        if (demangle_li) {
            /*
             * This demangles LI for long immediates; covering both integers for


@@ 1231,10 1260,6 @@ auto main(int argc, char* argv[]) -> int
        }

        cook::demangle_addiu(cooked_text);
        cook::demangle_arodp(cooked_text,
                             main_module.symtab,
                             main_module.strtab_quick,
                             rodata->get().data);

        if (demangle_mem) {
            cook::demangle_memory(cooked_text);

M new/src/vm/ins.cpp => new/src/vm/ins.cpp +12 -12
@@ 989,10 989,13 @@ auto execute(CALL const op, Stack& stack, ip_type const) -> ip_type
    if (auto fn = mutable_proxy(stack, op.instruction.in).get<uint64_t>(); fn) {
        fn_addr = *fn;
        fn.reset();
    } else {
        throw abort_execution{stack,
                              "invalid in operand to call instruction"};
    }

    if (fn_addr % sizeof(viua::arch::instruction_type)) {
        throw abort_execution{stack, "invalid IP after call"};
        throw abort_execution{stack, "invalid IP after synchronous call"};
    }

    /*


@@ 1405,21 1408,18 @@ auto execute(IO_PEEK const, Stack&, ip_type const) -> void

auto execute(ACTOR const op, Stack& stack, ip_type const) -> void
{
    auto fn_name = std::string{};
    auto fn_addr = size_t{};
    {
        if (auto fn = immutable_proxy(stack, op.instruction.in).get<uint64_t>();
            fn) {
            std::tie(fn_name, fn_addr) = stack.proc->module.function_at(*fn);
            mutable_proxy(stack, op.instruction.in).reset();
        } else {
            throw abort_execution{stack,
                                  "invalid in operand to actor instruction"};
        }
    if (auto fn = mutable_proxy(stack, op.instruction.in).get<uint64_t>();
        fn) {
        fn_addr = *fn;
        fn.reset();
    } else {
        throw abort_execution{stack,
                              "invalid in operand to actor instruction"};
    }

    if (fn_addr % sizeof(viua::arch::instruction_type)) {
        throw abort_execution{stack, "invalid IP after call"};
        throw abort_execution{stack, "invalid IP after asynchronous call"};
    }

    auto const fr_entry = (fn_addr / sizeof(viua::arch::instruction_type));

M new/tests/asm/mem_use_of_dead.abort => new/tests/asm/mem_use_of_dead.abort +1 -1
@@ 1,3 1,3 @@
0x0000000000000030
0x0000000000000028
0x0000030103027002
unknown pointer: bfffffffffffffe9