~serjei_mikailov/tiny32-assembler

402ffd345720c9bf4e1458d54f54a5825b056ef8 — SerjeiMikailov a month ago 74bbc78 master
new version
4 files changed, 141 insertions(+), 5 deletions(-)

M README.md
A make.sh
A src/source.c
A src/source.h
M README.md => README.md +2 -5
@@ 1,7 1,4 @@
# Tiny ia32 Assembler

A bare metal assembler, portable to ia32 Operating Systems.

### Features
 - Own Simple Syntax
 - Bare Metal Compatible
A tiny assembler, portable to ia32 Operating Systems.
To portable, just change the stdlib headers in 'source.h' to your own libc headers.

A make.sh => make.sh +6 -0
@@ 0,0 1,6 @@
mkdir -p build

clang src/source.c -o build/source -m32 -std=c17 -Wall -Wextra

cd build
./source

A src/source.c => src/source.c +111 -0
@@ 0,0 1,111 @@
#include "source.h"

char* c_strdup(const char* src) {
    char* dst = malloc(strlen(src) + 1);
    if (dst == NULL) return NULL;
    strcpy(dst, src);
    return dst;
}

void tokenize(const char* line, Token* tokens, int* token_count) {
    const char* delimiters = " ,\t\n";
    char* line_copy = c_strdup(line);
    char* token = strtok(line_copy, delimiters);
    *token_count = 0;

    while (token != NULL) {
        if (isalpha(token[0])) {
            if (strcmp(token, "mov") == 0 || strcmp(token, "add") == 0 || 
                strcmp(token, "sub") == 0 || strcmp(token, "jmp") == 0 || 
                strcmp(token, "cmp") == 0) {
                tokens[*token_count].type = TOKEN_INSTRUCTION;
            } else {
                tokens[*token_count].type = TOKEN_REGISTER;
            }
        } else if (isdigit(token[0]) || token[0] == '-') {
            tokens[*token_count].type = TOKEN_IMMEDIATE;
        } else {
            tokens[*token_count].type = TOKEN_UNKNOWN;
        }
        strcpy(tokens[*token_count].text, token);
        (*token_count)++;
        token = strtok(NULL, delimiters);
    }
    free(line_copy);
}

typedef struct {
    char instruction[8];
    char operands[2][32];
    int operand_count;
} ParsedLine;

void parse_tokens(Token* tokens, int token_count, ParsedLine* parsed_line) {
    if (token_count > 0) {
        strcpy(parsed_line->instruction, tokens[0].text);
        parsed_line->operand_count = token_count - 1;
        for (int i = 1; i < token_count; i++) {
            strcpy(parsed_line->operands[i-1], tokens[i].text);
        }
    }
}

unsigned char encode_register(const char* reg) {
    if (strcmp(reg, "eax") == 0) return 0x00;
    if (strcmp(reg, "ebx") == 0) return 0x03;
    if (strcmp(reg, "ecx") == 0) return 0x01;
    if (strcmp(reg, "edx") == 0) return 0x02;
    return 0xFF;  
}

void encode_mov(ParsedLine* line, unsigned char* output, int* length) {
    output[0] = 0xB8 + encode_register(line->operands[0]);  // MOV opcode for register
    int immediate = atoi(line->operands[1]);
    memcpy(output + 1, &immediate, sizeof(int));
    *length = 5;
}

void encode_instruction(ParsedLine* line, unsigned char* output, int* length) {
    if (strcmp(line->instruction, "mov") == 0) {
        encode_mov(line, output, length);
    }
}

void assemble(const char** instructions, int instruction_count, unsigned char* output, int* output_length) {
    Token tokens[10];
    int token_count;
    ParsedLine parsed_line;
    unsigned char encoded[10];
    int length;
    int offset = 0;

    for (int i = 0; i < instruction_count; i++) {
        tokenize(instructions[i], tokens, &token_count);
        parse_tokens(tokens, token_count, &parsed_line);
        encode_instruction(&parsed_line, encoded, &length);
        memcpy(output + offset, encoded, length);
        offset += length;
    }

    *output_length = offset;
}

int main() {
    const char* instructions[] = {
        "mov eax, 10",
        "mov ebx, 20"
    };
    int instruction_count = sizeof(instructions) / sizeof(instructions[0]);

    unsigned char output[256];
    int output_length;

    assemble(instructions, instruction_count, output, &output_length);

    for (int i = 0; i < output_length; i++) {
        printf("%02X ", output[i]);
    }
    printf("\n");

    return 0;
}

A src/source.h => src/source.h +22 -0
@@ 0,0 1,22 @@
#ifndef ASSEMBLER_H
#define ASSEMBLER_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef enum {
    TOKEN_INSTRUCTION,
    TOKEN_REGISTER,
    TOKEN_IMMEDIATE,
    TOKEN_LABEL,
    TOKEN_UNKNOWN
} TokenType;

typedef struct {
    TokenType type;
    char text[32];
} Token;

#endif