voidEmu/assembler/assember.cpp

#include <algorithm>
#include <cctype>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fcntl.h>
#include <map>
#include <optional>
#include <set>
#include <sstream>
#include <string>
#include <unistd.h>
#include <unordered_map>
#include <unordered_set>
#include <variant>
#include <vector>

#define BYTECODE_READER_IMPLEMENTATION
#include "reader.hpp"

bool ishex(char c) {
    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
           (c >= 'A' && c <= 'F');
}

bool is_control(std::string &str) {
    // pointer I
    if (str.length() == 3 && str.starts_with("[") && str.ends_with("]")) {
        if (tolower(str[1]) == 'i') {
            return true;
        }
    }

    std::set<std::string> special_registers = {"i", "k", "b", "f", "dt", "st"};
    if ((special_registers.contains(str))) {
        return true;
    }

    return false;
}

bool is_register(std::string &str) {
    std::transform(str.begin(), str.end(), str.begin(), ::tolower);
    if ((str.starts_with("v") || str.starts_with("V")) && str.length() == 2 &&
        ishex(str[1])) {
        return true;
    }

    return is_control(str);
}

bool is_base16(std::string &str) {
    if (str.starts_with("0x")) {
        for (char c : str.substr(2)) {
            if (!ishex(c)) {
                return false;
            }
        }
        return true;
    }

    return false;
}

bool is_base2(std::string &str) {
    if (str.starts_with("0b")) {
        for (char c : str.substr(2)) {
            if (c != '0' && c != '1') {
                return false;
            }
        }
        return true;
    }

    return false;
}

bool is_base10(std::string &str) {
    for (char c : str) {
        if (!isdigit(c)) {
            return false;
        }
        return true;
    }

    return false;
}

bool is_numeric(std::string &str) {
    return is_base16(str) || is_base2(str) || is_base10(str);
}

bool is_arithmetic_operator(std::string &str) {
    return str == "+" || str == "-" || str == "*" || str == "/";
}

size_t get_numeric(std::string &str) {
    if (is_base16(str)) {
        return std::stoi(&str[2], nullptr, 16);
    }

    if (is_base2(str)) {
        return std::stoi(&str[2], nullptr, 2);
    }

    if (is_base10(str)) {
        return std::stoi(str, nullptr, 10);
    }

    fprintf(stderr, "Invalid numeric literal: %s\n", str.c_str());
    exit(1);
}

uint8_t get_reg(std::string &str) {
    if (str.length() != 2) {
        fprintf(stderr, "Invalid register: %s\n", str.c_str());
        exit(1);
    }

    if (tolower(str[0]) != 'v') {
        fprintf(stderr, "Invalid register: %s\n", str.c_str());
        exit(1);
    }

    return std::stoi(&str[1], nullptr, 16);
}

// as with all hashing functions, there is a chance of collision, but we can
// pretend it's not an issue by means of mathematical improbability
constexpr uint32_t hash(const std::string data) noexcept {
    uint32_t hash = 5381;

    for (const char &c : data) {
        hash = ((hash << 5) + hash) + (uint8_t)c;
    }

    return hash;
}

bool is_directive(std::string &str) {
    switch (hash(str)) {
    case hash("text"):
    case hash("offset"):
    case hash("include"):
    case hash("define"):
    case hash("db"):
    case hash("dw"):
        return true;
    default:
        return false;
    }
}

bool is_reserved(std::string &str) {
    return is_register(str) || is_directive(str) || is_numeric(str);
}

class OperandNode;
class InstructionNode;
class LabelNode;
class RootNode;

enum class ValueType { REGISTER, IMMEDIATE, STRING, ATIHMETIC, LABEL };

class OperandNode {
  public:
    ValueType type;
    std::string value;
    OperandNode(ValueType type, std::string value) : type(type), value(value) {}
};

enum class DirectiveType { DEFINE, TEXT, DB, DW, OFFSET, INCLUDE };
class DirectiveNode {
  public:
    DirectiveType directive;
    std::vector<OperandNode> operands;

    DirectiveNode(const DirectiveType directive,
                  const std::vector<OperandNode> &operands = {})
        : directive(directive), operands(operands) {}
};

class InstructionNode {
  public:
    std::string opcode;
    std::vector<OperandNode> operands;

    InstructionNode(const std::string &opcode,
                    const std::vector<OperandNode> &operands = {})
        : opcode(opcode), operands(operands) {}
};

struct SectionElement {
    enum class Type { Instruction, Directive };
    std::variant<InstructionNode, DirectiveNode> element;
    Type type;

    SectionElement(Type t, InstructionNode instr) : element(instr), type(t) {}

    SectionElement(Type t, DirectiveNode dir) : element(dir), type(t) {}
};

class SectionNode {
  public:
    std::optional<std::string> label_name;
    std::vector<SectionElement> elements;

    SectionNode(const std::string &name) : label_name(name) {}
    SectionNode(std::optional<std::string> name,
                const std::vector<SectionElement> &elements = {})
        : label_name(name), elements(elements) {}
};

class RootNode {
  public:
    std::vector<SectionNode> sections;
    // label name -> section index
    std::unordered_map<std::string, size_t> labels;

    RootNode(const std::vector<SectionNode> &sections = {})
        : sections(sections) {}
};

enum class TokenType {
    Label,
    LabelDecleration,
    Instruction,
    Directive,
    Register,
    Immediate,
    ArithmeticOperator,
    String,
    Comment,
    EOFToken
};

bool is_operand(TokenType type) {
    switch (type) {
    case TokenType::Label:
    case TokenType::Register:
    case TokenType::Immediate:
    case TokenType::String:
    case TokenType::ArithmeticOperator:
    case TokenType::Comment:
        return true;
    default:
        return false;
    }
}

// Token structure
struct Token {
    TokenType type;
    std::string value;
};

class Lexer {
  public:
    Lexer(const std::string &input) : input(input), pos(0) {}

    Token get_next_token() {
        std::string accum = "";
        bool is_operand = next_operand;
        next_operand = false;

        // strip whitespace from the beginning of the line, but dont use the
        // is_whitespace function, since that will strip away newlines,
        // which are very important in terms of context for us
        while (pos < input.size() && isspace(input[pos])) {
            pos++;
        }

        while (pos < input.size()) {
            char current_char = input[pos++];

            // if we find a comment, we make sure we havent reached the end of
            // our current token, and if we havent, collect the commend and
            // return the comment token
            if (current_char == ';') {
                if (!accum.empty()) {
                    // rewind so we dont consume the ; character
                    pos--;
                    break;
                }

                next_operand = false;

                while (current_char != '\n' && pos < input.size()) {
                    accum += current_char;
                    current_char = input[pos++];
                }

                return Token{TokenType::Comment, accum};
            }

            if (current_char == ':') {
                if (accum.empty()) {
                    fprintf(stderr,
                            "Expected label before ':' but got nothing\n");
                    exit(1);
                }

                if (is_reserved(accum)) {
                    fprintf(stderr, "Use of reserved token: %s\n",
                            accum.c_str());
                    exit(1);
                }

                next_operand = false;

                return Token{TokenType::LabelDecleration, accum};
            }

            if (current_char == '"') {
                if (!accum.empty()) {
                    fprintf(stderr, "Expected string before '\"'\n");
                    exit(1);
                }

                current_char = input[pos++];
                while (current_char != '"' && pos < input.size()) {
                    if (current_char == '\\') {
                        current_char = input[pos++];
                        switch (current_char) {
                        case 'a':
                            accum += '\a';
                            break;
                        case 'b':
                            accum += '\b';
                            break;
                        case 'e':
                            accum += 0x1b;
                            break;
                        case 'f':
                            accum += '\f';
                            break;
                        case 'n':
                            accum += '\n';
                            break;
                        case 'r':
                            accum += '\r';
                            break;
                        case 't':
                            accum += '\t';
                            break;
                        case 'v':
                            accum += '\v';
                            break;
                        // TODO: hex escapes
                        default:
                            accum += current_char;
                            break;
                        }

                        current_char = input[pos++];

                        continue;
                    }

                    accum += current_char;
                    current_char = input[pos++];
                }

                return Token{TokenType::String, accum};
            }

            if (current_char == '\n') {
                // we hit end of line, we are onto the next instruction
                next_operand = false;
                break;
            }

            // if we find a delimiting character, we have reached the end of
            // our current token
            if (isspace(current_char) || current_char == ',') {
                // next token is an operand
                next_operand = true;
                break;
            }

            accum += current_char;
        }

        if (pos >= input.size() && accum.empty()) {
            return Token{TokenType::EOFToken, ""};
        }

        if (accum.empty()) {
            return get_next_token();
        }

        if (is_arithmetic_operator(accum)) {
            return Token{TokenType::ArithmeticOperator, accum};
        }

        if (is_directive(accum)) {
            return Token{TokenType::Directive, accum};
        }

        if (is_numeric(accum)) {
            return Token{TokenType::Immediate, accum};
        }

        if (is_register(accum)) {
            return Token{TokenType::Register, accum};
        }

        if (is_operand) {
            return Token{TokenType::Label, accum};
        }

        return Token{TokenType::Instruction, accum};
    }

  private:
    std::string input;
    size_t pos;
    bool next_operand;
};

class Parser {
  public:
    Parser(Lexer &lexer)
        : lexer(lexer), current_token(lexer.get_next_token()) {}

    RootNode parse() {
        RootNode root;
        std::optional<SectionNode *> cur_section = std::nullopt;
        while (true) {
            if (current_token.type == TokenType::EOFToken) {
                break;
            }

            if (current_token.type == TokenType::LabelDecleration) {
                SectionNode new_section(current_token.value);
                if (root.labels.contains(current_token.value)) {
                    fprintf(stderr, "Label %s already exists\n",
                            current_token.value.c_str());
                    exit(1);
                }
                root.sections.push_back(new_section);
                root.sections.back().label_name = current_token.value;
                root.labels[current_token.value] = root.sections.size() - 1;
                cur_section =
                    std::optional<SectionNode *>(&root.sections.back());
                current_token = lexer.get_next_token();
                continue;
            }

            if (!cur_section.has_value()) {
                root.sections.push_back(SectionNode(std::nullopt));
                cur_section =
                    std::optional<SectionNode *>(&root.sections.back());
            }

            if (current_token.type == TokenType::Instruction) {
                // this will set the current token to the next token once it
                // finds a token that is not an operand
                cur_section.value()->elements.push_back(
                    {SectionElement::Type::Instruction, parse_instruction()});
            } else if (current_token.type == TokenType::Directive) {
                // this will set the current token to the next token once it
                // finds a token that is not an operand
                // cur_section->directives.push_back(parse_directive(&pc));
                cur_section.value()->elements.push_back(
                    {SectionElement::Type::Directive, parse_directive()});
            } else {
                current_token = lexer.get_next_token();
            }
        }
        return root;
    }

  private:
    Lexer &lexer;
    Token current_token;

    InstructionNode parse_instruction() {
        InstructionNode instruction = InstructionNode(current_token.value);
        instruction.opcode = current_token.value;
        current_token = lexer.get_next_token();

        while (is_operand(current_token.type)) {
            if (current_token.type == TokenType::Comment) {
                current_token = lexer.get_next_token();
                continue;
            }
            // the token is either a label, a number, or a register
            instruction.operands.push_back(parse_operand());
            current_token = lexer.get_next_token();
        }

        return instruction;
    }

    DirectiveNode parse_directive() {
        DirectiveType directive_type;
        switch (hash(current_token.value)) {
        case hash("define"): {
            directive_type = DirectiveType::DEFINE;
            break;
        }
        case hash("text"): {
            directive_type = DirectiveType::TEXT;
            break;
        }
        case hash("db"): {
            directive_type = DirectiveType::DB;
            break;
        }
        case hash("dw"): {
            directive_type = DirectiveType::DW;
            break;
        }
        case hash("offset"): {
            directive_type = DirectiveType::OFFSET;
            break;
        }
        case hash("include"): {
            directive_type = DirectiveType::INCLUDE;
            break;
        }
        }
        DirectiveNode directive = DirectiveNode(directive_type);
        current_token = lexer.get_next_token();

        // directives require an operand
        if (!is_operand(current_token.type)) {
            fprintf(stderr, "Expected operand, found %s",
                    current_token.value.c_str());
            exit(1);
        }

        while (is_operand(current_token.type)) {
            if (current_token.type == TokenType::Comment) {
                current_token = lexer.get_next_token();
                continue;
            }
            // the token is either a label, a number, or a register
            directive.operands.push_back(parse_operand());
            current_token = lexer.get_next_token();
        }

        return directive;
    }

    OperandNode parse_operand() {
        switch (current_token.type) {
        case TokenType::Label:
            return OperandNode(ValueType::LABEL, current_token.value);
        case TokenType::Register:
            return OperandNode(ValueType::REGISTER, current_token.value);
        case TokenType::Immediate:
            return OperandNode(ValueType::IMMEDIATE, current_token.value);
        case TokenType::String:
            return OperandNode(ValueType::STRING, current_token.value);
        case TokenType::ArithmeticOperator:
            return OperandNode(ValueType::ATIHMETIC, current_token.value);
        default:
            fprintf(stderr, "Unexpected operand type: %d\n",
                    static_cast<int>(current_token.type));
            exit(1);
        }
    }
};

struct Defines {
    enum ValueType type;
    std::string value;
};

class Assembler {
  public:
    uint8_t *assembler(RootNode root, int rom_fd) {
        printf("Assembling...\n");
        std::map<std::string, Defines> defines;

        // rom space labels
        std::unordered_map<std::string, size_t> labels;

        size_t pc = 0;
        for (auto &section : root.sections) {
            if (section.label_name.has_value()) {
                labels[section.label_name.value()] = pc;
            }

            for (auto &element : section.elements) {
                switch (element.type) {
                case SectionElement::Type::Instruction: {
                    pc += 2;
                    break;
                }

                case SectionElement::Type::Directive: {
                    DirectiveNode directive_node =
                        std::get<DirectiveNode>(element.element);
                    for (auto &operand : directive_node.operands) {
                        if (operand.type == ValueType::LABEL) {
                            // check if the label is really a reference
                            // to a constant (ie a value defined by a
                            // defines directive)
                            if (defines.contains(operand.value)) {
                                operand.type = defines[operand.value].type;
                                operand.value = defines[operand.value].value;
                            }
                        }
                    }
                    switch (directive_node.directive) {
                    case DirectiveType::DEFINE: {
                        if (directive_node.operands.size() != 2) {
                            fprintf(stderr, "Expected 2 operands for define\n");
                            exit(1);
                        }

                        if (root.labels.contains(
                                directive_node.operands[0].value)) {
                            fprintf(stderr,
                                    "Redecleration of label %s as "
                                    "constant\n",
                                    directive_node.operands[0].value.c_str());
                            exit(1);
                        }

                        if (defines.contains(
                                directive_node.operands[0].value)) {
                            // TODO: should we allow redeclaration of
                            // constants?
                            fprintf(stderr, "Redecleration of constant %s\n",
                                    directive_node.operands[0].value.c_str());
                            exit(1);
                        }

                        if (directive_node.operands[1].type ==
                            ValueType::LABEL) {
                            // "label" may be a constant
                            if (defines.contains(
                                    directive_node.operands[0].value)) {
                                defines[directive_node.operands[0].value] =
                                    defines.at(
                                        directive_node.operands[0].value);
                                break;
                            }
                        }

                        defines[directive_node.operands[0].value] = {
                            .type = directive_node.operands[1].type,
                            .value = directive_node.operands[1].value};
                        break;
                    }
                    case DirectiveType::TEXT: {
                        if (directive_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for text\n");
                            exit(1);
                        }

                        pc += directive_node.operands[0].value.size() + 1;
                        break;
                    }
                    case DirectiveType::DB: {
                        if (directive_node.operands.size() == 0) {
                            fprintf(stderr, "Expected operands for db\n");
                            exit(1);
                        }

                        pc += directive_node.operands.size();
                        break;
                    }
                    case DirectiveType::DW: {
                        if (directive_node.operands.size() == 0) {
                            fprintf(stderr, "Expected operands for dw\n");
                            exit(1);
                        }

                        pc += directive_node.operands.size() * 2;
                        break;
                    }
                    case DirectiveType::OFFSET: {
                        if (directive_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for offset\n");
                            exit(1);
                        }

                        size_t offset =
                            get_numeric(directive_node.operands[0].value);
                        if (directive_node.operands[0].type !=
                            ValueType::IMMEDIATE) {
                            fprintf(stderr, "Expected immediate "
                                            "operand for offset\n");
                            exit(1);
                        }

                        pc += offset;
                        break;
                    }
                    case DirectiveType::INCLUDE: {
                        fprintf(stderr, "extraneous include directive! This is "
                                        "likely a compiler bug\n");
                        exit(1);
                    }
                    }
                }
                }
            }
        }

        uint8_t *rom_buf = NULL;
        rom_buf = (uint8_t *)calloc(pc, sizeof(uint8_t));
        if (rom_buf == NULL) {
            fprintf(stderr, "Failed to allocate memory!\n");
            exit(1);
        }

        pc = 0;
        for (auto &section : root.sections) {
            for (auto &element : section.elements) {
                switch (element.type) {
                case SectionElement::Type::Instruction: {
                    InstructionNode instruction_node =
                        std::get<InstructionNode>(element.element);
                    std::vector<OperandNode> flattened_operands;
                    // "flatten" opperands, aka, take things like labels and
                    // defines, and replace them with their real values, so that
                    // instructon dont need knowledge of these concepts
                    for (size_t i = 0; i < instruction_node.operands.size();
                         i++) {
                        OperandNode &operand = instruction_node.operands[i];
                        if (operand.type == ValueType::LABEL) {
                            // check if the label is really a reference
                            // to a constant (ie a value defined by a
                            // defines directive)
                            if (defines.contains(operand.value)) {
                                operand.type = defines[operand.value].type;
                                operand.value = defines[operand.value].value;
                            }

                            // might seem redundant, but if we transformed the
                            // "label" into a label via defines, we need to
                            // transform the label into the position of the
                            // label, but if the defines is NOT a label, we dont
                            // want to crash out
                            if (operand.type == ValueType::LABEL) {
                                if (labels.contains(operand.value)) {
                                    // the label points to a label we have
                                    // already discovered, so, we can
                                    // transform the label into a memory
                                    // space address, ready for the
                                    // instruction to use
                                    operand.type = ValueType::IMMEDIATE;
                                    operand.value = std::to_string(
                                        labels[operand.value] + 0x200);
                                } else {
                                    fprintf(stderr, "Label %s not found\n",
                                            operand.value.c_str());
                                    exit(1);
                                }
                            }
                        }

                        if (operand.type == ValueType::ATIHMETIC) {
                            // collect previous operand and next operand, and
                            // flatten the arithmetic
                            if (i == 0) {
                                fprintf(stderr, "expected immediate before "
                                                "arithmetic operator\n");
                                exit(1);
                            }

                            if (i + 1 >= instruction_node.operands.size()) {
                                fprintf(stderr,
                                        "expected 2 operands for arithmetic "
                                        "operator, but got only %lu\n",
                                        instruction_node.operands.size());
                                exit(1);
                            }

                            OperandNode prev_operand =
                                flattened_operands.back();
                            flattened_operands.pop_back();
                            OperandNode next_operand =
                                instruction_node.operands[i + 1];

                            if (prev_operand.type != ValueType::IMMEDIATE ||
                                prev_operand.type != ValueType::IMMEDIATE) {
                                fprintf(stderr,
                                        "Arithmetic can only be performed on "
                                        "immediate operands.\n");
                                exit(1);
                            }

                            switch (hash(operand.value)) {
                            case hash("+"):
                                operand = OperandNode(
                                    ValueType::IMMEDIATE,
                                    std::to_string(
                                        get_numeric(prev_operand.value) +
                                        get_numeric(next_operand.value)));
                                break;
                            case hash("-"):
                                operand = OperandNode(
                                    ValueType::IMMEDIATE,
                                    std::to_string(
                                        get_numeric(prev_operand.value) -
                                        get_numeric(next_operand.value)));
                                break;
                            case hash("*"):
                                operand = OperandNode(
                                    ValueType::IMMEDIATE,
                                    std::to_string(
                                        get_numeric(prev_operand.value) *
                                        get_numeric(next_operand.value)));
                                break;
                            case hash("/"):
                                operand = OperandNode(
                                    ValueType::IMMEDIATE,
                                    std::to_string(
                                        get_numeric(prev_operand.value) /
                                        get_numeric(next_operand.value)));
                                break;
                            default:
                                fprintf(stderr,
                                        "unexpected arithmetic operator: %s\n",
                                        operand.value.c_str());
                            }

                            i += 1;
                            flattened_operands.push_back(operand);
                            continue;
                        }

                        flattened_operands.push_back(operand);
                    }

                    instruction_node.operands = flattened_operands;

                    uint16_t instruction = 0;
                    switch (hash(instruction_node.opcode)) {
                    case hash("exit"): {
                        if (instruction_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for exit\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::IMMEDIATE) {
                            fprintf(stderr, "Expected immediate "
                                            "operand for exit\n");
                            exit(1);
                        }

                        uint8_t byte =
                            get_numeric(instruction_node.operands[0].value);
                        if (byte > 0x0F) {
                            fprintf(stderr,
                                    "Invalid exit code: %d, must be a "
                                    "value "
                                    "less that 0x10\n",
                                    byte);
                            exit(1);
                        }

                        instruction = 0x0010 | byte;
                        break;
                    }
                    case hash("cls"): {
                        instruction = 0x00E0;
                        break;
                    }
                    case hash("ret"): {
                        instruction = 0x00EE;
                        break;
                    }
                    case hash("sys"): {
                        if (instruction_node.operands.size() != 1) {
                            fprintf(stderr,
                                    "Expected 1 operand for sys got %lu\n",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        uint16_t address =
                            get_numeric(instruction_node.operands[0].value) &
                            0xFFF;
                        instruction = 0x0000 | address;
                        break;
                    }
                    case hash("jp"): {
                        if (instruction_node.operands.size() == 2) {
                            if (instruction_node.operands[0].type !=
                                    ValueType::REGISTER ||
                                get_reg(instruction_node.operands[0].value) !=
                                    0x0) {
                                fprintf(
                                    stderr,
                                    "V0 expected for jp with two arguments\n");
                                exit(1);
                            }

                            if (instruction_node.operands[1].type !=
                                ValueType::IMMEDIATE) {
                                fprintf(stderr,
                                        "Expected immediate operand for jp\n");
                                exit(1);
                            }

                            uint16_t address =
                                get_numeric(
                                    instruction_node.operands[1].value) &
                                0xFFF;

                            instruction = 0xB000 | address;
                            break;
                        }

                        if (instruction_node.operands.size() != 1) {
                            fprintf(stderr,
                                    "Expected 1 operand for jp got %lu\n",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::IMMEDIATE) {
                            fprintf(
                                stderr,
                                "Expected immediate operand for jp got %s\n",
                                instruction_node.operands[1].value.c_str());
                            exit(1);
                        }

                        uint16_t address =
                            get_numeric(instruction_node.operands[0].value) &
                            0xFFF;
                        instruction = 0x1000 | address;
                        break;
                    }
                    case hash("call"): {
                        if (instruction_node.operands.size() != 1) {
                            fprintf(stderr,
                                    "Expected 1 operand for call got %lu\n",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        uint16_t address =
                            get_numeric(instruction_node.operands[0].value) &
                            0xFFF;
                        instruction = 0x2000 | address;
                        break;
                    }
                    case hash("se"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr,
                                    "Expected 2 operands for se got %lu\n",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for se\n");
                            exit(1);
                        }

                        if (is_control(instruction_node.operands[0].value)) {
                            fprintf(stderr, "Expected register operand for "
                                            "se\n");
                            exit(1);
                        }

                        if (instruction_node.operands[1].type ==
                            ValueType::IMMEDIATE) {
                            // se reg, nnn
                            instruction =
                                0x3000 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                get_numeric(instruction_node.operands[1].value);
                            break;
                        }
                        if (instruction_node.operands[1].type ==
                            ValueType::REGISTER) {
                            // se reg, reg
                            if (is_control(
                                    instruction_node.operands[1].value)) {
                                fprintf(stderr, "Expected register operand for "
                                                "se\n");
                                exit(1);
                            }
                            instruction =
                                0x5000 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                (get_reg(instruction_node.operands[1].value)
                                 << 4);
                            break;
                        }
                        fprintf(stderr, "Expected register or immediate "
                                        "operand for se\n");
                        exit(1);
                    }
                    case hash("sne"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr,
                                    "Expected 2 operands for sne got %lu\n",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for sne\n");
                            exit(1);
                        }

                        if (is_control(instruction_node.operands[0].value)) {
                            fprintf(stderr, "Expected register operand for "
                                            "se\n");
                            exit(1);
                        }

                        if (instruction_node.operands[1].type ==
                            ValueType::IMMEDIATE) {
                            // sne reg, nnn
                            instruction =
                                0x4000 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                get_numeric(instruction_node.operands[1].value);
                            break;
                        } else if (instruction_node.operands[1].type ==
                                   ValueType::REGISTER) {
                            if (is_control(
                                    instruction_node.operands[1].value)) {
                                fprintf(stderr, "Expected register operand for "
                                                "se\n");
                                exit(1);
                            }

                            // sne reg, reg
                            instruction =
                                0x9000 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                (get_reg(instruction_node.operands[1].value)
                                 << 4);
                            break;
                        }
                        fprintf(stderr, "Expected register or immediate "
                                        "operand for se\n");
                        exit(1);
                    }
                    case hash("ld"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr,
                                    "Expected 2 operands for ld got %lu\n",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for ld\n");
                            exit(1);
                        }

                        std::string reg0 = instruction_node.operands[0].value;

                        if (is_control(reg0)) {
                            std::transform(reg0.begin(), reg0.end(),
                                           reg0.begin(), ::tolower);
                            switch (hash(reg0)) {
                            case hash("i"): {
                                instruction =
                                    0xA000 |
                                    (get_numeric(
                                         instruction_node.operands[1].value) &
                                     0xFFF);
                                break;
                            }
                            case hash("k"): {
                                instruction =
                                    0xF00A |
                                    (get_reg(instruction_node.operands[1].value)
                                     << 8);
                                break;
                            }
                            case hash("f"): {
                                instruction =
                                    0xF029 |
                                    (get_reg(instruction_node.operands[1].value)
                                     << 8);
                                break;
                            }
                            case hash("b"): {
                                instruction =
                                    0xF033 |
                                    (get_reg(instruction_node.operands[1].value)
                                     << 8);
                                break;
                            }
                            case hash("dt"): {
                                instruction =
                                    0xF015 |
                                    (get_reg(instruction_node.operands[1].value)
                                     << 8);
                                break;
                            }
                            case hash("st"): {
                                instruction =
                                    0xF018 |
                                    (get_reg(instruction_node.operands[1].value)
                                     << 8);
                                break;
                            }
                            case hash("[i]"): {
                                instruction =
                                    0xF055 |
                                    (get_reg(instruction_node.operands[1].value)
                                     << 8);
                                break;
                            }
                            default: {
                                fprintf(stderr,
                                        "Unknown control register: %s\n",
                                        reg0.c_str());
                                exit(1);
                            }
                            }
                            break;
                        }

                        if (instruction_node.operands[1].type ==
                            ValueType::IMMEDIATE) {
                            // ld reg, nnn
                            uint16_t val =
                                get_numeric(instruction_node.operands[1].value);
                            if (val > 0xFF) {
                                fprintf(stderr,
                                        "Invalid immediate value for ld\n");
                                exit(1);
                            }
                            instruction =
                                0x6000 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                val;
                            break;
                        }

                        // ld reg, control
                        if (instruction_node.operands[1].type ==
                            ValueType::REGISTER) {
                            std::string reg1 =
                                instruction_node.operands[1].value;
                            if (is_control(reg1)) {
                                std::transform(reg1.begin(), reg1.end(),
                                               reg1.begin(), ::tolower);
                                switch (
                                    hash(instruction_node.operands[1].value)) {
                                case hash("dt"): {
                                    instruction =
                                        0xF007 |
                                        (get_reg(
                                             instruction_node.operands[0].value)
                                         << 8);
                                    break;
                                }
                                case hash("k"): {
                                    instruction =
                                        0xF00A |
                                        (get_reg(
                                             instruction_node.operands[0].value)
                                         << 8);
                                    break;
                                }
                                case hash("[i]"): {
                                    instruction =
                                        0xF065 |
                                        (get_reg(
                                             instruction_node.operands[0].value)
                                         << 8);
                                    break;
                                }
                                }
                                break;
                            }

                            // ld reg, reg
                            instruction =
                                0x8000 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                (get_reg(instruction_node.operands[1].value)
                                 << 4);
                            break;
                        }

                        fprintf(stderr, "Expected register or immediate "
                                        "operand for se\n");
                        exit(1);
                        break;
                    }
                    case hash("add"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr, "Expected 2 operands for add\n");
                            exit(1);
                        }
                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for add\n");
                            exit(1);
                        }

                        if (is_control(instruction_node.operands[0].value)) {
                            // add I, reg
                            instruction =
                                0xF01E |
                                (get_reg(instruction_node.operands[1].value)
                                 << 8);
                            break;
                        }

                        // add reg, nn
                        if (instruction_node.operands[1].type ==
                            ValueType::REGISTER) {
                            instruction =
                                0x8004 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                (get_reg(instruction_node.operands[1].value)
                                 << 4);
                            break;
                        }

                        instruction =
                            0x7000 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            get_numeric(instruction_node.operands[1].value);
                        break;
                    }
                    case hash("or"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr,
                                    "Expected 2 operands for or, got %lu",
                                    instruction_node.operands.size());
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(stderr, "Expected two registers for or "
                                            "instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x8001 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("and"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(
                                stderr,
                                "Expected two registers for and instruction\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(
                                stderr,
                                "Expected two registers for and instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x8002 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("xor"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(
                                stderr,
                                "Expected two registers for xor instruction\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(
                                stderr,
                                "Expected two registers for xor instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x8003 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("sub"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(
                                stderr,
                                "Expected two registers for sub instruction\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(
                                stderr,
                                "Expected two registers for sub instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x8005 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("shr"): {
                        if (instruction_node.operands.size() == 1) {
                            if (instruction_node.operands[0].type !=
                                ValueType::REGISTER) {
                                fprintf(stderr,
                                        "Expected register operand for shr\n");
                                exit(1);
                            }

                            instruction =
                                0x8006 |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                (get_reg(instruction_node.operands[0].value)
                                 << 4);
                            break;
                        }

                        if (instruction_node.operands.size() != 2) {
                            fprintf(
                                stderr,
                                "Expected two registers for shr instruction\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(
                                stderr,
                                "Expected two registers for shr instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x8006 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("subn"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr, "Expected two registers for subn "
                                            "instruction\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(stderr, "Expected two registers for subn "
                                            "instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x8007 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("shl"): {
                        if (instruction_node.operands.size() == 1) {
                            if (instruction_node.operands[0].type !=
                                ValueType::REGISTER) {
                                fprintf(stderr,
                                        "Expected register operand for shl\n");
                                exit(1);
                            }

                            instruction =
                                0x800E |
                                (get_reg(instruction_node.operands[0].value)
                                 << 8) |
                                (get_reg(instruction_node.operands[0].value)
                                 << 4);
                            break;
                        }

                        if (instruction_node.operands.size() != 2) {
                            fprintf(
                                stderr,
                                "Expected two registers for shl instruction\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                                ValueType::REGISTER ||
                            instruction_node.operands[1].type !=
                                ValueType::REGISTER) {
                            fprintf(
                                stderr,
                                "Expected two registers for shl instruction\n");
                            exit(1);
                        }

                        instruction =
                            0x800E |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4);
                        break;
                    }
                    case hash("rnd"): {
                        if (instruction_node.operands.size() != 2) {
                            fprintf(stderr, "Expected 2 operands for rnd\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for rnd\n");
                            exit(1);
                        }

                        if (instruction_node.operands[1].type !=
                            ValueType::IMMEDIATE) {
                            fprintf(stderr,
                                    "Expected immediate operand for rnd\n");
                            exit(1);
                        }

                        if (get_numeric(instruction_node.operands[1].value) >
                            0xFF) {
                            fprintf(stderr,
                                    "Invalid immediate value for rnd\n");
                            exit(1);
                        }

                        instruction =
                            0xC000 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_numeric(instruction_node.operands[1].value) &
                             0xFF);
                        break;
                    }
                    case hash("drw"): {
                        if (instruction_node.operands.size() != 3) {
                            fprintf(stderr, "Expected 3 operands for drw\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for drw\n");
                            exit(1);
                        }

                        if (instruction_node.operands[1].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for drw\n");
                            exit(1);
                        }

                        if (instruction_node.operands[2].type !=
                            ValueType::IMMEDIATE) {
                            fprintf(stderr,
                                    "Expected immediate operand for drw\n");
                            exit(1);
                        }

                        if (get_numeric(instruction_node.operands[2].value) >
                            0xF) {
                            fprintf(stderr,
                                    "Invalid immediate value for drw\n");
                            exit(1);
                        }

                        instruction =
                            0xD000 |
                            (get_reg(instruction_node.operands[0].value) << 8) |
                            (get_reg(instruction_node.operands[1].value) << 4) |
                            (get_numeric(instruction_node.operands[2].value) &
                             0xF);
                        break;
                    }
                    case hash("skp"): {
                        if (instruction_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for skp\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for skp\n");
                            exit(1);
                        }

                        instruction =
                            0xE09E |
                            (get_reg(instruction_node.operands[0].value) << 8);
                        break;
                    }
                    case hash("sknp"): {
                        if (instruction_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for sknp\n");
                            exit(1);
                        }

                        if (instruction_node.operands[0].type !=
                            ValueType::REGISTER) {
                            fprintf(stderr,
                                    "Expected register operand for sknp\n");
                            exit(1);
                        }

                        instruction =
                            0xE0A1 |
                            (get_reg(instruction_node.operands[0].value) << 8);
                        break;
                    }
                    case hash("hires"): {
                        // jp to 260
                        instruction = 0x1000 | 0x260;
                        break;
                    }
                    default: {
                        fprintf(stderr, "Unhandled instruction %s!\n",
                                instruction_node.opcode.c_str());
                        break;
                    }
                    }

                    // le to be
                    rom_buf[pc] = (instruction >> 8) & 0xFF;
                    rom_buf[pc + 1] = instruction & 0xFF;

                    pc += 2;
                    break;
                }
                case SectionElement::Type::Directive: {
                    DirectiveNode directive_node =
                        std::get<DirectiveNode>(element.element);
                    for (auto &operand : directive_node.operands) {
                        if (operand.type == ValueType::LABEL) {
                            // check if the label is really a reference
                            // to a constant (ie a value defined by a
                            // defines directive)
                            if (defines.contains(operand.value)) {
                                operand.type = defines[operand.value].type;
                                operand.value = defines[operand.value].value;
                            }
                        }
                    }

                    switch (directive_node.directive) {
                    case DirectiveType::DEFINE: {
                        // defines are preprocessed, so we can ignore them
                        break;
                    }
                    case DirectiveType::TEXT: {
                        if (directive_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for text\n");
                            exit(1);
                        }

                        if (directive_node.operands[0].type ==
                            ValueType::LABEL) {
                            if (defines.contains(
                                    directive_node.operands[0].value)) {
                            }
                        }

                        if (directive_node.operands[0].type !=
                            ValueType::STRING) {
                            fprintf(stderr,
                                    "Expected string operand for text\n");
                            exit(1);
                        }

                        size_t len =
                            directive_node.operands[0].value.size() + 1;

                        pc += len;

                        // write stirng to rom including null terminator
                        memcpy(rom_buf + pc,
                               directive_node.operands[0].value.c_str(), len);
                        break;
                    }
                    case DirectiveType::DB: {
                        if (directive_node.operands.size() == 0) {
                            fprintf(stderr, "Expected operands for db\n");
                            exit(1);
                        }

                        for (auto &operand : directive_node.operands) {
                            uint8_t byte = get_numeric(operand.value);
                            if (operand.type != ValueType::IMMEDIATE) {
                                fprintf(stderr, "Expected immediate "
                                                "operand for db\n");
                                exit(1);
                            }

                            rom_buf[pc] = byte;
                            pc += 1;
                        }
                        break;
                    }
                    case DirectiveType::DW: {
                        if (directive_node.operands.size() == 0) {
                            fprintf(stderr, "Expected operands for dw\n");
                            exit(1);
                        }

                        for (auto &operand : directive_node.operands) {
                            uint16_t word = get_numeric(operand.value);
                            if (operand.type != ValueType::IMMEDIATE) {
                                fprintf(stderr, "Expected immediate "
                                                "operand for dw\n");
                                exit(1);
                            }

                            rom_buf[pc] = word & 0xFF;
                            rom_buf[pc + 1] = word >> 8;
                            pc += 2;
                        }
                        break;
                    }
                    case DirectiveType::OFFSET: {
                        if (directive_node.operands.size() != 1) {
                            fprintf(stderr, "Expected 1 operand for offset\n");
                            exit(1);
                        }

                        size_t offset =
                            get_numeric(directive_node.operands[0].value);
                        if (directive_node.operands[0].type !=
                            ValueType::IMMEDIATE) {
                            fprintf(stderr, "Expected immediate "
                                            "operand for offset\n");
                            exit(1);
                        }

                        pc += offset;
                        break;
                    }
                    case DirectiveType::INCLUDE: {
                        fprintf(stderr, "extraneous include directive! This is "
                                        "likely a compiler bug\n");
                        exit(1);
                    }
                    }

                    break;
                }
                }
            }
        }

        write(rom_fd, rom_buf, pc);

        return nullptr;
    }

  private:
};

std::optional<std::string>
process_includes(char *data, size_t size, std::string include_dir,
                 std::unordered_set<std::string> &seen_includes) {
    std::string asm_str = std::string(data, size);
    size_t str_idx = 0;
    // find `include "%s"` and replace with the contents of the
    // file iterate over each line in the file
    std::stringstream lss(asm_str);
    std::string line;
    while (std::getline(lss, line)) {
        str_idx += line.size() + 1;

        if (line.starts_with("include")) {
            size_t pos = line.find_first_of("\"") + 1;
            size_t n = line.find_first_of("\"", pos) - pos;
            std::string file_name = line.substr(pos, n);
            std::string path = include_dir + file_name;

            if (seen_includes.contains(path)) {
                fprintf(stderr, "Recursive include detected: %s\n",
                        path.c_str());
                return std::nullopt;
            }

            seen_includes.insert(path);

            // open the file
            int include_fd = open(path.c_str(), O_RDONLY);
            if (include_fd < 0) {
                fprintf(stderr, "Failed to open include file: %s\n",
                        path.c_str());
                return std::nullopt;
            }

            // read the file
            size_t include_size = lseek(include_fd, 0, SEEK_END);
            lseek(include_fd, 0, SEEK_SET);
            char *include_data = (char *)calloc(include_size, sizeof(char));
            if (include_data == nullptr) {
                fprintf(stderr,
                        "Failed to allocate memory for include "
                        "file: %s\n",
                        path.c_str());
                return std::nullopt;
            }

            ssize_t read_size = read(include_fd, include_data, include_size);
            if (read_size < 0) {
                fprintf(stderr, "Failed to read include file: %s\n",
                        path.c_str());
                return std::nullopt;
            }
            close(include_fd);

            // recursively process the included file
            std::optional<std::string> new_data = process_includes(
                include_data, include_size, include_dir, seen_includes);
            if (!new_data.has_value()) {
                return std::nullopt;
            }
            free(include_data);

            // replace the include line with the included file
            asm_str.reserve(asm_str.size() + new_data.value().size());
            asm_str.replace(str_idx - (line.size() + 1), line.size(),
                            new_data.value());
        }
    }

    return asm_str;
}

int main(int argc, char **argv) {
    if (argc < 3) {
        printf("Usage: %s <input> <output>\n", argv[0]);
        return 1;
    }

    int asm_fd = open(argv[1], O_RDONLY);
    if (asm_fd < 0) {
        fprintf(stderr, "Failed to open file: %s\n", argv[1]);
        return 1;
    }

    int rom_fd = open(argv[2], O_RDWR | O_CREAT, 0644);
    if (rom_fd < 0) {
        fprintf(stderr, "Failed to open file: %s\n", argv[2]);
        return 1;
    }

    ftruncate(rom_fd, 0);

    size_t asm_size = lseek(asm_fd, 0, SEEK_END);
    (void)lseek(asm_fd, 0, SEEK_SET);

    void *asm_buf = calloc(asm_size, sizeof(char));
    if (asm_buf == NULL) {
        fprintf(stderr, "Failed to allocate memory!\n");
        return 1;
    }
    read(asm_fd, asm_buf, asm_size);

    std::string file_name = std::string(argv[1]);
    std::string includes_dir =
        file_name.substr(0, file_name.find_last_of('/')) + "/";

    std::unordered_set<std::string> seen_includes = {file_name};
    std::optional<std::string> asm_str = process_includes(
        static_cast<char *>(asm_buf), asm_size, includes_dir, seen_includes);
    if (!asm_str.has_value()) {
        return 1;
    }

    Lexer lexer = Lexer(asm_str.value());
    Parser parser = Parser(lexer);
    close(asm_fd);

    Assembler assembler = Assembler();
    assembler.assembler(parser.parse(), rom_fd);

    return 0;
}