#include #include #include #include #include #include #include #include #include #include #include #include #define BYTECODE_READER_IMPLEMENTATION #include "reader.hpp" #include struct label { enum type { type_instruction, type_byte, } type; uint16_t length; std::string name; }; uint16_t *find_closest_label(const std::unordered_map &map, uint16_t target) { uint16_t *closest_label = nullptr; for (const auto &pair : map) { if (pair.first <= target) { if (closest_label != nullptr && pair.first < *closest_label) { continue; } if (closest_label == nullptr) { closest_label = (uint16_t *)calloc(1, sizeof(uint16_t)); if (closest_label == NULL) { throw std::runtime_error("Failed to allocate memory"); } } *closest_label = pair.first; } else { break; } } return closest_label; } std::string to_hex(size_t number) { return std::format("{:04x}", number); } std::string get_label_for_byte(uint16_t byte, std::unordered_map labels, enum label::type target_type) { std::string operand_str; operand_str.append("0x"); operand_str.append(to_hex(byte)); if (labels.find(byte - 0x200) != labels.end()) { operand_str.clear(); operand_str = labels.at(byte - 0x200).name; } else { // try to see if the operand is offset from a label if (byte > 0x200) { uint16_t *closest_label = find_closest_label(labels, byte - 0x200); if (closest_label != nullptr && labels.at(*closest_label).type == target_type) { uint16_t offset = (byte - 0x200) - (*closest_label); // discard our result if the offset is greater than the // length of the label if (offset > labels.at(*closest_label).length) { return operand_str; } operand_str.clear(); operand_str = labels.at(*closest_label).name; operand_str.append(" + 0x"); operand_str.append(to_hex(offset)); operand_str.append(""); } } } return operand_str; } // I could emit something like omni assembly, nut that is significantly more // complex than just emitting the assembly like this, so I am just going to // emit the assembly like this for now void print_instruction(Bytecode bytecode, uint16_t pc, std::unordered_map labels) { switch (bytecode.instruction_type) { case HLT: printf("halt\n"); break; case EXIT: printf("exit 0x%02x\n", bytecode.operand.byte); break; case SYS: if (labels.find(bytecode.operand.word - 0x200) == labels.end()) { fprintf(stderr, "No label found for %04x\n", bytecode.operand.word - 0x200); exit(1); } printf("sys %s\n", labels.find(bytecode.operand.word - 0x200)->second.name.c_str()); break; case CLS: printf("cls\n"); break; case RET: printf("ret\n"); break; case JP: if (pc == 0 && bytecode.operand.word == 0x260) { printf("hires\n"); break; } if (labels.find(bytecode.operand.word - 0x200) == labels.end()) { fprintf(stderr, "No label found for %04x\n", bytecode.operand.word - 0x200); exit(1); } printf("jp %s\n", labels.find(bytecode.operand.word - 0x200)->second.name.c_str()); break; case CALL: if (labels.find(bytecode.operand.word - 0x200) == labels.end()) { fprintf(stderr, "No label found for %04x\n", bytecode.operand.word - 0x200); exit(1); } printf("call %s\n", labels.find(bytecode.operand.word - 0x200)->second.name.c_str()); break; case SKIP_INSTRUCTION_BYTE: printf("se v%x, 0x%02x\n", bytecode.operand.byte_reg.reg, bytecode.operand.byte_reg.byte); break; case SKIP_INSTRUCTION_NE_BYTE: printf("sne v%x, 0x%02x\n", bytecode.operand.byte_reg.reg, bytecode.operand.byte_reg.byte); break; case SKIP_INSTRUCTION_REG: printf("se v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case SKIP_INSTRUCTION_NE_REG: printf("sne v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case LOAD_BYTE: { printf("ld v%x, 0x%02x\n", bytecode.operand.byte_reg.reg, bytecode.operand.byte_reg.byte); break; } case ADD_BYTE: printf("add v%x, 0x%02x\n", bytecode.operand.byte_reg.reg, bytecode.operand.byte_reg.byte); break; case LOAD_REG: printf("ld v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case ADD_REG: printf("add v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case OR_REG: printf("or v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case AND_REG: printf("and v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case XOR_REG: printf("xor v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case SHR_REG: printf("shr v%x\n", bytecode.operand.reg_reg.x); break; case SUB_REG: printf("sub v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case SUBN_REG: printf("subn v%x, v%x\n", bytecode.operand.reg_reg.x, bytecode.operand.reg_reg.y); break; case SHL_REG: printf("shl v%x\n", bytecode.operand.reg_reg.x); break; case LOAD_I_BYTE: { printf("ld I, %s\n", get_label_for_byte(bytecode.operand.word, labels, label::type_byte) .c_str()); break; } case JP_V0_BYTE: { printf("jp v0, %s\n", get_label_for_byte(bytecode.operand.word, labels, label::type_instruction) .c_str()); break; } case RND: printf("rnd v%x, 0x%02x\n", bytecode.operand.byte_reg.reg, bytecode.operand.byte_reg.byte); break; case DRW: printf("drw v%x, v%x, 0x%x\n", bytecode.operand.reg_reg_nibble.x, bytecode.operand.reg_reg_nibble.y, bytecode.operand.reg_reg_nibble.nibble); break; case SKIP_PRESSED_REG: printf("skp v%x\n", bytecode.operand.byte); break; case SKIP_NOT_PRESSED_REG: printf("sknp v%x\n", bytecode.operand.byte); break; case LD_REG_DT: printf("ld v%x, dt\n", bytecode.operand.byte); break; case LD_REG_K: printf("ld v%x, k\n", bytecode.operand.byte); break; case LD_DT_REG: printf("ld dt, v%x\n", bytecode.operand.byte); break; case LD_ST_REG: printf("ld st, v%x\n", bytecode.operand.byte); break; case ADD_I_REG: printf("add I, v%x\n", bytecode.operand.byte); break; case LD_F_REG: printf("ld f, v%x\n", bytecode.operand.byte); break; case LD_B_REG: printf("ld b, v%x\n", bytecode.operand.byte); break; case LD_PTR_I_REG: printf("ld [I], v%x\n", bytecode.operand.byte); break; case LD_REG_PTR_I: printf("ld v%x, [I]\n", bytecode.operand.byte); break; case UNKNOWN_INSTRUCTION: printf("?\n"); } } struct hole { uint16_t start; uint16_t end; }; struct assembly_node { uint16_t address; enum type { type_byte, type_instruction, } type; union { uint8_t byte; Bytecode instruction; }; }; void write_assembly(std::vector assembly, std::unordered_map labels) { std::sort(assembly.begin(), assembly.end(), [](const struct assembly_node &a, const struct assembly_node &b) { return a.address < b.address; }); uint16_t last_byte_address = 0x0000; for (auto &node : assembly) { if (node.type != assembly_node::type_byte && node.address != 0 && node.address - 1 == last_byte_address) { printf("\n"); } if (labels.find(node.address) != labels.end()) { if (node.address != 0x0000) { // add whitespacing between labels, but not for the _start label printf("\n"); } printf("%s:\n", labels.at(node.address).name.c_str()); } switch (node.type) { case assembly_node::type_byte: if (node.address != last_byte_address + 1) { printf("db "); } else { printf(",\n "); } printf("0x%02x", node.byte); last_byte_address = node.address; break; case assembly_node::type_instruction: // if previous assembly was a byte, then we need to emit a new line print_instruction(node.instruction, node.address, labels); break; } } } void disassemble(uint8_t *rom, int rom_size) { // evaluate the bytecode, but dont actually execute it, just print it out, // and when we reach a branching instruction, follow it. Make sure that if // we enter an inifinite loop we dont just loop forever, so make sure we // keep track of what we have already visited std::unordered_set addresses_visited; std::queue work_queue; std::vector holes; std::unordered_map labels; size_t label_idx = 0; uint16_t *stack = (uint16_t *)calloc(16, sizeof(uint16_t)); if (stack == NULL) { fprintf(stderr, "Failed to allocate stack!"); exit(1); } // holds the start of a label size_t stack_idx = 0; std::vector assembly; // start at the beginning of the rom work_queue.push(0x0000); labels.emplace( 0x0000, label{.type = label::type_instruction, .length = 0, .name = "_start"}); while (!work_queue.empty()) { uint16_t pc = work_queue.front(); work_queue.pop(); if (pc >= (uint16_t)rom_size) { // if we are reading past the end of the rom, we are done break; } if (addresses_visited.find(pc) != addresses_visited.end()) continue; addresses_visited.insert(pc); uint16_t opcode = (rom[pc] << 8) | rom[pc + 1]; Bytecode bytecode = parse(opcode); assembly.push_back({.address = pc, .type = assembly_node::type_instruction, .instruction = bytecode}); switch (bytecode.instruction_type) { case JP: { if (pc == 0 && bytecode.operand.word == 0x260) { work_queue.push(0x2C0); break; } if (!labels.contains(bytecode.operand.word - 0x200)) { labels.emplace( bytecode.operand.word - 0x200, label{.type = label::type_instruction, .length = 0, .name = "_" + std::to_string(label_idx++)}); } work_queue.push(bytecode.operand.word - 0x200); break; } case CALL: { if (stack_idx == 16) { fprintf(stderr, "Stack overflow!\n"); exit(1); } if (!labels.contains(bytecode.operand.word - 0x200)) { labels.emplace( bytecode.operand.word - 0x200, label{.type = label::type_instruction, .length = 0, .name = "_" + std::to_string(label_idx++)}); } stack[stack_idx++] = pc + 2; work_queue.push(bytecode.operand.word - 0x200); break; } case SKIP_INSTRUCTION_BYTE: case SKIP_INSTRUCTION_NE_BYTE: case SKIP_INSTRUCTION_REG: case SKIP_INSTRUCTION_NE_REG: case SKIP_PRESSED_REG: case SKIP_NOT_PRESSED_REG: { work_queue.push(pc + 2); work_queue.push(pc + 4); break; } case RET: { if (stack_idx == 0) { fprintf(stderr, "Stack underflow!\n"); exit(1); } uint16_t ret_pc = stack[--stack_idx]; work_queue.push(ret_pc); break; } case HLT: { // Stop following break; } case UNKNOWN_INSTRUCTION: { fprintf(stderr, "Unknown instruction: %04x\n", opcode); // we failed at disassembling smartly break; } default: work_queue.push(pc + 2); } } bool skip = false; uint16_t *last_seen_byte_array = nullptr; uint16_t start_of_last_contiguous_block = 0x0000; for (uint16_t pc = 0x00; pc < rom_size; pc++) { if (skip) { skip = false; continue; } if (addresses_visited.find(pc) != addresses_visited.end()) { // when there is an instruction that we have already visited, we // want to skip this byte and the next byte, but we cant rely of the // instructions being aligned to 0x02 bytes, so instead we tell the // next run of the loop to skip skip = true; continue; } // this seems scary, but it's fine because the if block will jump down // if the first condition is met, so it will never dereference a null // pointer if (last_seen_byte_array == nullptr || *last_seen_byte_array != pc - 1) { if (last_seen_byte_array == nullptr) { last_seen_byte_array = new uint16_t; } start_of_last_contiguous_block = pc; // we are not in a contiguous block of bytes, so we need to add a // label if (!labels.contains(pc)) { labels.emplace( pc, label{.type = label::type_byte, .length = 1, .name = "_" + std::to_string(label_idx++)}); } } else { // we are in a contiguous block of bytes, so we need to update the // label's length by one labels[start_of_last_contiguous_block].length++; } *last_seen_byte_array = pc; assembly.push_back( {.address = pc, .type = assembly_node::type_byte, .byte = rom[pc]}); } for (auto &pair : labels) { uint16_t pc = pair.first; uint16_t label_length = 0; // while we havent reached the end of the rom, and we havent crossed // into a new label while (pc < rom_size) { label_length++; switch (pair.second.type) { case label::type_byte: pc++; break; case label::type_instruction: pc += 2; break; } if (labels.find(pc) != labels.end()) break; } pair.second.length = label_length; } write_assembly(assembly, labels); } int main(int argc, char **argv) { if (argc < 2) { printf("Usage: %s \n", argv[0]); return 1; } int rom_fd = open(argv[1], O_RDONLY); if (rom_fd < 0) { fprintf(stderr, "Failed to open file: %s\n", argv[1]); return 1; } int rom_size = lseek(rom_fd, 0, SEEK_END); (void)lseek(rom_fd, 0, SEEK_SET); uint8_t *rom = (uint8_t *)calloc(rom_size, sizeof(uint8_t)); if (rom == NULL) { fprintf(stderr, "Failed to allocate memory!\n"); return 1; } read(rom_fd, rom, rom_size); disassemble(rom, rom_size); return 0; }