disassembler, assembler, bug fixes, and more
This commit is contained in:
533
disassembler/disassembler.cpp
Normal file
533
disassembler/disassembler.cpp
Normal file
@@ -0,0 +1,533 @@
|
||||
#include <bits/stdc++.h>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <fcntl.h>
|
||||
#include <format>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#define BYTECODE_READER_IMPLEMENTATION
|
||||
#include "reader.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct label {
|
||||
enum type {
|
||||
type_instruction,
|
||||
type_byte,
|
||||
} type;
|
||||
uint16_t length;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
uint16_t *find_closest_label(const std::unordered_map<uint16_t, label> &map,
|
||||
uint16_t target) {
|
||||
|
||||
uint16_t *closest_label = nullptr;
|
||||
|
||||
for (const auto &pair : map) {
|
||||
if (pair.first <= target) {
|
||||
if (closest_label != nullptr && pair.first < *closest_label) {
|
||||
continue;
|
||||
}
|
||||
if (closest_label == nullptr) {
|
||||
closest_label = (uint16_t *)calloc(1, sizeof(uint16_t));
|
||||
if (closest_label == NULL) {
|
||||
throw std::runtime_error("Failed to allocate memory");
|
||||
}
|
||||
}
|
||||
*closest_label = pair.first;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return closest_label;
|
||||
}
|
||||
|
||||
std::string to_hex(size_t number) { return std::format("{:04x}", number); }
|
||||
|
||||
std::string
|
||||
get_label_for_byte(uint16_t byte,
|
||||
std::unordered_map<uint16_t, struct label> labels,
|
||||
enum label::type target_type) {
|
||||
std::string operand_str;
|
||||
operand_str.append("0x");
|
||||
operand_str.append(to_hex(byte));
|
||||
if (labels.find(byte - 0x200) != labels.end()) {
|
||||
operand_str.clear();
|
||||
operand_str = labels.at(byte - 0x200).name;
|
||||
} else {
|
||||
// try to see if the operand is offset from a label
|
||||
if (byte > 0x200) {
|
||||
uint16_t *closest_label = find_closest_label(labels, byte - 0x200);
|
||||
if (closest_label != nullptr &&
|
||||
labels.at(*closest_label).type == target_type) {
|
||||
uint16_t offset = (byte - 0x200) - (*closest_label);
|
||||
|
||||
// discard our result if the offset is greater than the
|
||||
// length of the label
|
||||
if (offset > labels.at(*closest_label).length) {
|
||||
return operand_str;
|
||||
}
|
||||
operand_str.clear();
|
||||
operand_str = labels.at(*closest_label).name;
|
||||
operand_str.append(" + 0x");
|
||||
operand_str.append(to_hex(offset));
|
||||
operand_str.append("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return operand_str;
|
||||
}
|
||||
|
||||
// I could emit something like omni assembly, nut that is significantly more
|
||||
// complex than just emitting the assembly like this, so I am just going to
|
||||
// emit the assembly like this for now
|
||||
void print_instruction(Bytecode bytecode, uint16_t pc,
|
||||
std::unordered_map<uint16_t, struct label> labels) {
|
||||
switch (bytecode.instruction_type) {
|
||||
case HLT:
|
||||
printf("halt\n");
|
||||
break;
|
||||
case EXIT:
|
||||
printf("exit 0x%02x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case SYS:
|
||||
if (labels.find(bytecode.operand.word - 0x200) == labels.end()) {
|
||||
fprintf(stderr, "No label found for %04x\n",
|
||||
bytecode.operand.word - 0x200);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("sys %s\n",
|
||||
labels.find(bytecode.operand.word - 0x200)->second.name.c_str());
|
||||
break;
|
||||
case CLS:
|
||||
printf("cls\n");
|
||||
break;
|
||||
case RET:
|
||||
printf("ret\n");
|
||||
break;
|
||||
case JP:
|
||||
if (pc == 0 && bytecode.operand.word == 0x260) {
|
||||
printf("hires\n");
|
||||
break;
|
||||
}
|
||||
if (labels.find(bytecode.operand.word - 0x200) == labels.end()) {
|
||||
fprintf(stderr, "No label found for %04x\n",
|
||||
bytecode.operand.word - 0x200);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("jp %s\n",
|
||||
labels.find(bytecode.operand.word - 0x200)->second.name.c_str());
|
||||
break;
|
||||
case CALL:
|
||||
if (labels.find(bytecode.operand.word - 0x200) == labels.end()) {
|
||||
fprintf(stderr, "No label found for %04x\n",
|
||||
bytecode.operand.word - 0x200);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("call %s\n",
|
||||
labels.find(bytecode.operand.word - 0x200)->second.name.c_str());
|
||||
break;
|
||||
case SKIP_INSTRUCTION_BYTE:
|
||||
printf("se v%x, 0x%02x\n", bytecode.operand.byte_reg.reg,
|
||||
bytecode.operand.byte_reg.byte);
|
||||
break;
|
||||
case SKIP_INSTRUCTION_NE_BYTE:
|
||||
printf("sne v%x, 0x%02x\n", bytecode.operand.byte_reg.reg,
|
||||
bytecode.operand.byte_reg.byte);
|
||||
break;
|
||||
case SKIP_INSTRUCTION_REG:
|
||||
printf("se v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case SKIP_INSTRUCTION_NE_REG:
|
||||
printf("sne v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case LOAD_BYTE: {
|
||||
printf("ld v%x, 0x%02x\n", bytecode.operand.byte_reg.reg,
|
||||
bytecode.operand.byte_reg.byte);
|
||||
break;
|
||||
}
|
||||
case ADD_BYTE:
|
||||
printf("add v%x, 0x%02x\n", bytecode.operand.byte_reg.reg,
|
||||
bytecode.operand.byte_reg.byte);
|
||||
break;
|
||||
case LOAD_REG:
|
||||
printf("ld v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case ADD_REG:
|
||||
printf("add v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case OR_REG:
|
||||
printf("or v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case AND_REG:
|
||||
printf("and v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case XOR_REG:
|
||||
printf("xor v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case SHR_REG:
|
||||
printf("shr v%x\n", bytecode.operand.reg_reg.x);
|
||||
break;
|
||||
case SUB_REG:
|
||||
printf("sub v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case SUBN_REG:
|
||||
printf("subn v%x, v%x\n", bytecode.operand.reg_reg.x,
|
||||
bytecode.operand.reg_reg.y);
|
||||
break;
|
||||
case SHL_REG:
|
||||
printf("shl v%x\n", bytecode.operand.reg_reg.x);
|
||||
break;
|
||||
case LOAD_I_BYTE: {
|
||||
printf("ld I, %s\n", get_label_for_byte(bytecode.operand.word, labels,
|
||||
label::type_byte)
|
||||
.c_str());
|
||||
break;
|
||||
}
|
||||
case JP_V0_BYTE: {
|
||||
printf("jp v0, %s\n", get_label_for_byte(bytecode.operand.word, labels,
|
||||
label::type_instruction)
|
||||
.c_str());
|
||||
break;
|
||||
}
|
||||
case RND:
|
||||
printf("rnd v%x, 0x%02x\n", bytecode.operand.byte_reg.reg,
|
||||
bytecode.operand.byte_reg.byte);
|
||||
break;
|
||||
case DRW:
|
||||
printf("drw v%x, v%x, 0x%x\n", bytecode.operand.reg_reg_nibble.x,
|
||||
bytecode.operand.reg_reg_nibble.y,
|
||||
bytecode.operand.reg_reg_nibble.nibble);
|
||||
break;
|
||||
case SKIP_PRESSED_REG:
|
||||
printf("skp v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case SKIP_NOT_PRESSED_REG:
|
||||
printf("sknp v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_REG_DT:
|
||||
printf("ld v%x, dt\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_REG_K:
|
||||
printf("ld v%x, k\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_DT_REG:
|
||||
printf("ld dt, v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_ST_REG:
|
||||
printf("ld st, v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case ADD_I_REG:
|
||||
printf("add I, v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_F_REG:
|
||||
printf("ld f, v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_B_REG:
|
||||
printf("ld b, v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_PTR_I_REG:
|
||||
printf("ld [I], v%x\n", bytecode.operand.byte);
|
||||
break;
|
||||
case LD_REG_PTR_I:
|
||||
printf("ld v%x, [I]\n", bytecode.operand.byte);
|
||||
break;
|
||||
case UNKNOWN_INSTRUCTION:
|
||||
printf("?\n");
|
||||
}
|
||||
}
|
||||
|
||||
struct hole {
|
||||
uint16_t start;
|
||||
uint16_t end;
|
||||
};
|
||||
|
||||
struct assembly_node {
|
||||
uint16_t address;
|
||||
enum type {
|
||||
type_byte,
|
||||
type_instruction,
|
||||
} type;
|
||||
union {
|
||||
uint8_t byte;
|
||||
Bytecode instruction;
|
||||
};
|
||||
};
|
||||
|
||||
void write_assembly(std::vector<struct assembly_node> assembly,
|
||||
std::unordered_map<uint16_t, struct label> labels) {
|
||||
std::sort(assembly.begin(), assembly.end(),
|
||||
[](const struct assembly_node &a, const struct assembly_node &b) {
|
||||
return a.address < b.address;
|
||||
});
|
||||
uint16_t last_byte_address = 0x0000;
|
||||
|
||||
for (auto &node : assembly) {
|
||||
if (node.type != assembly_node::type_byte && node.address != 0 &&
|
||||
node.address - 1 == last_byte_address) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (labels.find(node.address) != labels.end()) {
|
||||
if (node.address != 0x0000) {
|
||||
// add whitespacing between labels, but not for the _start label
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("%s:\n", labels.at(node.address).name.c_str());
|
||||
}
|
||||
|
||||
switch (node.type) {
|
||||
case assembly_node::type_byte:
|
||||
if (node.address != last_byte_address + 1) {
|
||||
printf("db ");
|
||||
} else {
|
||||
printf(",\n ");
|
||||
}
|
||||
|
||||
printf("0x%02x", node.byte);
|
||||
last_byte_address = node.address;
|
||||
break;
|
||||
case assembly_node::type_instruction:
|
||||
// if previous assembly was a byte, then we need to emit a new line
|
||||
print_instruction(node.instruction, node.address, labels);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void disassemble(uint8_t *rom, int rom_size) {
|
||||
// evaluate the bytecode, but dont actually execute it, just print it out,
|
||||
// and when we reach a branching instruction, follow it. Make sure that if
|
||||
// we enter an inifinite loop we dont just loop forever, so make sure we
|
||||
// keep track of what we have already visited
|
||||
std::unordered_set<uint16_t> addresses_visited;
|
||||
std::queue<uint16_t> work_queue;
|
||||
std::vector<struct hole> holes;
|
||||
std::unordered_map<uint16_t, struct label> labels;
|
||||
size_t label_idx = 0;
|
||||
uint16_t *stack = (uint16_t *)calloc(16, sizeof(uint16_t));
|
||||
if (stack == NULL) {
|
||||
fprintf(stderr, "Failed to allocate stack!");
|
||||
exit(1);
|
||||
}
|
||||
// holds the start of a label
|
||||
size_t stack_idx = 0;
|
||||
|
||||
std::vector<struct assembly_node> assembly;
|
||||
|
||||
// start at the beginning of the rom
|
||||
work_queue.push(0x0000);
|
||||
labels.emplace(
|
||||
0x0000,
|
||||
label{.type = label::type_instruction, .length = 0, .name = "_start"});
|
||||
|
||||
while (!work_queue.empty()) {
|
||||
uint16_t pc = work_queue.front();
|
||||
work_queue.pop();
|
||||
|
||||
if (pc >= (uint16_t)rom_size) {
|
||||
// if we are reading past the end of the rom, we are done
|
||||
break;
|
||||
}
|
||||
|
||||
if (addresses_visited.find(pc) != addresses_visited.end())
|
||||
continue;
|
||||
addresses_visited.insert(pc);
|
||||
|
||||
uint16_t opcode = (rom[pc] << 8) | rom[pc + 1];
|
||||
Bytecode bytecode = parse(opcode);
|
||||
|
||||
assembly.push_back({.address = pc,
|
||||
.type = assembly_node::type_instruction,
|
||||
.instruction = bytecode});
|
||||
|
||||
switch (bytecode.instruction_type) {
|
||||
case JP: {
|
||||
if (pc == 0 && bytecode.operand.word == 0x260) {
|
||||
work_queue.push(0x2C0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!labels.contains(bytecode.operand.word - 0x200)) {
|
||||
labels.emplace(
|
||||
bytecode.operand.word - 0x200,
|
||||
label{.type = label::type_instruction,
|
||||
.length = 0,
|
||||
.name = "_" + std::to_string(label_idx++)});
|
||||
}
|
||||
work_queue.push(bytecode.operand.word - 0x200);
|
||||
break;
|
||||
}
|
||||
case CALL: {
|
||||
if (stack_idx == 16) {
|
||||
fprintf(stderr, "Stack overflow!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!labels.contains(bytecode.operand.word - 0x200)) {
|
||||
labels.emplace(
|
||||
bytecode.operand.word - 0x200,
|
||||
label{.type = label::type_instruction,
|
||||
.length = 0,
|
||||
.name = "_" + std::to_string(label_idx++)});
|
||||
}
|
||||
stack[stack_idx++] = pc + 2;
|
||||
|
||||
work_queue.push(bytecode.operand.word - 0x200);
|
||||
break;
|
||||
}
|
||||
case SKIP_INSTRUCTION_BYTE:
|
||||
case SKIP_INSTRUCTION_NE_BYTE:
|
||||
case SKIP_INSTRUCTION_REG:
|
||||
case SKIP_INSTRUCTION_NE_REG:
|
||||
case SKIP_PRESSED_REG:
|
||||
case SKIP_NOT_PRESSED_REG: {
|
||||
work_queue.push(pc + 2);
|
||||
work_queue.push(pc + 4);
|
||||
break;
|
||||
}
|
||||
case RET: {
|
||||
if (stack_idx == 0) {
|
||||
fprintf(stderr, "Stack underflow!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
uint16_t ret_pc = stack[--stack_idx];
|
||||
work_queue.push(ret_pc);
|
||||
break;
|
||||
}
|
||||
case HLT: { // Stop following
|
||||
break;
|
||||
}
|
||||
case UNKNOWN_INSTRUCTION: {
|
||||
fprintf(stderr, "Unknown instruction: %04x\n", opcode);
|
||||
// we failed at disassembling smartly
|
||||
break;
|
||||
}
|
||||
default:
|
||||
work_queue.push(pc + 2);
|
||||
}
|
||||
}
|
||||
|
||||
bool skip = false;
|
||||
uint16_t *last_seen_byte_array = nullptr;
|
||||
uint16_t start_of_last_contiguous_block = 0x0000;
|
||||
for (uint16_t pc = 0x00; pc < rom_size; pc++) {
|
||||
if (skip) {
|
||||
skip = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (addresses_visited.find(pc) != addresses_visited.end()) {
|
||||
// when there is an instruction that we have already visited, we
|
||||
// want to skip this byte and the next byte, but we cant rely of the
|
||||
// instructions being aligned to 0x02 bytes, so instead we tell the
|
||||
// next run of the loop to skip
|
||||
skip = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// this seems scary, but it's fine because the if block will jump down
|
||||
// if the first condition is met, so it will never dereference a null
|
||||
// pointer
|
||||
if (last_seen_byte_array == nullptr ||
|
||||
*last_seen_byte_array != pc - 1) {
|
||||
if (last_seen_byte_array == nullptr) {
|
||||
last_seen_byte_array = new uint16_t;
|
||||
}
|
||||
|
||||
start_of_last_contiguous_block = pc;
|
||||
|
||||
// we are not in a contiguous block of bytes, so we need to add a
|
||||
// label
|
||||
if (!labels.contains(pc)) {
|
||||
labels.emplace(
|
||||
pc, label{.type = label::type_byte,
|
||||
.length = 1,
|
||||
.name = "_" + std::to_string(label_idx++)});
|
||||
}
|
||||
} else {
|
||||
// we are in a contiguous block of bytes, so we need to update the
|
||||
// label's length by one
|
||||
labels[start_of_last_contiguous_block].length++;
|
||||
}
|
||||
|
||||
*last_seen_byte_array = pc;
|
||||
|
||||
assembly.push_back(
|
||||
{.address = pc, .type = assembly_node::type_byte, .byte = rom[pc]});
|
||||
}
|
||||
|
||||
for (auto &pair : labels) {
|
||||
uint16_t pc = pair.first;
|
||||
uint16_t label_length = 0;
|
||||
// while we havent reached the end of the rom, and we havent crossed
|
||||
// into a new label
|
||||
while (pc < rom_size) {
|
||||
label_length++;
|
||||
switch (pair.second.type) {
|
||||
case label::type_byte:
|
||||
pc++;
|
||||
break;
|
||||
case label::type_instruction:
|
||||
pc += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (labels.find(pc) != labels.end())
|
||||
break;
|
||||
}
|
||||
|
||||
pair.second.length = label_length;
|
||||
}
|
||||
|
||||
write_assembly(assembly, labels);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc < 2) {
|
||||
printf("Usage: %s <file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int rom_fd = open(argv[1], O_RDONLY);
|
||||
if (rom_fd < 0) {
|
||||
fprintf(stderr, "Failed to open file: %s\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int rom_size = lseek(rom_fd, 0, SEEK_END);
|
||||
(void)lseek(rom_fd, 0, SEEK_SET);
|
||||
|
||||
uint8_t *rom = (uint8_t *)calloc(rom_size, sizeof(uint8_t));
|
||||
if (rom == NULL) {
|
||||
fprintf(stderr, "Failed to allocate memory!\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
read(rom_fd, rom, rom_size);
|
||||
|
||||
disassemble(rom, rom_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user