bus (feat): add cycle accuracy

Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
This commit is contained in:
2024-06-15 03:49:10 +05:30
parent cb75ebf8ef
commit c22333812e
9 changed files with 657 additions and 108 deletions

View File

@@ -5,6 +5,7 @@
#include <memory>
#include <optional>
#include <span>
#include <unordered_map>
#include <vector>
namespace matar {
@@ -22,14 +23,19 @@ class Bus {
static std::shared_ptr<Bus> init(std::array<uint8_t, BIOS_SIZE>&&,
std::vector<uint8_t>&&);
uint8_t read_byte(uint32_t);
void write_byte(uint32_t, uint8_t);
uint8_t read_byte(uint32_t, bool = true);
void write_byte(uint32_t, uint8_t, bool = true);
uint16_t read_halfword(uint32_t);
void write_halfword(uint32_t, uint16_t);
uint16_t read_halfword(uint32_t, bool = true);
void write_halfword(uint32_t, uint16_t, bool = true);
uint32_t read_word(uint32_t);
void write_word(uint32_t, uint32_t);
uint32_t read_word(uint32_t, bool = true);
void write_word(uint32_t, uint32_t, bool = true);
// not sure what else to do?
inline void internal_cycle() { cycles++; }
inline uint32_t get_cycles() { return cycles; }
private:
template<unsigned int>
@@ -38,9 +44,21 @@ class Bus {
template<unsigned int>
std::optional<std::span<uint8_t>> write(uint32_t);
uint32_t cycles = 0;
struct cycle_count {
uint8_t n16; // non sequential 8/16 bit width access
uint8_t n32; // non sequential 32 bit width access
uint8_t s16; // seuquential 8/16 bit width access
uint8_t s32; // sequential 32 bit width access
};
std::array<cycle_count, 0x10> cycle_map;
static constexpr decltype(cycle_map) init_cycle_count();
std::unique_ptr<IoDevices> io;
#define MEMORY_REGION(name, start) \
static constexpr uint32_t name##_START = start; \
static constexpr uint8_t name##_REGION = start >> 24 & 0xFF;
static constexpr uint8_t name##_REGION = start >> 24 & 0xF;
#define DECL_MEMORY(name, ident, start, end) \
MEMORY_REGION(name, start) \
@@ -70,12 +88,12 @@ class Bus {
MEMORY_REGION(ROM_1, 0x0A000000)
MEMORY_REGION(ROM_2, 0x0C000000)
MEMORY_REGION(IO, 0x04000000)
static constexpr uint32_t IO_END = 0x040003FE;
#undef MEMORY_REGION
std::vector<uint8_t> rom;
std::unique_ptr<IoDevices> io;
Header header;
void parse_header();
};

View File

@@ -49,4 +49,7 @@ add(uint32_t a, uint32_t b, bool& carry, bool& overflow, bool c = 0);
uint32_t
sbc(uint32_t a, uint32_t b, bool& carry, bool& overflow, bool c);
uint8_t
multiplier_array_cycles(uint32_t x, bool zeroes_only = false);
}

View File

@@ -16,6 +16,23 @@ class Cpu {
void step();
void chg_mode(const Mode to);
bool is_flushed = false;
inline void flush_pipeline() {
is_flushed = true;
if (cpsr.state() == State::Arm) {
opcodes[0] = bus->read_word(pc, false);
advance_pc_arm();
opcodes[1] = bus->read_word(pc);
advance_pc_arm();
} else {
opcodes[0] = bus->read_halfword(pc, false);
advance_pc_thumb();
opcodes[1] = bus->read_halfword(pc);
advance_pc_thumb();
}
sequential = true;
};
private:
friend void arm::Instruction::exec(Cpu& cpu);
friend void thumb::Instruction::exec(Cpu& cpu);
@@ -66,26 +83,15 @@ class Cpu {
Psr und;
} spsr_banked = {}; // banked saved program status registers
inline void internal_cycle() { bus->internal_cycle(); }
// whether read is going to be sequential or not
bool sequential = true;
// raw instructions in the pipeline
std::array<uint32_t, 2> opcodes = {};
inline void advance_pc_arm() { pc += arm::INSTRUCTION_SIZE; };
inline void advance_pc_thumb() { pc += thumb::INSTRUCTION_SIZE; }
bool is_flushed = false;
inline void flush_pipeline() {
is_flushed = true;
if (cpsr.state() == State::Arm) {
opcodes[0] = bus->read_word(pc);
advance_pc_arm();
opcodes[1] = bus->read_word(pc);
advance_pc_arm();
} else {
opcodes[0] = bus->read_halfword(pc);
advance_pc_thumb();
opcodes[1] = bus->read_halfword(pc);
advance_pc_thumb();
}
};
};
}

View File

@@ -4,13 +4,11 @@
namespace matar {
static constexpr uint32_t IO_START = 0x4000000;
static constexpr uint32_t IO_END = 0x40003FE;
Bus::Bus(Private,
std::array<uint8_t, BIOS_SIZE>&& bios,
std::vector<uint8_t>&& rom)
: bios(std::move(bios))
: cycle_map(init_cycle_count())
, bios(std::move(bios))
, rom(std::move(rom)) {
std::string bios_hash = crypto::sha256(this->bios);
static constexpr std::string_view expected_hash =
@@ -38,11 +36,52 @@ Bus::init(std::array<uint8_t, BIOS_SIZE>&& bios, std::vector<uint8_t>&& rom) {
return self;
}
constexpr decltype(Bus::cycle_map)
Bus::init_cycle_count() {
/*
Region Bus Read Write Cycles
BIOS ROM 32 8/16/32 - 1/1/1
Work RAM 32K 32 8/16/32 8/16/32 1/1/1
I/O 32 8/16/32 8/16/32 1/1/1
OAM 32 8/16/32 16/32 1/1/1 *
Work RAM 256K 16 8/16/32 8/16/32 3/3/6 **
Palette RAM 16 8/16/32 16/32 1/1/2 *
VRAM 16 8/16/32 16/32 1/1/2 *
GamePak ROM 16 8/16/32 - 5/5/8 **|***
GamePak Flash 16 8/16/32 16/32 5/5/8 **|***
GamePak SRAM 8 8 8 5 **
Timing Notes:
* Plus 1 cycle if GBA accesses video memory at the same time.
** Default waitstate settings, see System Control chapter.
*** Separate timings for sequential, and non-sequential accesses.
One cycle equals approx. 59.59ns (ie. 16.78MHz clock).
*/
decltype(cycle_map) map;
map.fill({ 1, 1, 1, 1 });
/* used fill instead of this
map[BIOS_REGION] = { 1, 1, 1, 1 };
map[CHIP_WRAM_REGION] = { 1, 1, 1, 1 };
map[IO_REGION] = { 1, 1, 1, 1 };
map[OAM_REGION] = { 1, 1, 1, 1 };
*/
map[3] = { 1, 1, 1, 1 };
map[BOARD_WRAM_REGION] = { .n16 = 3, .n32 = 6, .s16 = 3, .s32 = 6 };
map[PALETTE_RAM_REGION] = { .n16 = 1, .n32 = 2, .s16 = 1, .s32 = 2 };
map[VRAM_REGION] = { .n16 = 1, .n32 = 2, .s16 = 1, .s32 = 2 };
// TODO: GamePak access cycles
return map;
}
template<unsigned int N>
std::optional<std::span<const uint8_t>>
Bus::read(uint32_t address) const {
switch (address >> 24 & 0xFF) {
switch (address >> 24 & 0xF) {
#define MATCHES(AREA, area) \
case AREA##_REGION: \
@@ -80,7 +119,7 @@ template<unsigned int N>
std::optional<std::span<uint8_t>>
Bus::write(uint32_t address) {
switch (address >> 24 & 0xFF) {
switch (address >> 24 & 0xF) {
#define MATCHES(AREA, area) \
case AREA##_REGION: \
@@ -97,12 +136,14 @@ Bus::write(uint32_t address) {
#undef MATCHES
}
glogger.error("Invalid memory region written");
return {};
}
uint8_t
Bus::read_byte(uint32_t address) {
Bus::read_byte(uint32_t address, bool sequential) {
auto cc = cycle_map[address >> 24 & 0xF];
cycles += sequential ? cc.s16 : cc.n16;
if (address >= IO_START && address <= IO_END)
return io->read_byte(address);
@@ -111,7 +152,10 @@ Bus::read_byte(uint32_t address) {
}
void
Bus::write_byte(uint32_t address, uint8_t byte) {
Bus::write_byte(uint32_t address, uint8_t byte, bool sequential) {
auto cc = cycle_map[address >> 24 & 0xF];
cycles += sequential ? cc.s16 : cc.n16;
if (address >= IO_START && address <= IO_END) {
io->write_byte(address, byte);
return;
@@ -124,10 +168,13 @@ Bus::write_byte(uint32_t address, uint8_t byte) {
}
uint16_t
Bus::read_halfword(uint32_t address) {
Bus::read_halfword(uint32_t address, bool sequential) {
if (address & 0b01)
glogger.warn("Reading a non aligned halfword address");
auto cc = cycle_map[address >> 24 & 0xF];
cycles += sequential ? cc.s16 : cc.n16;
if (address >= IO_START && address <= IO_END)
return io->read_halfword(address);
@@ -137,10 +184,13 @@ Bus::read_halfword(uint32_t address) {
}
void
Bus::write_halfword(uint32_t address, uint16_t halfword) {
Bus::write_halfword(uint32_t address, uint16_t halfword, bool sequential) {
if (address & 0b01)
glogger.warn("Writing to a non aligned halfword address");
auto cc = cycle_map[address >> 24 & 0xF];
cycles += sequential ? cc.s16 : cc.n16;
if (address >= IO_START && address <= IO_END) {
io->write_halfword(address, halfword);
return;
@@ -156,10 +206,13 @@ Bus::write_halfword(uint32_t address, uint16_t halfword) {
}
uint32_t
Bus::read_word(uint32_t address) {
Bus::read_word(uint32_t address, bool sequential) {
if (address & 0b11)
glogger.warn("Reading a non aligned word address");
auto cc = cycle_map[address >> 24 & 0xF];
cycles += sequential ? cc.s32 : cc.n32;
if (address >= IO_START && address <= IO_END)
return io->read_word(address);
@@ -171,10 +224,13 @@ Bus::read_word(uint32_t address) {
}
void
Bus::write_word(uint32_t address, uint32_t word) {
Bus::write_word(uint32_t address, uint32_t word, bool sequential) {
if (address & 0b11)
glogger.warn("Writing to a non aligned word address");
auto cc = cycle_map[address >> 24 & 0xF];
cycles += sequential ? cc.s32 : cc.n32;
if (address >= IO_START && address <= IO_END) {
io->write_word(address, word);
return;

View File

@@ -88,4 +88,21 @@ sbc(uint32_t a, uint32_t b, bool& carry, bool& overflow, bool c) {
return result & 0xFFFFFFFF;
}
uint8_t
multiplier_array_cycles(uint32_t x, bool zeroes_only) {
// set zeroes_only to evaluate first condition that checks ones to false
if ((!zeroes_only && (x & 0xFFFFFF00) == 0xFFFFFF00) ||
(x & 0xFFFFFF00) == 0)
return 1;
if ((!zeroes_only && (x & 0xFFFF0000) == 0xFFFF0000) ||
(x & 0xFFFF0000) == 0)
return 2;
if ((!zeroes_only && (x & 0xFF000000) == 0xFF000000) ||
(x & 0xFF000000) == 0)
return 3;
return 4;
};
}

View File

@@ -24,6 +24,14 @@ Instruction::exec(Cpu& cpu) {
std::visit(
overloaded{
[&cpu, pc_warn](BranchAndExchange& data) {
/*
S -> reading instruction in step()
N -> fetch from the new address in branch
S -> last opcode fetch at +L to refill the pipeline
Total = 2S + N cycles
1S done, S+N taken care of by flush_pipeline()
*/
uint32_t addr = cpu.gpr[data.rn];
State state = static_cast<State>(get_bit(addr, 0));
@@ -48,6 +56,14 @@ Instruction::exec(Cpu& cpu) {
cpu.is_flushed = true;
},
[&cpu](Branch& data) {
/*
S -> reading instruction in step()
N -> fetch from the new address in branch
S -> last opcode fetch at +L to refill the pipeline
Total = 2S + N cycles
1S done, S+N taken care of by flush_pipeline()
*/
if (data.link)
cpu.gpr[14] = cpu.pc - INSTRUCTION_SIZE;
@@ -57,6 +73,19 @@ Instruction::exec(Cpu& cpu) {
cpu.is_flushed = true;
},
[&cpu, pc_error](Multiply& data) {
/*
S -> reading instruction in step()
mI -> m internal cycles
I -> only when accumulating
let v = data at rn
m = 1 if bits [32:8] of v are all zero or all one
m = 2 [32:16]
m = 3 [32:24]
m = 4 otherwise
Total = S + mI or S + (m+1)I
*/
if (data.rd == data.rm)
glogger.error("rd and rm are not distinct in {}",
typeid(data).name());
@@ -65,8 +94,17 @@ Instruction::exec(Cpu& cpu) {
pc_error(data.rd);
pc_error(data.rd);
cpu.gpr[data.rd] = cpu.gpr[data.rm] * cpu.gpr[data.rs] +
(data.acc ? cpu.gpr[data.rn] : 0);
// mI
for (int i = 0; i < multiplier_array_cycles(cpu.gpr[data.rs]); i++)
cpu.internal_cycle();
cpu.gpr[data.rd] = cpu.gpr[data.rm] * cpu.gpr[data.rs];
if (data.acc) {
cpu.gpr[data.rd] += cpu.gpr[data.rn];
// 1I
cpu.internal_cycle();
}
if (data.set) {
cpu.cpsr.set_z(cpu.gpr[data.rd] == 0);
@@ -75,6 +113,21 @@ Instruction::exec(Cpu& cpu) {
}
},
[&cpu, pc_error](MultiplyLong& data) {
/*
S -> reading instruction in step()
(m+1)I -> m + 1 internal cycles
I -> only when accumulating
let v = data at rn
m = 1 if bits [32:8] of v are all zeroes (or all ones if signed)
m = 2 [32:16]
m = 3 [32:24]
m = 4 otherwise
Total = S + mI or S + (m+1)I
Total = S + (m+1)I or S + (m+2)I
*/
if (data.rdhi == data.rdlo || data.rdhi == data.rm ||
data.rdlo == data.rm)
glogger.error("rdhi, rdlo and rm are not distinct in {}",
@@ -85,6 +138,16 @@ Instruction::exec(Cpu& cpu) {
pc_error(data.rm);
pc_error(data.rs);
// 1I
if (data.acc)
cpu.internal_cycle();
// m+1 internal cycles
for (int i = 0;
i <= multiplier_array_cycles(cpu.gpr[data.rs], data.uns);
i++)
cpu.internal_cycle();
if (data.uns) {
auto cast = [](uint32_t x) -> uint64_t {
return static_cast<uint64_t>(x);
@@ -121,21 +184,53 @@ Instruction::exec(Cpu& cpu) {
cpu.cpsr.set_v(0);
}
},
[](Undefined) { glogger.warn("Undefined instruction"); },
[](Undefined) {
// this should be 2S + N + I, should i flush the pipeline? i dont
// know. TODO: study
glogger.warn("Undefined instruction");
},
[&cpu, pc_error](SingleDataSwap& data) {
/*
N -> reading instruction in step()
N -> unrelated read
S -> related write
I -> earlier read value is written to register
Total = S + 2N +I
*/
pc_error(data.rm);
pc_error(data.rn);
pc_error(data.rd);
if (data.byte) {
cpu.gpr[data.rd] = cpu.bus->read_byte(cpu.gpr[data.rn]);
cpu.bus->write_byte(cpu.gpr[data.rn], cpu.gpr[data.rm] & 0xFF);
cpu.gpr[data.rd] = cpu.bus->read_byte(cpu.gpr[data.rn], false);
cpu.bus->write_byte(
cpu.gpr[data.rn], cpu.gpr[data.rm] & 0xFF, true);
} else {
cpu.gpr[data.rd] = cpu.bus->read_word(cpu.gpr[data.rn]);
cpu.bus->write_word(cpu.gpr[data.rn], cpu.gpr[data.rm]);
cpu.gpr[data.rd] = cpu.bus->read_word(cpu.gpr[data.rn], false);
cpu.bus->write_word(cpu.gpr[data.rn], cpu.gpr[data.rm], true);
}
cpu.internal_cycle();
// last write address is unrelated to next
cpu.sequential = false;
},
[&cpu, pc_warn, pc_error](SingleDataTransfer& data) {
/*
Load
====
S -> reading instruction in step()
N -> read from target
I -> stored in register
N+S -> if PC is written - taken care of by flush_pipeline()
Total = S + N + I or 2S + 2N + I
Store
=====
N -> calculating memory address
N -> write at target
Total = 2N
*/
uint32_t offset = 0;
uint32_t address = cpu.gpr[data.rn];
@@ -178,10 +273,17 @@ Instruction::exec(Cpu& cpu) {
if (data.load) {
// byte
if (data.byte)
cpu.gpr[data.rd] = cpu.bus->read_byte(address);
cpu.gpr[data.rd] = cpu.bus->read_byte(address, false);
// word
else
cpu.gpr[data.rd] = cpu.bus->read_word(address);
cpu.gpr[data.rd] = cpu.bus->read_word(address, false);
// N + S
if (data.rd == cpu.PC_INDEX)
cpu.is_flushed = true;
// I
cpu.internal_cycle();
// store
} else {
// take PC into consideration
@@ -190,10 +292,11 @@ Instruction::exec(Cpu& cpu) {
// byte
if (data.byte)
cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF);
cpu.bus->write_byte(
address, cpu.gpr[data.rd] & 0xFF, false);
// word
else
cpu.bus->write_word(address, cpu.gpr[data.rd]);
cpu.bus->write_word(address, cpu.gpr[data.rd], false);
}
if (!data.pre)
@@ -202,10 +305,26 @@ Instruction::exec(Cpu& cpu) {
if (!data.pre || data.write)
cpu.gpr[data.rn] = address;
if (data.rd == cpu.PC_INDEX && data.load)
cpu.is_flushed = true;
// last read/write is unrelated, this will be overwriten if flushed
cpu.sequential = false;
},
[&cpu, pc_warn, pc_error](HalfwordTransfer& data) {
/*
Load
====
S -> reading instruction in step()
N -> read from target
I -> stored in register
N+S -> if PC is written - taken care of by flush_pipeline()
Total = S + N + I or 2S + 2N + I
Store
=====
N -> calculating memory address
N -> write at target
Total = 2N
*/
uint32_t address = cpu.gpr[data.rn];
uint32_t offset = 0;
@@ -240,7 +359,8 @@ Instruction::exec(Cpu& cpu) {
if (data.sign) {
// halfword
if (data.half) {
cpu.gpr[data.rd] = cpu.bus->read_halfword(address);
cpu.gpr[data.rd] =
cpu.bus->read_halfword(address, false);
// sign extend the halfword
cpu.gpr[data.rd] =
@@ -248,7 +368,7 @@ Instruction::exec(Cpu& cpu) {
// byte
} else {
cpu.gpr[data.rd] = cpu.bus->read_byte(address);
cpu.gpr[data.rd] = cpu.bus->read_byte(address, false);
// sign extend the byte
cpu.gpr[data.rd] =
@@ -256,8 +376,15 @@ Instruction::exec(Cpu& cpu) {
}
// unsigned halfword
} else if (data.half) {
cpu.gpr[data.rd] = cpu.bus->read_halfword(address);
cpu.gpr[data.rd] = cpu.bus->read_halfword(address, false);
}
// I
cpu.internal_cycle();
if (data.rd == cpu.PC_INDEX)
cpu.is_flushed = true;
// store
} else {
// take PC into consideration
@@ -266,7 +393,7 @@ Instruction::exec(Cpu& cpu) {
// halfword
if (data.half)
cpu.bus->write_halfword(address, cpu.gpr[data.rd]);
cpu.bus->write_halfword(address, cpu.gpr[data.rd], false);
}
if (!data.pre)
@@ -275,15 +402,34 @@ Instruction::exec(Cpu& cpu) {
if (!data.pre || data.write)
cpu.gpr[data.rn] = address;
if (data.rd == cpu.PC_INDEX && data.load)
cpu.is_flushed = true;
// last read/write is unrelated, this will be overwriten if flushed
cpu.sequential = false;
},
[&cpu, pc_error](BlockDataTransfer& data) {
/*
Load
====
S -> reading instruction in step()
N -> unrelated read from target
(n-1) S -> next n - 1 related reads from target
I -> stored in register
N+S -> if PC is written - taken care of by flush_pipeline()
Total = nS + N + I or (n+1)S + 2N + I
Store
=====
N -> calculating memory address
N -> unrelated write at target
(n-1) S -> next n - 1 related writes
Total = 2N + (n-1)S
*/
static constexpr uint8_t alignment = 4; // word
uint32_t address = cpu.gpr[data.rn];
Mode mode = cpu.cpsr.mode();
int8_t i = 0;
bool sequential = false;
pc_error(data.rn);
@@ -308,40 +454,54 @@ Instruction::exec(Cpu& cpu) {
address += (data.up ? alignment : -alignment);
if (data.load) {
if (get_bit(data.regs, cpu.PC_INDEX) && data.s && data.load) {
if (get_bit(data.regs, cpu.PC_INDEX)) {
cpu.is_flushed = true;
// current mode's cpu.spsr is already loaded when it was
// switched
cpu.spsr = cpu.cpsr;
if (data.s)
cpu.spsr = cpu.cpsr;
}
if (data.up) {
for (i = 0; i < cpu.GPR_COUNT; i++) {
if (get_bit(data.regs, i)) {
cpu.gpr[i] = cpu.bus->read_word(address);
cpu.gpr[i] =
cpu.bus->read_word(address, sequential);
address += alignment;
sequential = true;
}
}
} else {
for (i = cpu.GPR_COUNT - 1; i >= 0; i--) {
if (get_bit(data.regs, i)) {
cpu.gpr[i] = cpu.bus->read_word(address);
cpu.gpr[i] =
cpu.bus->read_word(address, sequential);
address -= alignment;
sequential = true;
}
}
}
// I
cpu.internal_cycle();
} else {
if (data.up) {
for (i = 0; i < cpu.GPR_COUNT; i++) {
if (get_bit(data.regs, i)) {
cpu.bus->write_word(address, cpu.gpr[i]);
cpu.bus->write_word(
address, cpu.gpr[i], sequential);
address += alignment;
sequential = true;
}
}
} else {
for (i = cpu.GPR_COUNT - 1; i >= 0; i--) {
if (get_bit(data.regs, i)) {
cpu.bus->write_word(address, cpu.gpr[i]);
cpu.bus->write_word(
address, cpu.gpr[i], sequential);
address -= alignment;
sequential = true;
}
}
}
@@ -354,13 +514,18 @@ Instruction::exec(Cpu& cpu) {
if (!data.pre || data.write)
cpu.gpr[data.rn] = address;
if (data.load && get_bit(data.regs, cpu.PC_INDEX))
cpu.is_flushed = true;
// load back the original mode registers
cpu.chg_mode(mode);
// last read/write is unrelated, this will be overwriten if flushed
cpu.sequential = false;
},
[&cpu, pc_error](PsrTransfer& data) {
/*
S -> prefetched instruction in step()
Total = 1S cycle
*/
if (data.spsr && cpu.cpsr.mode() == Mode::User) {
glogger.error("Accessing CPU.SPSR in User mode in {}",
typeid(data).name());
@@ -396,6 +561,24 @@ Instruction::exec(Cpu& cpu) {
}
},
[&cpu, pc_error](DataProcessing& data) {
/*
Always
======
S -> prefetched instruction in step()
With Register specified shift
=============================
I -> internal cycle
When PC is written
==================
N -> fetch from the new address in branch
S -> last opcode fetch at +L to refill the pipeline
S+N taken care of by flush_pipeline()
Total = S or S + I or 2S + N + I or 2S + N cycles
*/
using OpCode = DataProcessing::OpCode;
uint32_t op_1 = cpu.gpr[data.rn];
@@ -425,6 +608,10 @@ Instruction::exec(Cpu& cpu) {
// PC is 12 bytes ahead when shifting
if (data.rn == cpu.PC_INDEX)
op_1 += INSTRUCTION_SIZE;
// 1I when register specified shift
if (shift->data.operand)
cpu.internal_cycle();
}
bool overflow = cpu.cpsr.v();

View File

@@ -129,11 +129,10 @@ Cpu::step() {
// word align
rst_bit(pc, 1);
uint32_t next_opcode = bus->read_word(pc);
arm::Instruction instruction(opcodes[0]);
opcodes[0] = opcodes[1];
opcodes[1] = next_opcode;
opcodes[1] = bus->read_word(pc, sequential);
#ifdef DISASSEMBLER
glogger.info("0x{:08X} : {}",
@@ -149,11 +148,10 @@ Cpu::step() {
} else
advance_pc_arm();
} else {
uint32_t next_opcode = bus->read_halfword(pc);
thumb::Instruction instruction(opcodes[0]);
opcodes[0] = opcodes[1];
opcodes[1] = next_opcode;
opcodes[1] = bus->read_halfword(pc, sequential);
#ifdef DISASSEMBLER
glogger.info("0x{:08X} : {}",
@@ -162,6 +160,7 @@ Cpu::step() {
#endif
instruction.exec(*this);
if (is_flushed) {
flush_pipeline();
is_flushed = false;

View File

@@ -1,3 +1,4 @@
#include "cpu/alu.hh"
#include "cpu/cpu.hh"
#include "util/bits.hh"
#include "util/log.hh"
@@ -15,6 +16,11 @@ Instruction::exec(Cpu& cpu) {
std::visit(
overloaded{
[&cpu, set_cc](MoveShiftedRegister& data) {
/*
S -> prefetched instruction in step()
Total = S cycle
*/
if (data.opcode == ShiftType::ROR)
glogger.error("Invalid opcode in {}", typeid(data).name());
@@ -28,6 +34,11 @@ Instruction::exec(Cpu& cpu) {
set_cc(carry, cpu.cpsr.v(), get_bit(shifted, 31), shifted == 0);
},
[&cpu, set_cc](AddSubtract& data) {
/*
S -> prefetched instruction in step()
Total = S cycle
*/
uint32_t offset =
data.imm ? static_cast<uint32_t>(static_cast<int8_t>(data.offset))
: cpu.gpr[data.offset];
@@ -48,6 +59,11 @@ Instruction::exec(Cpu& cpu) {
set_cc(carry, overflow, get_bit(result, 31), result == 0);
},
[&cpu, set_cc](MovCmpAddSubImmediate& data) {
/*
S -> prefetched instruction in step()
Total = S cycle
*/
uint32_t result = 0;
bool carry = cpu.cpsr.c();
bool overflow = cpu.cpsr.v();
@@ -73,6 +89,25 @@ Instruction::exec(Cpu& cpu) {
cpu.gpr[data.rd] = result;
},
[&cpu, set_cc](AluOperations& data) {
/*
Data Processing
===============
S -> prefetched instruction in step()
I -> only when register specified shift
Total = S or S + I cycles
Multiply
========
S -> reading instruction in step()
mI -> m internal cycles
let v = data at rn
m = 1 if bits [32:8] of v are all zero or all one
m = 2 [32:16]
m = 3 [32:24]
m = 4 otherwise
Total = S + mI cycles
*/
uint32_t op_1 = cpu.gpr[data.rd];
uint32_t op_2 = cpu.gpr[data.rs];
uint32_t result = 0;
@@ -90,12 +125,15 @@ Instruction::exec(Cpu& cpu) {
break;
case AluOperations::OpCode::LSL:
result = eval_shift(ShiftType::LSL, op_1, op_2, carry);
cpu.internal_cycle();
break;
case AluOperations::OpCode::LSR:
result = eval_shift(ShiftType::LSR, op_1, op_2, carry);
cpu.internal_cycle();
break;
case AluOperations::OpCode::ASR:
result = eval_shift(ShiftType::ASR, op_1, op_2, carry);
cpu.internal_cycle();
break;
case AluOperations::OpCode::ADC:
result = add(op_1, op_2, carry, overflow, carry);
@@ -105,6 +143,7 @@ Instruction::exec(Cpu& cpu) {
break;
case AluOperations::OpCode::ROR:
result = eval_shift(ShiftType::ROR, op_1, op_2, carry);
cpu.internal_cycle();
break;
case AluOperations::OpCode::NEG:
result = -op_2;
@@ -120,6 +159,9 @@ Instruction::exec(Cpu& cpu) {
break;
case AluOperations::OpCode::MUL:
result = op_1 * op_2;
// mI cycles
for (int i = 0; i < multiplier_array_cycles(op_2); i++)
cpu.internal_cycle();
break;
case AluOperations::OpCode::BIC:
result = op_1 & ~op_2;
@@ -137,6 +179,20 @@ Instruction::exec(Cpu& cpu) {
set_cc(carry, overflow, get_bit(result, 31), result == 0);
},
[&cpu, set_cc](HiRegisterOperations& data) {
/*
Always
======
S -> prefetched instruction in step()
When PC is written
==================
N -> fetch from the new address in branch
S -> last opcode fetch at +L to refill the pipeline
S+N taken care of by flush_pipeline()
Total = S or 2S + N cycles
*/
uint32_t op_1 = cpu.gpr[data.rd];
uint32_t op_2 = cpu.gpr[data.rs];
@@ -191,95 +247,157 @@ Instruction::exec(Cpu& cpu) {
}
},
[&cpu](PcRelativeLoad& data) {
/*
S -> reading instruction in step()
N -> read from target
I -> stored in register
Total = S + N + I cycles
*/
uint32_t pc = cpu.pc;
rst_bit(pc, 0);
rst_bit(pc, 1);
cpu.gpr[data.rd] = cpu.bus->read_word(pc + data.word);
cpu.gpr[data.rd] = cpu.bus->read_word(pc + data.word, false);
cpu.internal_cycle();
// last read is unrelated
cpu.sequential = false;
},
[&cpu](LoadStoreRegisterOffset& data) {
/*
Load
====
S -> reading instruction in step()
N -> read from target
I -> stored in register
Total = S + N + I
Store
=====
N -> calculating memory address
N -> write at target
Total = 2N
*/
uint32_t address = cpu.gpr[data.rb] + cpu.gpr[data.ro];
if (data.load) {
if (data.byte) {
cpu.gpr[data.rd] = cpu.bus->read_byte(address);
cpu.gpr[data.rd] = cpu.bus->read_byte(address, false);
} else {
cpu.gpr[data.rd] = cpu.bus->read_word(address);
cpu.gpr[data.rd] = cpu.bus->read_word(address, false);
}
cpu.internal_cycle();
} else {
if (data.byte) {
cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF);
cpu.bus->write_byte(
address, cpu.gpr[data.rd] & 0xFF, false);
} else {
cpu.bus->write_word(address, cpu.gpr[data.rd]);
cpu.bus->write_word(address, cpu.gpr[data.rd], false);
}
}
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](LoadStoreSignExtendedHalfword& data) {
// Same cycles as above
uint32_t address = cpu.gpr[data.rb] + cpu.gpr[data.ro];
switch (data.s << 1 | data.h) {
case 0b00:
cpu.bus->write_halfword(address, cpu.gpr[data.rd] & 0xFFFF);
cpu.bus->write_halfword(
address, cpu.gpr[data.rd] & 0xFFFF, false);
break;
case 0b01:
cpu.gpr[data.rd] = cpu.bus->read_halfword(address);
cpu.gpr[data.rd] = cpu.bus->read_halfword(address, false);
cpu.internal_cycle();
break;
case 0b10:
// sign extend and load the byte
cpu.gpr[data.rd] =
(static_cast<int32_t>(cpu.bus->read_byte(address))
(static_cast<int32_t>(cpu.bus->read_byte(address, false))
<< 24) >>
24;
cpu.internal_cycle();
break;
case 0b11:
// sign extend the halfword
cpu.gpr[data.rd] =
(static_cast<int32_t>(cpu.bus->read_halfword(address))
(static_cast<int32_t>(
cpu.bus->read_halfword(address, false))
<< 16) >>
16;
cpu.internal_cycle();
break;
// unreachable
default: {
}
}
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](LoadStoreImmediateOffset& data) {
// Same cycles as above
uint32_t address = cpu.gpr[data.rb] + data.offset;
if (data.load) {
if (data.byte) {
cpu.gpr[data.rd] = cpu.bus->read_byte(address);
cpu.gpr[data.rd] = cpu.bus->read_byte(address, false);
} else {
cpu.gpr[data.rd] = cpu.bus->read_word(address);
cpu.gpr[data.rd] = cpu.bus->read_word(address, false);
}
cpu.internal_cycle();
} else {
if (data.byte) {
cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF);
cpu.bus->write_byte(
address, cpu.gpr[data.rd] & 0xFF, false);
} else {
cpu.bus->write_word(address, cpu.gpr[data.rd]);
cpu.bus->write_word(address, cpu.gpr[data.rd], false);
}
}
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](LoadStoreHalfword& data) {
// Same cycles as above
uint32_t address = cpu.gpr[data.rb] + data.offset;
if (data.load) {
cpu.gpr[data.rd] = cpu.bus->read_halfword(address);
cpu.internal_cycle();
} else {
cpu.bus->write_halfword(address, cpu.gpr[data.rd] & 0xFFFF);
}
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](SpRelativeLoad& data) {
// Same cycles as above
uint32_t address = cpu.sp + data.word;
if (data.load) {
cpu.gpr[data.rd] = cpu.bus->read_word(address);
cpu.internal_cycle();
} else {
cpu.bus->write_word(address, cpu.gpr[data.rd]);
}
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](LoadAddress& data) {
// 1S cycle in step()
if (data.sp) {
cpu.gpr[data.rd] = cpu.sp + data.word;
} else {
@@ -288,15 +406,38 @@ Instruction::exec(Cpu& cpu) {
cpu.gpr[data.rd] = (cpu.pc & ~(1 << 1)) + data.word;
}
},
[&cpu](AddOffsetStackPointer& data) { cpu.sp += data.word; },
[&cpu](AddOffsetStackPointer& data) {
// 1S cycle in step()
cpu.sp += data.word;
},
[&cpu](PushPopRegister& data) {
/*
Load
====
S -> reading instruction in step()
N -> unrelated read from target
(n-1) S -> next n - 1 related reads from target
I -> stored in register
N+S -> if PC is written - taken care of by flush_pipeline()
Total = nS + N + I or (n+1)S + 2N + I
Store
=====
N -> calculating memory address
N -> unrelated write at target
(n-1) S -> next n - 1 related writes
Total = 2N + (n-1)S
*/
static constexpr uint8_t alignment = 4;
bool sequential = false;
if (data.load) {
for (uint8_t i = 0; i < 8; i++) {
if (get_bit(data.regs, i)) {
cpu.gpr[i] = cpu.bus->read_word(cpu.sp);
cpu.gpr[i] = cpu.bus->read_word(cpu.sp, sequential);
cpu.sp += alignment;
sequential = true;
}
}
@@ -305,6 +446,9 @@ Instruction::exec(Cpu& cpu) {
cpu.sp += alignment;
cpu.is_flushed = true;
}
// I
cpu.internal_cycle();
} else {
if (data.pclr) {
cpu.sp -= alignment;
@@ -314,35 +458,68 @@ Instruction::exec(Cpu& cpu) {
for (int8_t i = 7; i >= 0; i--) {
if (get_bit(data.regs, i)) {
cpu.sp -= alignment;
cpu.bus->write_word(cpu.sp, cpu.gpr[i]);
cpu.bus->write_word(cpu.sp, cpu.gpr[i], sequential);
sequential = true;
}
}
}
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](MultipleLoad& data) {
/*
Load
====
S -> reading instruction in step()
N -> unrelated read from target
(n-1) S -> next n - 1 related reads from target
I -> stored in register
Total = nS + N + I
Store
=====
N -> calculating memory address
N -> unrelated write at target
(n-1) S -> next n - 1 related writes
Total = 2N + (n-1)S
*/
static constexpr uint8_t alignment = 4;
uint32_t rb = cpu.gpr[data.rb];
uint32_t rb = cpu.gpr[data.rb];
bool sequential = false;
if (data.load) {
for (uint8_t i = 0; i < 8; i++) {
if (get_bit(data.regs, i)) {
cpu.gpr[i] = cpu.bus->read_word(rb);
cpu.gpr[i] = cpu.bus->read_word(rb, sequential);
rb += alignment;
sequential = true;
}
}
} else {
for (int8_t i = 7; i >= 0; i--) {
if (get_bit(data.regs, i)) {
rb -= alignment;
cpu.bus->write_word(rb, cpu.gpr[i]);
cpu.bus->write_word(rb, cpu.gpr[i], sequential);
sequential = true;
}
}
}
cpu.gpr[data.rb] = rb;
// last read/write is unrelated
cpu.sequential = false;
},
[&cpu](ConditionalBranch& data) {
/*
S -> reading instruction in step()
N+S -> if condition is true, branch and refill pipeline
Total = S or 2S + N
*/
if (data.condition == Condition::AL)
glogger.warn("Condition 1110 (AL) is undefined");
@@ -353,6 +530,12 @@ Instruction::exec(Cpu& cpu) {
cpu.is_flushed = true;
},
[&cpu](SoftwareInterrupt& data) {
/*
S -> reading instruction in step()
N+S -> refill pipeline
Total = 2S + N
*/
// next instruction is one instruction behind PC
cpu.lr = cpu.pc - INSTRUCTION_SIZE;
cpu.spsr = cpu.cpsr;
@@ -362,10 +545,24 @@ Instruction::exec(Cpu& cpu) {
cpu.is_flushed = true;
},
[&cpu](UnconditionalBranch& data) {
/*
S -> reading instruction in step()
N+S -> branch and refill pipeline
Total = 2S + N
*/
cpu.pc += data.offset;
cpu.is_flushed = true;
},
[&cpu](LongBranchWithLink& data) {
/*
S -> prefetched instruction in step()
N -> fetch from the new address in branch
S -> last opcode fetch at +L to refill the pipeline
Total = 2S + N cycles
1S done, S+N taken care of by flush_pipeline()
*/
// 12 bit integer
int32_t offset = data.offset;

View File

@@ -26,12 +26,18 @@ TEST_CASE("bios", TAG) {
auto bus =
Bus::init(std::move(bios), std::vector<uint8_t>(Header::HEADER_SIZE));
uint32_t cycles = bus->get_cycles();
CHECK(bus->read_byte(0) == 0xAC);
CHECK(bus->read_byte(0x3FFF) == 0x48);
CHECK(bus->read_byte(0x2A56) == 0x10);
CHECK(bus->get_cycles() == cycles + 3);
}
TEST_CASE_METHOD(BusFixture, "board wram", TAG) {
uint32_t cycles = bus->get_cycles();
bus->write_byte(0x2000000, 0xAC);
CHECK(bus->read_byte(0x2000000) == 0xAC);
@@ -40,9 +46,25 @@ TEST_CASE_METHOD(BusFixture, "board wram", TAG) {
bus->write_byte(0x2022A56, 0x10);
CHECK(bus->read_byte(0x2022A56) == 0x10);
CHECK(bus->get_cycles() == cycles + 2 * 9);
cycles = bus->get_cycles();
bus->write_halfword(0x2022A56, 0x1009);
CHECK(bus->read_halfword(0x2022A56) == 0x1009);
CHECK(bus->get_cycles() == cycles + 2 * 3);
cycles = bus->get_cycles();
bus->write_word(0x2022A56, 0x10FF9903);
CHECK(bus->read_word(0x2022A56) == 0x10FF9903);
CHECK(bus->get_cycles() == cycles + 2 * 6);
}
TEST_CASE_METHOD(BusFixture, "chip wram", TAG) {
uint32_t cycles = bus->get_cycles();
bus->write_byte(0x3000000, 0xAC);
CHECK(bus->read_byte(0x3000000) == 0xAC);
@@ -51,9 +73,25 @@ TEST_CASE_METHOD(BusFixture, "chip wram", TAG) {
bus->write_byte(0x3002A56, 0x10);
CHECK(bus->read_byte(0x3002A56) == 0x10);
CHECK(bus->get_cycles() == cycles + 2 * 3);
cycles = bus->get_cycles();
bus->write_halfword(0x3002A56, 0xF0F0);
CHECK(bus->read_halfword(0x3002A56) == 0xF0F0);
CHECK(bus->get_cycles() == cycles + 2);
cycles = bus->get_cycles();
bus->write_word(0x3002A56, 0xF9399010);
CHECK(bus->read_word(0x3002A56) == 0xF9399010);
CHECK(bus->get_cycles() == cycles + 2);
}
TEST_CASE_METHOD(BusFixture, "palette ram", TAG) {
uint32_t cycles = bus->get_cycles();
bus->write_byte(0x5000000, 0xAC);
CHECK(bus->read_byte(0x5000000) == 0xAC);
@@ -62,9 +100,25 @@ TEST_CASE_METHOD(BusFixture, "palette ram", TAG) {
bus->write_byte(0x5000156, 0x10);
CHECK(bus->read_byte(0x5000156) == 0x10);
CHECK(bus->get_cycles() == cycles + 2 * 3);
cycles = bus->get_cycles();
bus->write_halfword(0x5000156, 0xEEE1);
CHECK(bus->read_halfword(0x5000156) == 0xEEE1);
CHECK(bus->get_cycles() == cycles + 2);
cycles = bus->get_cycles();
bus->write_word(0x5000156, 0x938566E0);
CHECK(bus->read_word(0x5000156) == 0x938566E0);
CHECK(bus->get_cycles() == cycles + 2 * 2);
}
TEST_CASE_METHOD(BusFixture, "video ram", TAG) {
uint32_t cycles = bus->get_cycles();
bus->write_byte(0x6000000, 0xAC);
CHECK(bus->read_byte(0x6000000) == 0xAC);
@@ -73,9 +127,25 @@ TEST_CASE_METHOD(BusFixture, "video ram", TAG) {
bus->write_byte(0x6012A56, 0x10);
CHECK(bus->read_byte(0x6012A56) == 0x10);
CHECK(bus->get_cycles() == cycles + 2 * 3);
cycles = bus->get_cycles();
bus->write_halfword(0x6012A56, 0xB100);
CHECK(bus->read_halfword(0x6012A56) == 0xB100);
CHECK(bus->get_cycles() == cycles + 2);
cycles = bus->get_cycles();
bus->write_word(0x6012A56, 0x9322093E);
CHECK(bus->read_word(0x6012A56) == 0x9322093E);
CHECK(bus->get_cycles() == cycles + 2 * 2);
}
TEST_CASE_METHOD(BusFixture, "oam obj ram", TAG) {
uint32_t cycles = bus->get_cycles();
bus->write_byte(0x7000000, 0xAC);
CHECK(bus->read_byte(0x7000000) == 0xAC);
@@ -84,6 +154,20 @@ TEST_CASE_METHOD(BusFixture, "oam obj ram", TAG) {
bus->write_byte(0x7000156, 0x10);
CHECK(bus->read_byte(0x7000156) == 0x10);
CHECK(bus->get_cycles() == cycles + 2 * 3);
cycles = bus->get_cycles();
bus->write_halfword(0x7000156, 0x946C);
CHECK(bus->read_halfword(0x7000156) == 0x946C);
CHECK(bus->get_cycles() == cycles + 2);
cycles = bus->get_cycles();
bus->write_word(0x7000156, 0x93C5D1E0);
CHECK(bus->read_word(0x7000156) == 0x93C5D1E0);
CHECK(bus->get_cycles() == cycles + 2);
}
TEST_CASE("rom", TAG) {
@@ -116,22 +200,4 @@ TEST_CASE("rom", TAG) {
}
}
TEST_CASE_METHOD(BusFixture, "Halfword", TAG) {
CHECK(bus->read_halfword(0x202FED9) == 0);
bus->write_halfword(0x202FED9, 0x1A4A);
CHECK(bus->read_halfword(0x202FED9) == 0x1A4A);
CHECK(bus->read_word(0x202FED9) == 0x1A4A);
CHECK(bus->read_byte(0x202FED9) == 0x4A);
}
TEST_CASE_METHOD(BusFixture, "Word", TAG) {
CHECK(bus->read_word(0x600EE34) == 0);
bus->write_word(0x600EE34, 0x3ACC491D);
CHECK(bus->read_word(0x600EE34) == 0x3ACC491D);
CHECK(bus->read_halfword(0x600EE34) == 0x491D);
CHECK(bus->read_byte(0x600EE34) == 0x1D);
}
#undef TAG