From c22333812ec70dcead9cb50f4b72ffc86f08ef06 Mon Sep 17 00:00:00 2001 From: Amneesh Singh Date: Sat, 15 Jun 2024 03:49:10 +0530 Subject: [PATCH] bus (feat): add cycle accuracy Signed-off-by: Amneesh Singh --- include/bus.hh | 38 +++++-- include/cpu/alu.hh | 3 + include/cpu/cpu.hh | 38 ++++--- src/bus.cc | 82 +++++++++++--- src/cpu/alu.cc | 17 +++ src/cpu/arm/exec.cc | 243 +++++++++++++++++++++++++++++++++++++----- src/cpu/cpu.cc | 7 +- src/cpu/thumb/exec.cc | 235 ++++++++++++++++++++++++++++++++++++---- tests/bus.cc | 102 ++++++++++++++---- 9 files changed, 657 insertions(+), 108 deletions(-) diff --git a/include/bus.hh b/include/bus.hh index 6667b03..e2b7206 100644 --- a/include/bus.hh +++ b/include/bus.hh @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace matar { @@ -22,14 +23,19 @@ class Bus { static std::shared_ptr init(std::array&&, std::vector&&); - uint8_t read_byte(uint32_t); - void write_byte(uint32_t, uint8_t); + uint8_t read_byte(uint32_t, bool = true); + void write_byte(uint32_t, uint8_t, bool = true); - uint16_t read_halfword(uint32_t); - void write_halfword(uint32_t, uint16_t); + uint16_t read_halfword(uint32_t, bool = true); + void write_halfword(uint32_t, uint16_t, bool = true); - uint32_t read_word(uint32_t); - void write_word(uint32_t, uint32_t); + uint32_t read_word(uint32_t, bool = true); + void write_word(uint32_t, uint32_t, bool = true); + + // not sure what else to do? + inline void internal_cycle() { cycles++; } + + inline uint32_t get_cycles() { return cycles; } private: template @@ -38,9 +44,21 @@ class Bus { template std::optional> write(uint32_t); + uint32_t cycles = 0; + struct cycle_count { + uint8_t n16; // non sequential 8/16 bit width access + uint8_t n32; // non sequential 32 bit width access + uint8_t s16; // seuquential 8/16 bit width access + uint8_t s32; // sequential 32 bit width access + }; + std::array cycle_map; + static constexpr decltype(cycle_map) init_cycle_count(); + + std::unique_ptr io; + #define MEMORY_REGION(name, start) \ static constexpr uint32_t name##_START = start; \ - static constexpr uint8_t name##_REGION = start >> 24 & 0xFF; + static constexpr uint8_t name##_REGION = start >> 24 & 0xF; #define DECL_MEMORY(name, ident, start, end) \ MEMORY_REGION(name, start) \ @@ -70,12 +88,12 @@ class Bus { MEMORY_REGION(ROM_1, 0x0A000000) MEMORY_REGION(ROM_2, 0x0C000000) + MEMORY_REGION(IO, 0x04000000) + static constexpr uint32_t IO_END = 0x040003FE; + #undef MEMORY_REGION std::vector rom; - - std::unique_ptr io; - Header header; void parse_header(); }; diff --git a/include/cpu/alu.hh b/include/cpu/alu.hh index ce77f6f..f62ea84 100644 --- a/include/cpu/alu.hh +++ b/include/cpu/alu.hh @@ -49,4 +49,7 @@ add(uint32_t a, uint32_t b, bool& carry, bool& overflow, bool c = 0); uint32_t sbc(uint32_t a, uint32_t b, bool& carry, bool& overflow, bool c); + +uint8_t +multiplier_array_cycles(uint32_t x, bool zeroes_only = false); } diff --git a/include/cpu/cpu.hh b/include/cpu/cpu.hh index 07df5ca..a752f22 100644 --- a/include/cpu/cpu.hh +++ b/include/cpu/cpu.hh @@ -16,6 +16,23 @@ class Cpu { void step(); void chg_mode(const Mode to); + bool is_flushed = false; + inline void flush_pipeline() { + is_flushed = true; + if (cpsr.state() == State::Arm) { + opcodes[0] = bus->read_word(pc, false); + advance_pc_arm(); + opcodes[1] = bus->read_word(pc); + advance_pc_arm(); + } else { + opcodes[0] = bus->read_halfword(pc, false); + advance_pc_thumb(); + opcodes[1] = bus->read_halfword(pc); + advance_pc_thumb(); + } + sequential = true; + }; + private: friend void arm::Instruction::exec(Cpu& cpu); friend void thumb::Instruction::exec(Cpu& cpu); @@ -66,26 +83,15 @@ class Cpu { Psr und; } spsr_banked = {}; // banked saved program status registers + inline void internal_cycle() { bus->internal_cycle(); } + + // whether read is going to be sequential or not + bool sequential = true; + // raw instructions in the pipeline std::array opcodes = {}; inline void advance_pc_arm() { pc += arm::INSTRUCTION_SIZE; }; inline void advance_pc_thumb() { pc += thumb::INSTRUCTION_SIZE; } - - bool is_flushed = false; - inline void flush_pipeline() { - is_flushed = true; - if (cpsr.state() == State::Arm) { - opcodes[0] = bus->read_word(pc); - advance_pc_arm(); - opcodes[1] = bus->read_word(pc); - advance_pc_arm(); - } else { - opcodes[0] = bus->read_halfword(pc); - advance_pc_thumb(); - opcodes[1] = bus->read_halfword(pc); - advance_pc_thumb(); - } - }; }; } diff --git a/src/bus.cc b/src/bus.cc index 727d883..83880cd 100644 --- a/src/bus.cc +++ b/src/bus.cc @@ -4,13 +4,11 @@ namespace matar { -static constexpr uint32_t IO_START = 0x4000000; -static constexpr uint32_t IO_END = 0x40003FE; - Bus::Bus(Private, std::array&& bios, std::vector&& rom) - : bios(std::move(bios)) + : cycle_map(init_cycle_count()) + , bios(std::move(bios)) , rom(std::move(rom)) { std::string bios_hash = crypto::sha256(this->bios); static constexpr std::string_view expected_hash = @@ -38,11 +36,52 @@ Bus::init(std::array&& bios, std::vector&& rom) { return self; } +constexpr decltype(Bus::cycle_map) +Bus::init_cycle_count() { + /* + Region Bus Read Write Cycles + BIOS ROM 32 8/16/32 - 1/1/1 + Work RAM 32K 32 8/16/32 8/16/32 1/1/1 + I/O 32 8/16/32 8/16/32 1/1/1 + OAM 32 8/16/32 16/32 1/1/1 * + Work RAM 256K 16 8/16/32 8/16/32 3/3/6 ** + Palette RAM 16 8/16/32 16/32 1/1/2 * + VRAM 16 8/16/32 16/32 1/1/2 * + GamePak ROM 16 8/16/32 - 5/5/8 **|*** + GamePak Flash 16 8/16/32 16/32 5/5/8 **|*** + GamePak SRAM 8 8 8 5 ** + + Timing Notes: + + * Plus 1 cycle if GBA accesses video memory at the same time. + ** Default waitstate settings, see System Control chapter. + *** Separate timings for sequential, and non-sequential accesses. + One cycle equals approx. 59.59ns (ie. 16.78MHz clock). + */ + + decltype(cycle_map) map; + map.fill({ 1, 1, 1, 1 }); + + /* used fill instead of this + map[BIOS_REGION] = { 1, 1, 1, 1 }; + map[CHIP_WRAM_REGION] = { 1, 1, 1, 1 }; + map[IO_REGION] = { 1, 1, 1, 1 }; + map[OAM_REGION] = { 1, 1, 1, 1 }; + */ + map[3] = { 1, 1, 1, 1 }; + map[BOARD_WRAM_REGION] = { .n16 = 3, .n32 = 6, .s16 = 3, .s32 = 6 }; + map[PALETTE_RAM_REGION] = { .n16 = 1, .n32 = 2, .s16 = 1, .s32 = 2 }; + map[VRAM_REGION] = { .n16 = 1, .n32 = 2, .s16 = 1, .s32 = 2 }; + // TODO: GamePak access cycles + + return map; +} + template std::optional> Bus::read(uint32_t address) const { - switch (address >> 24 & 0xFF) { + switch (address >> 24 & 0xF) { #define MATCHES(AREA, area) \ case AREA##_REGION: \ @@ -80,7 +119,7 @@ template std::optional> Bus::write(uint32_t address) { - switch (address >> 24 & 0xFF) { + switch (address >> 24 & 0xF) { #define MATCHES(AREA, area) \ case AREA##_REGION: \ @@ -97,12 +136,14 @@ Bus::write(uint32_t address) { #undef MATCHES } - glogger.error("Invalid memory region written"); return {}; } uint8_t -Bus::read_byte(uint32_t address) { +Bus::read_byte(uint32_t address, bool sequential) { + auto cc = cycle_map[address >> 24 & 0xF]; + cycles += sequential ? cc.s16 : cc.n16; + if (address >= IO_START && address <= IO_END) return io->read_byte(address); @@ -111,7 +152,10 @@ Bus::read_byte(uint32_t address) { } void -Bus::write_byte(uint32_t address, uint8_t byte) { +Bus::write_byte(uint32_t address, uint8_t byte, bool sequential) { + auto cc = cycle_map[address >> 24 & 0xF]; + cycles += sequential ? cc.s16 : cc.n16; + if (address >= IO_START && address <= IO_END) { io->write_byte(address, byte); return; @@ -124,10 +168,13 @@ Bus::write_byte(uint32_t address, uint8_t byte) { } uint16_t -Bus::read_halfword(uint32_t address) { +Bus::read_halfword(uint32_t address, bool sequential) { if (address & 0b01) glogger.warn("Reading a non aligned halfword address"); + auto cc = cycle_map[address >> 24 & 0xF]; + cycles += sequential ? cc.s16 : cc.n16; + if (address >= IO_START && address <= IO_END) return io->read_halfword(address); @@ -137,10 +184,13 @@ Bus::read_halfword(uint32_t address) { } void -Bus::write_halfword(uint32_t address, uint16_t halfword) { +Bus::write_halfword(uint32_t address, uint16_t halfword, bool sequential) { if (address & 0b01) glogger.warn("Writing to a non aligned halfword address"); + auto cc = cycle_map[address >> 24 & 0xF]; + cycles += sequential ? cc.s16 : cc.n16; + if (address >= IO_START && address <= IO_END) { io->write_halfword(address, halfword); return; @@ -156,10 +206,13 @@ Bus::write_halfword(uint32_t address, uint16_t halfword) { } uint32_t -Bus::read_word(uint32_t address) { +Bus::read_word(uint32_t address, bool sequential) { if (address & 0b11) glogger.warn("Reading a non aligned word address"); + auto cc = cycle_map[address >> 24 & 0xF]; + cycles += sequential ? cc.s32 : cc.n32; + if (address >= IO_START && address <= IO_END) return io->read_word(address); @@ -171,10 +224,13 @@ Bus::read_word(uint32_t address) { } void -Bus::write_word(uint32_t address, uint32_t word) { +Bus::write_word(uint32_t address, uint32_t word, bool sequential) { if (address & 0b11) glogger.warn("Writing to a non aligned word address"); + auto cc = cycle_map[address >> 24 & 0xF]; + cycles += sequential ? cc.s32 : cc.n32; + if (address >= IO_START && address <= IO_END) { io->write_word(address, word); return; diff --git a/src/cpu/alu.cc b/src/cpu/alu.cc index 0422ace..52de7b9 100644 --- a/src/cpu/alu.cc +++ b/src/cpu/alu.cc @@ -88,4 +88,21 @@ sbc(uint32_t a, uint32_t b, bool& carry, bool& overflow, bool c) { return result & 0xFFFFFFFF; } + +uint8_t +multiplier_array_cycles(uint32_t x, bool zeroes_only) { + // set zeroes_only to evaluate first condition that checks ones to false + + if ((!zeroes_only && (x & 0xFFFFFF00) == 0xFFFFFF00) || + (x & 0xFFFFFF00) == 0) + return 1; + if ((!zeroes_only && (x & 0xFFFF0000) == 0xFFFF0000) || + (x & 0xFFFF0000) == 0) + return 2; + if ((!zeroes_only && (x & 0xFF000000) == 0xFF000000) || + (x & 0xFF000000) == 0) + return 3; + return 4; +}; + } diff --git a/src/cpu/arm/exec.cc b/src/cpu/arm/exec.cc index 326f53e..914683c 100644 --- a/src/cpu/arm/exec.cc +++ b/src/cpu/arm/exec.cc @@ -24,6 +24,14 @@ Instruction::exec(Cpu& cpu) { std::visit( overloaded{ [&cpu, pc_warn](BranchAndExchange& data) { + /* + S -> reading instruction in step() + N -> fetch from the new address in branch + S -> last opcode fetch at +L to refill the pipeline + Total = 2S + N cycles + 1S done, S+N taken care of by flush_pipeline() + */ + uint32_t addr = cpu.gpr[data.rn]; State state = static_cast(get_bit(addr, 0)); @@ -48,6 +56,14 @@ Instruction::exec(Cpu& cpu) { cpu.is_flushed = true; }, [&cpu](Branch& data) { + /* + S -> reading instruction in step() + N -> fetch from the new address in branch + S -> last opcode fetch at +L to refill the pipeline + Total = 2S + N cycles + 1S done, S+N taken care of by flush_pipeline() + */ + if (data.link) cpu.gpr[14] = cpu.pc - INSTRUCTION_SIZE; @@ -57,6 +73,19 @@ Instruction::exec(Cpu& cpu) { cpu.is_flushed = true; }, [&cpu, pc_error](Multiply& data) { + /* + S -> reading instruction in step() + mI -> m internal cycles + I -> only when accumulating + let v = data at rn + m = 1 if bits [32:8] of v are all zero or all one + m = 2 [32:16] + m = 3 [32:24] + m = 4 otherwise + + Total = S + mI or S + (m+1)I + */ + if (data.rd == data.rm) glogger.error("rd and rm are not distinct in {}", typeid(data).name()); @@ -65,8 +94,17 @@ Instruction::exec(Cpu& cpu) { pc_error(data.rd); pc_error(data.rd); - cpu.gpr[data.rd] = cpu.gpr[data.rm] * cpu.gpr[data.rs] + - (data.acc ? cpu.gpr[data.rn] : 0); + // mI + for (int i = 0; i < multiplier_array_cycles(cpu.gpr[data.rs]); i++) + cpu.internal_cycle(); + + cpu.gpr[data.rd] = cpu.gpr[data.rm] * cpu.gpr[data.rs]; + + if (data.acc) { + cpu.gpr[data.rd] += cpu.gpr[data.rn]; + // 1I + cpu.internal_cycle(); + } if (data.set) { cpu.cpsr.set_z(cpu.gpr[data.rd] == 0); @@ -75,6 +113,21 @@ Instruction::exec(Cpu& cpu) { } }, [&cpu, pc_error](MultiplyLong& data) { + /* + S -> reading instruction in step() + (m+1)I -> m + 1 internal cycles + I -> only when accumulating + let v = data at rn + m = 1 if bits [32:8] of v are all zeroes (or all ones if signed) + m = 2 [32:16] + m = 3 [32:24] + m = 4 otherwise + + Total = S + mI or S + (m+1)I + + Total = S + (m+1)I or S + (m+2)I + */ + if (data.rdhi == data.rdlo || data.rdhi == data.rm || data.rdlo == data.rm) glogger.error("rdhi, rdlo and rm are not distinct in {}", @@ -85,6 +138,16 @@ Instruction::exec(Cpu& cpu) { pc_error(data.rm); pc_error(data.rs); + // 1I + if (data.acc) + cpu.internal_cycle(); + + // m+1 internal cycles + for (int i = 0; + i <= multiplier_array_cycles(cpu.gpr[data.rs], data.uns); + i++) + cpu.internal_cycle(); + if (data.uns) { auto cast = [](uint32_t x) -> uint64_t { return static_cast(x); @@ -121,21 +184,53 @@ Instruction::exec(Cpu& cpu) { cpu.cpsr.set_v(0); } }, - [](Undefined) { glogger.warn("Undefined instruction"); }, + [](Undefined) { + // this should be 2S + N + I, should i flush the pipeline? i dont + // know. TODO: study + glogger.warn("Undefined instruction"); + }, [&cpu, pc_error](SingleDataSwap& data) { + /* + N -> reading instruction in step() + N -> unrelated read + S -> related write + I -> earlier read value is written to register + Total = S + 2N +I + */ + pc_error(data.rm); pc_error(data.rn); pc_error(data.rd); if (data.byte) { - cpu.gpr[data.rd] = cpu.bus->read_byte(cpu.gpr[data.rn]); - cpu.bus->write_byte(cpu.gpr[data.rn], cpu.gpr[data.rm] & 0xFF); + cpu.gpr[data.rd] = cpu.bus->read_byte(cpu.gpr[data.rn], false); + cpu.bus->write_byte( + cpu.gpr[data.rn], cpu.gpr[data.rm] & 0xFF, true); } else { - cpu.gpr[data.rd] = cpu.bus->read_word(cpu.gpr[data.rn]); - cpu.bus->write_word(cpu.gpr[data.rn], cpu.gpr[data.rm]); + cpu.gpr[data.rd] = cpu.bus->read_word(cpu.gpr[data.rn], false); + cpu.bus->write_word(cpu.gpr[data.rn], cpu.gpr[data.rm], true); } + + cpu.internal_cycle(); + // last write address is unrelated to next + cpu.sequential = false; }, [&cpu, pc_warn, pc_error](SingleDataTransfer& data) { + /* + Load + ==== + S -> reading instruction in step() + N -> read from target + I -> stored in register + N+S -> if PC is written - taken care of by flush_pipeline() + Total = S + N + I or 2S + 2N + I + + Store + ===== + N -> calculating memory address + N -> write at target + Total = 2N + */ uint32_t offset = 0; uint32_t address = cpu.gpr[data.rn]; @@ -178,10 +273,17 @@ Instruction::exec(Cpu& cpu) { if (data.load) { // byte if (data.byte) - cpu.gpr[data.rd] = cpu.bus->read_byte(address); + cpu.gpr[data.rd] = cpu.bus->read_byte(address, false); // word else - cpu.gpr[data.rd] = cpu.bus->read_word(address); + cpu.gpr[data.rd] = cpu.bus->read_word(address, false); + + // N + S + if (data.rd == cpu.PC_INDEX) + cpu.is_flushed = true; + + // I + cpu.internal_cycle(); // store } else { // take PC into consideration @@ -190,10 +292,11 @@ Instruction::exec(Cpu& cpu) { // byte if (data.byte) - cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF); + cpu.bus->write_byte( + address, cpu.gpr[data.rd] & 0xFF, false); // word else - cpu.bus->write_word(address, cpu.gpr[data.rd]); + cpu.bus->write_word(address, cpu.gpr[data.rd], false); } if (!data.pre) @@ -202,10 +305,26 @@ Instruction::exec(Cpu& cpu) { if (!data.pre || data.write) cpu.gpr[data.rn] = address; - if (data.rd == cpu.PC_INDEX && data.load) - cpu.is_flushed = true; + // last read/write is unrelated, this will be overwriten if flushed + cpu.sequential = false; }, [&cpu, pc_warn, pc_error](HalfwordTransfer& data) { + /* + Load + ==== + S -> reading instruction in step() + N -> read from target + I -> stored in register + N+S -> if PC is written - taken care of by flush_pipeline() + Total = S + N + I or 2S + 2N + I + + Store + ===== + N -> calculating memory address + N -> write at target + Total = 2N + */ + uint32_t address = cpu.gpr[data.rn]; uint32_t offset = 0; @@ -240,7 +359,8 @@ Instruction::exec(Cpu& cpu) { if (data.sign) { // halfword if (data.half) { - cpu.gpr[data.rd] = cpu.bus->read_halfword(address); + cpu.gpr[data.rd] = + cpu.bus->read_halfword(address, false); // sign extend the halfword cpu.gpr[data.rd] = @@ -248,7 +368,7 @@ Instruction::exec(Cpu& cpu) { // byte } else { - cpu.gpr[data.rd] = cpu.bus->read_byte(address); + cpu.gpr[data.rd] = cpu.bus->read_byte(address, false); // sign extend the byte cpu.gpr[data.rd] = @@ -256,8 +376,15 @@ Instruction::exec(Cpu& cpu) { } // unsigned halfword } else if (data.half) { - cpu.gpr[data.rd] = cpu.bus->read_halfword(address); + cpu.gpr[data.rd] = cpu.bus->read_halfword(address, false); } + + // I + cpu.internal_cycle(); + + if (data.rd == cpu.PC_INDEX) + cpu.is_flushed = true; + // store } else { // take PC into consideration @@ -266,7 +393,7 @@ Instruction::exec(Cpu& cpu) { // halfword if (data.half) - cpu.bus->write_halfword(address, cpu.gpr[data.rd]); + cpu.bus->write_halfword(address, cpu.gpr[data.rd], false); } if (!data.pre) @@ -275,15 +402,34 @@ Instruction::exec(Cpu& cpu) { if (!data.pre || data.write) cpu.gpr[data.rn] = address; - if (data.rd == cpu.PC_INDEX && data.load) - cpu.is_flushed = true; + // last read/write is unrelated, this will be overwriten if flushed + cpu.sequential = false; }, [&cpu, pc_error](BlockDataTransfer& data) { + /* + Load + ==== + S -> reading instruction in step() + N -> unrelated read from target + (n-1) S -> next n - 1 related reads from target + I -> stored in register + N+S -> if PC is written - taken care of by flush_pipeline() + Total = nS + N + I or (n+1)S + 2N + I + + Store + ===== + N -> calculating memory address + N -> unrelated write at target + (n-1) S -> next n - 1 related writes + Total = 2N + (n-1)S + */ + static constexpr uint8_t alignment = 4; // word uint32_t address = cpu.gpr[data.rn]; Mode mode = cpu.cpsr.mode(); int8_t i = 0; + bool sequential = false; pc_error(data.rn); @@ -308,40 +454,54 @@ Instruction::exec(Cpu& cpu) { address += (data.up ? alignment : -alignment); if (data.load) { - if (get_bit(data.regs, cpu.PC_INDEX) && data.s && data.load) { + if (get_bit(data.regs, cpu.PC_INDEX)) { + cpu.is_flushed = true; + // current mode's cpu.spsr is already loaded when it was // switched - cpu.spsr = cpu.cpsr; + if (data.s) + cpu.spsr = cpu.cpsr; } if (data.up) { for (i = 0; i < cpu.GPR_COUNT; i++) { if (get_bit(data.regs, i)) { - cpu.gpr[i] = cpu.bus->read_word(address); + cpu.gpr[i] = + cpu.bus->read_word(address, sequential); address += alignment; + sequential = true; } } } else { for (i = cpu.GPR_COUNT - 1; i >= 0; i--) { if (get_bit(data.regs, i)) { - cpu.gpr[i] = cpu.bus->read_word(address); + cpu.gpr[i] = + cpu.bus->read_word(address, sequential); address -= alignment; + sequential = true; } } } + + // I + cpu.internal_cycle(); } else { if (data.up) { for (i = 0; i < cpu.GPR_COUNT; i++) { if (get_bit(data.regs, i)) { - cpu.bus->write_word(address, cpu.gpr[i]); + cpu.bus->write_word( + address, cpu.gpr[i], sequential); address += alignment; + sequential = true; } } } else { for (i = cpu.GPR_COUNT - 1; i >= 0; i--) { if (get_bit(data.regs, i)) { - cpu.bus->write_word(address, cpu.gpr[i]); + cpu.bus->write_word( + address, cpu.gpr[i], sequential); address -= alignment; + sequential = true; } } } @@ -354,13 +514,18 @@ Instruction::exec(Cpu& cpu) { if (!data.pre || data.write) cpu.gpr[data.rn] = address; - if (data.load && get_bit(data.regs, cpu.PC_INDEX)) - cpu.is_flushed = true; - // load back the original mode registers cpu.chg_mode(mode); + + // last read/write is unrelated, this will be overwriten if flushed + cpu.sequential = false; }, [&cpu, pc_error](PsrTransfer& data) { + /* + S -> prefetched instruction in step() + Total = 1S cycle + */ + if (data.spsr && cpu.cpsr.mode() == Mode::User) { glogger.error("Accessing CPU.SPSR in User mode in {}", typeid(data).name()); @@ -396,6 +561,24 @@ Instruction::exec(Cpu& cpu) { } }, [&cpu, pc_error](DataProcessing& data) { + /* + Always + ====== + S -> prefetched instruction in step() + + With Register specified shift + ============================= + I -> internal cycle + + When PC is written + ================== + N -> fetch from the new address in branch + S -> last opcode fetch at +L to refill the pipeline + S+N taken care of by flush_pipeline() + + Total = S or S + I or 2S + N + I or 2S + N cycles + */ + using OpCode = DataProcessing::OpCode; uint32_t op_1 = cpu.gpr[data.rn]; @@ -425,6 +608,10 @@ Instruction::exec(Cpu& cpu) { // PC is 12 bytes ahead when shifting if (data.rn == cpu.PC_INDEX) op_1 += INSTRUCTION_SIZE; + + // 1I when register specified shift + if (shift->data.operand) + cpu.internal_cycle(); } bool overflow = cpu.cpsr.v(); diff --git a/src/cpu/cpu.cc b/src/cpu/cpu.cc index e68f51d..22620b8 100644 --- a/src/cpu/cpu.cc +++ b/src/cpu/cpu.cc @@ -129,11 +129,10 @@ Cpu::step() { // word align rst_bit(pc, 1); - uint32_t next_opcode = bus->read_word(pc); arm::Instruction instruction(opcodes[0]); opcodes[0] = opcodes[1]; - opcodes[1] = next_opcode; + opcodes[1] = bus->read_word(pc, sequential); #ifdef DISASSEMBLER glogger.info("0x{:08X} : {}", @@ -149,11 +148,10 @@ Cpu::step() { } else advance_pc_arm(); } else { - uint32_t next_opcode = bus->read_halfword(pc); thumb::Instruction instruction(opcodes[0]); opcodes[0] = opcodes[1]; - opcodes[1] = next_opcode; + opcodes[1] = bus->read_halfword(pc, sequential); #ifdef DISASSEMBLER glogger.info("0x{:08X} : {}", @@ -162,6 +160,7 @@ Cpu::step() { #endif instruction.exec(*this); + if (is_flushed) { flush_pipeline(); is_flushed = false; diff --git a/src/cpu/thumb/exec.cc b/src/cpu/thumb/exec.cc index a0ac3c9..4123316 100644 --- a/src/cpu/thumb/exec.cc +++ b/src/cpu/thumb/exec.cc @@ -1,3 +1,4 @@ +#include "cpu/alu.hh" #include "cpu/cpu.hh" #include "util/bits.hh" #include "util/log.hh" @@ -15,6 +16,11 @@ Instruction::exec(Cpu& cpu) { std::visit( overloaded{ [&cpu, set_cc](MoveShiftedRegister& data) { + /* + S -> prefetched instruction in step() + + Total = S cycle + */ if (data.opcode == ShiftType::ROR) glogger.error("Invalid opcode in {}", typeid(data).name()); @@ -28,6 +34,11 @@ Instruction::exec(Cpu& cpu) { set_cc(carry, cpu.cpsr.v(), get_bit(shifted, 31), shifted == 0); }, [&cpu, set_cc](AddSubtract& data) { + /* + S -> prefetched instruction in step() + + Total = S cycle + */ uint32_t offset = data.imm ? static_cast(static_cast(data.offset)) : cpu.gpr[data.offset]; @@ -48,6 +59,11 @@ Instruction::exec(Cpu& cpu) { set_cc(carry, overflow, get_bit(result, 31), result == 0); }, [&cpu, set_cc](MovCmpAddSubImmediate& data) { + /* + S -> prefetched instruction in step() + + Total = S cycle + */ uint32_t result = 0; bool carry = cpu.cpsr.c(); bool overflow = cpu.cpsr.v(); @@ -73,6 +89,25 @@ Instruction::exec(Cpu& cpu) { cpu.gpr[data.rd] = result; }, [&cpu, set_cc](AluOperations& data) { + /* + Data Processing + =============== + S -> prefetched instruction in step() + I -> only when register specified shift + Total = S or S + I cycles + + Multiply + ======== + S -> reading instruction in step() + mI -> m internal cycles + let v = data at rn + m = 1 if bits [32:8] of v are all zero or all one + m = 2 [32:16] + m = 3 [32:24] + m = 4 otherwise + + Total = S + mI cycles + */ uint32_t op_1 = cpu.gpr[data.rd]; uint32_t op_2 = cpu.gpr[data.rs]; uint32_t result = 0; @@ -90,12 +125,15 @@ Instruction::exec(Cpu& cpu) { break; case AluOperations::OpCode::LSL: result = eval_shift(ShiftType::LSL, op_1, op_2, carry); + cpu.internal_cycle(); break; case AluOperations::OpCode::LSR: result = eval_shift(ShiftType::LSR, op_1, op_2, carry); + cpu.internal_cycle(); break; case AluOperations::OpCode::ASR: result = eval_shift(ShiftType::ASR, op_1, op_2, carry); + cpu.internal_cycle(); break; case AluOperations::OpCode::ADC: result = add(op_1, op_2, carry, overflow, carry); @@ -105,6 +143,7 @@ Instruction::exec(Cpu& cpu) { break; case AluOperations::OpCode::ROR: result = eval_shift(ShiftType::ROR, op_1, op_2, carry); + cpu.internal_cycle(); break; case AluOperations::OpCode::NEG: result = -op_2; @@ -120,6 +159,9 @@ Instruction::exec(Cpu& cpu) { break; case AluOperations::OpCode::MUL: result = op_1 * op_2; + // mI cycles + for (int i = 0; i < multiplier_array_cycles(op_2); i++) + cpu.internal_cycle(); break; case AluOperations::OpCode::BIC: result = op_1 & ~op_2; @@ -137,6 +179,20 @@ Instruction::exec(Cpu& cpu) { set_cc(carry, overflow, get_bit(result, 31), result == 0); }, [&cpu, set_cc](HiRegisterOperations& data) { + /* + Always + ====== + S -> prefetched instruction in step() + + When PC is written + ================== + N -> fetch from the new address in branch + S -> last opcode fetch at +L to refill the pipeline + S+N taken care of by flush_pipeline() + + Total = S or 2S + N cycles + */ + uint32_t op_1 = cpu.gpr[data.rd]; uint32_t op_2 = cpu.gpr[data.rs]; @@ -191,95 +247,157 @@ Instruction::exec(Cpu& cpu) { } }, [&cpu](PcRelativeLoad& data) { + /* + S -> reading instruction in step() + N -> read from target + I -> stored in register + Total = S + N + I cycles + */ uint32_t pc = cpu.pc; rst_bit(pc, 0); rst_bit(pc, 1); - cpu.gpr[data.rd] = cpu.bus->read_word(pc + data.word); + cpu.gpr[data.rd] = cpu.bus->read_word(pc + data.word, false); + + cpu.internal_cycle(); + + // last read is unrelated + cpu.sequential = false; }, [&cpu](LoadStoreRegisterOffset& data) { + /* + Load + ==== + S -> reading instruction in step() + N -> read from target + I -> stored in register + Total = S + N + I + + Store + ===== + N -> calculating memory address + N -> write at target + Total = 2N + */ + uint32_t address = cpu.gpr[data.rb] + cpu.gpr[data.ro]; if (data.load) { if (data.byte) { - cpu.gpr[data.rd] = cpu.bus->read_byte(address); + cpu.gpr[data.rd] = cpu.bus->read_byte(address, false); } else { - cpu.gpr[data.rd] = cpu.bus->read_word(address); + cpu.gpr[data.rd] = cpu.bus->read_word(address, false); } + cpu.internal_cycle(); } else { if (data.byte) { - cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF); + cpu.bus->write_byte( + address, cpu.gpr[data.rd] & 0xFF, false); } else { - cpu.bus->write_word(address, cpu.gpr[data.rd]); + cpu.bus->write_word(address, cpu.gpr[data.rd], false); } } + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](LoadStoreSignExtendedHalfword& data) { + // Same cycles as above + uint32_t address = cpu.gpr[data.rb] + cpu.gpr[data.ro]; switch (data.s << 1 | data.h) { case 0b00: - cpu.bus->write_halfword(address, cpu.gpr[data.rd] & 0xFFFF); + cpu.bus->write_halfword( + address, cpu.gpr[data.rd] & 0xFFFF, false); break; case 0b01: - cpu.gpr[data.rd] = cpu.bus->read_halfword(address); + cpu.gpr[data.rd] = cpu.bus->read_halfword(address, false); + cpu.internal_cycle(); break; case 0b10: // sign extend and load the byte cpu.gpr[data.rd] = - (static_cast(cpu.bus->read_byte(address)) + (static_cast(cpu.bus->read_byte(address, false)) << 24) >> 24; + cpu.internal_cycle(); break; case 0b11: // sign extend the halfword cpu.gpr[data.rd] = - (static_cast(cpu.bus->read_halfword(address)) + (static_cast( + cpu.bus->read_halfword(address, false)) << 16) >> 16; + cpu.internal_cycle(); break; // unreachable default: { } } + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](LoadStoreImmediateOffset& data) { + // Same cycles as above + uint32_t address = cpu.gpr[data.rb] + data.offset; if (data.load) { if (data.byte) { - cpu.gpr[data.rd] = cpu.bus->read_byte(address); + cpu.gpr[data.rd] = cpu.bus->read_byte(address, false); } else { - cpu.gpr[data.rd] = cpu.bus->read_word(address); + cpu.gpr[data.rd] = cpu.bus->read_word(address, false); } + cpu.internal_cycle(); } else { if (data.byte) { - cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF); + cpu.bus->write_byte( + address, cpu.gpr[data.rd] & 0xFF, false); } else { - cpu.bus->write_word(address, cpu.gpr[data.rd]); + cpu.bus->write_word(address, cpu.gpr[data.rd], false); } } + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](LoadStoreHalfword& data) { + // Same cycles as above + uint32_t address = cpu.gpr[data.rb] + data.offset; if (data.load) { cpu.gpr[data.rd] = cpu.bus->read_halfword(address); + cpu.internal_cycle(); } else { cpu.bus->write_halfword(address, cpu.gpr[data.rd] & 0xFFFF); } + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](SpRelativeLoad& data) { + // Same cycles as above + uint32_t address = cpu.sp + data.word; if (data.load) { cpu.gpr[data.rd] = cpu.bus->read_word(address); + cpu.internal_cycle(); } else { cpu.bus->write_word(address, cpu.gpr[data.rd]); } + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](LoadAddress& data) { + // 1S cycle in step() + if (data.sp) { cpu.gpr[data.rd] = cpu.sp + data.word; } else { @@ -288,15 +406,38 @@ Instruction::exec(Cpu& cpu) { cpu.gpr[data.rd] = (cpu.pc & ~(1 << 1)) + data.word; } }, - [&cpu](AddOffsetStackPointer& data) { cpu.sp += data.word; }, + [&cpu](AddOffsetStackPointer& data) { + // 1S cycle in step() + + cpu.sp += data.word; + }, [&cpu](PushPopRegister& data) { + /* + Load + ==== + S -> reading instruction in step() + N -> unrelated read from target + (n-1) S -> next n - 1 related reads from target + I -> stored in register + N+S -> if PC is written - taken care of by flush_pipeline() + Total = nS + N + I or (n+1)S + 2N + I + + Store + ===== + N -> calculating memory address + N -> unrelated write at target + (n-1) S -> next n - 1 related writes + Total = 2N + (n-1)S + */ static constexpr uint8_t alignment = 4; + bool sequential = false; if (data.load) { for (uint8_t i = 0; i < 8; i++) { if (get_bit(data.regs, i)) { - cpu.gpr[i] = cpu.bus->read_word(cpu.sp); + cpu.gpr[i] = cpu.bus->read_word(cpu.sp, sequential); cpu.sp += alignment; + sequential = true; } } @@ -305,6 +446,9 @@ Instruction::exec(Cpu& cpu) { cpu.sp += alignment; cpu.is_flushed = true; } + + // I + cpu.internal_cycle(); } else { if (data.pclr) { cpu.sp -= alignment; @@ -314,35 +458,68 @@ Instruction::exec(Cpu& cpu) { for (int8_t i = 7; i >= 0; i--) { if (get_bit(data.regs, i)) { cpu.sp -= alignment; - cpu.bus->write_word(cpu.sp, cpu.gpr[i]); + cpu.bus->write_word(cpu.sp, cpu.gpr[i], sequential); + sequential = true; } } } + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](MultipleLoad& data) { + /* + Load + ==== + S -> reading instruction in step() + N -> unrelated read from target + (n-1) S -> next n - 1 related reads from target + I -> stored in register + Total = nS + N + I + + Store + ===== + N -> calculating memory address + N -> unrelated write at target + (n-1) S -> next n - 1 related writes + Total = 2N + (n-1)S + */ + static constexpr uint8_t alignment = 4; - uint32_t rb = cpu.gpr[data.rb]; + uint32_t rb = cpu.gpr[data.rb]; + bool sequential = false; if (data.load) { for (uint8_t i = 0; i < 8; i++) { if (get_bit(data.regs, i)) { - cpu.gpr[i] = cpu.bus->read_word(rb); + cpu.gpr[i] = cpu.bus->read_word(rb, sequential); rb += alignment; + sequential = true; } } } else { for (int8_t i = 7; i >= 0; i--) { if (get_bit(data.regs, i)) { rb -= alignment; - cpu.bus->write_word(rb, cpu.gpr[i]); + cpu.bus->write_word(rb, cpu.gpr[i], sequential); + sequential = true; } } } cpu.gpr[data.rb] = rb; + + // last read/write is unrelated + cpu.sequential = false; }, [&cpu](ConditionalBranch& data) { + /* + S -> reading instruction in step() + N+S -> if condition is true, branch and refill pipeline + Total = S or 2S + N + */ + if (data.condition == Condition::AL) glogger.warn("Condition 1110 (AL) is undefined"); @@ -353,6 +530,12 @@ Instruction::exec(Cpu& cpu) { cpu.is_flushed = true; }, [&cpu](SoftwareInterrupt& data) { + /* + S -> reading instruction in step() + N+S -> refill pipeline + Total = 2S + N + */ + // next instruction is one instruction behind PC cpu.lr = cpu.pc - INSTRUCTION_SIZE; cpu.spsr = cpu.cpsr; @@ -362,10 +545,24 @@ Instruction::exec(Cpu& cpu) { cpu.is_flushed = true; }, [&cpu](UnconditionalBranch& data) { + /* + S -> reading instruction in step() + N+S -> branch and refill pipeline + Total = 2S + N + */ + cpu.pc += data.offset; cpu.is_flushed = true; }, [&cpu](LongBranchWithLink& data) { + /* + S -> prefetched instruction in step() + N -> fetch from the new address in branch + S -> last opcode fetch at +L to refill the pipeline + Total = 2S + N cycles + 1S done, S+N taken care of by flush_pipeline() + */ + // 12 bit integer int32_t offset = data.offset; diff --git a/tests/bus.cc b/tests/bus.cc index 32cdcdb..8b41a01 100644 --- a/tests/bus.cc +++ b/tests/bus.cc @@ -26,12 +26,18 @@ TEST_CASE("bios", TAG) { auto bus = Bus::init(std::move(bios), std::vector(Header::HEADER_SIZE)); + uint32_t cycles = bus->get_cycles(); + CHECK(bus->read_byte(0) == 0xAC); CHECK(bus->read_byte(0x3FFF) == 0x48); CHECK(bus->read_byte(0x2A56) == 0x10); + + CHECK(bus->get_cycles() == cycles + 3); } TEST_CASE_METHOD(BusFixture, "board wram", TAG) { + uint32_t cycles = bus->get_cycles(); + bus->write_byte(0x2000000, 0xAC); CHECK(bus->read_byte(0x2000000) == 0xAC); @@ -40,9 +46,25 @@ TEST_CASE_METHOD(BusFixture, "board wram", TAG) { bus->write_byte(0x2022A56, 0x10); CHECK(bus->read_byte(0x2022A56) == 0x10); + + CHECK(bus->get_cycles() == cycles + 2 * 9); + cycles = bus->get_cycles(); + + bus->write_halfword(0x2022A56, 0x1009); + CHECK(bus->read_halfword(0x2022A56) == 0x1009); + + CHECK(bus->get_cycles() == cycles + 2 * 3); + cycles = bus->get_cycles(); + + bus->write_word(0x2022A56, 0x10FF9903); + CHECK(bus->read_word(0x2022A56) == 0x10FF9903); + + CHECK(bus->get_cycles() == cycles + 2 * 6); } TEST_CASE_METHOD(BusFixture, "chip wram", TAG) { + uint32_t cycles = bus->get_cycles(); + bus->write_byte(0x3000000, 0xAC); CHECK(bus->read_byte(0x3000000) == 0xAC); @@ -51,9 +73,25 @@ TEST_CASE_METHOD(BusFixture, "chip wram", TAG) { bus->write_byte(0x3002A56, 0x10); CHECK(bus->read_byte(0x3002A56) == 0x10); + + CHECK(bus->get_cycles() == cycles + 2 * 3); + cycles = bus->get_cycles(); + + bus->write_halfword(0x3002A56, 0xF0F0); + CHECK(bus->read_halfword(0x3002A56) == 0xF0F0); + + CHECK(bus->get_cycles() == cycles + 2); + cycles = bus->get_cycles(); + + bus->write_word(0x3002A56, 0xF9399010); + CHECK(bus->read_word(0x3002A56) == 0xF9399010); + + CHECK(bus->get_cycles() == cycles + 2); } TEST_CASE_METHOD(BusFixture, "palette ram", TAG) { + uint32_t cycles = bus->get_cycles(); + bus->write_byte(0x5000000, 0xAC); CHECK(bus->read_byte(0x5000000) == 0xAC); @@ -62,9 +100,25 @@ TEST_CASE_METHOD(BusFixture, "palette ram", TAG) { bus->write_byte(0x5000156, 0x10); CHECK(bus->read_byte(0x5000156) == 0x10); + + CHECK(bus->get_cycles() == cycles + 2 * 3); + cycles = bus->get_cycles(); + + bus->write_halfword(0x5000156, 0xEEE1); + CHECK(bus->read_halfword(0x5000156) == 0xEEE1); + + CHECK(bus->get_cycles() == cycles + 2); + cycles = bus->get_cycles(); + + bus->write_word(0x5000156, 0x938566E0); + CHECK(bus->read_word(0x5000156) == 0x938566E0); + + CHECK(bus->get_cycles() == cycles + 2 * 2); } TEST_CASE_METHOD(BusFixture, "video ram", TAG) { + uint32_t cycles = bus->get_cycles(); + bus->write_byte(0x6000000, 0xAC); CHECK(bus->read_byte(0x6000000) == 0xAC); @@ -73,9 +127,25 @@ TEST_CASE_METHOD(BusFixture, "video ram", TAG) { bus->write_byte(0x6012A56, 0x10); CHECK(bus->read_byte(0x6012A56) == 0x10); + + CHECK(bus->get_cycles() == cycles + 2 * 3); + cycles = bus->get_cycles(); + + bus->write_halfword(0x6012A56, 0xB100); + CHECK(bus->read_halfword(0x6012A56) == 0xB100); + + CHECK(bus->get_cycles() == cycles + 2); + cycles = bus->get_cycles(); + + bus->write_word(0x6012A56, 0x9322093E); + CHECK(bus->read_word(0x6012A56) == 0x9322093E); + + CHECK(bus->get_cycles() == cycles + 2 * 2); } TEST_CASE_METHOD(BusFixture, "oam obj ram", TAG) { + uint32_t cycles = bus->get_cycles(); + bus->write_byte(0x7000000, 0xAC); CHECK(bus->read_byte(0x7000000) == 0xAC); @@ -84,6 +154,20 @@ TEST_CASE_METHOD(BusFixture, "oam obj ram", TAG) { bus->write_byte(0x7000156, 0x10); CHECK(bus->read_byte(0x7000156) == 0x10); + + CHECK(bus->get_cycles() == cycles + 2 * 3); + cycles = bus->get_cycles(); + + bus->write_halfword(0x7000156, 0x946C); + CHECK(bus->read_halfword(0x7000156) == 0x946C); + + CHECK(bus->get_cycles() == cycles + 2); + cycles = bus->get_cycles(); + + bus->write_word(0x7000156, 0x93C5D1E0); + CHECK(bus->read_word(0x7000156) == 0x93C5D1E0); + + CHECK(bus->get_cycles() == cycles + 2); } TEST_CASE("rom", TAG) { @@ -116,22 +200,4 @@ TEST_CASE("rom", TAG) { } } -TEST_CASE_METHOD(BusFixture, "Halfword", TAG) { - CHECK(bus->read_halfword(0x202FED9) == 0); - - bus->write_halfword(0x202FED9, 0x1A4A); - CHECK(bus->read_halfword(0x202FED9) == 0x1A4A); - CHECK(bus->read_word(0x202FED9) == 0x1A4A); - CHECK(bus->read_byte(0x202FED9) == 0x4A); -} - -TEST_CASE_METHOD(BusFixture, "Word", TAG) { - CHECK(bus->read_word(0x600EE34) == 0); - - bus->write_word(0x600EE34, 0x3ACC491D); - CHECK(bus->read_word(0x600EE34) == 0x3ACC491D); - CHECK(bus->read_halfword(0x600EE34) == 0x491D); - CHECK(bus->read_byte(0x600EE34) == 0x1D); -} - #undef TAG