bus (feat): add cycle accuracy

Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2024-06-15 03:49:10 +05:30
parent cb75ebf8ef
commit c22333812e
9 changed files with 657 additions and 108 deletions
--- a/src/cpu/thumb/exec.cc
+++ b/src/cpu/thumb/exec.cc
@@ -1,3 +1,4 @@
+#include "cpu/alu.hh"
 #include "cpu/cpu.hh"
 #include "util/bits.hh"
 #include "util/log.hh"
@@ -15,6 +16,11 @@ Instruction::exec(Cpu& cpu) {
    std::visit(
      overloaded{
        [&cpu, set_cc](MoveShiftedRegister& data) {
+            /*
+              S -> prefetched instruction in step()
+
+              Total = S cycle
+            */
            if (data.opcode == ShiftType::ROR)
                glogger.error("Invalid opcode in {}", typeid(data).name());

@@ -28,6 +34,11 @@ Instruction::exec(Cpu& cpu) {
            set_cc(carry, cpu.cpsr.v(), get_bit(shifted, 31), shifted == 0);
        },
        [&cpu, set_cc](AddSubtract& data) {
+            /*
+              S -> prefetched instruction in step()
+
+              Total = S cycle
+            */
            uint32_t offset =
              data.imm ? static_cast<uint32_t>(static_cast<int8_t>(data.offset))
                       : cpu.gpr[data.offset];
@@ -48,6 +59,11 @@ Instruction::exec(Cpu& cpu) {
            set_cc(carry, overflow, get_bit(result, 31), result == 0);
        },
        [&cpu, set_cc](MovCmpAddSubImmediate& data) {
+            /*
+              S -> prefetched instruction in step()
+
+              Total = S cycle
+            */
            uint32_t result = 0;
            bool carry      = cpu.cpsr.c();
            bool overflow   = cpu.cpsr.v();
@@ -73,6 +89,25 @@ Instruction::exec(Cpu& cpu) {
                cpu.gpr[data.rd] = result;
        },
        [&cpu, set_cc](AluOperations& data) {
+            /*
+              Data Processing
+              ===============
+              S -> prefetched instruction in step()
+              I -> only when register specified shift
+              Total = S or S + I cycles
+
+              Multiply
+              ========
+              S -> reading instruction in step()
+              mI -> m internal cycles
+              let v = data at rn
+              m = 1 if bits [32:8] of v are all zero or all one
+              m = 2         [32:16]
+              m = 3         [32:24]
+              m = 4         otherwise
+
+              Total = S + mI cycles
+            */
            uint32_t op_1   = cpu.gpr[data.rd];
            uint32_t op_2   = cpu.gpr[data.rs];
            uint32_t result = 0;
@@ -90,12 +125,15 @@ Instruction::exec(Cpu& cpu) {
                    break;
                case AluOperations::OpCode::LSL:
                    result = eval_shift(ShiftType::LSL, op_1, op_2, carry);
+                    cpu.internal_cycle();
                    break;
                case AluOperations::OpCode::LSR:
                    result = eval_shift(ShiftType::LSR, op_1, op_2, carry);
+                    cpu.internal_cycle();
                    break;
                case AluOperations::OpCode::ASR:
                    result = eval_shift(ShiftType::ASR, op_1, op_2, carry);
+                    cpu.internal_cycle();
                    break;
                case AluOperations::OpCode::ADC:
                    result = add(op_1, op_2, carry, overflow, carry);
@@ -105,6 +143,7 @@ Instruction::exec(Cpu& cpu) {
                    break;
                case AluOperations::OpCode::ROR:
                    result = eval_shift(ShiftType::ROR, op_1, op_2, carry);
+                    cpu.internal_cycle();
                    break;
                case AluOperations::OpCode::NEG:
                    result = -op_2;
@@ -120,6 +159,9 @@ Instruction::exec(Cpu& cpu) {
                    break;
                case AluOperations::OpCode::MUL:
                    result = op_1 * op_2;
+                    // mI cycles
+                    for (int i = 0; i < multiplier_array_cycles(op_2); i++)
+                        cpu.internal_cycle();
                    break;
                case AluOperations::OpCode::BIC:
                    result = op_1 & ~op_2;
@@ -137,6 +179,20 @@ Instruction::exec(Cpu& cpu) {
            set_cc(carry, overflow, get_bit(result, 31), result == 0);
        },
        [&cpu, set_cc](HiRegisterOperations& data) {
+            /*
+              Always
+              ======
+              S -> prefetched instruction in step()
+
+              When PC is written
+              ==================
+              N -> fetch from the new address in branch
+              S -> last opcode fetch at +L to refill the pipeline
+              S+N taken care of by flush_pipeline()
+
+              Total = S or 2S + N cycles
+            */
+
            uint32_t op_1 = cpu.gpr[data.rd];
            uint32_t op_2 = cpu.gpr[data.rs];

@@ -191,95 +247,157 @@ Instruction::exec(Cpu& cpu) {
            }
        },
        [&cpu](PcRelativeLoad& data) {
+            /*
+              S   -> reading instruction in step()
+              N   -> read from target
+              I   -> stored in register
+              Total = S + N + I cycles
+            */
            uint32_t pc = cpu.pc;
            rst_bit(pc, 0);
            rst_bit(pc, 1);

-            cpu.gpr[data.rd] = cpu.bus->read_word(pc + data.word);
+            cpu.gpr[data.rd] = cpu.bus->read_word(pc + data.word, false);
+
+            cpu.internal_cycle();
+
+            // last read is unrelated
+            cpu.sequential = false;
        },
        [&cpu](LoadStoreRegisterOffset& data) {
+            /*
+              Load
+              ====
+              S   -> reading instruction in step()
+              N   -> read from target
+              I   -> stored in register
+              Total = S + N + I
+
+              Store
+              =====
+              N -> calculating memory address
+              N -> write at target
+              Total = 2N
+            */
+
            uint32_t address = cpu.gpr[data.rb] + cpu.gpr[data.ro];

            if (data.load) {
                if (data.byte) {
-                    cpu.gpr[data.rd] = cpu.bus->read_byte(address);
+                    cpu.gpr[data.rd] = cpu.bus->read_byte(address, false);
                } else {
-                    cpu.gpr[data.rd] = cpu.bus->read_word(address);
+                    cpu.gpr[data.rd] = cpu.bus->read_word(address, false);
                }
+                cpu.internal_cycle();
            } else {
                if (data.byte) {
-                    cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF);
+                    cpu.bus->write_byte(
+                      address, cpu.gpr[data.rd] & 0xFF, false);
                } else {
-                    cpu.bus->write_word(address, cpu.gpr[data.rd]);
+                    cpu.bus->write_word(address, cpu.gpr[data.rd], false);
                }
            }
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](LoadStoreSignExtendedHalfword& data) {
+            // Same cycles as above
+
            uint32_t address = cpu.gpr[data.rb] + cpu.gpr[data.ro];

            switch (data.s << 1 | data.h) {
                case 0b00:
-                    cpu.bus->write_halfword(address, cpu.gpr[data.rd] & 0xFFFF);
+                    cpu.bus->write_halfword(
+                      address, cpu.gpr[data.rd] & 0xFFFF, false);
                    break;
                case 0b01:
-                    cpu.gpr[data.rd] = cpu.bus->read_halfword(address);
+                    cpu.gpr[data.rd] = cpu.bus->read_halfword(address, false);
+                    cpu.internal_cycle();
                    break;
                case 0b10:
                    // sign extend and load the byte
                    cpu.gpr[data.rd] =
-                      (static_cast<int32_t>(cpu.bus->read_byte(address))
+                      (static_cast<int32_t>(cpu.bus->read_byte(address, false))
                       << 24) >>
                      24;
+                    cpu.internal_cycle();
                    break;
                case 0b11:
                    // sign extend the halfword
                    cpu.gpr[data.rd] =
-                      (static_cast<int32_t>(cpu.bus->read_halfword(address))
+                      (static_cast<int32_t>(
+                         cpu.bus->read_halfword(address, false))
                       << 16) >>
                      16;
+                    cpu.internal_cycle();
                    break;

                    // unreachable
                default: {
                }
            }
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](LoadStoreImmediateOffset& data) {
+            // Same cycles as above
+
            uint32_t address = cpu.gpr[data.rb] + data.offset;

            if (data.load) {
                if (data.byte) {
-                    cpu.gpr[data.rd] = cpu.bus->read_byte(address);
+                    cpu.gpr[data.rd] = cpu.bus->read_byte(address, false);
                } else {
-                    cpu.gpr[data.rd] = cpu.bus->read_word(address);
+                    cpu.gpr[data.rd] = cpu.bus->read_word(address, false);
                }
+                cpu.internal_cycle();
            } else {
                if (data.byte) {
-                    cpu.bus->write_byte(address, cpu.gpr[data.rd] & 0xFF);
+                    cpu.bus->write_byte(
+                      address, cpu.gpr[data.rd] & 0xFF, false);
                } else {
-                    cpu.bus->write_word(address, cpu.gpr[data.rd]);
+                    cpu.bus->write_word(address, cpu.gpr[data.rd], false);
                }
            }
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](LoadStoreHalfword& data) {
+            // Same cycles as above
+
            uint32_t address = cpu.gpr[data.rb] + data.offset;

            if (data.load) {
                cpu.gpr[data.rd] = cpu.bus->read_halfword(address);
+                cpu.internal_cycle();
            } else {
                cpu.bus->write_halfword(address, cpu.gpr[data.rd] & 0xFFFF);
            }
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](SpRelativeLoad& data) {
+            // Same cycles as above
+
            uint32_t address = cpu.sp + data.word;

            if (data.load) {
                cpu.gpr[data.rd] = cpu.bus->read_word(address);
+                cpu.internal_cycle();
            } else {
                cpu.bus->write_word(address, cpu.gpr[data.rd]);
            }
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](LoadAddress& data) {
+            // 1S cycle in step()
+
            if (data.sp) {
                cpu.gpr[data.rd] = cpu.sp + data.word;
            } else {
@@ -288,15 +406,38 @@ Instruction::exec(Cpu& cpu) {
                cpu.gpr[data.rd] = (cpu.pc & ~(1 << 1)) + data.word;
            }
        },
-        [&cpu](AddOffsetStackPointer& data) { cpu.sp += data.word; },
+        [&cpu](AddOffsetStackPointer& data) {
+            // 1S cycle in step()
+
+            cpu.sp += data.word;
+        },
        [&cpu](PushPopRegister& data) {
+            /*
+              Load
+              ====
+              S       -> reading instruction in step()
+              N       -> unrelated read from target
+              (n-1) S -> next n - 1 related reads from target
+              I       -> stored in register
+              N+S     -> if PC is written - taken care of by flush_pipeline()
+              Total = nS + N + I or (n+1)S + 2N + I
+
+              Store
+              =====
+              N       -> calculating memory address
+              N       -> unrelated write at target
+              (n-1) S -> next n - 1 related writes
+              Total = 2N + (n-1)S
+            */
            static constexpr uint8_t alignment = 4;
+            bool sequential                    = false;

            if (data.load) {
                for (uint8_t i = 0; i < 8; i++) {
                    if (get_bit(data.regs, i)) {
-                        cpu.gpr[i] = cpu.bus->read_word(cpu.sp);
+                        cpu.gpr[i] = cpu.bus->read_word(cpu.sp, sequential);
                        cpu.sp += alignment;
+                        sequential = true;
                    }
                }

@@ -305,6 +446,9 @@ Instruction::exec(Cpu& cpu) {
                    cpu.sp += alignment;
                    cpu.is_flushed = true;
                }
+
+                // I
+                cpu.internal_cycle();
            } else {
                if (data.pclr) {
                    cpu.sp -= alignment;
@@ -314,35 +458,68 @@ Instruction::exec(Cpu& cpu) {
                for (int8_t i = 7; i >= 0; i--) {
                    if (get_bit(data.regs, i)) {
                        cpu.sp -= alignment;
-                        cpu.bus->write_word(cpu.sp, cpu.gpr[i]);
+                        cpu.bus->write_word(cpu.sp, cpu.gpr[i], sequential);
+                        sequential = true;
                    }
                }
            }
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](MultipleLoad& data) {
+            /*
+              Load
+              ====
+              S       -> reading instruction in step()
+              N       -> unrelated read from target
+              (n-1) S -> next n - 1 related reads from target
+              I       -> stored in register
+              Total = nS + N + I
+
+              Store
+              =====
+              N       -> calculating memory address
+              N       -> unrelated write at target
+              (n-1) S -> next n - 1 related writes
+              Total = 2N + (n-1)S
+            */
+
            static constexpr uint8_t alignment = 4;

-            uint32_t rb = cpu.gpr[data.rb];
+            uint32_t rb     = cpu.gpr[data.rb];
+            bool sequential = false;

            if (data.load) {
                for (uint8_t i = 0; i < 8; i++) {
                    if (get_bit(data.regs, i)) {
-                        cpu.gpr[i] = cpu.bus->read_word(rb);
+                        cpu.gpr[i] = cpu.bus->read_word(rb, sequential);
                        rb += alignment;
+                        sequential = true;
                    }
                }
            } else {
                for (int8_t i = 7; i >= 0; i--) {
                    if (get_bit(data.regs, i)) {
                        rb -= alignment;
-                        cpu.bus->write_word(rb, cpu.gpr[i]);
+                        cpu.bus->write_word(rb, cpu.gpr[i], sequential);
+                        sequential = true;
                    }
                }
            }

            cpu.gpr[data.rb] = rb;
+
+            // last read/write is unrelated
+            cpu.sequential = false;
        },
        [&cpu](ConditionalBranch& data) {
+            /*
+              S   -> reading instruction in step()
+              N+S -> if condition is true, branch and refill pipeline
+              Total = S or 2S + N
+            */
+
            if (data.condition == Condition::AL)
                glogger.warn("Condition 1110 (AL) is undefined");

@@ -353,6 +530,12 @@ Instruction::exec(Cpu& cpu) {
            cpu.is_flushed = true;
        },
        [&cpu](SoftwareInterrupt& data) {
+            /*
+              S   -> reading instruction in step()
+              N+S -> refill pipeline
+              Total = 2S + N
+            */
+
            // next instruction is one instruction behind PC
            cpu.lr   = cpu.pc - INSTRUCTION_SIZE;
            cpu.spsr = cpu.cpsr;
@@ -362,10 +545,24 @@ Instruction::exec(Cpu& cpu) {
            cpu.is_flushed = true;
        },
        [&cpu](UnconditionalBranch& data) {
+            /*
+              S   -> reading instruction in step()
+              N+S -> branch and refill pipeline
+              Total = 2S + N
+            */
+
            cpu.pc += data.offset;
            cpu.is_flushed = true;
        },
        [&cpu](LongBranchWithLink& data) {
+            /*
+              S -> prefetched instruction in step()
+              N -> fetch from the new address in branch
+              S -> last opcode fetch at +L to refill the pipeline
+              Total = 2S + N cycles
+                      1S done, S+N taken care of by flush_pipeline()
+            */
+
            // 12 bit integer
            int32_t offset = data.offset;