Created
December 16, 2025 14:24
-
-
Save sylefeb/7c20ce7508c1e7f04bf496d5d29abf79 to your computer and use it in GitHub Desktop.
ice-v CPU fix
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // SL 2020-06-12 @sylefeb | |
| // | |
| // Fun with RISC-V! RV32I cpu, see README.md | |
| // | |
| // https://github.com/sylefeb/Silice | |
| // MIT license, see LICENSE_MIT in Silice repo root | |
| $$if ICEV_FAST_SHIFT then | |
| $$print("Ice-V configured for fast shift (barrel shifter)") | |
| $$end | |
| // -------------------------------------------------- | |
| // Processor | |
| // -------------------------------------------------- | |
| // bitfield for easier decoding of instructions | |
| $$if not ICEV_RTYPE then | |
| bitfield Rtype { uint1 unused1, uint1 sign, uint5 unused2, uint5 rs2, | |
| uint5 rs1, uint3 op, uint5 rd, uint7 opcode} | |
| $$ICEV_RTYPE = 1 | |
| $$end | |
| // -------------------------------------------------- | |
| // execute: decoder + ALU | |
| // - decodes instructions | |
| // - performs all integer computations | |
| unit execute( | |
| // instruction, program counter and registers | |
| input uint32 instr, input uint$addrW$ pc, input int32 xa, input int32 xb, | |
| // trigger: pulsed high when the decoder + ALU should start | |
| input uint1 trigger, | |
| // outputs all information the processor needs to decide what to do next | |
| output uint3 op, output uint5 write_rd, output uint1 no_rd, | |
| output uint1 jump, output uint1 load, output uint1 store, | |
| output int32 val, output uint1 storeVal, output uint1 working(0), | |
| output uint32 n, output uint1 storeAddr, // next address adder | |
| output uint1 intop, output int32 r, // integer operations | |
| ) { | |
| uint5 shamt(0); uint32 cycle(0); // shifter status and cycle counter | |
| always { | |
| // ==== decode immediates | |
| int32 imm_u = {instr[12,20],12b0}; | |
| int32 imm_j = {{12{instr[31,1]}},instr[12,8],instr[20,1],instr[21,10],1b0}; | |
| int32 imm_i = {{20{instr[31,1]}},instr[20,12]}; | |
| int32 imm_b = {{20{instr[31,1]}},instr[7,1],instr[25,6],instr[8,4],1b0}; | |
| int32 imm_s = {{20{instr[31,1]}},instr[25,7],instr[7,5]}; | |
| // ==== decode opcode // load-store op | |
| uint5 opcode = instr[ 2, 5]; op = Rtype(instr).op; | |
| uint1 AUIPC = opcode == 5b00101; uint1 LUI = opcode == 5b01101; | |
| uint1 JAL = opcode == 5b11011; uint1 JALR = opcode == 5b11001; | |
| uint1 IntImm = opcode == 5b00100; uint1 IntReg = opcode == 5b01100; | |
| uint1 Cycles = opcode == 5b11100; uint1 branch = opcode == 5b11000; | |
| uint1 regOrImm = IntReg | branch; // reg or imm in ALU? | |
| uint1 pcOrReg = AUIPC | JAL | branch; // pc or reg in addr? | |
| uint1 sub = IntReg & Rtype(instr).sign; // subtract | |
| uint1 aluShift = (IntImm | IntReg) & op[0,2] == 2b01; // shift requested | |
| // ==== select next address adder first input | |
| int32 addr_a = pcOrReg ? __signed({1b0,pc,2b0}) : xa; | |
| // ==== select ALU second input | |
| int32 b = regOrImm ? (xb) : imm_i; | |
| // ==== allows to do subtraction and all comparisons with a single adder | |
| // trick from femtorv32/swapforth/J1 | |
| int33 a_minus_b = {1b1,~b} + {1b0,xa} + 33b1; | |
| uint1 a_lt_b = (xa[31,1] ^ b[31,1]) ? xa[31,1] : a_minus_b[32,1]; | |
| uint1 a_lt_b_u = a_minus_b[32,1]; | |
| uint1 a_eq_b = a_minus_b[0,32] == 0; | |
| // ==== set decoder outputs depending on incoming instructions | |
| // register to write to? | |
| write_rd = Rtype(instr).rd; | |
| // load/store? | |
| load = opcode == 5b00000; store = opcode == 5b01000; | |
| // do we have to write a result to a register? | |
| no_rd = branch | store | (Rtype(instr).rd == 5b0); | |
| // integer operations // store next address? | |
| intop = (IntImm | IntReg); storeAddr = AUIPC; | |
| // value to store directly // store value? | |
| val = LUI ? imm_u : cycle; storeVal = LUI | Cycles; | |
| // ==== select immediate for the next address computation | |
| // 'or trick' from femtorv32 | |
| int32 addr_imm = (AUIPC ? imm_u : 32b0) | (JAL ? imm_j : 32b0) | |
| | (branch ? imm_b : 32b0) | ((JALR|load) ? imm_i : 32b0) | |
| | (store ? imm_s : 32b0); | |
| // ==== increment cycle counter | |
| cycle = cycle + 1; | |
| int32 shift(0); uint1 j(0); // temp variables for shifter and comparator | |
| // ====================== ALU | |
| $$if not ICEV_FAST_SHIFT then | |
| // shift (one bit per clock) | |
| if (working) { | |
| // decrease shift size | |
| shamt = shamt - 1; | |
| // shift one bit | |
| shift = op[2,1] ? (Rtype(instr).sign ? {r[31,1],r[1,31]} | |
| : {__signed(1b0),r[1,31]}) : {r[0,31],__signed(1b0)}; | |
| } else { | |
| // start shifting? | |
| shamt = ((aluShift & trigger) ? __unsigned(b[0,5]) : 0); | |
| // store value to be shifted | |
| shift = xa; | |
| } | |
| // are we still shifting? | |
| working = (shamt != 0); | |
| $$end | |
| // all ALU operations | |
| switch (op) { | |
| case 3b000: { r = sub ? a_minus_b : xa + b; } // ADD / SUB | |
| case 3b010: { r = a_lt_b; } case 3b011: { r = a_lt_b_u; }// SLTI / SLTU | |
| case 3b100: { r = xa ^ b; } case 3b110: { r = xa | b; }// XOR / OR | |
| $$if not ICEV_FAST_SHIFT then | |
| case 3b001: { r = shift; } case 3b101: { r = shift; }// SLLI/SRLI/SRAI | |
| $$else | |
| case 3b001: { r = (xa <<< b[0,5]); } | |
| case 3b101: { r = Rtype(instr).sign ? (xa >>> b[0,5]) : (xa >> b[0,5]); } | |
| $$end | |
| case 3b111: { r = xa & b; } // AND | |
| default: { r = {32{1bx}}; } // don't care | |
| } | |
| // ====================== Comparator for branching | |
| switch (op[1,2]) { | |
| case 2b00: { j = a_eq_b; } /*BEQ */ case 2b10: { j=a_lt_b;} /*BLT*/ | |
| case 2b11: { j = a_lt_b_u;} /*BLTU*/ default: { j = 1bx; } | |
| } | |
| jump = (JAL | JALR) | (branch & (j ^ op[0,1])); | |
| // ^^^^^^^ negates comparator result | |
| // ====================== Next address adder | |
| n = addr_a + addr_imm; | |
| } | |
| } | |
| // -------------------------------------------------- | |
| // The Risc-V RV32I CPU itself | |
| unit rv32i_cpu(bram_port mem) { | |
| // register file, uses two BRAMs to fetch two registers at once | |
| bram int32 xregsA[32] = {pad(0)}; bram int32 xregsB[32] = {pad(0)}; | |
| // current instruction | |
| uint32 instr(0); | |
| // program counter | |
| uint$addrW$ pc = uninitialized; | |
| uint$addrW$ next_pc <:: pc + 1; // next_pc tracks the expression 'pc + 1' | |
| // value that has been loaded from memory | |
| int32 loaded = uninitialized; | |
| // decoder + ALU, executes the instruction and tells processor what to do | |
| execute exec( | |
| instr <:: instr, pc <:: pc, xa <: xregsA.rdata, xb <: xregsB.rdata | |
| ); | |
| int32 sext_n <: {{$32-(addrW+2)${exec.n[$addrW+2$,1]}},exec.n[0,$addrW+2$]}; | |
| // what do we write in register? (pc, alu or val, load is handled separately) | |
| int32 write_back <: (exec.jump ? (next_pc<<2) : 32b0) | |
| | (exec.storeAddr ? sext_n : 32b0) | |
| | (exec.storeVal ? exec.val : 32b0) | |
| | (exec.load ? loaded : 32b0) | |
| | (exec.intop ? exec.r : 32b0); | |
| // The 'always_before' block is applied at the start of every cycle. | |
| // This is a good place to set default values, which also indicates | |
| // to Silice that some variables (e.g. xregsA.wdata) are fully set | |
| // every cycle, enabling further optimizations. | |
| // Default values are overriden from within the algorithm loop. | |
| always_before { | |
| // decodes values loaded from memory (used when exec.load == 1) | |
| uint32 aligned = mem.rdata >> {exec.n[0,2],3b000}; | |
| switch ( exec.op[0,2] ) { // LB / LBU, LH / LHU, LW | |
| case 2b00:{ loaded = {{24{(~exec.op[2,1])&aligned[ 7,1]}},aligned[ 0,8]}; } | |
| case 2b01:{ loaded = {{16{(~exec.op[2,1])&aligned[15,1]}},aligned[ 0,16]};} | |
| case 2b10:{ loaded = aligned; } | |
| default: { loaded = {32{1bx}}; } // don't care | |
| } | |
| // what to write on a store (used when exec.store == 1) | |
| mem.wdata = xregsB.rdata << {exec.n[0,2],3b000}; | |
| // maintain write enable low (pulses high when needed) | |
| mem.wenable = 4b0000; | |
| // maintain alu trigger low | |
| exec.trigger = 0; | |
| // maintain register wenable low | |
| // (pulsed when necessary) | |
| xregsA.wenable = 0; | |
| } | |
| algorithm <onehot,autorun> { | |
| // =========== CPU runs forever | |
| while (1) { | |
| // data is now available | |
| instr = mem.rdata; | |
| pc = mem.addr; | |
| ++: // wait for register read (BRAM takes one cycle) | |
| exec.trigger = 1; | |
| while (1) { // decode + ALU refresh during the cycle entering the loop | |
| // this operations loop allows to wait for ALU when needed | |
| // it is built such that no cycles are wasted | |
| // load/store? | |
| if (exec.load | exec.store) { | |
| // memory address from which to load/store | |
| mem.addr = exec.n >> 2; | |
| // == Store (enabled if exec.store == 1) | |
| // build write mask depending on SB, SH, SW | |
| // assumes aligned, e.g. SW => next_addr[0,2] == 2 | |
| mem.wenable = ({4{exec.store}} & { { 2{exec.op[0,2]==2b10} }, | |
| exec.op[0,1] | exec.op[1,1], 1b1 | |
| } ) << exec.n[0,2]; | |
| ++: // wait for data transaction | |
| // == Load (enabled if exec.load == 1) | |
| // commit result | |
| xregsA.wenable = ~exec.no_rd; | |
| // restore address to program counter | |
| mem.addr = next_pc; | |
| // exit the operations loop | |
| $$if ICEV_VERILATOR_TRACE then | |
| // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs | |
| __verilog("$c32(\"cpu_retires(1,\",%,\",\",%,\",\",%,\",\",%,\");\");", | |
| {pc,2b00},instr,xregsA.wenable?exec.write_rd:0,write_back); | |
| $$end | |
| break; | |
| // instruction read from BRAM and write to register | |
| // occurs as we jump back to loop start | |
| } else { | |
| // commit result | |
| xregsA.wenable = ~exec.no_rd; | |
| // next instruction address | |
| mem.addr = exec.jump ? (exec.n >> 2) : next_pc; | |
| // ALU done? | |
| if (exec.working == 0) { | |
| // yes: all is correct, stop here | |
| $$if ICEV_VERILATOR_TRACE then | |
| // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs | |
| __verilog("$c32(\"cpu_retires(1,\",%,\",\",%,\",\",%,\",\",%,\");\");", | |
| {pc,2b00},instr,xregsA.wenable?exec.write_rd:0,write_back); | |
| $$end | |
| break; | |
| // instruction read from BRAM and write to register | |
| // occurs as we jump back to loop start | |
| } | |
| } | |
| } | |
| } | |
| } | |
| // the 'always_after' block is executed at the end of every cycle | |
| always_after { | |
| // write back data to both register BRAMs | |
| xregsA.wdata = write_back; xregsB.wdata = write_back; | |
| // xregsB written when xregsA is | |
| xregsB.wenable = xregsA.wenable; | |
| // write to write_rd, else track instruction register | |
| xregsA.addr = xregsA.wenable ? exec.write_rd : Rtype(instr).rs1; | |
| xregsB.addr = xregsA.wenable ? exec.write_rd : Rtype(instr).rs2; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment