sylefeb · December 16, 2025 14:24
diff --git a/ice-v.si b/ice-v.si
 // SL 2020-06-12 @sylefeb
 //
 // Fun with RISC-V!   RV32I cpu, see README.md
 //
 // https://github.com/sylefeb/Silice
 // MIT license, see LICENSE_MIT in Silice repo root

 $$if ICEV_FAST_SHIFT then
 $$print("Ice-V configured for fast shift (barrel shifter)")
 $$end

 // --------------------------------------------------
 // Processor
 // --------------------------------------------------

 // bitfield for easier decoding of instructions
 $$if not ICEV_RTYPE then
 bitfield Rtype { uint1 unused1, uint1 sign, uint5 unused2, uint5 rs2,
                 uint5 rs1,     uint3 op,   uint5 rd,      uint7 opcode}
 $$ICEV_RTYPE = 1
 $$end

 // --------------------------------------------------
 // execute: decoder + ALU
 // - decodes instructions
 // - performs all integer computations

 unit execute(
  // instruction, program counter and registers
  input  uint32 instr, input  uint$addrW$ pc, input int32 xa, input int32 xb,
  // trigger: pulsed high when the decoder + ALU should start
  input  uint1  trigger,
  // outputs all information the processor needs to decide what to do next
  output uint3  op,    output uint5  write_rd, output  uint1  no_rd,
  output uint1  jump,  output uint1  load,     output  uint1  store,
  output int32  val,   output uint1  storeVal, output  uint1  working(0),
  output uint32 n,     output uint1  storeAddr, // next address adder
  output uint1  intop, output int32  r,         // integer operations
 ) {
  uint5  shamt(0);  uint32 cycle(0); // shifter status and cycle counter
  always {
    // ==== decode immediates
    int32 imm_u  = {instr[12,20],12b0};
    int32 imm_j  = {{12{instr[31,1]}},instr[12,8],instr[20,1],instr[21,10],1b0};
    int32 imm_i  = {{20{instr[31,1]}},instr[20,12]};
    int32 imm_b  = {{20{instr[31,1]}},instr[7,1],instr[25,6],instr[8,4],1b0};
    int32 imm_s  = {{20{instr[31,1]}},instr[25,7],instr[7,5]};
    // ==== decode opcode                 // load-store op
    uint5 opcode    = instr[ 2, 5];       op           = Rtype(instr).op;
    uint1 AUIPC     = opcode == 5b00101;  uint1 LUI    = opcode == 5b01101;
    uint1 JAL       = opcode == 5b11011;  uint1 JALR   = opcode == 5b11001;
    uint1 IntImm    = opcode == 5b00100;  uint1 IntReg = opcode == 5b01100;
    uint1 Cycles    = opcode == 5b11100;  uint1 branch = opcode == 5b11000;
    uint1 regOrImm  = IntReg  | branch;                    // reg or imm in ALU?
    uint1 pcOrReg   = AUIPC   | JAL    | branch;           // pc or reg in addr?
    uint1 sub       = IntReg  & Rtype(instr).sign;         // subtract
    uint1 aluShift  = (IntImm | IntReg) & op[0,2] == 2b01; // shift requested
    // ==== select next address adder first input
    int32 addr_a    = pcOrReg ? __signed({1b0,pc,2b0}) : xa;
    // ==== select ALU second input
    int32 b         = regOrImm ? (xb) : imm_i;
    // ==== allows to do subtraction and all comparisons with a single adder
    // trick from femtorv32/swapforth/J1
    int33 a_minus_b = {1b1,~b} + {1b0,xa} + 33b1;
    uint1 a_lt_b    = (xa[31,1] ^ b[31,1]) ? xa[31,1] : a_minus_b[32,1];
    uint1 a_lt_b_u  = a_minus_b[32,1];
    uint1 a_eq_b    = a_minus_b[0,32] == 0;
    // ==== set decoder outputs depending on incoming instructions
    // register to write to?
    write_rd     = Rtype(instr).rd;
    // load/store?
    load         = opcode == 5b00000;   store        = opcode == 5b01000;
    // do we have to write a result to a register?
    no_rd        = branch  | store  | (Rtype(instr).rd == 5b0);
    // integer operations                // store next address?
    intop        = (IntImm | IntReg);   storeAddr    = AUIPC;
    // value to store directly           // store value?
    val          = LUI ? imm_u : cycle; storeVal     = LUI     | Cycles;
    // ==== select immediate for the next address computation
    // 'or trick' from femtorv32
    int32 addr_imm  =  (AUIPC  ? imm_u : 32b0) | (JAL         ? imm_j : 32b0)
                    |  (branch ? imm_b : 32b0) | ((JALR|load) ? imm_i : 32b0)
                    |  (store  ? imm_s : 32b0);
    // ==== increment cycle counter
    cycle        = cycle + 1;
    int32 shift(0);  uint1 j(0); // temp variables for shifter and comparator
    // ====================== ALU
 $$if not ICEV_FAST_SHIFT then
    // shift (one bit per clock)
    if (working) {
      // decrease shift size
      shamt = shamt - 1;
      // shift one bit
      shift = op[2,1] ? (Rtype(instr).sign ? {r[31,1],r[1,31]}
                          : {__signed(1b0),r[1,31]}) : {r[0,31],__signed(1b0)};
    } else {
      // start shifting?
      shamt = ((aluShift & trigger) ? __unsigned(b[0,5]) : 0);
      // store value to be shifted
      shift = xa;
    }
    // are we still shifting?
    working = (shamt != 0);
 $$end
    // all ALU operations
    switch (op) {
      case 3b000: { r = sub ? a_minus_b : xa + b; }            // ADD / SUB
      case 3b010: { r = a_lt_b; } case 3b011: { r = a_lt_b_u; }// SLTI / SLTU
      case 3b100: { r = xa ^ b; } case 3b110: { r = xa | b;   }// XOR / OR
 $$if not ICEV_FAST_SHIFT then
      case 3b001: { r = shift;  } case 3b101: { r = shift;    }// SLLI/SRLI/SRAI
 $$else
      case 3b001: { r = (xa <<< b[0,5]); }
      case 3b101: { r = Rtype(instr).sign ? (xa >>> b[0,5]) : (xa >> b[0,5]); }
 $$end
      case 3b111: { r = xa & b; }     // AND
      default:    { r = {32{1bx}}; }  // don't care
    }
    // ====================== Comparator for branching
    switch (op[1,2]) {
      case 2b00:  { j = a_eq_b;  } /*BEQ */ case 2b10: { j=a_lt_b;} /*BLT*/
      case 2b11:  { j = a_lt_b_u;} /*BLTU*/ default:   { j = 1bx; }
    }
    jump = (JAL | JALR) | (branch & (j ^ op[0,1]));
    //                                   ^^^^^^^ negates comparator result
    // ====================== Next address adder
    n = addr_a + addr_imm;
  }

 }

 // --------------------------------------------------
 // The Risc-V RV32I CPU itself

 unit rv32i_cpu(bram_port mem) {

  // register file, uses two BRAMs to fetch two registers at once
  bram int32 xregsA[32] = {pad(0)}; bram int32 xregsB[32] = {pad(0)};

  // current instruction
  uint32 instr(0);

  // program counter
  uint$addrW$ pc   = uninitialized;
  uint$addrW$ next_pc <:: pc + 1; // next_pc tracks the expression 'pc + 1'

  // value that has been loaded from memory
  int32 loaded     = uninitialized;

  // decoder + ALU, executes the instruction and tells processor what to do
  execute exec(
    instr <:: instr, pc <:: pc, xa <: xregsA.rdata, xb <: xregsB.rdata
  );

  int32 sext_n <: {{$32-(addrW+2)${exec.n[$addrW+2$,1]}},exec.n[0,$addrW+2$]};
  // what do we write in register? (pc, alu or val, load is handled separately)
  int32 write_back <: (exec.jump      ? (next_pc<<2)        : 32b0)
                    | (exec.storeAddr ? sext_n              : 32b0)
                    | (exec.storeVal  ? exec.val            : 32b0)
                    | (exec.load      ? loaded              : 32b0)
                    | (exec.intop     ? exec.r              : 32b0);

  // The 'always_before' block is applied at the start of every cycle.
  // This is a good place to set default values, which also indicates
  // to Silice that some variables (e.g. xregsA.wdata) are fully set
  // every cycle, enabling further optimizations.
  // Default values are overriden from within the algorithm loop.
  always_before {
    // decodes values loaded from memory (used when exec.load == 1)
    uint32 aligned = mem.rdata >> {exec.n[0,2],3b000};
    switch ( exec.op[0,2] ) { // LB / LBU, LH / LHU, LW
      case 2b00:{ loaded = {{24{(~exec.op[2,1])&aligned[ 7,1]}},aligned[ 0,8]}; }
      case 2b01:{ loaded = {{16{(~exec.op[2,1])&aligned[15,1]}},aligned[ 0,16]};}
      case 2b10:{ loaded = aligned;   }
      default:  { loaded = {32{1bx}}; } // don't care
    }
    // what to write on a store (used when exec.store == 1)
    mem.wdata      = xregsB.rdata << {exec.n[0,2],3b000};
    // maintain write enable low (pulses high when needed)
    mem.wenable    = 4b0000;
    // maintain alu trigger low
    exec.trigger   = 0;
    // maintain register wenable low
    // (pulsed when necessary)
    xregsA.wenable = 0;
  }

  algorithm <onehot,autorun> {
    // =========== CPU runs forever
    while (1) {

      // data is now available
      instr           = mem.rdata;
      pc              = mem.addr;

  ++: // wait for register read (BRAM takes one cycle)

      exec.trigger    = 1;

      while (1) { // decode + ALU refresh during the cycle entering the loop

        // this operations loop allows to wait for ALU when needed
        // it is built such that no cycles are wasted

        // load/store?
        if (exec.load | exec.store) {
          // memory address from which to load/store
          mem.addr   = exec.n >> 2;
          // == Store (enabled if exec.store == 1)
          // build write mask depending on SB, SH, SW
          // assumes aligned, e.g. SW => next_addr[0,2] == 2
          mem.wenable = ({4{exec.store}} & { { 2{exec.op[0,2]==2b10} },
                                                exec.op[0,1] | exec.op[1,1], 1b1
                                          } ) << exec.n[0,2];

  ++: // wait for data transaction

          // == Load (enabled if exec.load == 1)
          // commit result
          xregsA.wenable = ~exec.no_rd;
          // restore address to program counter
          mem.addr       = next_pc;
          // exit the operations loop
 $$if ICEV_VERILATOR_TRACE then
          // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs
 					__verilog("$c32(\"cpu_retires(1,\",%,\",\",%,\",\",%,\",\",%,\");\");",
                    {pc,2b00},instr,xregsA.wenable?exec.write_rd:0,write_back);
 $$end
          break;
          //  instruction read from BRAM and write to register
          //  occurs as we jump back to loop start

        } else {
          // commit result
          xregsA.wenable = ~exec.no_rd;
          // next instruction address
          mem.addr       = exec.jump ? (exec.n >> 2) : next_pc;
          // ALU done?
          if (exec.working == 0) {
            // yes: all is correct, stop here
 $$if ICEV_VERILATOR_TRACE then
          // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs
 					__verilog("$c32(\"cpu_retires(1,\",%,\",\",%,\",\",%,\",\",%,\");\");",
                    {pc,2b00},instr,xregsA.wenable?exec.write_rd:0,write_back);
 $$end
            break;
            //  instruction read from BRAM and write to register
            //  occurs as we jump back to loop start
          }
        }
      }
    }
  }

  // the 'always_after' block is executed at the end of every cycle
  always_after {
    // write back data to both register BRAMs
    xregsA.wdata   = write_back;      xregsB.wdata   = write_back;
    // xregsB written when xregsA is
    xregsB.wenable = xregsA.wenable;
    // write to write_rd, else track instruction register
    xregsA.addr    = xregsA.wenable ? exec.write_rd : Rtype(instr).rs1;
    xregsB.addr    = xregsA.wenable ? exec.write_rd : Rtype(instr).rs2;
  }
 }
	// SL 2020-06-12 @sylefeb
	//
	// Fun with RISC-V! RV32I cpu, see README.md
	//
	// https://github.com/sylefeb/Silice
	// MIT license, see LICENSE_MIT in Silice repo root

	$$if ICEV_FAST_SHIFT then
	$$print("Ice-V configured for fast shift (barrel shifter)")
	$$end

	// --------------------------------------------------
	// Processor
	// --------------------------------------------------

	// bitfield for easier decoding of instructions
	$$if not ICEV_RTYPE then
	bitfield Rtype { uint1 unused1, uint1 sign, uint5 unused2, uint5 rs2,
	uint5 rs1, uint3 op, uint5 rd, uint7 opcode}
	$$ICEV_RTYPE = 1
	$$end

	// --------------------------------------------------
	// execute: decoder + ALU
	// - decodes instructions
	// - performs all integer computations

	unit execute(
	// instruction, program counter and registers
	input uint32 instr, input uint$addrW$ pc, input int32 xa, input int32 xb,
	// trigger: pulsed high when the decoder + ALU should start
	input uint1 trigger,
	// outputs all information the processor needs to decide what to do next
	output uint3 op, output uint5 write_rd, output uint1 no_rd,
	output uint1 jump, output uint1 load, output uint1 store,
	output int32 val, output uint1 storeVal, output uint1 working(0),
	output uint32 n, output uint1 storeAddr, // next address adder
	output uint1 intop, output int32 r, // integer operations
	) {
	uint5 shamt(0); uint32 cycle(0); // shifter status and cycle counter
	always {
	// ==== decode immediates
	int32 imm_u = {instr[12,20],12b0};
	int32 imm_j = {{12{instr[31,1]}},instr[12,8],instr[20,1],instr[21,10],1b0};
	int32 imm_i = {{20{instr[31,1]}},instr[20,12]};
	int32 imm_b = {{20{instr[31,1]}},instr[7,1],instr[25,6],instr[8,4],1b0};
	int32 imm_s = {{20{instr[31,1]}},instr[25,7],instr[7,5]};
	// ==== decode opcode // load-store op
	uint5 opcode = instr[ 2, 5]; op = Rtype(instr).op;
	uint1 AUIPC = opcode == 5b00101; uint1 LUI = opcode == 5b01101;
	uint1 JAL = opcode == 5b11011; uint1 JALR = opcode == 5b11001;
	uint1 IntImm = opcode == 5b00100; uint1 IntReg = opcode == 5b01100;
	uint1 Cycles = opcode == 5b11100; uint1 branch = opcode == 5b11000;
	uint1 regOrImm = IntReg \| branch; // reg or imm in ALU?
	uint1 pcOrReg = AUIPC \| JAL \| branch; // pc or reg in addr?
	uint1 sub = IntReg & Rtype(instr).sign; // subtract
	uint1 aluShift = (IntImm \| IntReg) & op[0,2] == 2b01; // shift requested
	// ==== select next address adder first input
	int32 addr_a = pcOrReg ? __signed({1b0,pc,2b0}) : xa;
	// ==== select ALU second input
	int32 b = regOrImm ? (xb) : imm_i;
	// ==== allows to do subtraction and all comparisons with a single adder
	// trick from femtorv32/swapforth/J1
	int33 a_minus_b = {1b1,~b} + {1b0,xa} + 33b1;
	uint1 a_lt_b = (xa[31,1] ^ b[31,1]) ? xa[31,1] : a_minus_b[32,1];
	uint1 a_lt_b_u = a_minus_b[32,1];
	uint1 a_eq_b = a_minus_b[0,32] == 0;
	// ==== set decoder outputs depending on incoming instructions
	// register to write to?
	write_rd = Rtype(instr).rd;
	// load/store?
	load = opcode == 5b00000; store = opcode == 5b01000;
	// do we have to write a result to a register?
	no_rd = branch \| store \| (Rtype(instr).rd == 5b0);
	// integer operations // store next address?
	intop = (IntImm \| IntReg); storeAddr = AUIPC;
	// value to store directly // store value?
	val = LUI ? imm_u : cycle; storeVal = LUI \| Cycles;
	// ==== select immediate for the next address computation
	// 'or trick' from femtorv32
	int32 addr_imm = (AUIPC ? imm_u : 32b0) \| (JAL ? imm_j : 32b0)
	\| (branch ? imm_b : 32b0) \| ((JALR\|load) ? imm_i : 32b0)
	\| (store ? imm_s : 32b0);
	// ==== increment cycle counter
	cycle = cycle + 1;
	int32 shift(0); uint1 j(0); // temp variables for shifter and comparator
	// ====================== ALU
	$$if not ICEV_FAST_SHIFT then
	// shift (one bit per clock)
	if (working) {
	// decrease shift size
	shamt = shamt - 1;
	// shift one bit
	shift = op[2,1] ? (Rtype(instr).sign ? {r[31,1],r[1,31]}
	: {__signed(1b0),r[1,31]}) : {r[0,31],__signed(1b0)};
	} else {
	// start shifting?
	shamt = ((aluShift & trigger) ? __unsigned(b[0,5]) : 0);
	// store value to be shifted
	shift = xa;
	}
	// are we still shifting?
	working = (shamt != 0);
	$$end
	// all ALU operations
	switch (op) {
	case 3b000: { r = sub ? a_minus_b : xa + b; } // ADD / SUB
	case 3b010: { r = a_lt_b; } case 3b011: { r = a_lt_b_u; }// SLTI / SLTU
	case 3b100: { r = xa ^ b; } case 3b110: { r = xa \| b; }// XOR / OR
	$$if not ICEV_FAST_SHIFT then
	case 3b001: { r = shift; } case 3b101: { r = shift; }// SLLI/SRLI/SRAI
	$$else
	case 3b001: { r = (xa <<< b[0,5]); }
	case 3b101: { r = Rtype(instr).sign ? (xa >>> b[0,5]) : (xa >> b[0,5]); }
	$$end
	case 3b111: { r = xa & b; } // AND
	default: { r = {32{1bx}}; } // don't care
	}
	// ====================== Comparator for branching
	switch (op[1,2]) {
	case 2b00: { j = a_eq_b; } /BEQ / case 2b10: { j=a_lt_b;} /BLT/
	case 2b11: { j = a_lt_b_u;} /BLTU/ default: { j = 1bx; }
	}
	jump = (JAL \| JALR) \| (branch & (j ^ op[0,1]));
	// ^^^^^^^ negates comparator result
	// ====================== Next address adder
	n = addr_a + addr_imm;
	}

	}

	// --------------------------------------------------
	// The Risc-V RV32I CPU itself

	unit rv32i_cpu(bram_port mem) {

	// register file, uses two BRAMs to fetch two registers at once
	bram int32 xregsA[32] = {pad(0)}; bram int32 xregsB[32] = {pad(0)};

	// current instruction
	uint32 instr(0);

	// program counter
	uint$addrW$ pc = uninitialized;
	uint$addrW$ next_pc <:: pc + 1; // next_pc tracks the expression 'pc + 1'

	// value that has been loaded from memory
	int32 loaded = uninitialized;

	// decoder + ALU, executes the instruction and tells processor what to do
	execute exec(
	instr <:: instr, pc <:: pc, xa <: xregsA.rdata, xb <: xregsB.rdata
	);

	int32 sext_n <: {{$32-(addrW+2)${exec.n[$addrW+2$,1]}},exec.n[0,$addrW+2$]};
	// what do we write in register? (pc, alu or val, load is handled separately)
	int32 write_back <: (exec.jump ? (next_pc<<2) : 32b0)
	\| (exec.storeAddr ? sext_n : 32b0)
	\| (exec.storeVal ? exec.val : 32b0)
	\| (exec.load ? loaded : 32b0)
	\| (exec.intop ? exec.r : 32b0);

	// The 'always_before' block is applied at the start of every cycle.
	// This is a good place to set default values, which also indicates
	// to Silice that some variables (e.g. xregsA.wdata) are fully set
	// every cycle, enabling further optimizations.
	// Default values are overriden from within the algorithm loop.
	always_before {
	// decodes values loaded from memory (used when exec.load == 1)
	uint32 aligned = mem.rdata >> {exec.n[0,2],3b000};
	switch ( exec.op[0,2] ) { // LB / LBU, LH / LHU, LW
	case 2b00:{ loaded = {{24{(~exec.op[2,1])&aligned[ 7,1]}},aligned[ 0,8]}; }
	case 2b01:{ loaded = {{16{(~exec.op[2,1])&aligned[15,1]}},aligned[ 0,16]};}
	case 2b10:{ loaded = aligned; }
	default: { loaded = {32{1bx}}; } // don't care
	}
	// what to write on a store (used when exec.store == 1)
	mem.wdata = xregsB.rdata << {exec.n[0,2],3b000};
	// maintain write enable low (pulses high when needed)
	mem.wenable = 4b0000;
	// maintain alu trigger low
	exec.trigger = 0;
	// maintain register wenable low
	// (pulsed when necessary)
	xregsA.wenable = 0;
	}

	algorithm <onehot,autorun> {
	// =========== CPU runs forever
	while (1) {

	// data is now available
	instr = mem.rdata;
	pc = mem.addr;

	++: // wait for register read (BRAM takes one cycle)

	exec.trigger = 1;

	while (1) { // decode + ALU refresh during the cycle entering the loop

	// this operations loop allows to wait for ALU when needed
	// it is built such that no cycles are wasted

	// load/store?
	if (exec.load \| exec.store) {
	// memory address from which to load/store
	mem.addr = exec.n >> 2;
	// == Store (enabled if exec.store == 1)
	// build write mask depending on SB, SH, SW
	// assumes aligned, e.g. SW => next_addr[0,2] == 2
	mem.wenable = ({4{exec.store}} & { { 2{exec.op[0,2]==2b10} },
	exec.op[0,1] \| exec.op[1,1], 1b1
	} ) << exec.n[0,2];

	++: // wait for data transaction

	// == Load (enabled if exec.load == 1)
	// commit result
	xregsA.wenable = ~exec.no_rd;
	// restore address to program counter
	mem.addr = next_pc;
	// exit the operations loop
	$$if ICEV_VERILATOR_TRACE then
	// this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs
	__verilog("$c32(\"cpu_retires(1,\",%,\",\",%,\",\",%,\",\",%,\");\");",
	{pc,2b00},instr,xregsA.wenable?exec.write_rd:0,write_back);
	$$end
	break;
	// instruction read from BRAM and write to register
	// occurs as we jump back to loop start

	} else {
	// commit result
	xregsA.wenable = ~exec.no_rd;
	// next instruction address
	mem.addr = exec.jump ? (exec.n >> 2) : next_pc;
	// ALU done?
	if (exec.working == 0) {
	// yes: all is correct, stop here
	$$if ICEV_VERILATOR_TRACE then
	// this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs
	__verilog("$c32(\"cpu_retires(1,\",%,\",\",%,\",\",%,\",\",%,\");\");",
	{pc,2b00},instr,xregsA.wenable?exec.write_rd:0,write_back);
	$$end
	break;
	// instruction read from BRAM and write to register
	// occurs as we jump back to loop start
	}
	}
	}
	}
	}

	// the 'always_after' block is executed at the end of every cycle
	always_after {
	// write back data to both register BRAMs
	xregsA.wdata = write_back; xregsB.wdata = write_back;
	// xregsB written when xregsA is
	xregsB.wenable = xregsA.wenable;
	// write to write_rd, else track instruction register
	xregsA.addr = xregsA.wenable ? exec.write_rd : Rtype(instr).rs1;
	xregsB.addr = xregsA.wenable ? exec.write_rd : Rtype(instr).rs2;
	}
	}
No results found