diff --git a/FemtoRV/RTL/PROCESSOR/femtorv32_quark.v b/FemtoRV/RTL/PROCESSOR/femtorv32_quark.v index bd7079c5..2790c8a7 100644 --- a/FemtoRV/RTL/PROCESSOR/femtorv32_quark.v +++ b/FemtoRV/RTL/PROCESSOR/femtorv32_quark.v @@ -34,23 +34,22 @@ `define NRV_ABI "ilp32" `define NRV_OPTIMIZE "-Os" -module FemtoRV32( - input clk, - - output [31:0] mem_addr, // address bus - output [31:0] mem_wdata, // data to be written - output [3:0] mem_wmask, // write mask for the 4 bytes of each word - input [31:0] mem_rdata, // input lines for both data and instr - output mem_rstrb, // active to initiate memory read (used by IO) - input mem_rbusy, // asserted if memory is busy reading value - input mem_wbusy, // asserted if memory is busy writing value - - input reset // set to 0 to reset the processor +module FemtoRV32 #( + parameter RESET_ADDR = 32'h00000000, + parameter ADDR_WIDTH = 24 +)( + input clk, + input reset, // set to 0 to reset the processor + // system bus + output wire [31:0] mem_addr, // address bus + output reg [31:0] mem_wdata, // data to be written + output wire [3:0] mem_wmask, // write mask for the 4 bytes of each word + input wire [31:0] mem_rdata, // input lines for both data and instr + output wire mem_rstrb, // active to initiate memory read (used by IO) + input wire mem_rbusy, // asserted if memory is busy reading value + input wire mem_wbusy // asserted if memory is busy writing value ); - parameter RESET_ADDR = 32'h00000000; - parameter ADDR_WIDTH = 24; - /***************************************************************************/ // Instruction decoding. /***************************************************************************/ @@ -98,6 +97,7 @@ module FemtoRV32( reg [31:0] rs2; reg [31:0] registerFile [31:0]; + // write access always @(posedge clk) begin if (writeBack) if (rdId != 0) @@ -132,36 +132,50 @@ module FemtoRV32( wire LTU = aluMinus[32]; wire EQ = (aluMinus[31:0] == 0); + // Logical operations + reg [32-1:0] aluLog; + + always @(*) + case (instr[13:12]) + 2'b00 : aluLog = aluIn1 ^ aluIn2; + 2'b10 : aluLog = aluIn1 | aluIn2; + 2'b11 : aluLog = aluIn1 & aluIn2; + default: aluLog = 32'hxxxxxxxx; + endcase + // Notes: // - instr[30] is 1 for SUB and 0 for ADD // - for SUB, need to test also instr[5] to discriminate ADDI: // (1 for ADD/SUB, 0 for ADDI, and Iimm used by ADDI overlaps bit 30 !) // - instr[30] is 1 for SRA (do sign extension) and 0 for SRL - - wire [31:0] aluOut = - (funct3Is[0] ? instr[30] & instr[5] ? aluMinus[31:0] : aluPlus : 32'b0) | - (funct3Is[2] ? {31'b0, LT} : 32'b0) | - (funct3Is[3] ? {31'b0, LTU} : 32'b0) | - (funct3Is[4] ? aluIn1 ^ aluIn2 : 32'b0) | - (funct3Is[6] ? aluIn1 | aluIn2 : 32'b0) | - (funct3Is[7] ? aluIn1 & aluIn2 : 32'b0) | - (funct3IsShift ? aluReg : 32'b0) ; + reg [32-1:0] aluOut; + always @(*) + case (instr[14:12]) + 3'b000: aluOut = instr[30] & instr[5] ? aluMinus[31:0] : aluPlus; // ADD + 3'b001: aluOut = aluReg; // SL + 3'b010: aluOut = {31'b0, LT}; // SLT + 3'b011: aluOut = {31'b0, LTU}; // SLTU + 3'b100: aluOut = aluLog; // XOR + 3'b101: aluOut = aluReg; // SR + 3'b110: aluOut = aluLog; // OR + 3'b111: aluOut = aluLog; // AND + endcase wire funct3IsShift = funct3Is[1] | funct3Is[5]; always @(posedge clk) begin if(aluWr) begin if (funct3IsShift) begin // SLL, SRA, SRL - aluReg <= aluIn1; - aluShamt <= aluIn2[4:0]; - end + aluReg <= aluIn1; + aluShamt <= aluIn2[4:0]; + end end `ifdef NRV_TWOLEVEL_SHIFTER else if(|aluShamt[4:2]) begin // Shift by 4 aluShamt <= aluShamt - 4; - aluReg <= funct3Is[1] ? aluReg << 4 : - {{4{instr[30] & aluReg[31]}}, aluReg[31:4]}; + aluReg <= funct3Is[1] ? aluReg << 4 : + {{4{instr[30] & aluReg[31]}}, aluReg[31:4]}; end else `endif // Compact form of: @@ -171,8 +185,8 @@ module FemtoRV32( if (|aluShamt) begin aluShamt <= aluShamt - 1; - aluReg <= funct3Is[1] ? aluReg << 1 : // SLL - {instr[30] & aluReg[31], aluReg[31:1]}; // SRA,SRL + aluReg <= funct3Is[1] ? aluReg << 1 : // SLL + {instr[30] & aluReg[31], aluReg[31:1]}; // SRA,SRL end end @@ -180,13 +194,19 @@ module FemtoRV32( // The predicate for conditional branches. /***************************************************************************/ - wire predicate = - funct3Is[0] & EQ | // BEQ - funct3Is[1] & !EQ | // BNE - funct3Is[4] & LT | // BLT - funct3Is[5] & !LT | // BGE - funct3Is[6] & LTU | // BLTU - funct3Is[7] & !LTU ; // BGEU + reg predicate; + + always @(*) + case (instr[14:12]) + 3'b000: predicate = EQ ; // BEQ + 3'b001: predicate = !EQ ; // BNE + 3'b010: predicate = 1'bx; // + 3'b011: predicate = 1'bx; // + 3'b100: predicate = LT ; // BLT + 3'b101: predicate = !LT ; // BGE + 3'b110: predicate = LTU; // BLTU + 3'b111: predicate = !LTU; // BGEU + endcase /***************************************************************************/ // Program counter and branch target computation. @@ -202,20 +222,20 @@ module FemtoRV32( // branch->PC+Bimm AUIPC->PC+Uimm JAL->PC+Jimm // Equivalent to PCplusImm = PC + (isJAL ? Jimm : isAUIPC ? Uimm : Bimm) wire [ADDR_WIDTH-1:0] PCplusImm = PC + ( instr[3] ? Jimm[ADDR_WIDTH-1:0] : - instr[4] ? Uimm[ADDR_WIDTH-1:0] : - Bimm[ADDR_WIDTH-1:0] ); + instr[4] ? Uimm[ADDR_WIDTH-1:0] : + Bimm[ADDR_WIDTH-1:0] ); // A separate adder to compute the destination of load/store. // testing instr[5] is equivalent to testing isStore in this context. wire [ADDR_WIDTH-1:0] loadstore_addr = rs1[ADDR_WIDTH-1:0] + - (instr[5] ? Simm[ADDR_WIDTH-1:0] : Iimm[ADDR_WIDTH-1:0]); + (instr[5] ? Simm[ADDR_WIDTH-1:0] : Iimm[ADDR_WIDTH-1:0]); /* verilator lint_off WIDTH */ // internal address registers and cycles counter may have less than // 32 bits, so we deactivate width test for mem_addr and writeBackData assign mem_addr = state[WAIT_INSTR_bit] | state[FETCH_INSTR_bit] ? - PC : loadstore_addr ; + PC : {loadstore_addr[ADDR_WIDTH-1:2], 2'b00} ; /***************************************************************************/ // The value written back to the register file. @@ -237,38 +257,47 @@ module FemtoRV32( /***************************************************************************/ // All memory accesses are aligned on 32 bits boundary. For this - // reason, we need some circuitry that does unaligned halfword + // reason, we need some circuitry that does unaligned half // and byte load/store, based on: - // - funct3[1:0]: 00->byte 01->halfword 10->word - // - mem_addr[1:0]: indicates which byte/halfword is accessed + // - funct3[1:0]: 00->byte 01->half 10->word + // - mem_addr[1:0]: indicates which byte/half is accessed - wire mem_byteAccess = instr[13:12] == 2'b00; // funct3[1:0] == 2'b00; - wire mem_halfwordAccess = instr[13:12] == 2'b01; // funct3[1:0] == 2'b01; + wire mem_byteAccess = instr[13:12] == 2'b00; // funct3[1:0] == 2'b00; + wire mem_halfAccess = instr[13:12] == 2'b01; // funct3[1:0] == 2'b01; // LOAD, in addition to funct3[1:0], LOAD depends on: // - funct3[2] (instr[14]): 0->do sign expansion 1->no sign expansion wire LOAD_sign = - !instr[14] & (mem_byteAccess ? LOAD_byte[7] : LOAD_halfword[15]); + !instr[14] & (mem_byteAccess ? LOAD_byte[7] : LOAD_half[15]); wire [31:0] LOAD_data = - mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} : - mem_halfwordAccess ? {{16{LOAD_sign}}, LOAD_halfword} : - mem_rdata ; + mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} : + mem_halfAccess ? {{16{LOAD_sign}}, LOAD_half} : + mem_rdata ; - wire [15:0] LOAD_halfword = - loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0]; + wire [15:0] LOAD_half = + loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0]; wire [7:0] LOAD_byte = - loadstore_addr[0] ? LOAD_halfword[15:8] : LOAD_halfword[7:0]; + loadstore_addr[0] ? LOAD_half[15:8] : LOAD_half[7:0]; // STORE - - assign mem_wdata[ 7: 0] = rs2[7:0]; - assign mem_wdata[15: 8] = loadstore_addr[0] ? rs2[7:0] : rs2[15: 8]; - assign mem_wdata[23:16] = loadstore_addr[1] ? rs2[7:0] : rs2[23:16]; - assign mem_wdata[31:24] = loadstore_addr[0] ? rs2[7:0] : - loadstore_addr[1] ? rs2[15:8] : rs2[31:24]; + always @(*) + case (instr[14:12]) + 3'b000 : case (loadstore_addr[1:0]) + 2'b00: mem_wdata = {8'hxx , 8'hxx , 8'hxx , rs2[ 7: 0]}; + 2'b01: mem_wdata = {8'hxx , 8'hxx , rs2[ 7: 0], 8'hxx }; + 2'b10: mem_wdata = {8'hxx , rs2[ 7: 0], 8'hxx , 8'hxx }; + 2'b11: mem_wdata = {rs2[ 7: 0], 8'hxx , 8'hxx , 8'hxx }; + endcase + 3'b001 : casez (loadstore_addr[1]) + 1'b0 : mem_wdata = {8'hxx , 8'hxx , rs2[15: 8], rs2[ 7: 0]}; + 1'b1 : mem_wdata = {rs2[15: 8], rs2[ 7: 0], 8'hxx , 8'hxx }; + endcase + 3'b010 : mem_wdata = {rs2[31:24], rs2[23:16], rs2[15: 8], rs2[ 7: 0]}; + default: mem_wdata = {8'hxx , 8'hxx , 8'hxx , 8'hxx }; + endcase // The memory write mask: // 1111 if writing a word @@ -278,13 +307,13 @@ module FemtoRV32( // (depending on loadstore_addr[1:0]) wire [3:0] STORE_wmask = - mem_byteAccess ? - (loadstore_addr[1] ? - (loadstore_addr[0] ? 4'b1000 : 4'b0100) : - (loadstore_addr[0] ? 4'b0010 : 4'b0001) + mem_byteAccess ? + (loadstore_addr[1] ? + (loadstore_addr[0] ? 4'b1000 : 4'b0100) : + (loadstore_addr[0] ? 4'b0010 : 4'b0001) ) : - mem_halfwordAccess ? - (loadstore_addr[1] ? 4'b1100 : 4'b0011) : + mem_halfAccess ? + (loadstore_addr[1] ? 4'b1100 : 4'b0011) : 4'b1111; /*************************************************************************/ @@ -310,7 +339,7 @@ module FemtoRV32( // register write-back enable. wire writeBack = ~(isBranch | isStore ) & - (state[EXECUTE_bit] | state[WAIT_ALU_OR_MEM_bit]); + (state[EXECUTE_bit] | state[WAIT_ALU_OR_MEM_bit]); // The memory-read signal. assign mem_rstrb = state[EXECUTE_bit] & isLoad | state[FETCH_INSTR_bit]; @@ -324,8 +353,8 @@ module FemtoRV32( wire jumpToPCplusImm = isJAL | (isBranch & predicate); `ifdef NRV_IS_IO_ADDR wire needToWait = isLoad | - isStore & `NRV_IS_IO_ADDR(mem_addr) | - isALU & funct3IsShift; + isStore & `NRV_IS_IO_ADDR(mem_addr) | + isALU & funct3IsShift; `else wire needToWait = isLoad | isStore | isALU & funct3IsShift; `endif @@ -353,7 +382,7 @@ module FemtoRV32( PC <= isJALR ? {aluPlus[ADDR_WIDTH-1:1],1'b0} : jumpToPCplusImm ? PCplusImm : PCplus4; - state <= needToWait ? WAIT_ALU_OR_MEM : FETCH_INSTR; + state <= needToWait ? WAIT_ALU_OR_MEM : FETCH_INSTR; end state[WAIT_ALU_OR_MEM_bit]: begin @@ -361,7 +390,7 @@ module FemtoRV32( end default: begin // FETCH_INSTR - state <= WAIT_INSTR; + state <= WAIT_INSTR; end endcase @@ -415,4 +444,3 @@ endmodule // [2] state uses 1-hot encoding (at any time, state has only one bit set to 1). // It uses a larger number of bits (one bit per state), but often results in // a both more compact (fewer LUTs) and faster state machine. -