Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding infrastructure for RoCC interface attached to Spike processor. Memory interface to follow. #1839

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
185 changes: 184 additions & 1 deletion generators/chipyard/src/main/resources/csrc/spiketile.cc
Original file line number Diff line number Diff line change
@@ -10,6 +10,18 @@
#include <vpi_user.h>
#include <svdpi.h>

/* Includes for rocc support */
#include <riscv/extension.h>
#include <riscv/rocc.h>
#include <random>
#include <limits>
#include <riscv/mmu.h>
#include <riscv/trap.h>
#include <stdexcept>
#include <iostream>
#include <assert.h>
#include <math.h>

#if __has_include("spiketile_tsi.h")
#define SPIKETILE_HTIF_TSI
extern std::map<int, htif_t*> tsis;
@@ -89,6 +101,13 @@ class chipyard_simif_t : public simif_t
void tcm_a(uint64_t address, uint64_t data, uint32_t mask, uint32_t opcode, uint32_t size);
bool tcm_d(uint64_t *data);

bool rocc_handshake(rocc_insn_t *insn, reg_t* rs1, reg_t* rs2);
void push_rocc_insn(rocc_insn_t insn, reg_t rs1, reg_t rs2);
void push_rocc_result(long long int result);
long long int get_rocc_result();
void set_rocc_exists(bool exists);
bool get_rocc_exists();

void loadmem(size_t base, const char* fname);

void drain_stq();
@@ -155,6 +174,12 @@ class chipyard_simif_t : public simif_t
std::vector<writeback_t> wb_q;
std::vector<stq_entry_t> st_q;

std::vector<rocc_insn_t> rocc_insn_q;
std::vector<long long int> rocc_result_q;
std::vector<reg_t> rocc_rs1_q;
std::vector<reg_t> rocc_rs2_q;
bool rocc_exists;

std::map<std::pair<uint64_t, size_t>, uint64_t> readonly_cache;

bool mmio_valid;
@@ -181,6 +206,31 @@ class tile_t {
context_t stq_context;
};

/* Begin RoCC header file */
class generic_t : public extension_t
{
public:
generic_t(chipyard_simif_t* s) {
simif = s;
}

const char* name() { return "generic" ; }

reg_t custom0(rocc_insn_t insn, reg_t xs1, reg_t xs2);
reg_t custom1(rocc_insn_t insn, reg_t xs1, reg_t xs2);
reg_t custom2(rocc_insn_t insn, reg_t xs1, reg_t xs2);
reg_t custom3(rocc_insn_t insn, reg_t xs1, reg_t xs2);

virtual std::vector<insn_desc_t> get_instructions();
virtual std::vector<disasm_insn_t*> get_disasms();

void reset() {};

protected:
chipyard_simif_t* simif;
};
/* End RoCC header file */

context_t *host;
std::map<int, tile_t*> tiles;
std::ostream sout(nullptr);
@@ -204,6 +254,7 @@ extern "C" void spike_tile(int hartid, char* isa,
long long int ipc,
long long int cycle,
long long int* insns_retired,
unsigned char has_rocc,

char debug,
char mtip, char msip, char meip,
@@ -285,7 +336,16 @@ extern "C" void spike_tile(int hartid, char* isa,

unsigned char* tcm_d_valid,
unsigned char tcm_d_ready,
long long int* tcm_d_data
long long int* tcm_d_data,

unsigned char rocc_request_ready,
unsigned char* rocc_request_valid,
int* rocc_request_insn,
int* rocc_request_rs1,
int* rocc_request_rs2,
unsigned char rocc_response_valid,
long long int rocc_response_rd,
long long int rocc_response_result
)
{
if (!host) {
@@ -312,6 +372,11 @@ extern "C" void spike_tile(int hartid, char* isa,
sout);
simif->harts[hartid] = p;

std::function<extension_t*()> extension;
generic_t* my_generic_extension = new generic_t(simif);
p->register_extension(my_generic_extension);
simif->set_rocc_exists(has_rocc);

s_vpi_vlog_info vinfo;
if (!vpi_get_vlog_info(&vinfo))
abort();
@@ -425,8 +490,83 @@ extern "C" void spike_tile(int hartid, char* isa,
if (tcm_d_ready) {
*tcm_d_valid = simif->tcm_d((uint64_t*)tcm_d_data);
}

*rocc_request_valid = 0;
if (rocc_request_ready) {
*rocc_request_valid = simif->rocc_handshake((rocc_insn_t*) rocc_request_insn, (reg_t*) rocc_request_rs1, (reg_t*) rocc_request_rs2);
}

if (rocc_response_valid) {
simif->push_rocc_result(rocc_response_result);
}
}

/* Begin RoCC Section */
reg_t generic_t::custom0(rocc_insn_t insn, reg_t xs1, reg_t xs2) {
bool has_rocc = simif->get_rocc_exists();
if (!has_rocc) {
printf("Accelerator not instantiated, are you using the right config?\n");
exit(1);
} else {
simif->push_rocc_insn(insn, xs1, xs2);
return simif->get_rocc_result();
}
}

reg_t generic_t::custom1(rocc_insn_t insn, reg_t xs1, reg_t xs2) {
bool has_rocc = simif->get_rocc_exists();
if (!has_rocc) {
printf("Accelerator not instantiated, are you using the right config?\n");
exit(1);
} else {
simif->push_rocc_insn(insn, xs1, xs2);
return simif->get_rocc_result();
}
}

reg_t generic_t::custom2(rocc_insn_t insn, reg_t xs1, reg_t xs2) {
bool has_rocc = simif->get_rocc_exists();
if (!has_rocc) {
printf("Accelerator not instantiated, are you using the right config?\n");
exit(1);
} else {
simif->push_rocc_insn(insn, xs1, xs2);
return simif->get_rocc_result();
}
}

reg_t generic_t::custom3(rocc_insn_t insn, reg_t xs1, reg_t xs2) {
bool has_rocc = simif->get_rocc_exists();
if (!has_rocc) {
printf("Accelerator not instantiated, are you using the right config?\n");
exit(1);
} else {
simif->push_rocc_insn(insn, xs1, xs2);
return simif->get_rocc_result();
}
}

define_custom_func(generic_t, "generic", generic_custom0, custom0);
define_custom_func(generic_t, "generic", generic_custom1, custom1);
define_custom_func(generic_t, "generic", generic_custom2, custom2);
define_custom_func(generic_t, "generic", generic_custom3, custom3);

std::vector<insn_desc_t> generic_t::get_instructions()
{
std::vector<insn_desc_t> insns;
push_custom_insn(insns, ROCC_OPCODE0, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom0);
push_custom_insn(insns, ROCC_OPCODE1, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom1);
push_custom_insn(insns, ROCC_OPCODE2, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom2);
push_custom_insn(insns, ROCC_OPCODE3, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom3);
return insns;
}

std::vector<disasm_insn_t*> generic_t::get_disasms()
{
std::vector<disasm_insn_t*> insns;
return insns;
}
/*End RoCC Section*/

chipyard_simif_t::chipyard_simif_t(size_t icache_ways,
size_t icache_sets,
@@ -1055,6 +1195,49 @@ bool chipyard_simif_t::tcm_d(uint64_t* data) {
return true;
}

bool chipyard_simif_t::rocc_handshake(rocc_insn_t* insn, reg_t* rs1, reg_t* rs2) {
if (rocc_insn_q.empty()) {
return false;
}
*insn = rocc_insn_q[0];
*rs1 = rocc_rs1_q[0];
*rs2 = rocc_rs2_q[0];

rocc_insn_q.erase(rocc_insn_q.begin());
rocc_rs1_q.erase(rocc_rs1_q.begin());
rocc_rs2_q.erase(rocc_rs2_q.begin());
return true;
}

void chipyard_simif_t::push_rocc_insn(rocc_insn_t insn, reg_t rs1, reg_t rs2) {
rocc_insn_q.push_back(insn);
rocc_rs1_q.push_back(rs1);
rocc_rs2_q.push_back(rs2);

host->switch_to();
}

void chipyard_simif_t::push_rocc_result(long long int result) {
rocc_result_q.push_back(result);
}

long long int chipyard_simif_t::get_rocc_result() {
while (rocc_result_q.size() == 0) {
host->switch_to();
}
long long int result = rocc_result_q.front();
rocc_result_q.erase(rocc_result_q.begin());
return result;
}

void chipyard_simif_t::set_rocc_exists(bool exists) {
rocc_exists = exists;
}

bool chipyard_simif_t::get_rocc_exists() {
return rocc_exists;
}

void chipyard_simif_t::loadmem(size_t base, const char* fname) {
class loadmem_memif_t : public memif_t {
public:
62 changes: 59 additions & 3 deletions generators/chipyard/src/main/resources/vsrc/spiketile.v
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ import "DPI-C" function void spike_tile(input int hartid,
input longint ipc,
input longint cycle,
output longint insns_retired,
input bit has_rocc,

input bit debug,
input bit mtip,
@@ -102,7 +103,16 @@ import "DPI-C" function void spike_tile(input int hartid,

output bit tcm_d_valid,
input bit tcm_d_ready,
output longint tcm_d_data
output longint tcm_d_data,

input bit rocc_request_ready,
output bit rocc_request_valid,
output longint rocc_request_insn,
output longint rocc_request_rs1,
output longint rocc_request_rs2,
input bit rocc_response_valid,
input longint rocc_response_rd,
input longint rocc_response_result
);


@@ -128,6 +138,7 @@ module SpikeBlackBox #(
input [63:0] ipc,
input [63:0] cycle,
output [63:0] insns_retired,
input has_rocc,

input debug,
input mtip,
@@ -211,12 +222,25 @@ module SpikeBlackBox #(

output tcm_d_valid,
input tcm_d_ready,
output [63:0] tcm_d_data
output [63:0] tcm_d_data,


input rocc_request_ready,
output rocc_request_valid,
output [63:0] rocc_request_insn,
output [63:0] rocc_request_rs1,
output [63:0] rocc_request_rs2,

input rocc_response_valid,
input [63:0] rocc_response_rd,
input [63:0] rocc_response_result
);

longint __insns_retired;
reg [63:0] __insns_retired_reg;

wire __has_rocc;

wire __icache_a_ready;
bit __icache_a_valid;
longint __icache_a_address;
@@ -290,7 +314,19 @@ module SpikeBlackBox #(
reg __tcm_d_valid_reg;
reg [63:0] __tcm_d_data_reg;

wire __rocc_request_ready;
bit __rocc_request_valid;
longint __rocc_request_insn;
longint __rocc_request_rs1;
longint __rocc_request_rs2;
reg __rocc_request_valid_reg;
reg [63:0] __rocc_request_insn_reg;
reg [63:0] __rocc_request_rs1_reg;
reg [63:0] __rocc_request_rs2_reg;

wire __rocc_response_valid;
longint __rocc_response_rd;
longint __rocc_response_result;

always @(posedge clock) begin
if (reset) begin
@@ -359,6 +395,7 @@ module SpikeBlackBox #(
__tcm_d_valid_reg <= 1'b0;
__tcm_d_data = 64'h0;
__tcm_d_data_reg <= 64'h0;

spike_tile_reset(HARTID);
end else begin
spike_tile(HARTID, ISA, PMPREGIONS,
@@ -367,6 +404,7 @@ module SpikeBlackBox #(
ICACHE_SOURCEIDS, DCACHE_SOURCEIDS,
TCM_BASE, TCM_SIZE,
reset_vector, ipc, cycle, __insns_retired,
__has_rocc,
debug, mtip, msip, meip, seip,

__icache_a_ready, __icache_a_valid, __icache_a_address, __icache_a_sourceid,
@@ -391,7 +429,10 @@ module SpikeBlackBox #(
mmio_d_valid, mmio_d_data,

tcm_a_valid, tcm_a_address, tcm_a_data, tcm_a_mask, tcm_a_opcode, tcm_a_size,
__tcm_d_valid, __tcm_d_ready, __tcm_d_data
__tcm_d_valid, __tcm_d_ready, __tcm_d_data,

__rocc_request_ready, __rocc_request_valid, __rocc_request_insn, __rocc_request_rs1, __rocc_request_rs2,
__rocc_response_valid, rocc_response_rd, __rocc_response_result
);
__insns_retired_reg <= __insns_retired;

@@ -430,6 +471,11 @@ module SpikeBlackBox #(
__tcm_d_valid_reg <= __tcm_d_valid;
__tcm_d_data_reg <= __tcm_d_data;

__rocc_request_valid_reg <= __rocc_request_valid;
__rocc_request_insn_reg <= __rocc_request_insn;
__rocc_request_rs1_reg <= __rocc_request_rs1;
__rocc_request_rs2_reg <= __rocc_request_rs2;

end
end // always @ (posedge clock)
assign insns_retired = __insns_retired_reg;
@@ -473,4 +519,14 @@ module SpikeBlackBox #(
assign tcm_d_data = __tcm_d_data_reg;
assign __tcm_d_ready = tcm_d_ready;

assign __has_rocc = has_rocc;
assign rocc_request_valid = __rocc_request_valid_reg;
assign rocc_request_insn = __rocc_request_insn_reg;
assign rocc_request_rs1 = __rocc_request_rs1_reg;
assign rocc_request_rs2 = __rocc_request_rs2_reg;
assign __rocc_request_ready = rocc_request_ready;
assign __rocc_response_valid = rocc_response_valid;
assign __rocc_response_rd = rocc_response_rd;
assign __rocc_response_result = rocc_response_result;

endmodule;
140 changes: 138 additions & 2 deletions generators/chipyard/src/main/scala/SpikeTile.scala
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ case class SpikeCoreParams() extends CoreParams {
val useConditionalZero = false

override def vLen = 128
override def vMemDataBits = 128
override def vMemDataBits = 64 //128
}

case class SpikeTileAttachParams(
@@ -176,6 +176,9 @@ class SpikeTile(
tlMasterXbar.node := TLWidthWidget(8) := TLBuffer() := mmioNode

override lazy val module = new SpikeTileModuleImp(this)
val rocc_sequence = p(BuildRoCC).map(_(p))
val has_rocc = rocc_sequence.nonEmpty
val rocc_module = if (has_rocc) rocc_sequence.head else null
}

class SpikeBlackBox(
@@ -193,7 +196,8 @@ class SpikeBlackBox(
executable_regions: String,
tcm_base: BigInt,
tcm_size: BigInt,
use_dtm: Boolean) extends BlackBox(Map(
use_dtm: Boolean,
) extends BlackBox(Map(
"HARTID" -> IntParam(hartId),
"ISA" -> StringParam(isa),
"PMPREGIONS" -> IntParam(pmpregions),
@@ -218,6 +222,7 @@ class SpikeBlackBox(
val ipc = Input(UInt(64.W))
val cycle = Input(UInt(64.W))
val insns_retired = Output(UInt(64.W))
val has_rocc = Input(Bool())

val debug = Input(Bool())
val mtip = Input(Bool())
@@ -303,6 +308,21 @@ class SpikeBlackBox(
val data = Output(UInt(64.W))
}
}

val rocc = new Bundle {
val request = new Bundle {
val ready = Input(Bool())
val valid = Output(Bool())
val insn = Output(UInt(64.W))
val rs1 = Output(UInt(64.W))
val rs2 = Output(UInt(64.W))
}
val response = new Bundle {
val valid = Input(Bool())
val rd = Input(UInt(64.W))
val result = Input(UInt(64.W))
}
}
})
addResource("/vsrc/spiketile.v")
addResource("/csrc/spiketile.cc")
@@ -347,6 +367,7 @@ class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) {
outer.spikeTileParams.tcmParams.map(_.size).getOrElse(0),
useDTM
))
spike.io.has_rocc := outer.has_rocc.asBool
spike.io.clock := clock.asBool
val cycle = RegInit(0.U(64.W))
cycle := cycle + 1.U
@@ -466,6 +487,109 @@ class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) {
tcm_tl.d.valid := spike.io.tcm.d.valid
tcm_tl.d.bits.data := spike.io.tcm.d.data
}

/* Begin RoCC Section */
if (outer.has_rocc) {
val to_rocc_enq_bits = IO(new Bundle{
val rs2 = UInt(64.W)
val rs1 = UInt(64.W)
val insn = UInt(64.W)
})

val to_rocc_q = Module(new Queue(UInt(192.W), 1, flow=true, pipe=true))
spike.io.rocc.request.ready := to_rocc_q.io.enq.ready && to_rocc_q.io.count === 0.U
to_rocc_q.io.enq.valid := spike.io.rocc.request.valid
to_rocc_enq_bits.insn := spike.io.rocc.request.insn
to_rocc_enq_bits.rs1 := spike.io.rocc.request.rs1
to_rocc_enq_bits.rs2 := spike.io.rocc.request.rs2
to_rocc_q.io.enq.bits := to_rocc_enq_bits.asUInt

outer.rocc_module.module.io.cmd.valid := to_rocc_q.io.deq.valid
to_rocc_q.io.deq.ready := outer.rocc_module.module.io.cmd.ready

val inst = Wire(new RoCCInstruction())
inst.funct := to_rocc_q.io.deq.bits(31,25)
inst.rs2 := to_rocc_q.io.deq.bits(24,20)
inst.rs1 := to_rocc_q.io.deq.bits(19,15)
inst.xd := to_rocc_q.io.deq.bits(14)
inst.xs1 := to_rocc_q.io.deq.bits(13)
inst.xs2 := to_rocc_q.io.deq.bits(12)
inst.rd := to_rocc_q.io.deq.bits(11,7)
inst.opcode := to_rocc_q.io.deq.bits(6,0)

val cmd = Wire(new RoCCCommand())
cmd.inst := inst
cmd.rs1 := to_rocc_q.io.deq.bits(127,64)
cmd.rs2 := to_rocc_q.io.deq.bits(191,128)
cmd.status := DontCare
outer.rocc_module.module.io.cmd.bits := cmd
dontTouch(outer.rocc_module.module.io)

//Instantiate unused signals, will probably be used as interface develops further.
outer.rocc_module.module.io.mem.req.ready := false.B
outer.rocc_module.module.io.mem.s2_nack := false.B
outer.rocc_module.module.io.mem.s2_uncached := false.B
outer.rocc_module.module.io.mem.s2_paddr := 0.U
outer.rocc_module.module.io.mem.resp.valid := false.B
outer.rocc_module.module.io.mem.resp.bits := DontCare
outer.rocc_module.module.io.mem.replay_next := false.B
outer.rocc_module.module.io.mem.s2_xcpt.ma.ld := false.B
outer.rocc_module.module.io.mem.s2_xcpt.ma.st := false.B
outer.rocc_module.module.io.mem.s2_xcpt.pf.ld := false.B
outer.rocc_module.module.io.mem.s2_xcpt.pf.st := false.B
outer.rocc_module.module.io.mem.s2_xcpt.ae.ld := false.B
outer.rocc_module.module.io.mem.s2_xcpt.ae.st := false.B
outer.rocc_module.module.io.mem.s2_xcpt.gf.ld := false.B
outer.rocc_module.module.io.mem.s2_xcpt.gf.st := false.B
outer.rocc_module.module.io.mem.s2_gpa := 0.U
outer.rocc_module.module.io.mem.ordered := false.B
outer.rocc_module.module.io.mem.perf.acquire := false.B
outer.rocc_module.module.io.mem.perf.release := false.B
outer.rocc_module.module.io.mem.perf.grant := false.B
outer.rocc_module.module.io.exception := false.B
outer.rocc_module.module.io.mem.clock_enabled := true.B
outer.rocc_module.module.io.mem.perf.storeBufferEmptyAfterStore := false.B
outer.rocc_module.module.io.mem.perf.storeBufferEmptyAfterLoad := false.B
outer.rocc_module.module.io.mem.perf.canAcceptLoadThenLoad := false.B
outer.rocc_module.module.io.mem.perf.canAcceptStoreThenLoad := false.B
outer.rocc_module.module.io.mem.perf.canAcceptStoreThenRMW := false.B
outer.rocc_module.module.io.mem.s2_nack_cause_raw := 0.U
outer.rocc_module.module.io.mem.s2_gpa_is_pte := false.B
outer.rocc_module.module.io.mem.perf.tlbMiss := false.B
outer.rocc_module.module.io.mem.perf.blocked := false.B

outer.rocc_module.module.io.fpu_req.ready := false.B
outer.rocc_module.module.io.fpu_resp.valid := false.B
outer.rocc_module.module.io.fpu_resp.bits := DontCare

val from_rocc_enq_bits = IO(new Bundle {
val rd = UInt(64.W)
val resp = UInt(64.W)
})

val from_rocc_q = Module(new Queue(UInt(128.W), 1, flow=true, pipe=true)) //rd and result stitched together
outer.rocc_module.module.io.resp.ready := from_rocc_q.io.enq.ready && from_rocc_q.io.count === 0.U
from_rocc_q.io.enq.valid := outer.rocc_module.module.io.resp.valid

from_rocc_enq_bits.rd := outer.rocc_module.module.io.resp.bits.rd
from_rocc_enq_bits.resp := outer.rocc_module.module.io.resp.bits.data
from_rocc_q.io.enq.bits := from_rocc_enq_bits.asUInt
spike.io.rocc.response.valid := false.B
from_rocc_q.io.deq.ready := true.B
spike.io.rocc.response.rd := from_rocc_q.io.deq.bits(127,64)
spike.io.rocc.response.result := 0.U

when (from_rocc_q.io.deq.fire) {
spike.io.rocc.response.valid := true.B
spike.io.rocc.response.result := from_rocc_q.io.deq.bits(63,0)
}
} else {
spike.io.rocc.request.ready := false.B
spike.io.rocc.response.valid := false.B
spike.io.rocc.response.result := 0.U
spike.io.rocc.response.rd := 0.U
}
/* End RoCC Section */
}

class WithNSpikeCores(n: Int = 1, tileParams: SpikeTileParams = SpikeTileParams()
@@ -497,3 +621,15 @@ class WithSpikeTCM extends Config((site, here, up) => {
case ExtMem => None
case SubsystemBankedCoherenceKey => up(SubsystemBankedCoherenceKey).copy(nBanks = 0)
})

/**
* Config fragment to enable different RoCCs, work in progress
*/
class WithAccumRoCC extends Config((site, here, up) => {
case BuildRoCC => List(
(p: Parameters) => {
val accumulator = LazyModule(new AccumulatorExample(OpcodeSet.custom0, n = 4)(p))
accumulator
}
)
})