Skip to content

Commit

Permalink
Implemented bursting in devicemem reads/writes - faster sim
Browse files Browse the repository at this point in the history
  • Loading branch information
quetric committed Feb 20, 2024
1 parent 8001071 commit a9c1c89
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 51 deletions.
152 changes: 127 additions & 25 deletions test/model/simulator/cclo_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,24 +94,55 @@ void control_read_fsm(XSI_DUT *dut, Stream<unsigned int> &addr, Stream<unsigned
}
}

void data_read_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<512> > &ret){
void data_read_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<32> > &len, Stream<ap_uint<512> > &ret){
static axi_fsm_state state = VALID_ADDR;
static ap_uint<64> curr_addr = 0;
static unsigned int curr_nbytes = 0;
static ap_uint<8> nbeats = 0;
unsigned int nbytes_to_4k_boundary, nbytes_this_transfer;
switch(state){
case VALID_ADDR:
//set up bus to transfer one 64-byte word at a time
dut->write(datamem.arsize(), 6);//64B
dut->write(datamem.arlen(), 0);//one word
dut->write(datamem.arburst(), 1);//INCR
if(!addr.IsEmpty()){
dut->write<64>(datamem.araddr(), addr.Pop());
curr_addr = addr.Pop();
curr_nbytes = len.Pop();
nbytes_to_4k_boundary = (curr_addr/4096+1)*4096 - curr_addr;
nbytes_this_transfer = std::min(nbytes_to_4k_boundary, curr_nbytes);
nbeats = (nbytes_this_transfer+63)/64;//number of 64B beats in transfer
logger << log_level::debug << "Read start addr=" << curr_addr << " len=" << nbytes_this_transfer << " (" << nbeats << ")" << endl;
//set up bus to transfer one 64-byte word at a time
dut->write(datamem.arsize(), 6);//64B
dut->write(datamem.arlen(), nbeats-1);
dut->write(datamem.arburst(), 1);//INCR
dut->write<64>(datamem.araddr(), curr_addr);
dut->set(datamem.arvalid());
if(dut->test(datamem.arready())){
state = CLEAR_ADDR;
} else {
state = READY_ADDR;
}
curr_addr += nbytes_this_transfer;
curr_nbytes -= nbytes_this_transfer;
}
return;
case CONTINUE_ADDR:
nbytes_to_4k_boundary = (curr_addr/4096+1)*4096 - curr_addr;
nbytes_this_transfer = std::min(nbytes_to_4k_boundary, curr_nbytes);
nbeats = (nbytes_this_transfer+63)/64;//number of 64B beats in transfer
logger << log_level::debug << "Read continue addr=" << curr_addr << " len=" << nbytes_this_transfer << " (" << nbeats << ")" << endl;
//set up bus to transfer one 64-byte word at a time
dut->write(datamem.arsize(), 6);//64B
dut->write(datamem.arlen(), nbeats-1);
dut->write(datamem.arburst(), 1);//INCR
dut->write<64>(datamem.araddr(), curr_addr);
dut->set(datamem.arvalid());
if(dut->test(datamem.arready())){
state = CLEAR_ADDR;
} else {
state = READY_ADDR;
}
curr_addr += nbytes_this_transfer;
curr_nbytes -= nbytes_this_transfer;
return;
case READY_ADDR:
if(dut->test(datamem.arready())){
state = CLEAR_ADDR;
Expand All @@ -124,13 +155,20 @@ void data_read_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<512>
return;
case READY_DATA:
if(dut->test(datamem.rvalid())){
nbeats--;
ret.Push(dut->read<512>(datamem.rdata()));
state = CLEAR_DATA;
if(nbeats == 0){
state = CLEAR_DATA;
}
}
return;
case CLEAR_DATA:
dut->clear(datamem.rready());
state = VALID_ADDR;
if(curr_nbytes == 0){
state = VALID_ADDR;
} else {
state = CONTINUE_ADDR;
}
return;
}
}
Expand Down Expand Up @@ -201,25 +239,55 @@ void control_write_fsm(XSI_DUT *dut, Stream<unsigned int> &addr, Stream<unsigned
}
}

void data_write_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<512> > &val, Stream<ap_uint<64> > &strb){
void data_write_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<32> > &len, Stream<ap_uint<512> > &val, Stream<ap_uint<64> > &strb){
static axi_fsm_state state = VALID_ADDR;
static ap_uint<64> curr_addr = 0;
static unsigned int curr_nbytes = 0;
static ap_uint<8> nbeats = 0;
unsigned int nbytes_to_4k_boundary, nbytes_this_transfer;
switch(state){
case VALID_ADDR:
dut->write(datamem.awsize(), 6);//64B width
dut->write(datamem.awlen(), 0);//one word
dut->write(datamem.awburst(), 1);//INCR
dut->write(datamem.wlast(), 1);//always last transfer
//set awaddr = addr
if(!addr.IsEmpty()){
dut->write<64>(datamem.awaddr(), addr.Pop());
curr_addr = addr.Pop();
curr_nbytes = len.Pop();
nbytes_to_4k_boundary = (curr_addr/4096+1)*4096 - curr_addr;
nbytes_this_transfer = std::min(nbytes_to_4k_boundary, curr_nbytes);
nbeats = (nbytes_this_transfer+63)/64;//number of 64B beats in transfer
logger << log_level::debug << "Write start addr=" << curr_addr << " len=" << nbytes_this_transfer << " (" << nbeats << ")" << endl;
dut->write(datamem.awsize(), 6);//64B width
dut->write(datamem.awlen(), nbeats-1);
dut->write(datamem.awburst(), 1);//INCR
dut->write<64>(datamem.awaddr(), curr_addr);
//set awvalid = 1
dut->set(datamem.awvalid());
if(dut->test(datamem.awready())){
state = CLEAR_ADDR;
} else{
state = READY_ADDR;
}
curr_addr += nbytes_this_transfer;
curr_nbytes -= nbytes_this_transfer;
}
return;
case CONTINUE_ADDR:
nbytes_to_4k_boundary = (curr_addr/4096+1)*4096 - curr_addr;
nbytes_this_transfer = std::min(nbytes_to_4k_boundary, curr_nbytes);
nbeats = (nbytes_this_transfer+63)/64;//number of 64B beats in transfer
logger << log_level::debug << "Write continue addr=" << curr_addr << " len=" << nbytes_this_transfer << " (" << nbeats << ")" << endl;
dut->write(datamem.awsize(), 6);//64B width
dut->write(datamem.awlen(), nbeats-1);
dut->write(datamem.awburst(), 1);//INCR
dut->write<64>(datamem.awaddr(), curr_addr);
//set awvalid = 1
dut->set(datamem.awvalid());
if(dut->test(datamem.awready())){
state = CLEAR_ADDR;
} else{
state = READY_ADDR;
}
curr_addr += nbytes_this_transfer;
curr_nbytes -= nbytes_this_transfer;
return;
case READY_ADDR:
if(dut->test(datamem.awready())){
Expand All @@ -234,18 +302,46 @@ void data_write_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<512
if(!val.IsEmpty() && !strb.IsEmpty()){
dut->write<512>(datamem.wdata(), val.Pop());
dut->write<64>(datamem.wstrb(), strb.Pop());
dut->write(datamem.wlast(), nbeats==1);
//set wvalid = 1
dut->set(datamem.wvalid());
if(dut->test(datamem.wready())){
state = CLEAR_DATA;
nbeats--;
if(nbeats != 0){
state = UPDATE_DATA;
} else {
state = CLEAR_DATA;
}
} else{
state = READY_DATA;
}
}
return;
case READY_DATA:
if(dut->test(datamem.wready())){
state = CLEAR_DATA;
nbeats--;
if(nbeats != 0){
state = UPDATE_DATA;
} else {
state = CLEAR_DATA;
}
}
return;
case UPDATE_DATA:
if(!val.IsEmpty() && !strb.IsEmpty()){
dut->write<512>(datamem.wdata(), val.Pop());
dut->write<64>(datamem.wstrb(), strb.Pop());
dut->write(datamem.wlast(), nbeats==1);
if(dut->test(datamem.wready())){
nbeats--;
if(nbeats != 0){
state = UPDATE_DATA;
} else {
state = CLEAR_DATA;
}
} else{
state = READY_DATA;
}
}
return;
case CLEAR_DATA:
Expand All @@ -267,7 +363,11 @@ void data_write_fsm(XSI_DUT *dut, Stream<ap_uint<64> > &addr, Stream<ap_uint<512
return;
case CLEAR_ACK:
dut->clear(datamem.bready());
state = VALID_ADDR;
if(curr_nbytes == 0){
state = VALID_ADDR;
} else {
state = CONTINUE_ADDR;
}
return;
}
}
Expand Down Expand Up @@ -420,8 +520,8 @@ void krnl_egress_fsm(XSI_DUT *dut, Stream<stream_word> &val){

void interface_handler(XSI_DUT *dut, Stream<unsigned int> &axilite_rd_addr, Stream<unsigned int> &axilite_rd_data,
Stream<unsigned int> &axilite_wr_addr, Stream<unsigned int> &axilite_wr_data,
Stream<ap_uint<64> > &aximm_rd_addr, Stream<ap_uint<512> > &aximm_rd_data,
Stream<ap_uint<64> > &aximm_wr_addr, Stream<ap_uint<512> > &aximm_wr_data, Stream<ap_uint<64> > &aximm_wr_strb,
Stream<ap_uint<64> > &aximm_rd_addr, Stream<ap_uint<32> > &aximm_rd_len, Stream<ap_uint<512> > &aximm_rd_data,
Stream<ap_uint<64> > &aximm_wr_addr, Stream<ap_uint<32> > &aximm_wr_len, Stream<ap_uint<512> > &aximm_wr_data, Stream<ap_uint<64> > &aximm_wr_strb,
Stream<unsigned int> &callreq, Stream<unsigned int> &callack,
Stream<stream_word> &eth_tx_data, Stream<stream_word> &eth_rx_data,
Stream<stream_word> &cclo_to_krnl_data, Stream<stream_word> &krnl_to_cclo_data){
Expand All @@ -430,8 +530,8 @@ void interface_handler(XSI_DUT *dut, Stream<unsigned int> &axilite_rd_addr, Stre
dut->run_ncycles(1);
control_read_fsm(dut, axilite_rd_addr, axilite_rd_data);
control_write_fsm(dut, axilite_wr_addr, axilite_wr_data);
data_read_fsm(dut, aximm_rd_addr, aximm_rd_data);
data_write_fsm(dut, aximm_wr_addr, aximm_wr_data, aximm_wr_strb);
data_read_fsm(dut, aximm_rd_addr, aximm_rd_len, aximm_rd_data);
data_write_fsm(dut, aximm_wr_addr, aximm_wr_len, aximm_wr_data, aximm_wr_strb);
call_req_fsm(dut, callreq);
call_ack_fsm(dut, callack);
eth_ingress_fsm(dut, eth_rx_data);
Expand Down Expand Up @@ -522,8 +622,10 @@ int main(int argc, char **argv)
Stream<unsigned int> axilite_wr_addr;
Stream<unsigned int> axilite_wr_data;
Stream<ap_uint<64> > aximm_rd_addr;
Stream<ap_uint<32> > aximm_rd_len;
Stream<ap_uint<512> > aximm_rd_data;
Stream<ap_uint<64> > aximm_wr_addr;
Stream<ap_uint<32> > aximm_wr_len;
Stream<ap_uint<512> > aximm_wr_data;
Stream<ap_uint<64> > aximm_wr_strb;
Stream<unsigned int, 16> callreq; //need some capacity for all args
Expand Down Expand Up @@ -552,16 +654,16 @@ int main(int argc, char **argv)
HLSLIB_DATAFLOW_FUNCTION(interface_handler, &dut,
axilite_rd_addr, axilite_rd_data,
axilite_wr_addr, axilite_wr_data,
aximm_rd_addr, aximm_rd_data,
aximm_wr_addr, aximm_wr_data, aximm_wr_strb,
aximm_rd_addr, aximm_rd_len, aximm_rd_data,
aximm_wr_addr, aximm_wr_len, aximm_wr_data, aximm_wr_strb,
callreq, callack,
eth_tx_data, eth_rx_data,
cclo_to_krnl_data, krnl_to_cclo_data);
HLSLIB_DATAFLOW_FUNCTION(zmq_cmd_server, &ctx,
axilite_rd_addr, axilite_rd_data,
axilite_wr_addr, axilite_wr_data,
aximm_rd_addr, aximm_rd_data,
aximm_wr_addr, aximm_wr_data, aximm_wr_strb,
aximm_rd_addr, aximm_rd_len, aximm_rd_data,
aximm_wr_addr, aximm_wr_len, aximm_wr_data, aximm_wr_strb,
callreq, callack);
//ZMQ to other nodes process(es)
HLSLIB_DATAFLOW_FUNCTION(zmq_eth_egress_server, &ctx, eth_tx_data, local_rank);
Expand Down
2 changes: 1 addition & 1 deletion test/model/simulator/cclo_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#
# *******************************************************************************/

enum axi_fsm_state {VALID_ADDR, READY_ADDR, CLEAR_ADDR, VALID_DATA, READY_DATA, CLEAR_DATA, VALID_ACK, READY_ACK, CLEAR_ACK};
enum axi_fsm_state {VALID_ADDR, CONTINUE_ADDR, READY_ADDR, CLEAR_ADDR, VALID_DATA, READY_DATA, UPDATE_DATA, CLEAR_DATA, VALID_ACK, READY_ACK, CLEAR_ACK};

struct axilite{
std::string basename = "";
Expand Down
40 changes: 19 additions & 21 deletions test/model/zmq/zmq_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,8 @@ void serve_zmq(zmq_intf_context *ctx, uint32_t *cfgmem, vector<char> &devicemem,
void serve_zmq(zmq_intf_context *ctx,
Stream<unsigned int> &axilite_rd_addr, Stream<unsigned int> &axilite_rd_data,
Stream<unsigned int> &axilite_wr_addr, Stream<unsigned int> &axilite_wr_data,
Stream<ap_uint<64> > &aximm_rd_addr, Stream<ap_uint<512> > &aximm_rd_data,
Stream<ap_uint<64> > &aximm_wr_addr, Stream<ap_uint<512> > &aximm_wr_data, Stream<ap_uint<64> > &aximm_wr_strb,
Stream<ap_uint<64> > &aximm_rd_addr, Stream<ap_uint<32> > &aximm_rd_len, Stream<ap_uint<512> > &aximm_rd_data,
Stream<ap_uint<64> > &aximm_wr_addr, Stream<ap_uint<32> > &aximm_wr_len, Stream<ap_uint<512> > &aximm_wr_data, Stream<ap_uint<64> > &aximm_wr_strb,
Stream<unsigned int> &callreq, Stream<unsigned int> &callack){

Json::Reader reader;
Expand Down Expand Up @@ -498,19 +498,17 @@ void serve_zmq(zmq_intf_context *ctx,
response["rdata"][0] = 0;
*logger << log_level::error << "Mem read outside available range ("<< ACCL_SIM_MEM_SIZE_KB << "KB) at addr: " << adr << " len: " << len << endl;
} else {
for(int i=0; i<len; i+=64){
mem_addr = adr+i;
aximm_rd_addr.Push(mem_addr);
while(!ctx->stop){
if(!aximm_rd_data.IsEmpty()){
mem_data = aximm_rd_data.Pop();
break;
} else{
this_thread::sleep_for(chrono::milliseconds(1));
aximm_rd_addr.Push(adr);
aximm_rd_len.Push(len);
unsigned int idx = 0;
while(!ctx->stop && len>idx){
if(!aximm_rd_data.IsEmpty()){
mem_data = aximm_rd_data.Pop();
for(int j=0; j<64 && len>idx; j++, idx++){
response["rdata"][idx] = (unsigned int)mem_data(8*(j+1)-1, 8*j);
}
}
for(int j=0; j<64 && (i+j)<len; j++){
response["rdata"][i+j] = (unsigned int)mem_data(8*(j+1)-1, 8*j);
} else{
this_thread::sleep_for(chrono::milliseconds(1));
}
}
}
Expand All @@ -526,16 +524,16 @@ void serve_zmq(zmq_intf_context *ctx,
response["status"] = 1;
*logger << log_level::error << "Mem write outside available range ("<< ACCL_SIM_MEM_SIZE_KB << "KB) at addr: " << adr << " len: " << len << endl;
} else{
aximm_wr_addr.Push(adr);
aximm_wr_len.Push(len);
for(int i=0; i<len; i+=64){
mem_strb = 0;
mem_addr = adr+i;
for(int j=0; j<64 && (i+j)<len; j++){
mem_data(8*(j+1)-1, 8*j) = dma_wdata[i+j].asUInt();
mem_strb(j,j) = 1;
}
while(!ctx->stop){
if(!aximm_wr_addr.IsFull() && !aximm_wr_data.IsFull() && !aximm_wr_strb.IsFull()){
aximm_wr_addr.Push(mem_addr);
if(!aximm_wr_data.IsFull() && !aximm_wr_strb.IsFull()){
aximm_wr_data.Push(mem_data);
aximm_wr_strb.Push(mem_strb);
break;
Expand Down Expand Up @@ -681,16 +679,16 @@ void serve_zmq(zmq_intf_context *ctx,
void zmq_cmd_server(zmq_intf_context *ctx,
Stream<unsigned int> &axilite_rd_addr, Stream<unsigned int> &axilite_rd_data,
Stream<unsigned int> &axilite_wr_addr, Stream<unsigned int> &axilite_wr_data,
Stream<ap_uint<64> > &aximm_rd_addr, Stream<ap_uint<512> > &aximm_rd_data,
Stream<ap_uint<64> > &aximm_wr_addr, Stream<ap_uint<512> > &aximm_wr_data, Stream<ap_uint<64> > &aximm_wr_strb,
Stream<ap_uint<64> > &aximm_rd_addr, Stream<ap_uint<32> > &aximm_rd_len, Stream<ap_uint<512> > &aximm_rd_data,
Stream<ap_uint<64> > &aximm_wr_addr, Stream<ap_uint<32> > &aximm_wr_len, Stream<ap_uint<512> > &aximm_wr_data, Stream<ap_uint<64> > &aximm_wr_strb,
Stream<unsigned int> &callreq, Stream<unsigned int> &callack){
(*logger)("Starting ZMQ server\n", log_level::verbose);
while(!ctx->stop){
serve_zmq(ctx,
axilite_rd_addr, axilite_rd_data,
axilite_wr_addr, axilite_wr_data,
aximm_rd_addr, aximm_rd_data,
aximm_wr_addr, aximm_wr_data, aximm_wr_strb,
aximm_rd_addr, aximm_rd_len, aximm_rd_data,
aximm_wr_addr, aximm_wr_len, aximm_wr_data, aximm_wr_strb,
callreq, callack
);
this_thread::sleep_for(chrono::milliseconds(10));
Expand Down
10 changes: 6 additions & 4 deletions test/model/zmq/zmq_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,11 @@ void krnl_endpoint_egress_port(zmq_intf_context *ctx, hlslib::Stream<stream_word
* @param axilite_rd_data
* @param axilite_wr_addr
* @param axilite_wr_data
* @param aximm_rd_addr
* @param aximm_rd_addr
* @param aximm_rd_len
* @param aximm_rd_data
* @param aximm_wr_addr
* @param aximm_wr_addr
* @param aximm_wr_len
* @param aximm_wr_data
* @param aximm_wr_strb
* @param callreq
Expand All @@ -115,8 +117,8 @@ void krnl_endpoint_egress_port(zmq_intf_context *ctx, hlslib::Stream<stream_word
void zmq_cmd_server(zmq_intf_context *ctx,
hlslib::Stream<unsigned int> &axilite_rd_addr, hlslib::Stream<unsigned int> &axilite_rd_data,
hlslib::Stream<unsigned int> &axilite_wr_addr, hlslib::Stream<unsigned int> &axilite_wr_data,
hlslib::Stream<ap_uint<64> > &aximm_rd_addr, hlslib::Stream<ap_uint<512> > &aximm_rd_data,
hlslib::Stream<ap_uint<64> > &aximm_wr_addr, hlslib::Stream<ap_uint<512> > &aximm_wr_data, hlslib::Stream<ap_uint<64> > &aximm_wr_strb,
hlslib::Stream<ap_uint<64> > &aximm_rd_addr, hlslib::Stream<ap_uint<32> > &aximm_rd_len, hlslib::Stream<ap_uint<512> > &aximm_rd_data,
hlslib::Stream<ap_uint<64> > &aximm_wr_addr, hlslib::Stream<ap_uint<32> > &aximm_wr_len, hlslib::Stream<ap_uint<512> > &aximm_wr_data, hlslib::Stream<ap_uint<64> > &aximm_wr_strb,
hlslib::Stream<unsigned int> &callreq, hlslib::Stream<unsigned int> &callack);

/**
Expand Down

0 comments on commit a9c1c89

Please sign in to comment.