From 75e1ea84be77314575dc50d68d6472742f445ad9 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Thu, 16 Nov 2023 17:44:36 -0500 Subject: [PATCH 01/11] Updated port mapping and wrapper generation scripts, updated RTL_Add example, added module instantiation check to RAD-SIM --- docs/rad-sim-rtl-code.rst | 61 +++++++------- rad-sim/example-designs/add/add_driver.cpp | 14 +++- rad-sim/example-designs/add/add_driver.hpp | 2 + rad-sim/example-designs/add/modules/adder.cpp | 8 +- .../example-designs/add/modules/client.cpp | 10 +-- .../example-designs/add/modules/client.hpp | 2 +- .../example-designs/rtl_add/modules/adder.cpp | 15 +++- .../example-designs/rtl_add/modules/adder.hpp | 4 + .../rtl_add/modules/client.cpp | 15 +++- .../rtl_add/modules/client.hpp | 4 + .../rtl_add/modules/rtl/CMakeLists.txt | 17 +++- .../rtl_add/modules/rtl/adder.v | 2 +- .../rtl_add/modules/rtl/client.v | 8 +- .../rtl_add/modules/rtl/port.map | 26 +++--- .../rtl_add/modules/rtl/static_params.vh | 4 +- .../rtl_add/rtl_add_driver.cpp | 14 +++- .../rtl_add/rtl_add_driver.hpp | 2 + .../example-designs/rtl_add/rtl_add_top.hpp | 2 + rad-sim/scripts/generate_port_mappings.py | 46 ++++++---- rad-sim/scripts/generate_wrapper.py | 84 ++++++++++++------- rad-sim/scripts/verilog_parser.py | 24 ++++-- rad-sim/sim/design_context.cpp | 5 ++ rad-sim/test/wrapper-scripts/mock_port.map | 3 +- .../wrapper-scripts/mock_port_incomplete.map | 2 +- .../mock_port_inconsistent.map | 3 + .../test_generate_port_mappings.py | 65 ++++++++++---- .../wrapper-scripts/test_generate_wrapper.py | 22 ++++- 27 files changed, 315 insertions(+), 149 deletions(-) create mode 100644 rad-sim/test/wrapper-scripts/mock_port_inconsistent.map diff --git a/docs/rad-sim-rtl-code.rst b/docs/rad-sim-rtl-code.rst index f0b3da4..cd3b920 100644 --- a/docs/rad-sim-rtl-code.rst +++ b/docs/rad-sim-rtl-code.rst @@ -81,7 +81,7 @@ Automatic wrapper generation follows the workflow: #. Run ``generate_port_mappings.py`` with the design path and RTL design files as arguments. - * ex. ``python generate_port_mappings.py adder.v client.v`` + * ex. ``python generate_port_mappings.py example-designs/rtl_add adder.v client.v`` #. Check the console for inference warnings. @@ -90,7 +90,7 @@ Automatic wrapper generation follows the workflow: #. Run ``generate_wrapper.py`` with the design path and module names as arguments. - * ex. ``python generate_wrapper.py adder client`` + * ex. ``python generate_wrapper.py example-designs/rtl_add adder client`` * Note: these are modules connected to the NoC. These scripts produce basic source and header wrapper files for the specified RTL modules. @@ -98,7 +98,7 @@ Advanced users may edit these files to add additional functionality. AXI-S Formatting Requirement ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Ports in RTL modules using AXI-S must be specified in the format ``axis_{name}_interface_{signal}`` to be recognized by the port mappings script. +Ports in RTL modules using AXI-S must be specified in the format ``axis_{name}_{signal}`` to be recognized by the port mappings script. Port Map File Format ^^^^^^^^^^^^^^^^^^^^^ @@ -107,35 +107,36 @@ The port map file is a blank-space delimited file used to specify the connection * All inputs, outputs, and AXI-S ports must be under a ``module``. * Modules are defined by ``module {name}``. * Input and Output ports are defined by ``{input/output} {port_width} {rtl_port} {radsim_port}``. -* AXI-S ports are defined by ``axis {master/slave} {rtl_port} {radsim_port}``. +* AXI-S ports are defined by ``axis {master/slave} {rtl_port} {axis_interface} {axis_port}``. An example port map file from the ``rtl_add`` example is shown below: .. code-block:: - module adder - input 1 clk clk - input 1 rst rst - axis slave axis_adder_interface_tvalid axis_adder_interface.tvalid - axis slave axis_adder_interface_tlast axis_adder_interface.tlast - axis slave axis_adder_interface_tdata axis_adder_interface.tdata - axis slave axis_adder_interface_tready axis_adder_interface.tready - output 128 response response - output 1 response_valid response_valid - - module client - input 1 clk clk - input 1 rst rst - input 128 client_tdata client_tdata - input 1 client_tlast client_tlast - input 1 client_valid client_valid - axis master axis_client_interface_tready axis_client_interface.tready - output 1 client_ready client_ready - axis master axis_client_interface_tvalid axis_client_interface.tvalid - axis master axis_client_interface_tlast axis_client_interface.tlast - axis master axis_client_interface_tdest axis_client_interface.tdest - axis master axis_client_interface_tid axis_client_interface.tid - axis master axis_client_interface_tstrb axis_client_interface.tstrb - axis master axis_client_interface_tkeep axis_client_interface.tkeep - axis master axis_client_interface_tuser axis_client_interface.tuser - axis master axis_client_interface_tdata axis_client_interface.tdata + module adder + input 1 clk clk + input 1 rst rst + axis slave axis_adder_interface_tvalid axis_adder_interface tvalid + axis slave axis_adder_interface_tlast axis_adder_interface tlast + axis slave axis_adder_interface_tdata axis_adder_interface tdata + axis slave axis_adder_interface_tready axis_adder_interface tready + output 128 response response + output 1 response_valid response_valid + + module client + input 1 clk clk + input 1 rst rst + input 128 client_tdata client_tdata + input 1 client_tlast client_tlast + input 1 client_valid client_valid + axis master axis_client_interface_tready axis_client_interface tready + output 1 client_ready client_ready + axis master axis_client_interface_tvalid axis_client_interface tvalid + axis master axis_client_interface_tlast axis_client_interface tlast + axis master axis_client_interface_tdest axis_client_interface tdest + axis master axis_client_interface_tid axis_client_interface tid + axis master axis_client_interface_tstrb axis_client_interface tstrb + axis master axis_client_interface_tkeep axis_client_interface tkeep + axis master axis_client_interface_tuser axis_client_interface tuser + axis master axis_client_interface_tdata axis_client_interface tdata + diff --git a/rad-sim/example-designs/add/add_driver.cpp b/rad-sim/example-designs/add/add_driver.cpp index 388f289..70b4405 100644 --- a/rad-sim/example-designs/add/add_driver.cpp +++ b/rad-sim/example-designs/add/add_driver.cpp @@ -31,6 +31,8 @@ void add_driver::source() { client_valid.write(false); wait(); rst.write(false); + start_cycle = GetSimulationCycle(1.0); + start_time = std::chrono::steady_clock::now(); wait(); while (!numbers_to_send.empty()) { @@ -45,7 +47,7 @@ void add_driver::source() { } } client_valid.write(false); - std::cout << "Finished sending all numbers to client module!" << std::endl; + //std::cout << "Finished sending all numbers to client module!" << std::endl; wait(); } @@ -53,11 +55,17 @@ void add_driver::sink() { while (!response_valid.read()) { wait(); } - std::cout << "Received " << response.read().to_uint64() << " sum from the adder!" << std::endl; - std::cout << "The actual sum is " << actual_sum << std::endl; + //std::cout << "Received " << response.read().to_uint64() << " sum from the adder!" << std::endl; + //std::cout << "The actual sum is " << actual_sum << std::endl; if (response.read() != actual_sum) std::cout << "FAILURE - Output is not matching!" << std::endl; else std::cout << "SUCCESS - Output is matching!" << std::endl; + end_cycle = GetSimulationCycle(1.0); + end_time = std::chrono::steady_clock::now(); + std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; + std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " us" << std::endl; + NoCTransactionTelemetry::DumpStatsToFile("stats.csv"); + sc_stop(); } \ No newline at end of file diff --git a/rad-sim/example-designs/add/add_driver.hpp b/rad-sim/example-designs/add/add_driver.hpp index baae941..5bbef5a 100644 --- a/rad-sim/example-designs/add/add_driver.hpp +++ b/rad-sim/example-designs/add/add_driver.hpp @@ -12,6 +12,8 @@ class add_driver : public sc_module { private: std::queue numbers_to_send; int actual_sum; + int start_cycle, end_cycle; + std::chrono::steady_clock::time_point start_time, end_time; public: sc_in clk; diff --git a/rad-sim/example-designs/add/modules/adder.cpp b/rad-sim/example-designs/add/modules/adder.cpp index 0a8ecf6..a1acbdb 100644 --- a/rad-sim/example-designs/add/modules/adder.cpp +++ b/rad-sim/example-designs/add/modules/adder.cpp @@ -40,10 +40,10 @@ void adder::Tick() { uint64_t current_sum = adder_rolling_sum.to_uint64(); adder_rolling_sum = current_sum + axis_adder_interface.tdata.read().to_uint64(); t_finished.write(axis_adder_interface.tlast.read()); - std::cout << module_name << ": Got Transaction (user = " - << axis_adder_interface.tuser.read().to_uint64() << ") (addend = " - << axis_adder_interface.tdata.read().to_uint64() << ")!" - << std::endl; + //std::cout << module_name << ": Got Transaction (user = " + // << axis_adder_interface.tuser.read().to_uint64() << ") (addend = " + // << axis_adder_interface.tdata.read().to_uint64() << ")!" + // << std::endl; } // Print Sum and Exit diff --git a/rad-sim/example-designs/add/modules/client.cpp b/rad-sim/example-designs/add/modules/client.cpp index 24c13e2..cdea703 100644 --- a/rad-sim/example-designs/add/modules/client.cpp +++ b/rad-sim/example-designs/add/modules/client.cpp @@ -21,7 +21,7 @@ client::~client() {} void client::Assign() { if (rst) { - client_ready.write(true); // ready to accept requests from driver testbench + client_ready.write(false); } else { // Ready to accept new addend from driver testbench as long as the addend // FIFO is not full @@ -45,8 +45,8 @@ void client::Tick() { // Interface with testbench driver if (client_ready.read() && client_valid.read()) { client_tdata_fifo.push(client_tdata); - testbench_tlast = client_tlast.read(); - std::cout << module_name << ": Pushed request to FIFO" << std::endl; + r_client_tlast = client_tlast.read(); + //std::cout << module_name << ": Pushed request to FIFO" << std::endl; } client_fifo_full.write(client_tdata_fifo.size() >= client_fifo_depth); @@ -62,7 +62,7 @@ void client::Tick() { axis_client_interface.tstrb.write(0); axis_client_interface.tkeep.write(0); axis_client_interface.tuser.write(src_addr); - axis_client_interface.tlast.write(testbench_tlast && (client_tdata_fifo.size() == 1)); + axis_client_interface.tlast.write(r_client_tlast && (client_tdata_fifo.size() == 1)); axis_client_interface.tdata.write(tdata); axis_client_interface.tvalid.write(true); @@ -73,7 +73,7 @@ void client::Tick() { if (axis_client_interface.tvalid.read() && axis_client_interface.tready.read()) { client_tdata_fifo.pop(); - std::cout << module_name << ": Sent Transaction!" << std::endl; + //std::cout << module_name << ": Sent Transaction!" << std::endl; } wait(); } diff --git a/rad-sim/example-designs/add/modules/client.hpp b/rad-sim/example-designs/add/modules/client.hpp index c08e0cb..4b4eec7 100644 --- a/rad-sim/example-designs/add/modules/client.hpp +++ b/rad-sim/example-designs/add/modules/client.hpp @@ -16,7 +16,7 @@ class client : public RADSimModule { std::queue> client_tdata_fifo; // FIFO to store numbers unsigned int client_fifo_depth; // MAXIMUM number of addends to store in FIFO sc_signal client_fifo_full; // Signal flagging addend FIFO is full - bool testbench_tlast; + bool r_client_tlast; public: sc_in rst; diff --git a/rad-sim/example-designs/rtl_add/modules/adder.cpp b/rad-sim/example-designs/rtl_add/modules/adder.cpp index 23529c1..a46c840 100644 --- a/rad-sim/example-designs/rtl_add/modules/adder.cpp +++ b/rad-sim/example-designs/rtl_add/modules/adder.cpp @@ -1,8 +1,11 @@ -#include #include adder::adder(const sc_module_name &name) : RADSimModule(name) { - Vadder* vadder = new Vadder{"vadder"}; + char vadder_name[25]; + std::string vadder_name_str = std::string(name) + "_vmvm"; + std::strcpy(vadder_name, vadder_name_str.c_str()); + + vadder = new Vadder{vadder_name}; vadder->clk(clk); vadder->rst(rst); vadder->axis_adder_interface_tvalid(axis_adder_interface.tvalid); @@ -15,7 +18,9 @@ adder::adder(const sc_module_name &name) : RADSimModule(name) { this->RegisterModuleInfo(); } -adder::~adder() {} +adder::~adder() { + delete vadder; +} void adder::RegisterModuleInfo() { std::string port_name; @@ -23,6 +28,8 @@ void adder::RegisterModuleInfo() { _num_noc_axis_master_ports = 0; _num_noc_aximm_slave_ports = 0; _num_noc_aximm_master_ports = 0; + port_name = module_name + ".axis_adder_interface"; - RegisterAxisSlavePort(port_name, &axis_adder_interface, 128, 0); + RegisterAxisSlavePort(port_name, &axis_adder_interface, 512, 0); + } diff --git a/rad-sim/example-designs/rtl_add/modules/adder.hpp b/rad-sim/example-designs/rtl_add/modules/adder.hpp index d4b0201..67eea31 100644 --- a/rad-sim/example-designs/rtl_add/modules/adder.hpp +++ b/rad-sim/example-designs/rtl_add/modules/adder.hpp @@ -8,7 +8,11 @@ #include #include +#include class adder : public RADSimModule { +private: + Vadder* vadder; + public: sc_in rst; sc_out> response; diff --git a/rad-sim/example-designs/rtl_add/modules/client.cpp b/rad-sim/example-designs/rtl_add/modules/client.cpp index 41596f9..feb3bc7 100644 --- a/rad-sim/example-designs/rtl_add/modules/client.cpp +++ b/rad-sim/example-designs/rtl_add/modules/client.cpp @@ -1,8 +1,11 @@ -#include #include client::client(const sc_module_name &name) : RADSimModule(name) { - Vclient* vclient = new Vclient{"vclient"}; + char vclient_name[25]; + std::string vclient_name_str = std::string(name) + "_vmvm"; + std::strcpy(vclient_name, vclient_name_str.c_str()); + + vclient = new Vclient{vclient_name}; vclient->clk(clk); vclient->rst(rst); vclient->client_tdata(client_tdata); @@ -22,7 +25,9 @@ client::client(const sc_module_name &name) : RADSimModule(name) { this->RegisterModuleInfo(); } -client::~client() {} +client::~client() { + delete vclient; +} void client::RegisterModuleInfo() { std::string port_name; @@ -30,6 +35,8 @@ void client::RegisterModuleInfo() { _num_noc_axis_master_ports = 0; _num_noc_aximm_slave_ports = 0; _num_noc_aximm_master_ports = 0; + port_name = module_name + ".axis_client_interface"; - RegisterAxisMasterPort(port_name, &axis_client_interface, 128, 0); + RegisterAxisMasterPort(port_name, &axis_client_interface, 512, 0); + } diff --git a/rad-sim/example-designs/rtl_add/modules/client.hpp b/rad-sim/example-designs/rtl_add/modules/client.hpp index 56c394e..1f32d96 100644 --- a/rad-sim/example-designs/rtl_add/modules/client.hpp +++ b/rad-sim/example-designs/rtl_add/modules/client.hpp @@ -8,7 +8,11 @@ #include #include +#include class client : public RADSimModule { +private: + Vclient* vclient; + public: sc_in rst; sc_in> client_tdata; diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/CMakeLists.txt b/rad-sim/example-designs/rtl_add/modules/rtl/CMakeLists.txt index 1757d1f..c602b9c 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/CMakeLists.txt +++ b/rad-sim/example-designs/rtl_add/modules/rtl/CMakeLists.txt @@ -6,18 +6,29 @@ if (NOT verilator_FOUND) endif() find_package(SystemCLanguage CONFIG REQUIRED) -set(rtlmodules +set(verilator_options --pins-bv 2 -Wno-fatal -Wall) + +set(rtl_top_modules adder.v client.v ) +set(rtl_libraries + fifo.v +) + add_library(rtl_designs STATIC) target_link_libraries(rtl_designs PUBLIC SystemC::systemc) -foreach(module IN LISTS rtlmodules) +foreach(module IN LISTS rtl_top_modules) + foreach(rtl_library IN LISTS rtl_libraries) + list(APPEND VERILOG_LIBRARIES "-v") + list(APPEND VERILOG_LIBRARIES ${rtl_library}) + endforeach() + verilate(rtl_designs SOURCES ${module} SYSTEMC - VERILATOR_ARGS --pins-bv 2 -Wno-fatal -Wall + VERILATOR_ARGS ${verilator_options} ${VERILOG_LIBRARIES} ) endforeach() \ No newline at end of file diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/adder.v b/rad-sim/example-designs/rtl_add/modules/rtl/adder.v index a28536d..0604ef6 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/adder.v +++ b/rad-sim/example-designs/rtl_add/modules/rtl/adder.v @@ -33,7 +33,7 @@ module adder (clk, rst, axis_adder_interface_tvalid, axis_adder_interface_tlast, response_valid = 1'b0; end else begin if (axis_adder_interface_tready && axis_adder_interface_tvalid) begin - $display("Adder: Received %d!", axis_adder_interface_tdata[63:0]); + //$display("Adder: Received %d!", axis_adder_interface_tdata[63:0]); adder_rolling_sum = adder_rolling_sum + axis_adder_interface_tdata[`DATAW-1:0]; t_finished = axis_adder_interface_tlast; end diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/client.v b/rad-sim/example-designs/rtl_add/modules/rtl/client.v index 66922d3..cb5a991 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/client.v +++ b/rad-sim/example-designs/rtl_add/modules/rtl/client.v @@ -68,15 +68,15 @@ module client ( end else begin if (client_ready && client_valid && axis_client_interface_tvalid && axis_client_interface_tready) begin // push data onto the FIFO - $display("Client: Added %d onto the FIFO!", client_tdata[63:0]); - $display("Client: Sent %d to Adder!", axis_client_interface_tdata[63:0]); + //$display("Client: Added %d onto the FIFO!", client_tdata[63:0]); + //$display("Client: Sent %d to Adder!", axis_client_interface_tdata[63:0]); end else if (client_ready && client_valid) begin // push data onto the FIFO item_count <= item_count + 1; - $display("Client: Added %d onto the FIFO!", client_tdata[63:0]); + //$display("Client: Added %d onto the FIFO!", client_tdata[63:0]); end else if (axis_client_interface_tvalid && axis_client_interface_tready) begin item_count <= item_count - 1; - $display("Client: Sent %d to Adder!", axis_client_interface_tdata[63:0]); + //$display("Client: Sent %d to Adder!", axis_client_interface_tdata[63:0]); end if (client_tlast) begin diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/port.map b/rad-sim/example-designs/rtl_add/modules/rtl/port.map index 3682a24..ad8df43 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/port.map +++ b/rad-sim/example-designs/rtl_add/modules/rtl/port.map @@ -1,10 +1,10 @@ module adder input 1 clk clk input 1 rst rst -axis slave axis_adder_interface_tvalid axis_adder_interface.tvalid -axis slave axis_adder_interface_tlast axis_adder_interface.tlast -axis slave axis_adder_interface_tdata axis_adder_interface.tdata -axis slave axis_adder_interface_tready axis_adder_interface.tready +axis slave axis_adder_interface_tvalid axis_adder_interface tvalid +axis slave axis_adder_interface_tlast axis_adder_interface tlast +axis slave axis_adder_interface_tdata axis_adder_interface tdata +axis slave axis_adder_interface_tready axis_adder_interface tready output 128 response response output 1 response_valid response_valid @@ -14,14 +14,14 @@ input 1 rst rst input 128 client_tdata client_tdata input 1 client_tlast client_tlast input 1 client_valid client_valid -axis master axis_client_interface_tready axis_client_interface.tready +axis master axis_client_interface_tready axis_client_interface tready output 1 client_ready client_ready -axis master axis_client_interface_tvalid axis_client_interface.tvalid -axis master axis_client_interface_tlast axis_client_interface.tlast -axis master axis_client_interface_tdest axis_client_interface.tdest -axis master axis_client_interface_tid axis_client_interface.tid -axis master axis_client_interface_tstrb axis_client_interface.tstrb -axis master axis_client_interface_tkeep axis_client_interface.tkeep -axis master axis_client_interface_tuser axis_client_interface.tuser -axis master axis_client_interface_tdata axis_client_interface.tdata +axis master axis_client_interface_tvalid axis_client_interface tvalid +axis master axis_client_interface_tlast axis_client_interface tlast +axis master axis_client_interface_tdest axis_client_interface tdest +axis master axis_client_interface_tid axis_client_interface tid +axis master axis_client_interface_tstrb axis_client_interface tstrb +axis master axis_client_interface_tkeep axis_client_interface tkeep +axis master axis_client_interface_tuser axis_client_interface tuser +axis master axis_client_interface_tdata axis_client_interface tdata diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/static_params.vh b/rad-sim/example-designs/rtl_add/modules/rtl/static_params.vh index 868e511..fa577f4 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/static_params.vh +++ b/rad-sim/example-designs/rtl_add/modules/rtl/static_params.vh @@ -13,7 +13,7 @@ `define AXIS_KEEPW 8 `define AXIS_IDW `NOC_LINKS_PACKETID_WIDTH `define AXIS_DESTW `NOC_LINKS_DEST_WIDTH -`define AXIS_USERW 66 -`define AXIS_MAX_DATAW 1024 +`define AXIS_USERW 75 +`define AXIS_MAX_DATAW 512 `endif //_static_params_vh_ diff --git a/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp b/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp index 294a59c..490ff25 100644 --- a/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp +++ b/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp @@ -31,6 +31,8 @@ void rtl_add_driver::source() { client_valid.write(false); wait(); rst.write(false); + start_cycle = GetSimulationCycle(1.0); + start_time = std::chrono::steady_clock::now(); wait(); while (!numbers_to_send.empty()) { @@ -45,7 +47,7 @@ void rtl_add_driver::source() { } } client_valid.write(false); - std::cout << "Finished sending all numbers to client module!" << std::endl; + //std::cout << "Finished sending all numbers to client module!" << std::endl; wait(); } @@ -53,11 +55,17 @@ void rtl_add_driver::sink() { while (!response_valid.read()) { wait(); } - std::cout << "Received " << response.read().to_uint64() << " sum from the adder!" << std::endl; - std::cout << "The actual sum is " << actual_sum << std::endl; + //std::cout << "Received " << response.read().to_uint64() << " sum from the adder!" << std::endl; + //std::cout << "The actual sum is " << actual_sum << std::endl; if (response.read() != actual_sum) std::cout << "FAILURE - Output is not matching!" << std::endl; else std::cout << "SUCCESS - Output is matching!" << std::endl; + end_cycle = GetSimulationCycle(1.0); + end_time = std::chrono::steady_clock::now(); + std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; + std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " us" << std::endl; + NoCTransactionTelemetry::DumpStatsToFile("stats.csv"); + sc_stop(); } \ No newline at end of file diff --git a/rad-sim/example-designs/rtl_add/rtl_add_driver.hpp b/rad-sim/example-designs/rtl_add/rtl_add_driver.hpp index 06470d3..283a032 100644 --- a/rad-sim/example-designs/rtl_add/rtl_add_driver.hpp +++ b/rad-sim/example-designs/rtl_add/rtl_add_driver.hpp @@ -12,6 +12,8 @@ class rtl_add_driver : public sc_module { private: std::queue numbers_to_send; int actual_sum; + int start_cycle, end_cycle; + std::chrono::steady_clock::time_point start_time, end_time; public: sc_in clk; diff --git a/rad-sim/example-designs/rtl_add/rtl_add_top.hpp b/rad-sim/example-designs/rtl_add/rtl_add_top.hpp index b2cb899..e6a1613 100644 --- a/rad-sim/example-designs/rtl_add/rtl_add_top.hpp +++ b/rad-sim/example-designs/rtl_add/rtl_add_top.hpp @@ -20,6 +20,8 @@ class rtl_add_top : public sc_module { sc_in client_tlast; sc_in client_valid; sc_out client_ready; + + // Adder's interface sc_out> response; sc_out response_valid; diff --git a/rad-sim/scripts/generate_port_mappings.py b/rad-sim/scripts/generate_port_mappings.py index e84c318..6980cde 100644 --- a/rad-sim/scripts/generate_port_mappings.py +++ b/rad-sim/scripts/generate_port_mappings.py @@ -5,7 +5,9 @@ from pathlib import Path verilog_range_regex = "\[(\d*):(\d*)\]" -verilog_axis_regex = "axis_{0}_interface_(.*)" +verilog_axis_regex = "axis_(.*)_(.*)" + +axis_slave_input_ports = ["tvalid", "tdata", "tstrb", "tkeep", "tlast", "tid", "tdest", "tuser"] def determine_port_width(port): if not port.data_type: @@ -26,32 +28,43 @@ def determine_port_width(port): return "?" -def is_axis_port(port, module): - if re.match(verilog_axis_regex.format(module), port.name): +def match_axis_regex(port): + signal = re.match(verilog_axis_regex, port.name) + return signal.groups() + +def is_axis_port(port): + if re.match(verilog_axis_regex, port.name): + return True + return False + +def is_axis_role_found(axis_roles, port): + (axis_interface, axis_port) = match_axis_regex(port) + if axis_interface in axis_roles: return True return False -def is_axis_role_master(module): +# Determines the AXI-S roles for a given module. +def determine_axis_roles(module): + axis_roles = {} for p in module.ports: - if is_axis_port(p, module.name): - signal = re.match(verilog_axis_regex.format(module.name), p.name) - if signal.groups()[0] == "tdata": - return 0 if p.mode == "input" else 1 - return None # If no AXI-S data port is found + if is_axis_port(p) and not is_axis_role_found(axis_roles, p): + (axis_interface, axis_port) = match_axis_regex(p) + if axis_port in axis_slave_input_ports: + axis_roles[axis_interface] = "slave" if p.mode == "input" else "master" + return axis_roles def get_modules_from_verilog_file(verilog_file_path): vlog_ex = vlog.VerilogExtractor() return vlog_ex.extract_objects(verilog_file_path) -def generate_port_mappings_for_module(port_mapping_file, module): +def generate_port_mappings_for_module(port_mapping_file, module, axis_roles): warnings = False port_mapping_file.write("module {0}\n".format(module.name)) - is_master = is_axis_role_master(module) # Scan all ports to determine whether AXI-S interface is master or slave for p in module.ports: - if is_axis_port(p, module.name): - signal = re.match(verilog_axis_regex.format(module.name), p.name) - axis_role = "master" if is_master else "slave" - port_mapping_file.write("axis {0} {1} axis_{2}_interface.{3}\n".format(axis_role, p.name, module.name, signal.groups()[0])) + if is_axis_port(p) and is_axis_role_found(axis_roles, p): + (axis_interface, axis_port) = match_axis_regex(p) + axis_role = axis_roles[axis_interface] + port_mapping_file.write("axis {0} {1} axis_{2} {3}\n".format(axis_role, p.name, axis_interface, axis_port)) else: port_size = determine_port_width(p) if port_size == "?": @@ -91,7 +104,8 @@ def generate(design_folder, rtl_files, cmd_overwrite): print("ERROR: File {0} is not supported. Only Verilog/SystemVerilog files are supported.".format(rtl_file)) exit() for m in modules: - warnings = True if generate_port_mappings_for_module(port_mapping_file, m) else warnings + axis_roles = determine_axis_roles(m) + warnings = True if generate_port_mappings_for_module(port_mapping_file, m, axis_roles) else warnings if warnings: print("WARNING: Successfully generated port mapping file with manual input required.") print("Please manually replace '?' with the correct values before running the wrapper generation script.") diff --git a/rad-sim/scripts/generate_wrapper.py b/rad-sim/scripts/generate_wrapper.py index cac453e..443857d 100644 --- a/rad-sim/scripts/generate_wrapper.py +++ b/rad-sim/scripts/generate_wrapper.py @@ -6,22 +6,27 @@ import argparse from pathlib import Path +DEFAULT_PORT_WIDTH = 1024 + port_type_translation = { "input": "sc_in", "output": "sc_out", "inout": "sc_inout" } -def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_role): +def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_roles): verilated_design = "V" + design_name design_inst = "v" + design_name with open(modules_folder / (design_name + ".cpp"), "w") as wrapper_cpp_file: - wrapper_cpp_file.write("#include <" + verilated_design + ".h>\n") wrapper_cpp_file.write("#include <" + design_name + ".hpp>\n\n") wrapper_cpp_file.write(design_name + "::" + design_name + "(const sc_module_name &name) : RADSimModule(name) {\n") - wrapper_cpp_file.write("\t" + verilated_design + "* " + design_inst + " = new " + verilated_design + "{\"" + design_inst + "\"};\n") + + wrapper_cpp_file.write("\t" + "char " + design_inst + "_name[25];\n") + wrapper_cpp_file.write("\tstd::string " + design_inst + "_name_str = std::string(name) + \"_vmvm\";\n") + wrapper_cpp_file.write("\tstd::strcpy(" + design_inst + "_name, " + design_inst + "_name_str.c_str());\n\n") + wrapper_cpp_file.write("\t" + design_inst + " = new " + verilated_design + "{" + design_inst + "_name};\n") #inputs and outputs connections if not design_name in mappings: @@ -32,28 +37,34 @@ def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_r for port in mappings[design_name]: wrapper_cpp_file.write("\t" + design_inst + "->" + port[2] + "(" + port[3] + ");\n") - wrapper_cpp_file.write("\n\tthis->RegisterModuleInfo();\n") + if axis_roles != None: + wrapper_cpp_file.write("\n\tthis->RegisterModuleInfo();\n") wrapper_cpp_file.write("}\n\n") - wrapper_cpp_file.write(design_name + "::~" + design_name + "() {}\n\n") + wrapper_cpp_file.write(design_name + "::~" + design_name + "() {\n") + wrapper_cpp_file.write("\tdelete " + design_inst + ";\n") + wrapper_cpp_file.write("}\n\n") - if axis_role != None: + if axis_roles != None: wrapper_cpp_file.write("void " + design_name + "::RegisterModuleInfo() {\n") wrapper_cpp_file.write("\tstd::string port_name;\n") wrapper_cpp_file.write("\t_num_noc_axis_slave_ports = 0;\n") wrapper_cpp_file.write("\t_num_noc_axis_master_ports = 0;\n") wrapper_cpp_file.write("\t_num_noc_aximm_slave_ports = 0;\n") - wrapper_cpp_file.write("\t_num_noc_aximm_master_ports = 0;\n") - wrapper_cpp_file.write("\tport_name = module_name + \".axis_" + design_name + "_interface\";\n") - if axis_role == "master": - wrapper_cpp_file.write("\tRegisterAxisMasterPort(port_name, &axis_" + design_name + "_interface, " + dataw + ", 0);\n") - else: - wrapper_cpp_file.write("\tRegisterAxisSlavePort(port_name, &axis_" + design_name + "_interface, " + dataw + ", 0);\n") + wrapper_cpp_file.write("\t_num_noc_aximm_master_ports = 0;\n\n") + + for axis_interface, axis_role in axis_roles.items(): + wrapper_cpp_file.write("\tport_name = module_name + \"." + axis_interface + "\";\n") + if axis_role == "master": + wrapper_cpp_file.write("\tRegisterAxisMasterPort(port_name, &" + axis_interface + ", " + dataw + ", 0);\n\n") + else: + wrapper_cpp_file.write("\tRegisterAxisSlavePort(port_name, &" + axis_interface + ", " + dataw + ", 0);\n\n") + wrapper_cpp_file.write("}\n") else: - print("WARNING: Module {0} is not connected to the NOC via AXI-S.", design_name) + print("WARNING: Module {0} is not connected to the NOC via AXI-S.".format(design_name)) -def generate_header_wrapper(design_name, modules_folder, mappings, axis_role): +def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): verilated_design = "V" + design_name design_inst = "v" + design_name @@ -66,8 +77,11 @@ def generate_header_wrapper(design_name, modules_folder, mappings, axis_role): wrapper_hpp_file.write("#include \n") wrapper_hpp_file.write("#include \n") wrapper_hpp_file.write("#include \n\n") + wrapper_hpp_file.write("#include <" + verilated_design + ".h>\n") wrapper_hpp_file.write("class " + design_name + " : " + "public RADSimModule {\n") + wrapper_hpp_file.write("private:\n") + wrapper_hpp_file.write("\t" + verilated_design + "* " + design_inst + ";\n\n") wrapper_hpp_file.write("public:\n") #inputs and outputs @@ -83,19 +97,22 @@ def generate_header_wrapper(design_name, modules_folder, mappings, axis_role): wrapper_hpp_file.write("\t" + port[0] + "<" + port_size_type + "> " + port[3] + ";\n") #NoC connection, TODO: add support for AXI-MM - if axis_role == "master": - wrapper_hpp_file.write("\n\taxis_master_port axis_" + design_name + "_interface;\n\n") - else: - wrapper_hpp_file.write("\n\taxis_slave_port axis_" + design_name + "_interface;\n\n") - - wrapper_hpp_file.write("\t" + design_name + "(const sc_module_name &name);\n") + if axis_roles != None: + wrapper_hpp_file.write("\n") + for axis_interface, axis_role in axis_roles.items(): + if axis_role == "master": + wrapper_hpp_file.write("\taxis_master_port " + axis_interface + ";\n") + else: + wrapper_hpp_file.write("\taxis_slave_port " + axis_interface + ";\n") + + wrapper_hpp_file.write("\n\t" + design_name + "(const sc_module_name &name);\n") wrapper_hpp_file.write("\t~" + design_name + "();\n\n") wrapper_hpp_file.write("\tSC_HAS_PROCESS(" + design_name + ");\n") - if axis_role != None: + if axis_roles != None: wrapper_hpp_file.write("\tvoid RegisterModuleInfo();\n") else: - print("WARNING: Module {0} is not connected to the NOC via AXI-S.", design_name) + print("WARNING: Module {0} is not connected to the NOC via AXI-S.".format(design_name)) wrapper_hpp_file.write("};\n") def read_port_mappings(port_mapping_file): @@ -115,10 +132,18 @@ def read_port_mappings(port_mapping_file): print("Found port mappings for module", current_module) elif components[0] == "axis": if not current_module: raise ValueError("A module must be specified before mappings for the module.") - if len(components) != 4: raise ValueError("Each line specifying an AXI-S port can only contain 4 parameters separated by a whitespace.") - - mappings[current_module].append((components[0], components[1], components[2], components[3])) - axis_roles[current_module] = components[1] + if len(components) != 5: raise ValueError("Each line specifying an AXI-S port must contain 5 parameters separated by a whitespace.") + + (keyword, axis_role, rtl_port, axis_interface, axis_port) = components + radsim_port = axis_interface + "." + axis_port + mappings[current_module].append((keyword, axis_role, rtl_port, radsim_port)) + if current_module not in axis_roles: + axis_roles[current_module] = {} + if axis_interface not in axis_roles[current_module]: + axis_roles[current_module][axis_interface] = axis_role + else: + if axis_roles[current_module][axis_interface] != axis_role: + raise ValueError("Inconsistent AXI-S role for interface " + axis_interface + ". Each interface can either be master or slave.") else: port_mode = components[0] port_width = components[1] @@ -147,10 +172,11 @@ def generate(design_folder, design_modules): print("Reading Port Mappings...") mappings, axis_roles = read_port_mappings(port_map_file_path) print("Read Port Mappings Sucessfully!") - for i in range(design_modules): + for i in range(len(design_modules)): design_name = design_modules[i] - dataw = input("Enter the AXI-S data width for module " + design_name + " (default: 1024): ") - dataw = dataw if dataw else 1024 + #TODO: only ask for AXI-S data width if module contains AXI port. + dataw = input("Enter the AXI-S data width for module " + design_name + " (default: " + str(DEFAULT_PORT_WIDTH) + "): ") + dataw = dataw if dataw else str(DEFAULT_PORT_WIDTH) generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_roles.get(design_name)) print("Generated Source Wrapper for module", design_name) generate_header_wrapper(design_name, modules_folder, mappings, axis_roles.get(design_name)) diff --git a/rad-sim/scripts/verilog_parser.py b/rad-sim/scripts/verilog_parser.py index db93469..8538e43 100644 --- a/rad-sim/scripts/verilog_parser.py +++ b/rad-sim/scripts/verilog_parser.py @@ -8,6 +8,9 @@ '''Verilog documentation parser''' +#TODO: There may exist a bug with signal names that include reserved keywords. As of presently, input, inout, output, and parameter is patched. +#TODO: The `function_start` and `function_arg` tokens are not implemented. There are bugs when parsing functions from parameters. + verilog_tokens = { 'root': [ (r'\bmodule\s+(\w+)\s*', 'module', 'module'), @@ -16,21 +19,30 @@ (r'//.*\n', None), ], 'module': [ - (r'parameter\s*(signed|integer|realtime|real|time)?\s*(\[[^]]+\])?', 'parameter_start', 'parameters'), - (r'(input|inout|output)\s*(logic|reg|supply0|supply1|tri|triand|trior|tri0|tri1|wire|wand|wor)?\s*(signed)?\s*(\[[^]]+\])?', 'module_port_start', 'module_port'), + (r'parameter\s+(signed|integer|realtime|real|time)?\s*(\[[^]]+\])?', 'parameter_start', 'parameters'), + (r'(input|inout|output)\s+(logic|reg|supply0|supply1|tri|triand|trior|tri0|tri1|wire|wand|wor)?\s*(signed)?\s*(\[[^]]+\])?', 'module_port_start', 'module_port'), (r'endmodule', 'end_module', '#pop'), (r'/\*', 'block_comment', 'block_comment'), (r'//#\s*{{(.*)}}\n', 'section_meta'), (r'//.*\n', None), ], 'parameters': [ - (r'\s*parameter\s*(signed|integer|realtime|real|time)?\s*(\[[^]]+\])?', 'parameter_start'), - (r'\s*(\w+)[^),;]*', 'param_item'), - (r',', None), + (r'\s*parameter\s+(signed|integer|realtime|real|time)?\s*(\[[^]]+\])?', 'parameter_start'), + (r'(\w+)\s*=\s*\$*(\w+)\s*\(\w+', 'function_start', 'function'), + (r'\s*(\w+)[^),;\/]*', 'param_item'), + (r'\s*,', None), + (r'//.*\n', None), (r'[);]', None, '#pop'), ], + 'function': [ + (r'\s*\$*(\w+)\s*\(\w+', 'function_start', 'function'), + (r'\s*(\w+)[^),;\/]*', 'function_arg'), + (r'\s*,', None), + (r'//.*\n', None), + (r'\s*\)', None, '#pop'), + ], 'module_port': [ - (r'\s*(input|inout|output)\s*(logic|reg|supply0|supply1|tri|triand|trior|tri0|tri1|wire|wand|wor)?\s*(signed)?\s*(\[[^]]+\])?', 'module_port_start'), + (r'\s*(input|inout|output)\s+(logic|reg|supply0|supply1|tri|triand|trior|tri0|tri1|wire|wand|wor)?\s*(signed)?\s*(\[[^]]+\])?', 'module_port_start'), (r'\s*(\w+)\s*,?', 'port_param'), (r'[);]', None, '#pop'), (r'//#\s*{{(.*)}}\n', 'section_meta'), diff --git a/rad-sim/sim/design_context.cpp b/rad-sim/sim/design_context.cpp index 47e77f3..34aaed5 100644 --- a/rad-sim/sim/design_context.cpp +++ b/rad-sim/sim/design_context.cpp @@ -165,6 +165,11 @@ void RADSimDesignContext::ParseNoCPlacement( int port_noc_placement = std::stoi(port_noc_placement_str); int port_node_placement = std::stoi(port_node_placement_str); + + // Ensure the module has been instantiated + if (_design_modules.find(module_name) == _design_modules.end()) + sim_log.log(error, "NoC module " + module_name + " is not defined!"); + RADSimModule *module_ptr = _design_modules[module_name]; if (port_axi_type == "axis") { diff --git a/rad-sim/test/wrapper-scripts/mock_port.map b/rad-sim/test/wrapper-scripts/mock_port.map index 9a9a808..67cbd1b 100644 --- a/rad-sim/test/wrapper-scripts/mock_port.map +++ b/rad-sim/test/wrapper-scripts/mock_port.map @@ -1,5 +1,6 @@ module mock_module input 1 i1 i1 inout 64 io64 io64 -axis master axis_mock_module_interface_tdata axis_mock_module_interface.tdata +axis master axis_mock_master_tdata axis_mock_master tdata +axis slave axis_mock_slave_tdata axis_mock_slave tdata output 32 o32 o32 \ No newline at end of file diff --git a/rad-sim/test/wrapper-scripts/mock_port_incomplete.map b/rad-sim/test/wrapper-scripts/mock_port_incomplete.map index d9921f8..f3e9b9e 100644 --- a/rad-sim/test/wrapper-scripts/mock_port_incomplete.map +++ b/rad-sim/test/wrapper-scripts/mock_port_incomplete.map @@ -1,5 +1,5 @@ module mock_module input 1 i1 i1 inout 64 io64 io64 -axis master axis_mock_module_interface_tdata axis_mock_module_interface.tdata +axis master axis_mock_module_tdata axis_mock_module tdata output ? o32 o32 \ No newline at end of file diff --git a/rad-sim/test/wrapper-scripts/mock_port_inconsistent.map b/rad-sim/test/wrapper-scripts/mock_port_inconsistent.map new file mode 100644 index 0000000..d8bee91 --- /dev/null +++ b/rad-sim/test/wrapper-scripts/mock_port_inconsistent.map @@ -0,0 +1,3 @@ +module mock_module +axis master axis_mock_module_tdata axis_mock_module tdata +axis slave axis_mock_module_tid axis_mock_module tid \ No newline at end of file diff --git a/rad-sim/test/wrapper-scripts/test_generate_port_mappings.py b/rad-sim/test/wrapper-scripts/test_generate_port_mappings.py index b375502..32b3320 100644 --- a/rad-sim/test/wrapper-scripts/test_generate_port_mappings.py +++ b/rad-sim/test/wrapper-scripts/test_generate_port_mappings.py @@ -9,29 +9,47 @@ VerilogParameter("output_w3", "output", "wire [5:3]"), # Port 2: Output with width 3 VerilogParameter("inout_w2_reg", "inout", "reg [1:0]"), # Port 3: Inout reg with width 2 VerilogParameter("output_w1_logic", "output", "logic"), # Port 4: Output logic with width 1 (bool) - VerilogParameter("axis_mock_master_module_interface_tdata", "output", "logic [`DEF1:`DEF2]") # Port 5: Output AXI-S logic with unknown width + VerilogParameter("axis_mock_master_module_tdata", "output", "logic [`DEF1:`DEF2]"), # Port 5: Output AXI-S logic with unknown width + VerilogParameter("axis_mock_master_module_unknown_tunknown", "output") # Port 6: (unknown) Output AXI-S logic with width 1 ]) -mock_slave_module = VerilogModule("mock_slave_module", [ - VerilogParameter("axis_mock_slave_module_interface_tdata", "input", "logic [`DEF1:`DEF2]") # Port 0: Input AXI-S logic with unknown width +mock_mixed_module = VerilogModule("mock_mixed_module", [ + VerilogParameter("axis_mock_master_module_tstrb", "output"), # Port 0: Output AXI-S logic with width 1 + VerilogParameter("axis_mock_slave_module_tid", "input", "logic [3:0]"), # Port 1: Input AXI-S logic with width 4 + VerilogParameter("axis_mock_unknown_module_tunknown", "input", "logic [511:0]") # Port 2: Input unknown AXI-S logic with width 512 ]) +mock_mixed_module_axis_roles = { + "mock_master_module": "master", + "mock_slave_module": "slave" +} + mock_generate_module = VerilogModule("mock_generate_module", [ - VerilogParameter("axis_mock_generate_module_interface_tdata", "input", "logic [`DEF1:`DEF2]"), # Port 0: Input AXI-S logic with unknown width - VerilogParameter("i64", "input", "[63:0]"), # Port 1: Input 64-bit bit vector - VerilogParameter("o1", "output"), # Port 2: Output 1-bit boolean - VerilogParameter("io32", "inout", "[31:0]"), # Port 3: Inout 32-bit bit-vector - VerilogParameter("iox", "inout", "[`DEF1:`DEF2]") # Port 4: Inout with unknown width bit-vector + VerilogParameter("axis_mock_generate_module_0_tdata", "input", "logic [`DEF1:`DEF2]"), # Port 0: Input AXI-S logic with unknown width + VerilogParameter("axis_mock_generate_module_1_tdata", "output", "logic [`DEF1:`DEF2]"), # Port 1: Output AXI-S logic with unknown width + VerilogParameter("axis_mock_generate_module_2_tunknown", "output", "logic [`DEF1:`DEF2]"), # Port 2: Output unknown AXI-S logic with unknown width + VerilogParameter("i64", "input", "[63:0]"), # Port 3: Input 64-bit bit vector + VerilogParameter("o1", "output"), # Port 4: Output 1-bit boolean + VerilogParameter("io32", "inout", "[31:0]"), # Port 5: Inout 32-bit bit-vector + VerilogParameter("iox", "inout", "[`DEF1:`DEF2]") # Port 6: Inout with unknown width bit-vector ]) + mock_generate_module_port_mappings = [ call("module mock_generate_module\n"), - call("axis slave axis_mock_generate_module_interface_tdata axis_mock_generate_module_interface.tdata\n"), + call("axis slave axis_mock_generate_module_0_tdata axis_mock_generate_module_0 tdata\n"), + call("axis master axis_mock_generate_module_1_tdata axis_mock_generate_module_1 tdata\n"), + call("output ? axis_mock_generate_module_2_tunknown axis_mock_generate_module_2_tunknown\n"), call("input 64 i64 i64\n"), call("output 1 o1 o1\n"), call("inout 32 io32 io32\n"), call("inout ? iox iox\n") ] +mock_generate_module_axis_roles = { + "mock_generate_module_0": "slave", + "mock_generate_module_1": "master", +} + class GeneratePortMappingsTest(unittest.TestCase): """ GeneratePortMappingsTest class to test the functions in generate_port_mappings.py @@ -47,20 +65,35 @@ def test_determine_port_width(self): self.assertEqual(gpm.determine_port_width(mock_master_module.ports[3]), 2) self.assertEqual(gpm.determine_port_width(mock_master_module.ports[4]), 1) self.assertEqual(gpm.determine_port_width(mock_master_module.ports[5]), "?") + self.assertEqual(gpm.determine_port_width(mock_master_module.ports[6]), 1) def test_is_axis_port(self): """ Tests the is_axis_port function for correct output """ - self.assertFalse(gpm.is_axis_port(mock_master_module.ports[0], mock_master_module.name)) - self.assertTrue(gpm.is_axis_port(mock_master_module.ports[5], mock_master_module.name)) + self.assertFalse(gpm.is_axis_port(mock_master_module.ports[0])) + self.assertTrue(gpm.is_axis_port(mock_master_module.ports[5])) + + def test_is_axis_role_found(self): + """ + Tests the is_axis_port function for correct output + """ + self.assertTrue(gpm.is_axis_role_found(mock_mixed_module_axis_roles, mock_mixed_module.ports[0])) + self.assertFalse(gpm.is_axis_role_found(mock_mixed_module_axis_roles, mock_mixed_module.ports[2])) - def test_is_axis_role_master(self): + def test_determine_axis_roles(self): """ - Tests the is_axis_role_master function for correct output + Tests the determine_axis_roles function for correct output """ - self.assertTrue(gpm.is_axis_role_master(mock_master_module)) - self.assertFalse(gpm.is_axis_role_master(mock_slave_module)) + mixed_roles = gpm.determine_axis_roles(mock_mixed_module) + # test a: slave port is detected correctly + self.assertIn("mock_slave_module", mixed_roles) + self.assertEqual(mixed_roles["mock_slave_module"], "slave") + # test b: master port is detected correctly + self.assertIn("mock_master_module", mixed_roles) + self.assertEqual(mixed_roles["mock_master_module"], "master") + # test c: unknown AXIS port is not detected + self.assertNotIn("mock_unknown_module", mixed_roles) def test_generate_port_mappings_for_module(self): """ @@ -68,7 +101,7 @@ def test_generate_port_mappings_for_module(self): """ mock_file = Mock() mock_file.write = Mock() - warnings = gpm.generate_port_mappings_for_module(mock_file, mock_generate_module) + warnings = gpm.generate_port_mappings_for_module(mock_file, mock_generate_module, mock_generate_module_axis_roles) mock_file.write.assert_has_calls(mock_generate_module_port_mappings) self.assertTrue(warnings, "Warnings should be generated when there exists a value that cannot be inferred automatically.") diff --git a/rad-sim/test/wrapper-scripts/test_generate_wrapper.py b/rad-sim/test/wrapper-scripts/test_generate_wrapper.py index 5c54aa8..767bbaf 100644 --- a/rad-sim/test/wrapper-scripts/test_generate_wrapper.py +++ b/rad-sim/test/wrapper-scripts/test_generate_wrapper.py @@ -15,15 +15,23 @@ def test_read_port_mappings(self): """ mock_port_map_file = PROJECT_PATH + "/test/wrapper-scripts/mock_port.map" - (mappings, axis_role) = gw.read_port_mappings(mock_port_map_file) + (mappings, axis_roles) = gw.read_port_mappings(mock_port_map_file) self.assertEqual(len(mappings), 1, "There should only be one module in the port map file") moduleMappings = mappings["mock_module"] - self.assertEqual(len(moduleMappings), 4) + self.assertEqual(len(moduleMappings), 5) self.assertIn(("sc_in", "1", "i1", "i1"), moduleMappings) self.assertIn(("sc_inout", "64", "io64", "io64"), moduleMappings) self.assertIn(("sc_out", "32", "o32", "o32"), moduleMappings) - self.assertIn(("axis", "master", "axis_mock_module_interface_tdata", "axis_mock_module_interface.tdata"), moduleMappings) + self.assertIn(("axis", "master", "axis_mock_master_tdata", "axis_mock_master.tdata"), moduleMappings) + self.assertIn(("axis", "slave", "axis_mock_slave_tdata", "axis_mock_slave.tdata"), moduleMappings) + + moduleAxisRoles = axis_roles["mock_module"] + self.assertEqual(len(moduleAxisRoles), 2) + self.assertIn("axis_mock_master", moduleAxisRoles) + self.assertEqual(moduleAxisRoles["axis_mock_master"], "master") + self.assertIn("axis_mock_slave", moduleAxisRoles) + self.assertEqual(moduleAxisRoles["axis_mock_slave"], "slave") def test_read_port_mappings_incomplete(self): """ @@ -33,5 +41,13 @@ def test_read_port_mappings_incomplete(self): mock_port_map_file = PROJECT_PATH + "/test/wrapper-scripts/mock_port_incomplete.map" self.assertRaises(ValueError, gw.read_port_mappings, mock_port_map_file) + def test_read_port_mappings_inconsistent(self): + """ + Tests the read_port_mappings function when a single AXI-S interface is defined as master and slave + """ + + mock_port_map_file = PROJECT_PATH + "/test/wrapper-scripts/mock_port_incomplete.map" + self.assertRaises(ValueError, gw.read_port_mappings, mock_port_map_file) + if __name__ == "__main__": unittest.main() \ No newline at end of file From 12f3fb33665bb804e6633b6ce50d8e8da8c15b4e Mon Sep 17 00:00:00 2001 From: George Trieu Date: Thu, 16 Nov 2023 19:03:13 -0500 Subject: [PATCH 02/11] MLP Co-Simulated Design with both native SystemC modules and RTL verilated modules --- .../example-designs/mlp_int8/CMakeLists.txt | 6 +- .../mlp_int8/compiler/gen_testcase.py | 2 +- rad-sim/example-designs/mlp_int8/mlp.place | 18 +- .../example-designs/mlp_int8/mlp_driver.cpp | 43 +- .../example-designs/mlp_int8/mlp_driver.hpp | 3 + .../mlp_int8/mlp_int8_system.cpp | 24 +- .../mlp_int8/mlp_int8_system.hpp | 4 + rad-sim/example-designs/mlp_int8/mlp_top.cpp | 65 ++- rad-sim/example-designs/mlp_int8/mlp_top.hpp | 5 + .../mlp_int8/modules/dispatcher.cpp | 2 +- .../mlp_int8/modules/inst_loader.cpp | 2 +- .../example-designs/mlp_int8/modules/mvm.cpp | 4 +- .../mlp_int8/modules/rtl/CMakeLists.txt | 37 ++ .../mlp_int8/modules/rtl/accum.v | 79 ++++ .../mlp_int8/modules/rtl/components.v | 133 ++++++ .../mlp_int8/modules/rtl/datapath.v | 95 ++++ .../mlp_int8/modules/rtl/dpe.v | 97 ++++ .../mlp_int8/modules/rtl/port.map | 22 + .../mlp_int8/modules/rtl/reduce.v | 38 ++ .../mlp_int8/modules/rtl/rtl_mvm.v | 416 ++++++++++++++++++ .../mlp_int8/modules/rtl_mvm.cpp | 50 +++ .../mlp_int8/modules/rtl_mvm.hpp | 27 ++ .../mlp_int8/modules/weight_loader.cpp | 2 +- 23 files changed, 1113 insertions(+), 61 deletions(-) create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/accum.v create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/components.v create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/port.map create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp create mode 100644 rad-sim/example-designs/mlp_int8/modules/rtl_mvm.hpp diff --git a/rad-sim/example-designs/mlp_int8/CMakeLists.txt b/rad-sim/example-designs/mlp_int8/CMakeLists.txt index 25c9753..15def09 100644 --- a/rad-sim/example-designs/mlp_int8/CMakeLists.txt +++ b/rad-sim/example-designs/mlp_int8/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.16) find_package(SystemCLanguage CONFIG REQUIRED) +add_subdirectory(modules/rtl) + include_directories( ./ modules @@ -21,6 +23,7 @@ set(srcfiles modules/datapath.cpp modules/pipeline.cpp modules/mvm.cpp + modules/rtl_mvm.cpp modules/weight_loader.cpp modules/inst_loader.cpp mlp_top.cpp @@ -38,6 +41,7 @@ set(hdrfiles modules/datapath.hpp modules/pipeline.hpp modules/mvm.hpp + modules/rtl_mvm.hpp modules/weight_loader.hpp modules/inst_loader.hpp modules/config.hpp @@ -49,4 +53,4 @@ set(hdrfiles add_compile_options(-Wall -Wextra -pedantic) add_library(design STATIC ${srcfiles} ${hdrfiles}) -target_link_libraries(design PUBLIC SystemC::systemc booksim noc) +target_link_libraries(design PUBLIC SystemC::systemc booksim noc rtl_designs) diff --git a/rad-sim/example-designs/mlp_int8/compiler/gen_testcase.py b/rad-sim/example-designs/mlp_int8/compiler/gen_testcase.py index ef36cce..9281263 100644 --- a/rad-sim/example-designs/mlp_int8/compiler/gen_testcase.py +++ b/rad-sim/example-designs/mlp_int8/compiler/gen_testcase.py @@ -240,5 +240,5 @@ config_file = open('./layer_mvm_config', 'w') config_file.write(str(num_layers) + ' ') for mvm_count in num_mvms: - config_file.write(str(mvm_count) + ' ') + config_file.write(str(mvm_count) + ',0 ') # by default, initialize all MVM instances to be all native SystemC modules config_file.close() \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/mlp.place b/rad-sim/example-designs/mlp_int8/mlp.place index 5c82054..59a9fe3 100644 --- a/rad-sim/example-designs/mlp_int8/mlp.place +++ b/rad-sim/example-designs/mlp_int8/mlp.place @@ -1,10 +1,10 @@ -layer0_mvm0 0 5 axis -layer0_mvm1 0 13 axis -layer1_mvm0 0 15 axis -layer1_mvm1 0 7 axis -layer2_mvm0 0 2 axis -input_dispatcher0 0 14 axis -input_dispatcher1 0 8 axis -output_collector 0 9 axis -weight_loader 0 10 axis +layer0_mvm0 0 12 axis +layer0_mvm1 0 2 axis +layer1_mvm0 0 10 axis +layer1_mvm1 0 14 axis +layer2_mvm0 0 8 axis +input_dispatcher0 0 0 axis +input_dispatcher1 0 13 axis +output_collector 0 7 axis +weight_loader 0 6 axis inst_loader 0 4 axis diff --git a/rad-sim/example-designs/mlp_int8/mlp_driver.cpp b/rad-sim/example-designs/mlp_int8/mlp_driver.cpp index b172b8c..1d702e1 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_driver.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_driver.cpp @@ -114,39 +114,47 @@ mlp_driver::mlp_driver(const sc_module_name& name) : sc_module(name) { std::string design_root_dir = radsim_config.GetStringKnob("radsim_user_design_root_dir"); std::string design_config_filename = design_root_dir + "/compiler/layer_mvm_config"; std::ifstream design_config_file(design_config_filename); - if(!design_config_file) { + if (!design_config_file) { std::cerr << "Cannot read MLP design configuration file!" << std::endl; exit(1); } std::string line; std::getline(design_config_file, line); std::stringstream line_stream(line); - unsigned int tmp; + std::string num_mvms_layer, num_mvms_rtl_layer; + std::string layer_mvms; line_stream >> num_layers; num_mvms.resize(num_layers); + num_mvms_rtl.resize(num_layers); + num_mvms_total.resize(num_layers); for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { - line_stream >> tmp; - num_mvms[layer_id] = tmp; + line_stream >> layer_mvms; + std::stringstream layer_mvms_stream(layer_mvms); + std::getline(layer_mvms_stream, num_mvms_layer, ','); + std::getline(layer_mvms_stream, num_mvms_rtl_layer, ','); + num_mvms[layer_id] = std::stoi(num_mvms_layer); + num_mvms_rtl[layer_id] = std::stoi(num_mvms_rtl_layer); + num_mvms_total[layer_id] = num_mvms[layer_id] + num_mvms_rtl[layer_id]; } // Intialize input/output interface vectors - init_vector>::init_sc_vector(dispatcher_fifo_rdy, num_mvms[0]); - init_vector>::init_sc_vector(dispatcher_fifo_wen, num_mvms[0]); - init_vector>>>::init_sc_vector(dispatcher_fifo_wdata, num_mvms[0]); + init_vector>::init_sc_vector(dispatcher_fifo_rdy, num_mvms_total[0]); + init_vector>::init_sc_vector(dispatcher_fifo_wen, num_mvms_total[0]); + init_vector>>>::init_sc_vector(dispatcher_fifo_wdata, num_mvms_total[0]); // Parse weights ParseWeights(weight_data, weight_rf_id, weight_rf_addr, weight_layer_id, - weight_mvm_id, num_layers, num_mvms); + weight_mvm_id, num_layers, num_mvms_total); std::cout << "# Weight vectors = " << weight_data.size() << std::endl; // Parse instructions - ParseInstructions(inst_data, inst_layer_id, inst_mvm_id, num_layers, num_mvms); + ParseInstructions(inst_data, inst_layer_id, inst_mvm_id, num_layers, num_mvms_total); std::cout << "# Instructions = " << inst_data.size() << std::endl; // Parse test inputs - test_inputs.resize(num_mvms[0]); + test_inputs.resize(num_mvms_total[0]); std::string filename; - for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms[0]; dispatcher_id++) { + for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms_total[0]; dispatcher_id++) { filename = design_root_dir + "/compiler/input_mifs/inputs_mvm" + std::to_string(dispatcher_id) + ".mif"; if (!ParseIO(test_inputs[dispatcher_id], filename)) { std::cout << "Failed to parse test inputs file: " << filename << std::endl; @@ -182,7 +190,7 @@ void mlp_driver::source() { inst_loader_inst_fifo_wen.write(false); inst_loader_layer_id_fifo_wen.write(false); inst_loader_mvm_id_fifo_wen.write(false); - for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms[0]; dispatcher_id++) + for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms_total[0]; dispatcher_id++) dispatcher_fifo_wen[dispatcher_id].write(false); wait(); rst.write(false); @@ -251,12 +259,13 @@ void mlp_driver::source() { } start_cycle = GetSimulationCycle(1.0); + start_time = std::chrono::steady_clock::now(); wait(); - std::vector written_inputs(num_mvms[0], 0); + std::vector written_inputs(num_mvms_total[0], 0); bool still_have_inputs_to_feed = true; while (still_have_inputs_to_feed) { - for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms[0]; dispatcher_id++) { + for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms_total[0]; dispatcher_id++) { if (dispatcher_fifo_rdy[dispatcher_id].read() && written_inputs[dispatcher_id] < test_inputs[dispatcher_id].size()) { data_vector> tmp(test_inputs[dispatcher_id][written_inputs[dispatcher_id]]); @@ -268,13 +277,13 @@ void mlp_driver::source() { } } still_have_inputs_to_feed = false; - for (unsigned int i = 0; i < num_mvms[0]; i++) + for (unsigned int i = 0; i < num_mvms_total[0]; i++) still_have_inputs_to_feed = still_have_inputs_to_feed || (written_inputs[i] < test_inputs[i].size()); wait(); } std::cout << "Finished writing all test inputs!" << std::endl; - for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms[0]; dispatcher_id++) + for (unsigned int dispatcher_id = 0; dispatcher_id < num_mvms_total[0]; dispatcher_id++) dispatcher_fifo_wen[dispatcher_id].write(false); wait(); } @@ -302,7 +311,9 @@ void mlp_driver::sink() { else std::cout << "SUCCESS - All outputs are matching!" << std::endl; end_cycle = GetSimulationCycle(1.0); + end_time = std::chrono::steady_clock::now(); std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; + std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " us" << std::endl; NoCTransactionTelemetry::DumpStatsToFile("stats.csv"); std::vector aggregate_bandwidths = NoCTransactionTelemetry::DumpTrafficFlows("traffic_flows", diff --git a/rad-sim/example-designs/mlp_int8/mlp_driver.hpp b/rad-sim/example-designs/mlp_int8/mlp_driver.hpp index 73f9e3f..9c206ea 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_driver.hpp +++ b/rad-sim/example-designs/mlp_int8/mlp_driver.hpp @@ -14,8 +14,11 @@ class mlp_driver : public sc_module { private: int start_cycle, end_cycle; + std::chrono::steady_clock::time_point start_time, end_time; unsigned int num_layers; std::vector num_mvms; + std::vector num_mvms_rtl; + std::vector num_mvms_total; std::vector>> test_inputs; std::vector> golden_outputs; diff --git a/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp b/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp index 88a8ad4..bab51ff 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp @@ -7,26 +7,34 @@ mlp_int8_system::mlp_int8_system(const sc_module_name& name, sc_clock* driver_cl std::string design_root_dir = radsim_config.GetStringKnob("radsim_user_design_root_dir"); std::string design_config_filename = design_root_dir + "/compiler/layer_mvm_config"; std::ifstream design_config_file(design_config_filename); - if(!design_config_file) { + if (!design_config_file) { std::cerr << "Cannot read MLP design configuration file!" << std::endl; exit(1); } std::string line; std::getline(design_config_file, line); std::stringstream line_stream(line); - unsigned int num_layers, tmp; - std::vector num_mvms; + unsigned int num_layers; + std::string num_mvms_layer, num_mvms_rtl_layer; + std::string layer_mvms; line_stream >> num_layers; num_mvms.resize(num_layers); + num_mvms_rtl.resize(num_layers); + num_mvms_total.resize(num_layers); for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { - line_stream >> tmp; - num_mvms[layer_id] = tmp; + line_stream >> layer_mvms; + std::stringstream layer_mvms_stream(layer_mvms); + std::getline(layer_mvms_stream, num_mvms_layer, ','); + std::getline(layer_mvms_stream, num_mvms_rtl_layer, ','); + num_mvms[layer_id] = std::stoi(num_mvms_layer); + num_mvms_rtl[layer_id] = std::stoi(num_mvms_rtl_layer); + num_mvms_total[layer_id] = num_mvms[layer_id] + num_mvms_rtl[layer_id]; } // Initialize signal vectors - init_vector>::init_sc_vector(dispatcher_fifo_rdy_signal, num_mvms[0]); - init_vector>::init_sc_vector(dispatcher_fifo_wen_signal, num_mvms[0]); - init_vector>>>::init_sc_vector(dispatcher_fifo_wdata_signal, num_mvms[0]); + init_vector>::init_sc_vector(dispatcher_fifo_rdy_signal, num_mvms_total[0]); + init_vector>::init_sc_vector(dispatcher_fifo_wen_signal, num_mvms_total[0]); + init_vector>>>::init_sc_vector(dispatcher_fifo_wdata_signal, num_mvms_total[0]); // Instantiate driver mlp_driver_inst = new mlp_driver("mlp_driver"); diff --git a/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp b/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp index ad2b586..3e8b5c2 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp +++ b/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp @@ -9,6 +9,10 @@ class mlp_int8_system : public sc_module { private: + std::vector num_mvms; + std::vector num_mvms_rtl; + std::vector num_mvms_total; + sc_signal weight_loader_weight_fifo_rdy_signal; sc_signal weight_loader_weight_fifo_wen_signal; sc_signal>> weight_loader_weight_fifo_wdata_signal; diff --git a/rad-sim/example-designs/mlp_int8/mlp_top.cpp b/rad-sim/example-designs/mlp_int8/mlp_top.cpp index fff8d57..05d3bb6 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_top.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_top.cpp @@ -15,26 +15,36 @@ mlp_top::mlp_top(const sc_module_name &name) : sc_module(name) { std::string line; std::getline(design_config_file, line); std::stringstream line_stream(line); - unsigned int num_layers, tmp; - std::vector num_mvms; + unsigned int num_layers; + std::string num_mvms_layer, num_mvms_rtl_layer; + std::string layer_mvms; line_stream >> num_layers; num_mvms.resize(num_layers); + num_mvms_rtl.resize(num_layers); + num_mvms_total.resize(num_layers); for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { - line_stream >> tmp; - num_mvms[layer_id] = tmp; + line_stream >> layer_mvms; + std::stringstream layer_mvms_stream(layer_mvms); + std::getline(layer_mvms_stream, num_mvms_layer, ','); + std::getline(layer_mvms_stream, num_mvms_rtl_layer, ','); + num_mvms[layer_id] = std::stoi(num_mvms_layer); + num_mvms_rtl[layer_id] = std::stoi(num_mvms_rtl_layer); + num_mvms_total[layer_id] = num_mvms[layer_id] + num_mvms_rtl[layer_id]; } - init_vector>::init_sc_vector(dispatcher_fifo_rdy, num_mvms[0]); - init_vector>::init_sc_vector(dispatcher_fifo_wen, num_mvms[0]); + init_vector>::init_sc_vector(dispatcher_fifo_rdy, num_mvms_total[0]); + init_vector>::init_sc_vector(dispatcher_fifo_wen, num_mvms_total[0]); init_vector>>>::init_sc_vector( - dispatcher_fifo_wdata, num_mvms[0]); + dispatcher_fifo_wdata, num_mvms_total[0]); matrix_vector_engines.resize(num_layers); - input_dispatchers.resize(num_mvms[0]); + rtl_matrix_vector_engines.resize(num_layers); + input_dispatchers.resize(num_mvms_total[0]); char module_name[25]; std::string module_name_str; for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { matrix_vector_engines[layer_id].resize(num_mvms[layer_id]); + rtl_matrix_vector_engines[layer_id].resize(num_mvms_rtl[layer_id]); for (unsigned int mvm_id = 0; mvm_id < num_mvms[layer_id]; mvm_id++) { module_name_str = "layer" + std::to_string(layer_id) + "_mvm" + std::to_string(mvm_id); @@ -42,18 +52,26 @@ mlp_top::mlp_top(const sc_module_name &name) : sc_module(name) { matrix_vector_engines[layer_id][mvm_id] = new mvm(module_name, mvm_id, layer_id); matrix_vector_engines[layer_id][mvm_id]->rst(rst); - - if (layer_id == 0) { - module_name_str = "input_dispatcher" + std::to_string(mvm_id); - std::strcpy(module_name, module_name_str.c_str()); - input_dispatchers[mvm_id] = new dispatcher(module_name, mvm_id); - input_dispatchers[mvm_id]->rst(rst); - input_dispatchers[mvm_id]->data_fifo_rdy(dispatcher_fifo_rdy[mvm_id]); - input_dispatchers[mvm_id]->data_fifo_wen(dispatcher_fifo_wen[mvm_id]); - input_dispatchers[mvm_id]->data_fifo_wdata( - dispatcher_fifo_wdata[mvm_id]); - } } + for (unsigned int mvm_id = 0; mvm_id < num_mvms_rtl[layer_id]; mvm_id++) { + module_name_str = + "layer" + std::to_string(layer_id) + "_mvm" + std::to_string(mvm_id + num_mvms[layer_id]); + std::strcpy(module_name, module_name_str.c_str()); + rtl_matrix_vector_engines[layer_id][mvm_id] = + new rtl_mvm(module_name); + rtl_matrix_vector_engines[layer_id][mvm_id]->rst(rst); + } + } + + for (unsigned int mvm_id = 0; mvm_id < num_mvms_total[0]; mvm_id++) { + module_name_str = "input_dispatcher" + std::to_string(mvm_id); + std::strcpy(module_name, module_name_str.c_str()); + input_dispatchers[mvm_id] = new dispatcher(module_name, mvm_id); + input_dispatchers[mvm_id]->rst(rst); + input_dispatchers[mvm_id]->data_fifo_rdy(dispatcher_fifo_rdy[mvm_id]); + input_dispatchers[mvm_id]->data_fifo_wen(dispatcher_fifo_wen[mvm_id]); + input_dispatchers[mvm_id]->data_fifo_wdata( + dispatcher_fifo_wdata[mvm_id]); } module_name_str = "output_collector"; @@ -109,9 +127,14 @@ mlp_top::~mlp_top() { for (unsigned int mvm_id = 0; mvm_id < matrix_vector_engines[layer_id].size(); mvm_id++) { delete matrix_vector_engines[layer_id][mvm_id]; - if (layer_id == 0) - delete input_dispatchers[mvm_id]; } + for (unsigned int mvm_id = 0; + mvm_id < rtl_matrix_vector_engines[layer_id].size(); mvm_id++) { + delete rtl_matrix_vector_engines[layer_id][mvm_id]; + } + } + for (unsigned int mvm_id = 0; mvm_id < num_mvms_total[0]; mvm_id++) { + delete input_dispatchers[mvm_id]; } delete output_collector; delete wloader; diff --git a/rad-sim/example-designs/mlp_int8/mlp_top.hpp b/rad-sim/example-designs/mlp_int8/mlp_top.hpp index 23ea033..c8d92b7 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_top.hpp +++ b/rad-sim/example-designs/mlp_int8/mlp_top.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,7 +15,11 @@ class mlp_top : public sc_module { private: + std::vector> rtl_matrix_vector_engines; std::vector> matrix_vector_engines; + std::vector num_mvms; + std::vector num_mvms_rtl; + std::vector num_mvms_total; std::vector input_dispatchers; collector* output_collector; weight_loader* wloader; diff --git a/rad-sim/example-designs/mlp_int8/modules/dispatcher.cpp b/rad-sim/example-designs/mlp_int8/modules/dispatcher.cpp index 7b78f66..90382a7 100644 --- a/rad-sim/example-designs/mlp_int8/modules/dispatcher.cpp +++ b/rad-sim/example-designs/mlp_int8/modules/dispatcher.cpp @@ -49,7 +49,7 @@ void dispatcher::Assign() { tx_interface.tdata.write(tx_tdata_bv); tx_interface.tuser.write(2 << 9); tx_interface.tid.write(0); - std::string dest_name = "layer0_mvm" + std::to_string(dispatcher_id) + ".rx_interface"; + std::string dest_name = "layer0_mvm" + std::to_string(dispatcher_id) + ".axis_rx"; tx_interface.tdest.write(radsim_design.GetPortDestinationID(dest_name)); } else { tx_interface.tvalid.write(false); diff --git a/rad-sim/example-designs/mlp_int8/modules/inst_loader.cpp b/rad-sim/example-designs/mlp_int8/modules/inst_loader.cpp index 77cd479..91a88e4 100644 --- a/rad-sim/example-designs/mlp_int8/modules/inst_loader.cpp +++ b/rad-sim/example-designs/mlp_int8/modules/inst_loader.cpp @@ -88,7 +88,7 @@ void inst_loader::Assign() { tx_interface.tid.write(0); std::string dest_name = "layer" + std::to_string(layer_id_fifo_odata.read()) + - "_mvm" + std::to_string(mvm_id_fifo_odata.read()) + ".rx_interface"; + "_mvm" + std::to_string(mvm_id_fifo_odata.read()) + ".axis_rx"; tx_interface.tdest.write(radsim_design.GetPortDestinationID(dest_name)); } tx_interface.tvalid.write(!inst_fifo_empty.read()); diff --git a/rad-sim/example-designs/mlp_int8/modules/mvm.cpp b/rad-sim/example-designs/mlp_int8/modules/mvm.cpp index 7fc8967..709c8a1 100644 --- a/rad-sim/example-designs/mlp_int8/modules/mvm.cpp +++ b/rad-sim/example-designs/mlp_int8/modules/mvm.cpp @@ -397,9 +397,9 @@ void mvm::RegisterModuleInfo() { _num_noc_axis_slave_ports = 0; _num_noc_axis_master_ports = 0; - port_name = module_name + ".tx_interface"; + port_name = module_name + ".axis_tx"; RegisterAxisMasterPort(port_name, &tx_interface, DATAW, 0); - port_name = module_name + ".rx_interface"; + port_name = module_name + ".axis_rx"; RegisterAxisSlavePort(port_name, &rx_interface, DATAW, 0); } diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt b/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt new file mode 100644 index 0000000..01db0b1 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.16) + +find_package(verilator HINTS $ENV{VERILATOR_ROOT}) +if (NOT verilator_FOUND) + message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable") +endif() +find_package(SystemCLanguage CONFIG REQUIRED) + +set(verilator_options --pins-bv 2 -Wno-fatal -Wall) + +set(rtl_top_modules + rtl_mvm.v +) + +set(rtl_libraries + accum.v + components.v + datapath.v + dpe.v + reduce.v +) + +add_library(rtl_designs STATIC) +target_link_libraries(rtl_designs PUBLIC SystemC::systemc) + +foreach(module IN LISTS rtl_top_modules) + foreach(rtl_library IN LISTS rtl_libraries) + list(APPEND VERILOG_LIBRARIES "-v") + list(APPEND VERILOG_LIBRARIES ${rtl_library}) + endforeach() + + verilate(rtl_designs + SOURCES ${module} + SYSTEMC + VERILATOR_ARGS ${verilator_options} ${VERILOG_LIBRARIES} + ) +endforeach() \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v b/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v new file mode 100644 index 0000000..4c4ce16 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v @@ -0,0 +1,79 @@ +module accum # ( + parameter DATAW = 32, + parameter DEPTH = 512, + parameter ADDRW = $clog2(DEPTH) +)( + input clk, + input rst, + input i_valid, + input [DATAW-1:0] i_data, + input [ADDRW-1:0] i_addr, + input i_accum, + input i_last, + output o_valid, + output [DATAW-1:0] o_result +); + +reg [ADDRW-1:0] accum_mem_waddr; +reg [DATAW-1:0] accum_mem_wdata; +wire [DATAW-1:0] accum_mem_rdata; +reg accum_mem_wen; + +reg r_accum, rr_accum, r_valid, rr_valid, rrr_valid, r_last, rr_last; +reg [ADDRW-1:0] r_addr, rr_addr; +reg [DATAW-1:0] r_data, rr_data, r_result; + +memory_block # ( + .DATAW(DATAW), + .DEPTH(DEPTH) +) accum_mem ( + .clk(clk), + .rst(rst), + .waddr(accum_mem_waddr), + .wen(accum_mem_wen), + .wdata(r_result), + .raddr(i_addr), + .rdata(accum_mem_rdata) +); + +always @ (posedge clk) begin + if (rst) begin + r_accum <= 1'b0; rr_accum <= 1'b0; + r_valid <= 1'b0; rr_valid <= 1'b0; rrr_valid <= 1'b0; + r_addr <= 'd0; rr_addr <= 'd0; + r_data <= 'd0; rr_data <= 'd0; + r_last <= 1'b0; rr_last <= 1'b0; + accum_mem_wen <= 1'b0; + accum_mem_waddr <= 'd0; + end else begin + // Pipeline inputs to align with memory output + r_accum <= i_accum; + rr_accum <= r_accum; + r_addr <= i_addr; + rr_addr <= r_addr; + r_valid <= i_valid; + rr_valid <= r_valid; + r_last <= i_last; + rr_last <= r_last; + r_data <= i_data; + rr_data <= r_data; + + // Perform accumulation + if (rr_valid && rr_accum) begin + r_result <= rr_data + accum_mem_rdata; + accum_mem_wen <= 1'b1; + end else if (rr_valid) begin + r_result <= rr_data; + accum_mem_wen <= 1'b1; + end else begin + accum_mem_wen <= 1'b0; + end + accum_mem_waddr <= rr_addr; + rrr_valid <= rr_last && rr_valid; + end +end + +assign o_valid = rrr_valid; +assign o_result = r_result; + +endmodule \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/components.v b/rad-sim/example-designs/mlp_int8/modules/rtl/components.v new file mode 100644 index 0000000..a6b5546 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/components.v @@ -0,0 +1,133 @@ +module pipeline # ( + parameter DELAY = 1, + parameter WIDTH = 32 +)( + input clk, + input rst, + input [WIDTH-1:0] data_in, + output [WIDTH-1:0] data_out +); + +reg [WIDTH-1:0] r_pipeline [0:DELAY-1]; + +integer i; +always @ (posedge clk) begin + if (rst) begin + for (i = 0; i < DELAY; i = i + 1) begin + r_pipeline[i] <= 0; + end + end else begin + r_pipeline[0] <= data_in; + for (i = 1; i < DELAY; i = i + 1) begin + r_pipeline[i] <= r_pipeline[i-1]; + end + end +end + +assign data_out = r_pipeline[DELAY-1]; + +endmodule + +module memory_block # ( + parameter DATAW = 8, + parameter DEPTH = 512, + parameter ADDRW = $clog2(DEPTH) +)( + input clk, + input rst, + input [ADDRW-1:0] waddr, + input wen, + input [DATAW-1:0] wdata, + input [ADDRW-1:0] raddr, + output [DATAW-1:0] rdata +); + +reg [DATAW-1:0] mem [0:DEPTH-1]; + +reg [ADDRW-1:0] r_raddr, r_waddr; +reg [DATAW-1:0] r_rdata, r_wdata; +reg r_wen; + +integer i; + +initial begin + for (i = 0; i < DEPTH; i = i + 1) begin + mem[i] = 0; + end +end + +always @ (posedge clk) begin + if (rst) begin + r_raddr <= 0; + r_waddr <= 0; + r_wdata <= 0; + r_rdata <= 0; + end else begin + r_raddr <= raddr; + r_wen <= wen; + r_waddr <= waddr; + r_wdata <= wdata; + r_rdata <= mem[r_raddr]; + + if (r_wen) mem[r_waddr] <= r_wdata; + end +end + +assign rdata = r_rdata; + +endmodule + +module fifo # ( + parameter DATAW = 64, + parameter DEPTH = 128, + parameter ADDRW = $clog2(DEPTH), + parameter ALMOST_FULL_DEPTH = DEPTH +)( + input clk, + input rst, + input push, + input [DATAW-1:0] idata, + input pop, + output [DATAW-1:0] odata, + output empty, + output full, + output almost_full +); + +reg [DATAW-1:0] mem [0:DEPTH-1]; +reg [ADDRW-1:0] head_ptr, tail_ptr; +reg [ADDRW:0] remaining; + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + remaining <= DEPTH; + end else begin + if (!full && push) begin + mem[tail_ptr] <= idata; + tail_ptr <= tail_ptr + 1'b1; + end + + if (!empty && pop) begin + head_ptr <= head_ptr + 1'b1; + end + + if (!empty && pop && !full && push) begin + remaining <= remaining; + end else if (!empty && pop) begin + remaining <= remaining + 1'b1; + end else if (!full && push) begin + remaining <= remaining - 1'b1; + end else begin + remaining <= remaining; + end + end +end + +assign empty = (tail_ptr == head_ptr); +assign full = (tail_ptr + 1'b1 == head_ptr); +assign odata = mem[head_ptr]; +assign almost_full = (remaining < (DEPTH - ALMOST_FULL_DEPTH)); + +endmodule \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v b/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v new file mode 100644 index 0000000..fffdb8e --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v @@ -0,0 +1,95 @@ +module datapath # ( + parameter LANES = 64, + parameter DATAW = 512, + parameter IPREC = 8, + parameter OPREC = 32, + parameter MEM_DEPTH = 512, + parameter ADDRW = $clog2(MEM_DEPTH) +)( + input clk, + input rst, + input i_valid, + input [DATAW-1:0] i_dataa, + input [DATAW-1:0] i_datab, + input [IPREC-1:0] i_datac, + input [ADDRW-1:0] i_accum_addr, + input i_accum, + input i_last, + input i_reduce, + output o_valid, + output [OPREC-1:0] o_result +); + +localparam DPE_LATENCY = 8; +localparam ACCUM_LATENCY = 3; + +wire dpe_valid, dpe_accum, dpe_last, accum_valid, accum_reduce; +wire [OPREC-1:0] dpe_result, accum_result; +wire [IPREC-1:0] accum_datac; +wire [ADDRW-1:0] dpe_accum_addr; + +pipeline # ( + .DELAY(DPE_LATENCY), + .WIDTH(ADDRW+2) +) dpe_pipeline ( + .clk(clk), + .rst(rst), + .data_in({i_accum_addr, i_accum, i_last}), + .data_out({dpe_accum_addr, dpe_accum, dpe_last}) +); + +pipeline # ( + .DELAY(DPE_LATENCY+ACCUM_LATENCY), + .WIDTH(IPREC+1) +) accum_pipeline ( + .clk(clk), + .rst(rst), + .data_in({i_datac, i_reduce}), + .data_out({accum_datac, accum_reduce}) +); + +dpe # ( + .LANES(LANES), + .DATAW(DATAW), + .IPREC(IPREC), + .OPREC(OPREC) +) dpe_inst ( + .clk(clk), + .rst(rst), + .i_valid(i_valid), + .i_dataa(i_dataa), + .i_datab(i_datab), + .o_valid(dpe_valid), + .o_result(dpe_result) +); + +accum # ( + .DATAW(OPREC), + .DEPTH(MEM_DEPTH) +) accum_inst ( + .clk(clk), + .rst(rst), + .i_valid(dpe_valid), + .i_data(dpe_result), + .i_addr(dpe_accum_addr), + .i_accum(dpe_accum), + .i_last(dpe_last), + .o_valid(accum_valid), + .o_result(accum_result) +); + +reduce # ( + .IPREC(IPREC), + .OPREC(OPREC) +) reduce_inst ( + .clk(clk), + .rst(rst), + .i_valid(accum_valid), + .i_dataa(accum_result), + .i_datab(accum_datac), + .i_reduce(accum_reduce), + .o_valid(o_valid), + .o_result(o_result) +); + +endmodule \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v b/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v new file mode 100644 index 0000000..20f9792 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v @@ -0,0 +1,97 @@ +module dpe # ( + parameter LANES = 64, + parameter DATAW = 512, + parameter IPREC = 8, + parameter MPREC = 2 * IPREC, + parameter NUM_MULT = DATAW / IPREC, + parameter OPREC = 32, + parameter ADDER_STAGES = $clog2(LANES) +)( + input clk, + input rst, + input i_valid, + input [DATAW-1:0] i_dataa, + input [DATAW-1:0] i_datab, + output o_valid, + output [OPREC-1:0] o_result +); + +// Input registers +wire signed [IPREC-1:0] dataa [0:NUM_MULT-1]; +wire signed [IPREC-1:0] datab [0:NUM_MULT-1]; +reg signed [IPREC-1:0] r_dataa [0:NUM_MULT-1]; +reg signed [IPREC-1:0] r_datab [0:NUM_MULT-1]; +reg r_ivalid; + +// Multiplication registers +reg signed [MPREC-1:0] r_mrslt [0:NUM_MULT-1]; +reg r_mvalid; + +// Adder tree registers +reg signed [OPREC-1:0] r_arslt_s1 [0:31]; +reg signed [OPREC-1:0] r_arslt_s2 [0:15]; +reg signed [OPREC-1:0] r_arslt_s3 [0:7]; +reg signed [OPREC-1:0] r_arslt_s4 [0:3]; +reg signed [OPREC-1:0] r_arslt_s5 [0:1]; +reg signed [OPREC-1:0] r_arslt_s6; +reg r_avalid [0:ADDER_STAGES-1]; + +genvar j; +generate +for (j = 0; j < NUM_MULT; j = j + 1) begin: split_input + assign dataa[j] = i_dataa[(j+1)*IPREC-1:j*IPREC]; + assign datab[j] = i_datab[(j+1)*IPREC-1:j*IPREC]; +end +endgenerate + +integer i; +always @ (posedge clk) begin + if (rst) begin + for (i = 0; i < NUM_MULT; i = i + 1) begin + r_mrslt[i] <= 'd0; + r_dataa[i] <= 'd0; + r_datab[i] <= 'd0; + end + r_ivalid <= 1'b0; + r_mvalid <= 1'b0; + for (i = 0; i < 32; i = i + 1) r_arslt_s1[i] <= 'd0; + for (i = 0; i < 16; i = i + 1) r_arslt_s2[i] <= 'd0; + for (i = 0; i < 8; i = i + 1) r_arslt_s3[i] <= 'd0; + for (i = 0; i < 4; i = i + 1) r_arslt_s4[i] <= 'd0; + for (i = 0; i < 2; i = i + 1) r_arslt_s5[i] <= 'd0; + r_arslt_s6 <= 'd0; + for (i = 0; i < ADDER_STAGES; i = i + 1) begin + r_avalid[i] <= 1'b0; + end + end else begin + // Register inputs + for (i = 0; i < NUM_MULT; i = i + 1) begin + r_dataa[i] <= dataa[i]; + r_datab[i] <= datab[i]; + end + r_ivalid <= i_valid; + + // Perform multiplication + for (i = 0; i < NUM_MULT; i = i + 1) begin + r_mrslt[i] <= r_dataa[i] * r_datab[i]; + end + r_mvalid <= r_ivalid; + + // Register adder tree + for (i = 0; i < 32; i = i + 1) r_arslt_s1[i] <= r_mrslt[i] + r_mrslt[32+i]; + for (i = 0; i < 16; i = i + 1) r_arslt_s2[i] <= r_arslt_s1[i] + r_arslt_s1[16+i]; + for (i = 0; i < 8; i = i + 1) r_arslt_s3[i] <= r_arslt_s2[i] + r_arslt_s2[ 8+i]; + for (i = 0; i < 4; i = i + 1) r_arslt_s4[i] <= r_arslt_s3[i] + r_arslt_s3[ 4+i]; + for (i = 0; i < 2; i = i + 1) r_arslt_s5[i] <= r_arslt_s4[i] + r_arslt_s4[ 2+i]; + r_arslt_s6 <= r_arslt_s5[0] + r_arslt_s5[1]; + r_avalid[0] <= r_mvalid; + for (i = 1; i < ADDER_STAGES; i = i + 1) begin + r_avalid[i] <= r_avalid[i-1]; + end + end +end + +assign o_result = r_arslt_s6; +assign o_valid = r_avalid[ADDER_STAGES-1]; + +endmodule \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/port.map b/rad-sim/example-designs/mlp_int8/modules/rtl/port.map new file mode 100644 index 0000000..4723403 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/port.map @@ -0,0 +1,22 @@ +module rtl_mvm +input 1 clk clk +input 1 rst rst +axis slave axis_rx_tvalid axis_rx tvalid +axis slave axis_rx_tdata axis_rx tdata +axis slave axis_rx_tstrb axis_rx tstrb +axis slave axis_rx_tkeep axis_rx tkeep +axis slave axis_rx_tid axis_rx tid +axis slave axis_rx_tdest axis_rx tdest +axis slave axis_rx_tuser axis_rx tuser +axis slave axis_rx_tlast axis_rx tlast +axis slave axis_rx_tready axis_rx tready +axis master axis_tx_tvalid axis_tx tvalid +axis master axis_tx_tdata axis_tx tdata +axis master axis_tx_tstrb axis_tx tstrb +axis master axis_tx_tkeep axis_tx tkeep +axis master axis_tx_tid axis_tx tid +axis master axis_tx_tdest axis_tx tdest +axis master axis_tx_tuser axis_tx tuser +axis master axis_tx_tlast axis_tx tlast +axis master axis_tx_tready axis_tx tready + diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v b/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v new file mode 100644 index 0000000..1498627 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v @@ -0,0 +1,38 @@ +module reduce # ( + parameter IPREC = 8, + parameter OPREC = 32 +)( + input clk, + input rst, + input i_valid, + input [OPREC-1:0] i_dataa, + input [IPREC-1:0] i_datab, + input i_reduce, + output o_valid, + output [OPREC-1:0] o_result +); + +reg [OPREC-1:0] r_result; +reg r_valid; + +always @ (posedge clk) begin + if (rst) begin + r_result <= 'd0; + r_valid <= 1'b0; + end else begin + if (i_valid && i_reduce) begin + r_result <= i_dataa + i_datab; + r_valid <= 1'b1; + end else if (i_valid) begin + r_result <= i_dataa; + r_valid <= 1'b1; + end else begin + r_valid <= 1'b0; + end + end +end + +assign o_valid = r_valid; +assign o_result = r_result; + +endmodule \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v b/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v new file mode 100644 index 0000000..de367e7 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v @@ -0,0 +1,416 @@ +// Instruction Macros for cleaner code. Only supported in Quartus Prime Pro Edition +//`define inst_reduce(inst) ``inst``[0] +//`define inst_accum_en(inst) ``inst``[1] +//`define inst_release(inst) ``inst``[2] +//`define inst_last(inst) ``inst``[3] +//`define inst_accum(inst) ``inst``[12:4] +//`define inst_raddr(inst) ``inst``[21:13] +//`define inst_release_dest(inst) ``inst``[30:22] +//`define inst_release_op(inst) ``inst``[31] + +module rtl_mvm # ( + parameter DATAW = 512, // Bitwidth of axi-s tdata + parameter BYTEW = 8, // Bitwidth of axi-s tkeep, tstrb + parameter IDW = 32, // Bitwidth of axi-s tid + parameter DESTW = 4, // Bitwidth of axi-s tdest + parameter USERW = 75, // Bitwidth of axi-s tuser + parameter IPRECISION = 8, // Input precision in bits + parameter OPRECISION = 32, // Output precision in bits + parameter LANES = DATAW / IPRECISION, // Number of dot-product INT8 lanes + parameter DPES = LANES, // Number of dot-product engines + parameter NODES = 512, // Max number of nodes in each NoC + parameter NODESW = $clog2(NODES), //Bitwidth of store node ID + parameter RFDEPTH = 512, // Depth of register files (RFs) + parameter RFADDRW = $clog2(RFDEPTH), // Bitwidth of RF address + parameter INSTW = 1 + NODESW + 2 * RFADDRW + 4, // Instruction bitwidth {release_op, release_dest, rf_raddr, accum_raddr, last, release, accum_en, reduce, jump, en} + parameter INSTD = 512, // Depth of instruction FIFO + parameter INSTADDRW = $clog2(INSTD), // Bitwidth of instruction memory address + parameter AXIS_OPS = 4, // Number of AXI-S operations (max 4) {instruction, reduction vector, input vector, matrix} + parameter AXIS_OPSW = $clog2(AXIS_OPS), + parameter FIFOD = 64, // Depth of input, accumulation, and output FIFOs + parameter DATAPATH_DELAY = 12 // Delay of datpath (inputs -> result) +)( + input clk, + input rst, + // Rx interface + input axis_rx_tvalid, + input [DATAW-1:0] axis_rx_tdata, + input [BYTEW-1:0] axis_rx_tstrb, + input [BYTEW-1:0] axis_rx_tkeep, + input [IDW-1:0] axis_rx_tid, + input [DESTW-1:0] axis_rx_tdest, + input [USERW-1:0] axis_rx_tuser, + input axis_rx_tlast, + output axis_rx_tready, + // Tx interface + output axis_tx_tvalid, + output [DATAW-1:0] axis_tx_tdata, + output [BYTEW-1:0] axis_tx_tstrb, + output [BYTEW-1:0] axis_tx_tkeep, + output [IDW-1:0] axis_tx_tid, + output [DESTW-1:0] axis_tx_tdest, + output [USERW-1:0] axis_tx_tuser, + output axis_tx_tlast, + input axis_tx_tready +); + +// Hook up unused Rx signals to dummy registers to avoid being synthesized away +(*noprune*) reg [BYTEW-1:0] dummy_axis_rx_tstrb; +(*noprune*) reg [BYTEW-1:0] dummy_axis_rx_tkeep; +(*noprune*) reg [DESTW-1:0] dummy_axis_rx_tdest; +(*noprune*) reg [IDW-1:0] dummy_axis_rx_tid; +always @ (posedge clk) begin + dummy_axis_rx_tstrb <= axis_rx_tstrb; + dummy_axis_rx_tkeep <= axis_rx_tkeep; + dummy_axis_rx_tdest <= axis_rx_tdest; + dummy_axis_rx_tid <= axis_rx_tid; +end + +reg [AXIS_OPSW-1:0] r_tuser_op; + +reg inst_fifo_push, inst_init_fifo_push; +reg [INSTW-1:0] inst_fifo_idata, inst_init_idata; +wire [INSTW-1:0] inst_fifo_odata; +wire inst_fifo_full, inst_fifo_empty; +wire inst_fifo_pop; + +wire input_fifo_empty, input_fifo_full; +reg [DATAW-1:0] input_fifo_idata; +wire [DATAW-1:0] input_fifo_odata; +reg input_fifo_push; +wire input_fifo_pop; + +wire reduction_fifo_empty, reduction_fifo_full; +reg [DATAW-1:0] reduction_fifo_idata; +wire [DATAW-1:0] reduction_fifo_odata; +reg reduction_fifo_push; +wire reduction_fifo_pop; + +wire output_fifo_empty, output_fifo_full, output_fifo_almost_full; +wire [NODESW-1:0] output_fifo_odest; +wire output_fifo_oop; +wire [DATAW-1:0] output_fifo_odata; +reg output_fifo_pop; + +reg [RFADDRW-1:0] rf_waddr; +reg rf_wen [0:DPES-1]; +reg [DATAW-1:0] rf_wdata; +wire [DATAW-1:0] rf_rdata [0:DPES-1]; + +wire [RFADDRW-1:0] accum_mem_waddr; +wire [DATAW-1:0] accum_mem_rdata; + +wire [OPRECISION*DPES-1:0] datapath_results; +wire [DATAW-1:0] truncated_datapath_results; +wire [DPES-1:0] datapath_ovalid; +wire [NODESW-1:0] datapath_dest; +wire datapath_op; + +wire [NODESW-1:0] inst_release_dest; +wire [RFADDRW-1:0] inst_rf_raddr, inst_accum_raddr; +wire inst_reduce, inst_accum_en, inst_release_op, inst_release, inst_jump, inst_en, inst_last; +wire [RFADDRW-1:0] tuser_rf_addr; +wire [AXIS_OPSW-1:0] tuser_op; +wire [DPES-1:0] tuser_rf_en; + +reg rxtready, txtvalid; +reg [AXIS_OPSW:0] tx_tuser_op; +reg [INSTW-1:0] r_inst, rr_inst; +reg r_inst_valid, r_inst_accum_en, r_inst_release, r_inst_reduce, r_inst_release_op; +reg rr_inst_valid, rr_inst_accum_en, rr_inst_release, rr_inst_reduce, rr_inst_release_op; +reg [DATAW-1:0] r_input_operands, rr_input_operands; +reg [DATAW-1:0] r_reduction_operands, rr_reduction_operands; +reg [RFADDRW-1:0] r_inst_accum_raddr, rr_inst_accum_raddr; +reg [NODESW-1:0] r_inst_release_dest, rr_inst_release_dest; + +// FIFO to store instructions +fifo # ( + .DATAW(INSTW), + .DEPTH(INSTD) +) instruction_fifo ( + .clk(clk), + .rst(rst), + .push(inst_fifo_push), + .pop(inst_fifo_pop), + .idata(inst_fifo_idata), + .odata(inst_fifo_odata), + .empty(inst_fifo_empty), + .full(inst_fifo_full), + .almost_full(inst_fifo_almost_full) +); + +// Split the instructions into fields for ease-of-use later +assign inst_release_op = inst_fifo_odata[31]; //`inst_release_op(inst_rdata); +assign inst_release_dest = inst_fifo_odata[30:22]; //`inst_release_dest(inst_rdata); +assign inst_rf_raddr = inst_fifo_odata[21:13]; //`inst_raddr(inst_rdata); +assign inst_accum_raddr = inst_fifo_odata[12:4]; //`inst_accum(inst_rdata); +assign inst_last = inst_fifo_odata[3]; //`inst_last(inst_rdata); +assign inst_reduce = inst_fifo_odata[0]; //`inst_reduce(inst_rdata); +assign inst_accum_en = inst_fifo_odata[1]; //`inst_accum_en(inst_rdata); +assign inst_release = inst_fifo_odata[2]; //`inst_release(inst_rdata); + +// Split the tuser field for ease-of-use later +assign tuser_rf_addr = axis_rx_tuser[8:0]; +assign tuser_op = axis_rx_tuser[10:9]; +assign tuser_rf_en = axis_rx_tuser[74:11]; + +// FIFO for input vectors sent to the MVM +fifo # ( + .DATAW(DATAW), + .DEPTH(FIFOD) +) input_fifo ( + .clk(clk), + .rst(rst), + .push(input_fifo_push), + .idata(input_fifo_idata), + .pop(input_fifo_pop), + .odata(input_fifo_odata), + .empty(input_fifo_empty), + .full(input_fifo_full), + .almost_full(input_fifo_almost_full) +); + +// FIFO for accumulation vectors sent to the MVM +fifo # ( + .DATAW(DATAW), + .DEPTH(FIFOD) +) reduction_fifo ( + .clk(clk), + .rst(rst), + .push(reduction_fifo_push), + .idata(reduction_fifo_idata), + .pop(reduction_fifo_pop), + .odata(reduction_fifo_odata), + .empty(reduction_fifo_empty), + .full(reduction_fifo_full), + .almost_full(reduction_fifo_almost_full) +); + +// Pipeline to pass release_dest and release_op alongside datapath +pipeline # ( + .DELAY(DATAPATH_DELAY), + .WIDTH(NODESW+1) +) release_pipeline ( + .clk(clk), + .rst(rst), + .data_in({rr_inst_release_op, rr_inst_release_dest}), + .data_out({datapath_op, datapath_dest}) +); + +genvar dpe_id; +generate +for (dpe_id = 0; dpe_id < DPES; dpe_id = dpe_id + 1) begin: generate_datapath + memory_block # ( + .DATAW(DATAW), + .DEPTH(RFDEPTH) + ) rf ( + .clk(clk), + .rst(rst), + .waddr(rf_waddr), + .wen(rf_wen[dpe_id]), + .wdata(rf_wdata), + .raddr(inst_rf_raddr), + .rdata(rf_rdata[dpe_id]) + ); + + datapath # ( + .LANES(LANES), + .DATAW(DATAW), + .IPREC(IPRECISION), + .OPREC(OPRECISION), + .MEM_DEPTH(RFDEPTH) + ) datapath_inst ( + .clk(clk), + .rst(rst), + .i_valid(rr_inst_valid), + .i_dataa(rr_input_operands), + .i_datab(rf_rdata[dpe_id]), + .i_datac(rr_reduction_operands[(dpe_id+1)*IPRECISION-1:dpe_id*IPRECISION]), + .i_accum_addr(rr_inst_accum_raddr), + .i_accum(rr_inst_accum_en), + .i_last(rr_inst_release), + .i_reduce(rr_inst_reduce), + .o_valid(datapath_ovalid[dpe_id]), + .o_result(datapath_results[(dpe_id+1)*OPRECISION-1:dpe_id*OPRECISION]) + ); + + assign truncated_datapath_results[(dpe_id+1)*IPRECISION-1:dpe_id*IPRECISION] = datapath_results[dpe_id*OPRECISION+IPRECISION-1:dpe_id*OPRECISION]; +end +endgenerate + +// Specify if ready to accept input +always @ (*) begin + if (axis_rx_tvalid && tuser_op == 0) begin + rxtready <= !inst_fifo_full; + end else if (axis_rx_tvalid && tuser_op == 1) begin + rxtready <= !reduction_fifo_full; + end else if (axis_rx_tvalid && tuser_op == 2) begin + rxtready <= !input_fifo_full; + end else if (axis_rx_tvalid && tuser_op == 3) begin + rxtready <= 1'b1; + end else begin + rxtready <= 1'b0; + end +end + +// Read from input interface and steer to destination mem/FIFO +integer i; +always @ (posedge clk) begin + if (axis_rx_tvalid && axis_rx_tready) begin + if (tuser_op == 0) begin + inst_init_idata <= axis_rx_tdata[INSTW-1:0]; + inst_init_fifo_push <= 1'b1; + reduction_fifo_push <= 1'b0; + input_fifo_push <= 1'b0; + for (i = 0; i < DPES; i = i + 1) rf_wen[i] <= 1'b0; + end else if (tuser_op == 1) begin + reduction_fifo_idata <= axis_rx_tdata[DATAW-1:0]; + inst_init_fifo_push <= 1'b0; + reduction_fifo_push <= 1'b1; + input_fifo_push <= 1'b0; + for (i = 0; i < DPES; i = i + 1) rf_wen[i] <= 1'b0; + end else if (tuser_op == 2) begin + input_fifo_idata <= axis_rx_tdata[DATAW-1:0]; + input_fifo_push <= 1'b1; + inst_init_fifo_push <= 1'b0; + reduction_fifo_push <= 1'b0; + for (i = 0; i < DPES; i = i + 1) rf_wen[i] <= 1'b0; + end else if (tuser_op == 3) begin + for (i = 0; i < DPES; i = i + 1) rf_wen[i] <= tuser_rf_en[i]; + rf_wdata <= axis_rx_tdata[DATAW-1:0]; + rf_waddr <= tuser_rf_addr; + inst_init_fifo_push <= 1'b0; + reduction_fifo_push <= 1'b0; + input_fifo_push <= 1'b0; + end + + r_tuser_op <= tuser_op; + end else begin + inst_init_fifo_push <= 1'b0; + reduction_fifo_push <= 1'b0; + input_fifo_push <= 1'b0; + for (i = 0; i < DPES; i = i + 1) rf_wen[i] <= 1'b0; + end +end + +// Multiplexer logic to switch between initial instruction writes, and looping the instructions +always @ (*) begin + if (r_tuser_op == 0) begin + inst_fifo_push = inst_init_fifo_push; + inst_fifo_idata = inst_init_idata; + end else begin + inst_fifo_push = inst_fifo_pop; + inst_fifo_idata = inst_fifo_odata; + end +end + +// Combinatory logic for tx_tuser_op +always @ (*) begin + if (output_fifo_oop) begin + tx_tuser_op = 2'h2; + end else begin + tx_tuser_op = 2'h1; + end +end + +// Process next instruction if there is an instruction and input vector available, and the output FIFO is able to take outputs +//assign inst_fifo_pop = ~inst_fifo_empty && !input_fifo_empty && !output_fifo_almost_full; +assign inst_fifo_pop = ~inst_fifo_empty && !input_fifo_empty && !output_fifo_almost_full && (!inst_reduce || !reduction_fifo_empty); +// Pop reduction vector if a request to reduce is made, the reduction vector is available, and the next instruction is able to be processed +assign reduction_fifo_pop = inst_reduce && !reduction_fifo_empty && inst_fifo_pop; +// Pop input vector if this is the last chunk for the input vector, and the next instruction is able to be processed +assign input_fifo_pop = inst_last && inst_fifo_pop; + +// Issue instruction and advance instruction raddr, pop inputs +always @ (posedge clk) begin + if (rst) begin + r_inst_valid <= 1'b0; + r_inst_reduce <= 1'b0; + r_inst_accum_en <= 1'b0; + r_inst_release <= 1'b0; + r_inst <= 0; + r_input_operands <= {(DATAW){1'b0}}; + r_reduction_operands <= {(DATAW){1'b0}}; + r_inst_accum_raddr <= {(RFADDRW){1'b0}}; + rr_inst_valid <= 1'b0; + rr_inst_reduce <= 1'b0; + rr_inst_accum_en <= 1'b0; + rr_inst_release <= 1'b0; + rr_inst <= 0; + rr_input_operands <= {(DATAW){1'b0}}; + rr_reduction_operands <= {(DATAW){1'b0}}; + rr_inst_accum_raddr <= {(RFADDRW){1'b0}}; + end else begin + if (!inst_fifo_empty) begin + if (inst_reduce) begin + // TODO: Is this a good implementation? Wait until reduction vector arrives to do anything + // If there are input and reduction vectors available and output is able to take on new outputs + if (!input_fifo_empty && !reduction_fifo_empty && !output_fifo_almost_full) begin + r_inst_valid <= 1'b1; + end else begin + r_inst_valid <= 1'b0; + end + end else begin + // If there are inputs available and output is able to take on new outputs + if (!input_fifo_empty && !output_fifo_almost_full) begin + r_inst_valid <= 1'b1; + end else begin + r_inst_valid <= 1'b0; + end + end + end else begin + r_inst_valid <= 1'b0; + end + end + r_inst_release_op <= inst_release_op; + r_inst_release_dest <= inst_release_dest; + r_inst_reduce <= inst_reduce; + r_inst_accum_en <= inst_accum_en; + r_inst_release <= inst_release; + r_inst <= inst_fifo_odata; + r_input_operands <= input_fifo_odata; + r_reduction_operands <= reduction_fifo_odata; + r_inst_accum_raddr <= inst_accum_raddr; + + rr_inst_release_op <= r_inst_release_op; + rr_inst_release_dest <= r_inst_release_dest; + rr_inst_reduce <= r_inst_reduce; + rr_inst_accum_en <= r_inst_accum_en; + rr_inst_release <= r_inst_release; + rr_inst <= r_inst; + rr_input_operands <= r_input_operands; + rr_reduction_operands <= r_reduction_operands; + rr_inst_accum_raddr <= r_inst_accum_raddr; + rr_inst_valid <= r_inst_valid; +end + +// MVM output FIFO +fifo # ( + .DATAW(1 + NODESW + DATAW), + .DEPTH(FIFOD), + .ALMOST_FULL_DEPTH(FIFOD-13) +) output_data_fifo ( + .clk(clk), + .rst(rst), + .push(datapath_ovalid[0]), + .idata({datapath_op, datapath_dest, truncated_datapath_results}), + .pop(axis_tx_tready && !output_fifo_empty), + .odata({output_fifo_oop, output_fifo_odest, output_fifo_odata}), + .empty(output_fifo_empty), + .full(output_fifo_full), + .almost_full(output_fifo_almost_full) +); + +assign axis_rx_tready = rxtready; +assign axis_tx_tvalid = !output_fifo_empty; +assign axis_tx_tdata = output_fifo_odata; +assign axis_tx_tdest = output_fifo_odest; +assign axis_tx_tid = 0; +assign axis_tx_tuser = {64'h0, tx_tuser_op, 9'h0}; // Send tuser field as either input or reduction vector + +// Hook up rest of Tx signals to dummy values to avoid optimizing them out +assign axis_tx_tstrb = output_fifo_odata[BYTEW-1:0]; +assign axis_tx_tkeep = output_fifo_odata[2*BYTEW-1:BYTEW]; +assign axis_tx_tlast = output_fifo_odata[31]; + +endmodule \ No newline at end of file diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp b/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp new file mode 100644 index 0000000..72910ca --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp @@ -0,0 +1,50 @@ +#include + +rtl_mvm::rtl_mvm(const sc_module_name &name) : RADSimModule(name) { + char vrtl_mvm_name[25]; + std::string vrtl_mvm_name_str = std::string(name) + "_vmvm"; + std::strcpy(vrtl_mvm_name, vrtl_mvm_name_str.c_str()); + + vrtl_mvm = new Vrtl_mvm{vrtl_mvm_name}; + vrtl_mvm->clk(clk); + vrtl_mvm->rst(rst); + vrtl_mvm->axis_rx_tvalid(axis_rx.tvalid); + vrtl_mvm->axis_rx_tdata(axis_rx.tdata); + vrtl_mvm->axis_rx_tstrb(axis_rx.tstrb); + vrtl_mvm->axis_rx_tkeep(axis_rx.tkeep); + vrtl_mvm->axis_rx_tid(axis_rx.tid); + vrtl_mvm->axis_rx_tdest(axis_rx.tdest); + vrtl_mvm->axis_rx_tuser(axis_rx.tuser); + vrtl_mvm->axis_rx_tlast(axis_rx.tlast); + vrtl_mvm->axis_rx_tready(axis_rx.tready); + vrtl_mvm->axis_tx_tvalid(axis_tx.tvalid); + vrtl_mvm->axis_tx_tdata(axis_tx.tdata); + vrtl_mvm->axis_tx_tstrb(axis_tx.tstrb); + vrtl_mvm->axis_tx_tkeep(axis_tx.tkeep); + vrtl_mvm->axis_tx_tid(axis_tx.tid); + vrtl_mvm->axis_tx_tdest(axis_tx.tdest); + vrtl_mvm->axis_tx_tuser(axis_tx.tuser); + vrtl_mvm->axis_tx_tlast(axis_tx.tlast); + vrtl_mvm->axis_tx_tready(axis_tx.tready); + + this->RegisterModuleInfo(); +} + +rtl_mvm::~rtl_mvm() { + delete vrtl_mvm; +} + +void rtl_mvm::RegisterModuleInfo() { + std::string port_name; + _num_noc_axis_slave_ports = 0; + _num_noc_axis_master_ports = 0; + _num_noc_aximm_slave_ports = 0; + _num_noc_aximm_master_ports = 0; + + port_name = module_name + ".axis_rx"; + RegisterAxisSlavePort(port_name, &axis_rx, 512, 0); + + port_name = module_name + ".axis_tx"; + RegisterAxisMasterPort(port_name, &axis_tx, 512, 0); + +} diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.hpp b/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.hpp new file mode 100644 index 0000000..4fc26d8 --- /dev/null +++ b/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +class rtl_mvm : public RADSimModule { +private: + Vrtl_mvm* vrtl_mvm; + +public: + sc_in rst; + + axis_slave_port axis_rx; + axis_master_port axis_tx; + + rtl_mvm(const sc_module_name &name); + ~rtl_mvm(); + + SC_HAS_PROCESS(rtl_mvm); + void RegisterModuleInfo(); +}; diff --git a/rad-sim/example-designs/mlp_int8/modules/weight_loader.cpp b/rad-sim/example-designs/mlp_int8/modules/weight_loader.cpp index 7466208..831aaaa 100644 --- a/rad-sim/example-designs/mlp_int8/modules/weight_loader.cpp +++ b/rad-sim/example-designs/mlp_int8/modules/weight_loader.cpp @@ -135,7 +135,7 @@ void weight_loader::Assign() { tx_interface.tuser.write(tuser_value); std::string dest_name = "layer" + std::to_string(layer_id_fifo_odata.read()) + - "_mvm" + std::to_string(mvm_id_fifo_odata.read()) + ".rx_interface"; + "_mvm" + std::to_string(mvm_id_fifo_odata.read()) + ".axis_rx"; tx_interface.tdest.write(radsim_design.GetPortDestinationID(dest_name)); } else { tx_interface.tvalid.write(false); From 9d87842f036f1370a660dfa450f2cdd98c897bfe Mon Sep 17 00:00:00 2001 From: George Trieu Date: Tue, 21 Nov 2023 21:26:12 -0500 Subject: [PATCH 03/11] Resolved Verilator Warnings --- rad-sim/example-designs/mlp_int8/CMakeLists.txt | 2 ++ rad-sim/example-designs/mlp_int8/mlp_driver.cpp | 2 +- rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt | 2 +- rad-sim/sim/CMakeLists.txt | 5 +++++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/rad-sim/example-designs/mlp_int8/CMakeLists.txt b/rad-sim/example-designs/mlp_int8/CMakeLists.txt index 15def09..7c88f28 100644 --- a/rad-sim/example-designs/mlp_int8/CMakeLists.txt +++ b/rad-sim/example-designs/mlp_int8/CMakeLists.txt @@ -13,6 +13,8 @@ include_directories( ../../sim/noc/booksim/routers ) +include_directories(SYSTEM "${VERILATOR_ROOT}/include") + set(srcfiles modules/fifo.cpp modules/register_file.cpp diff --git a/rad-sim/example-designs/mlp_int8/mlp_driver.cpp b/rad-sim/example-designs/mlp_int8/mlp_driver.cpp index 1d702e1..a867eb6 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_driver.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_driver.cpp @@ -313,7 +313,7 @@ void mlp_driver::sink() { end_cycle = GetSimulationCycle(1.0); end_time = std::chrono::steady_clock::now(); std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; - std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " us" << std::endl; + std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " ms" << std::endl; NoCTransactionTelemetry::DumpStatsToFile("stats.csv"); std::vector aggregate_bandwidths = NoCTransactionTelemetry::DumpTrafficFlows("traffic_flows", diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt b/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt index 01db0b1..c857449 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.16) -find_package(verilator HINTS $ENV{VERILATOR_ROOT}) +find_package(verilator CONFIG REQUIRED) if (NOT verilator_FOUND) message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable") endif() diff --git a/rad-sim/sim/CMakeLists.txt b/rad-sim/sim/CMakeLists.txt index 9492556..8ba2ac6 100644 --- a/rad-sim/sim/CMakeLists.txt +++ b/rad-sim/sim/CMakeLists.txt @@ -22,6 +22,11 @@ include_directories( ../example-designs/${DESIGN}/modules ) +find_package(verilator CONFIG REQUIRED) +if (verilator_FOUND) + include_directories(SYSTEM "${VERILATOR_ROOT}/include") +endif() + set(srcfiles design_context.cpp radsim_config.cpp From f75d3e9a79cc9266140ea842438583146b1cc7e9 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Tue, 21 Nov 2023 22:17:17 -0500 Subject: [PATCH 04/11] Fixed erroneous requirement of Verilator for RAD-Sim --- rad-sim/sim/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rad-sim/sim/CMakeLists.txt b/rad-sim/sim/CMakeLists.txt index 8ba2ac6..b7e4bfb 100644 --- a/rad-sim/sim/CMakeLists.txt +++ b/rad-sim/sim/CMakeLists.txt @@ -22,7 +22,7 @@ include_directories( ../example-designs/${DESIGN}/modules ) -find_package(verilator CONFIG REQUIRED) +find_package(verilator CONFIG) if (verilator_FOUND) include_directories(SYSTEM "${VERILATOR_ROOT}/include") endif() From 5b12363cd09f01fad7cf5b87e2bb1f3241698a70 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Tue, 21 Nov 2023 23:17:06 -0500 Subject: [PATCH 05/11] Added CI Test for Co-Simulation --- .github/scripts/setup_verilator.sh | 15 +++++++++++++++ .github/workflows/rad_sim_ci.yml | 6 +++++- rad-sim/test/mlp_int8_test.sh | 10 ++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100755 .github/scripts/setup_verilator.sh create mode 100755 rad-sim/test/mlp_int8_test.sh diff --git a/.github/scripts/setup_verilator.sh b/.github/scripts/setup_verilator.sh new file mode 100755 index 0000000..095f33b --- /dev/null +++ b/.github/scripts/setup_verilator.sh @@ -0,0 +1,15 @@ +#!/bin/bash +sudo apt-get update && apt-get upgrade -y +sudo apt install git help2man perl python3 make autoconf g++ flex bison ccache +sudo apt install libgoogle-perftools-dev numactl perl-doc +sudo apt install libfl2 +sudo apt install libfl-dev +sudo apt install zlibc zlib1g zlib1g-dev + +git clone https://github.com/verilator/verilator +unset VERILATOR_ROOT # For bash +cd verilator +autoconf # Create ./configure script +./configure # Configure and create Makefile +make -j `nproc` # Build Verilator itself (if error, try just 'make') +sudo make install \ No newline at end of file diff --git a/.github/workflows/rad_sim_ci.yml b/.github/workflows/rad_sim_ci.yml index 33c9563..158831e 100644 --- a/.github/workflows/rad_sim_ci.yml +++ b/.github/workflows/rad_sim_ci.yml @@ -47,7 +47,7 @@ jobs: strategy: fail-fast: false matrix: - test-script: [mlp_test.sh, dlrm_test.sh, npu_test.sh] + test-script: [mlp_test.sh, dlrm_test.sh, npu_test.sh, mlp_int8_test.sh] test: [""] exclude: - test-script: npu_test.sh # Don't run all the NPU tests in the same runner @@ -76,6 +76,10 @@ jobs: - name: Setup SystemC run: ./.github/scripts/setup_system_c.sh -v ${{ env.SYSTEMC_VERSION }} + - name: Setup Verilator IFF test requires it + if: matrix.test-script == 'mlp_int8_test.sh' + run: ./.github/scripts/setup_verilator.sh + - name: Setup Mambaforge (Conda) uses: conda-incubator/setup-miniconda@v2 with: diff --git a/rad-sim/test/mlp_int8_test.sh b/rad-sim/test/mlp_int8_test.sh new file mode 100755 index 0000000..a2b8209 --- /dev/null +++ b/rad-sim/test/mlp_int8_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash +test_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) +cd $test_path + +(cd ../; python config.py mlp_int8) + +# python gen_testcase.py {} {} +(cd ../example-designs/mlp_int8/compiler; python gen_testcase.py 4 512 512 512 256 128 3 3 2 2) +(cd ../example-designs/mlp_int8/compiler; sed -Ei "s/([0-9]*),([0-9]*)/\2,\1/g" layer_mvm_config) # Changes all instances to verilator instances +(cd ../build; make run) From b8d180fd51a6d6f82490a9e8bca7c0a10f1dd6f3 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Fri, 24 Nov 2023 19:24:01 -0500 Subject: [PATCH 06/11] Added built-in cleanup call for verilated modules --- rad-sim/scripts/generate_wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rad-sim/scripts/generate_wrapper.py b/rad-sim/scripts/generate_wrapper.py index 443857d..ff1dfda 100644 --- a/rad-sim/scripts/generate_wrapper.py +++ b/rad-sim/scripts/generate_wrapper.py @@ -42,6 +42,7 @@ def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_r wrapper_cpp_file.write("}\n\n") wrapper_cpp_file.write(design_name + "::~" + design_name + "() {\n") + wrapper_cpp_file.write("\t" + design_inst + "->final();\n") wrapper_cpp_file.write("\tdelete " + design_inst + ";\n") wrapper_cpp_file.write("}\n\n") From b625f850a84b5035c6b271a69e4540e163b10b4e Mon Sep 17 00:00:00 2001 From: George Trieu Date: Fri, 24 Nov 2023 22:19:19 -0500 Subject: [PATCH 07/11] Generated MVM file after built-in clean up change --- rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp b/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp index 72910ca..a69d6b5 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp +++ b/rad-sim/example-designs/mlp_int8/modules/rtl_mvm.cpp @@ -31,6 +31,7 @@ rtl_mvm::rtl_mvm(const sc_module_name &name) : RADSimModule(name) { } rtl_mvm::~rtl_mvm() { + vrtl_mvm->final(); delete vrtl_mvm; } From df12a77218c9547bb5db5f23706c8ebaa8967263 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Wed, 29 Nov 2023 13:27:23 -0500 Subject: [PATCH 08/11] Updated adder example to use new AXI-S format --- docs/rad-sim-rtl-code.rst | 27 ++++++------ .../example-designs/rtl_add/modules/adder.cpp | 13 +++--- .../example-designs/rtl_add/modules/adder.hpp | 2 +- .../rtl_add/modules/client.cpp | 23 +++++----- .../rtl_add/modules/client.hpp | 2 +- .../rtl_add/modules/rtl/adder.v | 22 +++++----- .../rtl_add/modules/rtl/client.v | 44 +++++++++---------- .../rtl_add/modules/rtl/port.map | 26 +++++------ 8 files changed, 80 insertions(+), 79 deletions(-) diff --git a/docs/rad-sim-rtl-code.rst b/docs/rad-sim-rtl-code.rst index cd3b920..91d1761 100644 --- a/docs/rad-sim-rtl-code.rst +++ b/docs/rad-sim-rtl-code.rst @@ -116,10 +116,10 @@ An example port map file from the ``rtl_add`` example is shown below: module adder input 1 clk clk input 1 rst rst - axis slave axis_adder_interface_tvalid axis_adder_interface tvalid - axis slave axis_adder_interface_tlast axis_adder_interface tlast - axis slave axis_adder_interface_tdata axis_adder_interface tdata - axis slave axis_adder_interface_tready axis_adder_interface tready + axis slave axis_adder_tvalid axis_adder tvalid + axis slave axis_adder_tlast axis_adder tlast + axis slave axis_adder_tdata axis_adder tdata + axis slave axis_adder_tready axis_adder tready output 128 response response output 1 response_valid response_valid @@ -129,14 +129,13 @@ An example port map file from the ``rtl_add`` example is shown below: input 128 client_tdata client_tdata input 1 client_tlast client_tlast input 1 client_valid client_valid - axis master axis_client_interface_tready axis_client_interface tready + axis master axis_client_tready axis_client tready output 1 client_ready client_ready - axis master axis_client_interface_tvalid axis_client_interface tvalid - axis master axis_client_interface_tlast axis_client_interface tlast - axis master axis_client_interface_tdest axis_client_interface tdest - axis master axis_client_interface_tid axis_client_interface tid - axis master axis_client_interface_tstrb axis_client_interface tstrb - axis master axis_client_interface_tkeep axis_client_interface tkeep - axis master axis_client_interface_tuser axis_client_interface tuser - axis master axis_client_interface_tdata axis_client_interface tdata - + axis master axis_client_tvalid axis_client tvalid + axis master axis_client_tlast axis_client tlast + axis master axis_client_tdest axis_client tdest + axis master axis_client_tid axis_client tid + axis master axis_client_tstrb axis_client tstrb + axis master axis_client_tkeep axis_client tkeep + axis master axis_client_tuser axis_client tuser + axis master axis_client_tdata axis_client tdata diff --git a/rad-sim/example-designs/rtl_add/modules/adder.cpp b/rad-sim/example-designs/rtl_add/modules/adder.cpp index a46c840..73d5488 100644 --- a/rad-sim/example-designs/rtl_add/modules/adder.cpp +++ b/rad-sim/example-designs/rtl_add/modules/adder.cpp @@ -8,10 +8,10 @@ adder::adder(const sc_module_name &name) : RADSimModule(name) { vadder = new Vadder{vadder_name}; vadder->clk(clk); vadder->rst(rst); - vadder->axis_adder_interface_tvalid(axis_adder_interface.tvalid); - vadder->axis_adder_interface_tlast(axis_adder_interface.tlast); - vadder->axis_adder_interface_tdata(axis_adder_interface.tdata); - vadder->axis_adder_interface_tready(axis_adder_interface.tready); + vadder->axis_adder_tvalid(axis_adder.tvalid); + vadder->axis_adder_tlast(axis_adder.tlast); + vadder->axis_adder_tdata(axis_adder.tdata); + vadder->axis_adder_tready(axis_adder.tready); vadder->response(response); vadder->response_valid(response_valid); @@ -19,6 +19,7 @@ adder::adder(const sc_module_name &name) : RADSimModule(name) { } adder::~adder() { + vadder->final(); delete vadder; } @@ -29,7 +30,7 @@ void adder::RegisterModuleInfo() { _num_noc_aximm_slave_ports = 0; _num_noc_aximm_master_ports = 0; - port_name = module_name + ".axis_adder_interface"; - RegisterAxisSlavePort(port_name, &axis_adder_interface, 512, 0); + port_name = module_name + ".axis_adder"; + RegisterAxisSlavePort(port_name, &axis_adder, 512, 0); } diff --git a/rad-sim/example-designs/rtl_add/modules/adder.hpp b/rad-sim/example-designs/rtl_add/modules/adder.hpp index 67eea31..06f8efc 100644 --- a/rad-sim/example-designs/rtl_add/modules/adder.hpp +++ b/rad-sim/example-designs/rtl_add/modules/adder.hpp @@ -18,7 +18,7 @@ class adder : public RADSimModule { sc_out> response; sc_out response_valid; - axis_slave_port axis_adder_interface; + axis_slave_port axis_adder; adder(const sc_module_name &name); ~adder(); diff --git a/rad-sim/example-designs/rtl_add/modules/client.cpp b/rad-sim/example-designs/rtl_add/modules/client.cpp index feb3bc7..3ba12c7 100644 --- a/rad-sim/example-designs/rtl_add/modules/client.cpp +++ b/rad-sim/example-designs/rtl_add/modules/client.cpp @@ -11,21 +11,22 @@ client::client(const sc_module_name &name) : RADSimModule(name) { vclient->client_tdata(client_tdata); vclient->client_tlast(client_tlast); vclient->client_valid(client_valid); - vclient->axis_client_interface_tready(axis_client_interface.tready); + vclient->axis_client_tready(axis_client.tready); vclient->client_ready(client_ready); - vclient->axis_client_interface_tvalid(axis_client_interface.tvalid); - vclient->axis_client_interface_tlast(axis_client_interface.tlast); - vclient->axis_client_interface_tdest(axis_client_interface.tdest); - vclient->axis_client_interface_tid(axis_client_interface.tid); - vclient->axis_client_interface_tstrb(axis_client_interface.tstrb); - vclient->axis_client_interface_tkeep(axis_client_interface.tkeep); - vclient->axis_client_interface_tuser(axis_client_interface.tuser); - vclient->axis_client_interface_tdata(axis_client_interface.tdata); + vclient->axis_client_tvalid(axis_client.tvalid); + vclient->axis_client_tlast(axis_client.tlast); + vclient->axis_client_tdest(axis_client.tdest); + vclient->axis_client_tid(axis_client.tid); + vclient->axis_client_tstrb(axis_client.tstrb); + vclient->axis_client_tkeep(axis_client.tkeep); + vclient->axis_client_tuser(axis_client.tuser); + vclient->axis_client_tdata(axis_client.tdata); this->RegisterModuleInfo(); } client::~client() { + vclient->final(); delete vclient; } @@ -36,7 +37,7 @@ void client::RegisterModuleInfo() { _num_noc_aximm_slave_ports = 0; _num_noc_aximm_master_ports = 0; - port_name = module_name + ".axis_client_interface"; - RegisterAxisMasterPort(port_name, &axis_client_interface, 512, 0); + port_name = module_name + ".axis_client"; + RegisterAxisMasterPort(port_name, &axis_client, 512, 0); } diff --git a/rad-sim/example-designs/rtl_add/modules/client.hpp b/rad-sim/example-designs/rtl_add/modules/client.hpp index 1f32d96..048982a 100644 --- a/rad-sim/example-designs/rtl_add/modules/client.hpp +++ b/rad-sim/example-designs/rtl_add/modules/client.hpp @@ -20,7 +20,7 @@ class client : public RADSimModule { sc_in client_valid; sc_out client_ready; - axis_master_port axis_client_interface; + axis_master_port axis_client; client(const sc_module_name &name); ~client(); diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/adder.v b/rad-sim/example-designs/rtl_add/modules/rtl/adder.v index 0604ef6..7d835f1 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/adder.v +++ b/rad-sim/example-designs/rtl_add/modules/rtl/adder.v @@ -3,14 +3,14 @@ `include "static_params.vh" -module adder (clk, rst, axis_adder_interface_tvalid, axis_adder_interface_tlast, axis_adder_interface_tdata, axis_adder_interface_tready, response, response_valid); +module adder (clk, rst, axis_adder_tvalid, axis_adder_tlast, axis_adder_tdata, axis_adder_tready, response, response_valid); input clk; input rst; - input axis_adder_interface_tvalid; - input axis_adder_interface_tlast; - input [`AXIS_MAX_DATAW-1:0] axis_adder_interface_tdata; + input axis_adder_tvalid; + input axis_adder_tlast; + input [`AXIS_MAX_DATAW-1:0] axis_adder_tdata; - output reg axis_adder_interface_tready; + output reg axis_adder_tready; output reg [`DATAW-1:0] response; output reg response_valid; @@ -21,9 +21,9 @@ module adder (clk, rst, axis_adder_interface_tvalid, axis_adder_interface_tlast, if (rst) begin adder_rolling_sum = {`DATAW{1'b0}}; t_finished = 1'b0; - axis_adder_interface_tready = 1'b0; + axis_adder_tready = 1'b0; end else begin - axis_adder_interface_tready = 1'b1; + axis_adder_tready = 1'b1; end end @@ -32,10 +32,10 @@ module adder (clk, rst, axis_adder_interface_tvalid, axis_adder_interface_tlast, response = {`DATAW{1'b0}}; response_valid = 1'b0; end else begin - if (axis_adder_interface_tready && axis_adder_interface_tvalid) begin - //$display("Adder: Received %d!", axis_adder_interface_tdata[63:0]); - adder_rolling_sum = adder_rolling_sum + axis_adder_interface_tdata[`DATAW-1:0]; - t_finished = axis_adder_interface_tlast; + if (axis_adder_tready && axis_adder_tvalid) begin + //$display("Adder: Received %d!", axis_adder_tdata[63:0]); + adder_rolling_sum = adder_rolling_sum + axis_adder_tdata[`DATAW-1:0]; + t_finished = axis_adder_tlast; end if (t_finished) begin diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/client.v b/rad-sim/example-designs/rtl_add/modules/rtl/client.v index cb5a991..f39f5c7 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/client.v +++ b/rad-sim/example-designs/rtl_add/modules/rtl/client.v @@ -12,16 +12,16 @@ module client ( input [`DATAW-1:0] client_tdata, input client_tlast, input client_valid, - input axis_client_interface_tready, + input axis_client_tready, output client_ready, - output axis_client_interface_tvalid, - output axis_client_interface_tlast, - output [`AXIS_DESTW-1:0] axis_client_interface_tdest, - output [`AXIS_IDW-1:0] axis_client_interface_tid, - output [`AXIS_STRBW-1:0] axis_client_interface_tstrb, - output [`AXIS_KEEPW-1:0] axis_client_interface_tkeep, - output [`AXIS_USERW-1:0] axis_client_interface_tuser, - output [`AXIS_MAX_DATAW-1:0] axis_client_interface_tdata + output axis_client_tvalid, + output axis_client_tlast, + output [`AXIS_DESTW-1:0] axis_client_tdest, + output [`AXIS_IDW-1:0] axis_client_tid, + output [`AXIS_STRBW-1:0] axis_client_tstrb, + output [`AXIS_KEEPW-1:0] axis_client_tkeep, + output [`AXIS_USERW-1:0] axis_client_tuser, + output [`AXIS_MAX_DATAW-1:0] axis_client_tdata ); wire fifo_w_en, fifo_r_en; @@ -48,35 +48,35 @@ module client ( ); assign client_ready = ~fifo_full; - assign fifo_r_en = axis_client_interface_tvalid && axis_client_interface_tready; + assign fifo_r_en = axis_client_tvalid && axis_client_tready; assign fifo_w_en = client_ready && client_valid; assign fifo_data_in = client_tdata; - assign axis_client_interface_tdest = `DEST_ADDR; - assign axis_client_interface_tuser = `SRC_ADDR; - assign axis_client_interface_tid = {`AXIS_IDW{1'b0}}; - assign axis_client_interface_tstrb = {`AXIS_STRBW{1'b0}}; - assign axis_client_interface_tkeep = {`AXIS_KEEPW{1'b0}}; - assign axis_client_interface_tvalid = ~fifo_empty; - assign axis_client_interface_tdata = fifo_data_out; - assign axis_client_interface_tlast = last_item_latch && item_count == 1; + assign axis_client_tdest = `DEST_ADDR; + assign axis_client_tuser = `SRC_ADDR; + assign axis_client_tid = {`AXIS_IDW{1'b0}}; + assign axis_client_tstrb = {`AXIS_STRBW{1'b0}}; + assign axis_client_tkeep = {`AXIS_KEEPW{1'b0}}; + assign axis_client_tvalid = ~fifo_empty; + assign axis_client_tdata = fifo_data_out; + assign axis_client_tlast = last_item_latch && item_count == 1; always @(posedge clk) begin if (rst) begin item_count <= 0; last_item_latch <= 1'b0; end else begin - if (client_ready && client_valid && axis_client_interface_tvalid && axis_client_interface_tready) begin + if (client_ready && client_valid && axis_client_tvalid && axis_client_tready) begin // push data onto the FIFO //$display("Client: Added %d onto the FIFO!", client_tdata[63:0]); - //$display("Client: Sent %d to Adder!", axis_client_interface_tdata[63:0]); + //$display("Client: Sent %d to Adder!", axis_client_tdata[63:0]); end else if (client_ready && client_valid) begin // push data onto the FIFO item_count <= item_count + 1; //$display("Client: Added %d onto the FIFO!", client_tdata[63:0]); - end else if (axis_client_interface_tvalid && axis_client_interface_tready) begin + end else if (axis_client_tvalid && axis_client_tready) begin item_count <= item_count - 1; - //$display("Client: Sent %d to Adder!", axis_client_interface_tdata[63:0]); + //$display("Client: Sent %d to Adder!", axis_client_tdata[63:0]); end if (client_tlast) begin diff --git a/rad-sim/example-designs/rtl_add/modules/rtl/port.map b/rad-sim/example-designs/rtl_add/modules/rtl/port.map index ad8df43..3791853 100644 --- a/rad-sim/example-designs/rtl_add/modules/rtl/port.map +++ b/rad-sim/example-designs/rtl_add/modules/rtl/port.map @@ -1,10 +1,10 @@ module adder input 1 clk clk input 1 rst rst -axis slave axis_adder_interface_tvalid axis_adder_interface tvalid -axis slave axis_adder_interface_tlast axis_adder_interface tlast -axis slave axis_adder_interface_tdata axis_adder_interface tdata -axis slave axis_adder_interface_tready axis_adder_interface tready +axis slave axis_adder_tvalid axis_adder tvalid +axis slave axis_adder_tlast axis_adder tlast +axis slave axis_adder_tdata axis_adder tdata +axis slave axis_adder_tready axis_adder tready output 128 response response output 1 response_valid response_valid @@ -14,14 +14,14 @@ input 1 rst rst input 128 client_tdata client_tdata input 1 client_tlast client_tlast input 1 client_valid client_valid -axis master axis_client_interface_tready axis_client_interface tready +axis master axis_client_tready axis_client tready output 1 client_ready client_ready -axis master axis_client_interface_tvalid axis_client_interface tvalid -axis master axis_client_interface_tlast axis_client_interface tlast -axis master axis_client_interface_tdest axis_client_interface tdest -axis master axis_client_interface_tid axis_client_interface tid -axis master axis_client_interface_tstrb axis_client_interface tstrb -axis master axis_client_interface_tkeep axis_client_interface tkeep -axis master axis_client_interface_tuser axis_client_interface tuser -axis master axis_client_interface_tdata axis_client_interface tdata +axis master axis_client_tvalid axis_client tvalid +axis master axis_client_tlast axis_client tlast +axis master axis_client_tdest axis_client tdest +axis master axis_client_tid axis_client tid +axis master axis_client_tstrb axis_client tstrb +axis master axis_client_tkeep axis_client tkeep +axis master axis_client_tuser axis_client tuser +axis master axis_client_tdata axis_client tdata From e74b3fca675cbbc3018cbfb3173d4c95aa9d5660 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Wed, 29 Nov 2023 13:45:19 -0500 Subject: [PATCH 09/11] Eliminated requirement to enter AXI-S width for non AXI-S modules --- rad-sim/scripts/generate_wrapper.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/rad-sim/scripts/generate_wrapper.py b/rad-sim/scripts/generate_wrapper.py index ff1dfda..4805d08 100644 --- a/rad-sim/scripts/generate_wrapper.py +++ b/rad-sim/scripts/generate_wrapper.py @@ -62,8 +62,6 @@ def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_r wrapper_cpp_file.write("\tRegisterAxisSlavePort(port_name, &" + axis_interface + ", " + dataw + ", 0);\n\n") wrapper_cpp_file.write("}\n") - else: - print("WARNING: Module {0} is not connected to the NOC via AXI-S.".format(design_name)) def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): verilated_design = "V" + design_name @@ -112,8 +110,6 @@ def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): wrapper_hpp_file.write("\tSC_HAS_PROCESS(" + design_name + ");\n") if axis_roles != None: wrapper_hpp_file.write("\tvoid RegisterModuleInfo();\n") - else: - print("WARNING: Module {0} is not connected to the NOC via AXI-S.".format(design_name)) wrapper_hpp_file.write("};\n") def read_port_mappings(port_mapping_file): @@ -175,9 +171,11 @@ def generate(design_folder, design_modules): print("Read Port Mappings Sucessfully!") for i in range(len(design_modules)): design_name = design_modules[i] - #TODO: only ask for AXI-S data width if module contains AXI port. - dataw = input("Enter the AXI-S data width for module " + design_name + " (default: " + str(DEFAULT_PORT_WIDTH) + "): ") - dataw = dataw if dataw else str(DEFAULT_PORT_WIDTH) + if (axis_roles.get(design_name) != None): + dataw = input("Enter the AXI-S data width for module " + design_name + " (default: " + str(DEFAULT_PORT_WIDTH) + "): ") + dataw = dataw if dataw else str(DEFAULT_PORT_WIDTH) + else: + print("WARNING: Module {0} is not connected to the NOC via AXI-S.".format(design_name)) generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_roles.get(design_name)) print("Generated Source Wrapper for module", design_name) generate_header_wrapper(design_name, modules_folder, mappings, axis_roles.get(design_name)) From 03680c2bf6576bd9aec966cc6b04db9723603b8f Mon Sep 17 00:00:00 2001 From: George Trieu Date: Wed, 29 Nov 2023 13:57:51 -0500 Subject: [PATCH 10/11] RTL Code Documentation CMakeLists improvement --- docs/rad-sim-rtl-code.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/rad-sim-rtl-code.rst b/docs/rad-sim-rtl-code.rst index 91d1761..deded79 100644 --- a/docs/rad-sim-rtl-code.rst +++ b/docs/rad-sim-rtl-code.rst @@ -55,11 +55,21 @@ RAD-Sim has a pre-defined file structure for supporting RTL code. All RTL code m ├── CMakeLists.txt └── config.yml +An example design that utilizes RTL modules can be found in the ``rad-sim/example-designs/rtl_add`` folder. + +RTL CMakeLists +--------------- The RTL source folder additionally contains a CMakeLists script, and an optional port mapping file used for :ref:`automatic wrapper generation `. The CMakeLists script imports the required libraries and verilates the RTL designs to SystemC modules. These objects are linked in the design CMakeLists script. -An example design that utilizes RTL modules can be found in the ``rad-sim/example-designs/rtl_add`` folder. +Under ``rtl_top_modules``, a list of all top-level RTL modules should be placed. Each top-level module will be verilated to become a C++ SystemC module. +All modules instantiated by a top-level module are merged into the SystemC module of the top-level design automatically. + +Verilator does not recognize modules with a file name other than the module name with a .v/.sv extension automatically. +It is therefore recommended that a list of non top-level modules be added under ``rtl_libraries``. + +A sample CMakeLists file can be found in the ``rad-sim/example-designs/rtl_add/modules/rtl`` folder. Wrapper Files ------------- From df50a775f0b8f11e70065d0a0b190c4cc14b45e3 Mon Sep 17 00:00:00 2001 From: George Trieu Date: Thu, 30 Nov 2023 16:21:51 -0500 Subject: [PATCH 11/11] Added comments and renamed some variables to make code appear cleaner --- rad-sim/example-designs/add/add_driver.cpp | 4 +- rad-sim/example-designs/mlp/mlp_driver.cpp | 4 +- .../example-designs/mlp_int8/CMakeLists.txt | 4 +- .../example-designs/mlp_int8/mlp_driver.cpp | 14 ++--- .../example-designs/mlp_int8/mlp_driver.hpp | 5 +- .../mlp_int8/mlp_int8_system.cpp | 10 ++-- .../mlp_int8/mlp_int8_system.hpp | 3 +- rad-sim/example-designs/mlp_int8/mlp_top.cpp | 30 +++++------ rad-sim/example-designs/mlp_int8/mlp_top.hpp | 6 +-- .../mlp_int8/modules/collector.hpp | 1 - .../mlp_int8/modules/dispatcher.hpp | 3 +- .../mlp_int8/modules/rtl/accum.v | 15 ++++++ .../mlp_int8/modules/rtl/components.v | 54 +++++++++++++++++++ .../mlp_int8/modules/rtl/datapath.v | 26 +++++++++ .../mlp_int8/modules/rtl/dpe.v | 40 ++++++++++---- .../mlp_int8/modules/rtl/reduce.v | 18 +++++++ .../mlp_int8/modules/rtl/rtl_mvm.v | 4 ++ .../modules/{mvm.cpp => sysc_mvm.cpp} | 12 ++--- .../modules/{mvm.hpp => sysc_mvm.hpp} | 8 +-- .../rtl_add/rtl_add_driver.cpp | 4 +- rad-sim/scripts/generate_port_mappings.py | 17 +++++- rad-sim/scripts/generate_wrapper.py | 28 +++++++--- rad-sim/scripts/verilog_parser.py | 3 +- 23 files changed, 240 insertions(+), 73 deletions(-) rename rad-sim/example-designs/mlp_int8/modules/{mvm.cpp => sysc_mvm.cpp} (98%) rename rad-sim/example-designs/mlp_int8/modules/{mvm.hpp => sysc_mvm.hpp} (94%) diff --git a/rad-sim/example-designs/add/add_driver.cpp b/rad-sim/example-designs/add/add_driver.cpp index 70b4405..62ea073 100644 --- a/rad-sim/example-designs/add/add_driver.cpp +++ b/rad-sim/example-designs/add/add_driver.cpp @@ -31,7 +31,7 @@ void add_driver::source() { client_valid.write(false); wait(); rst.write(false); - start_cycle = GetSimulationCycle(1.0); + start_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); start_time = std::chrono::steady_clock::now(); wait(); @@ -61,7 +61,7 @@ void add_driver::sink() { if (response.read() != actual_sum) std::cout << "FAILURE - Output is not matching!" << std::endl; else std::cout << "SUCCESS - Output is matching!" << std::endl; - end_cycle = GetSimulationCycle(1.0); + end_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); end_time = std::chrono::steady_clock::now(); std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " us" << std::endl; diff --git a/rad-sim/example-designs/mlp/mlp_driver.cpp b/rad-sim/example-designs/mlp/mlp_driver.cpp index 1069425..a71ccd7 100644 --- a/rad-sim/example-designs/mlp/mlp_driver.cpp +++ b/rad-sim/example-designs/mlp/mlp_driver.cpp @@ -82,7 +82,7 @@ void mlp_driver::source() { dispatcher_fifo_wen[dispatcher_id].write(false); wait(); rst.write(false); - start_cycle = GetSimulationCycle(1.0); + start_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); wait(); std::vector written_inputs(num_mvms[0], 0); @@ -129,7 +129,7 @@ void mlp_driver::sink() { if (mistake) std::cout << "FAILURE - Some outputs NOT matching!" << std::endl; else std::cout << "SUCCESS - All outputs are matching!" << std::endl; - end_cycle = GetSimulationCycle(1.0); + end_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; NoCTransactionTelemetry::DumpStatsToFile("stats.csv"); NoCFlitTelemetry::DumpNoCFlitTracesToFile("flit_traces.csv"); diff --git a/rad-sim/example-designs/mlp_int8/CMakeLists.txt b/rad-sim/example-designs/mlp_int8/CMakeLists.txt index 7c88f28..99593ca 100644 --- a/rad-sim/example-designs/mlp_int8/CMakeLists.txt +++ b/rad-sim/example-designs/mlp_int8/CMakeLists.txt @@ -24,7 +24,7 @@ set(srcfiles modules/collector.cpp modules/datapath.cpp modules/pipeline.cpp - modules/mvm.cpp + modules/sysc_mvm.cpp modules/rtl_mvm.cpp modules/weight_loader.cpp modules/inst_loader.cpp @@ -42,7 +42,7 @@ set(hdrfiles modules/collector.hpp modules/datapath.hpp modules/pipeline.hpp - modules/mvm.hpp + modules/sysc_mvm.hpp modules/rtl_mvm.hpp modules/weight_loader.hpp modules/inst_loader.hpp diff --git a/rad-sim/example-designs/mlp_int8/mlp_driver.cpp b/rad-sim/example-designs/mlp_int8/mlp_driver.cpp index a867eb6..382bc70 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_driver.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_driver.cpp @@ -121,20 +121,20 @@ mlp_driver::mlp_driver(const sc_module_name& name) : sc_module(name) { std::string line; std::getline(design_config_file, line); std::stringstream line_stream(line); - std::string num_mvms_layer, num_mvms_rtl_layer; + std::string num_mvms_sysc_layer, num_mvms_rtl_layer; std::string layer_mvms; line_stream >> num_layers; - num_mvms.resize(num_layers); + num_mvms_sysc.resize(num_layers); num_mvms_rtl.resize(num_layers); num_mvms_total.resize(num_layers); for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { line_stream >> layer_mvms; std::stringstream layer_mvms_stream(layer_mvms); - std::getline(layer_mvms_stream, num_mvms_layer, ','); + std::getline(layer_mvms_stream, num_mvms_sysc_layer, ','); std::getline(layer_mvms_stream, num_mvms_rtl_layer, ','); - num_mvms[layer_id] = std::stoi(num_mvms_layer); + num_mvms_sysc[layer_id] = std::stoi(num_mvms_sysc_layer); num_mvms_rtl[layer_id] = std::stoi(num_mvms_rtl_layer); - num_mvms_total[layer_id] = num_mvms[layer_id] + num_mvms_rtl[layer_id]; + num_mvms_total[layer_id] = num_mvms_sysc[layer_id] + num_mvms_rtl[layer_id]; } // Intialize input/output interface vectors @@ -258,7 +258,7 @@ void mlp_driver::source() { wait(); } - start_cycle = GetSimulationCycle(1.0); + start_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); start_time = std::chrono::steady_clock::now(); wait(); @@ -310,7 +310,7 @@ void mlp_driver::sink() { if (mistake) std::cout << "FAILURE - Some outputs NOT matching!" << std::endl; else std::cout << "SUCCESS - All outputs are matching!" << std::endl; - end_cycle = GetSimulationCycle(1.0); + end_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); end_time = std::chrono::steady_clock::now(); std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " ms" << std::endl; diff --git a/rad-sim/example-designs/mlp_int8/mlp_driver.hpp b/rad-sim/example-designs/mlp_int8/mlp_driver.hpp index 9c206ea..fd5b4d0 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_driver.hpp +++ b/rad-sim/example-designs/mlp_int8/mlp_driver.hpp @@ -6,17 +6,18 @@ #include #include -#include "mvm.hpp" #include "sim_utils.hpp" #include "instructions.hpp" #include "radsim_config.hpp" +#include + class mlp_driver : public sc_module { private: int start_cycle, end_cycle; std::chrono::steady_clock::time_point start_time, end_time; unsigned int num_layers; - std::vector num_mvms; + std::vector num_mvms_sysc; std::vector num_mvms_rtl; std::vector num_mvms_total; diff --git a/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp b/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp index bab51ff..bfd5286 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_int8_system.cpp @@ -15,20 +15,20 @@ mlp_int8_system::mlp_int8_system(const sc_module_name& name, sc_clock* driver_cl std::getline(design_config_file, line); std::stringstream line_stream(line); unsigned int num_layers; - std::string num_mvms_layer, num_mvms_rtl_layer; + std::string num_mvms_sysc_layer, num_mvms_rtl_layer; std::string layer_mvms; line_stream >> num_layers; - num_mvms.resize(num_layers); + num_mvms_sysc.resize(num_layers); num_mvms_rtl.resize(num_layers); num_mvms_total.resize(num_layers); for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { line_stream >> layer_mvms; std::stringstream layer_mvms_stream(layer_mvms); - std::getline(layer_mvms_stream, num_mvms_layer, ','); + std::getline(layer_mvms_stream, num_mvms_sysc_layer, ','); std::getline(layer_mvms_stream, num_mvms_rtl_layer, ','); - num_mvms[layer_id] = std::stoi(num_mvms_layer); + num_mvms_sysc[layer_id] = std::stoi(num_mvms_sysc_layer); num_mvms_rtl[layer_id] = std::stoi(num_mvms_rtl_layer); - num_mvms_total[layer_id] = num_mvms[layer_id] + num_mvms_rtl[layer_id]; + num_mvms_total[layer_id] = num_mvms_sysc[layer_id] + num_mvms_rtl[layer_id]; } // Initialize signal vectors diff --git a/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp b/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp index 3e8b5c2..5e020a9 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp +++ b/rad-sim/example-designs/mlp_int8/mlp_int8_system.hpp @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -9,7 +8,7 @@ class mlp_int8_system : public sc_module { private: - std::vector num_mvms; + std::vector num_mvms_sysc; std::vector num_mvms_rtl; std::vector num_mvms_total; diff --git a/rad-sim/example-designs/mlp_int8/mlp_top.cpp b/rad-sim/example-designs/mlp_int8/mlp_top.cpp index 05d3bb6..8b3bd16 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_top.cpp +++ b/rad-sim/example-designs/mlp_int8/mlp_top.cpp @@ -16,20 +16,20 @@ mlp_top::mlp_top(const sc_module_name &name) : sc_module(name) { std::getline(design_config_file, line); std::stringstream line_stream(line); unsigned int num_layers; - std::string num_mvms_layer, num_mvms_rtl_layer; + std::string num_mvms_sysc_layer, num_mvms_rtl_layer; std::string layer_mvms; line_stream >> num_layers; - num_mvms.resize(num_layers); + num_mvms_sysc.resize(num_layers); num_mvms_rtl.resize(num_layers); num_mvms_total.resize(num_layers); for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { line_stream >> layer_mvms; std::stringstream layer_mvms_stream(layer_mvms); - std::getline(layer_mvms_stream, num_mvms_layer, ','); + std::getline(layer_mvms_stream, num_mvms_sysc_layer, ','); std::getline(layer_mvms_stream, num_mvms_rtl_layer, ','); - num_mvms[layer_id] = std::stoi(num_mvms_layer); + num_mvms_sysc[layer_id] = std::stoi(num_mvms_sysc_layer); num_mvms_rtl[layer_id] = std::stoi(num_mvms_rtl_layer); - num_mvms_total[layer_id] = num_mvms[layer_id] + num_mvms_rtl[layer_id]; + num_mvms_total[layer_id] = num_mvms_sysc[layer_id] + num_mvms_rtl[layer_id]; } init_vector>::init_sc_vector(dispatcher_fifo_rdy, num_mvms_total[0]); @@ -37,25 +37,25 @@ mlp_top::mlp_top(const sc_module_name &name) : sc_module(name) { init_vector>>>::init_sc_vector( dispatcher_fifo_wdata, num_mvms_total[0]); - matrix_vector_engines.resize(num_layers); + sysc_matrix_vector_engines.resize(num_layers); rtl_matrix_vector_engines.resize(num_layers); input_dispatchers.resize(num_mvms_total[0]); char module_name[25]; std::string module_name_str; for (unsigned int layer_id = 0; layer_id < num_layers; layer_id++) { - matrix_vector_engines[layer_id].resize(num_mvms[layer_id]); + sysc_matrix_vector_engines[layer_id].resize(num_mvms_sysc[layer_id]); rtl_matrix_vector_engines[layer_id].resize(num_mvms_rtl[layer_id]); - for (unsigned int mvm_id = 0; mvm_id < num_mvms[layer_id]; mvm_id++) { + for (unsigned int mvm_id = 0; mvm_id < num_mvms_sysc[layer_id]; mvm_id++) { module_name_str = "layer" + std::to_string(layer_id) + "_mvm" + std::to_string(mvm_id); std::strcpy(module_name, module_name_str.c_str()); - matrix_vector_engines[layer_id][mvm_id] = - new mvm(module_name, mvm_id, layer_id); - matrix_vector_engines[layer_id][mvm_id]->rst(rst); + sysc_matrix_vector_engines[layer_id][mvm_id] = + new sysc_mvm(module_name, mvm_id, layer_id); + sysc_matrix_vector_engines[layer_id][mvm_id]->rst(rst); } for (unsigned int mvm_id = 0; mvm_id < num_mvms_rtl[layer_id]; mvm_id++) { module_name_str = - "layer" + std::to_string(layer_id) + "_mvm" + std::to_string(mvm_id + num_mvms[layer_id]); + "layer" + std::to_string(layer_id) + "_mvm" + std::to_string(mvm_id + num_mvms_sysc[layer_id]); std::strcpy(module_name, module_name_str.c_str()); rtl_matrix_vector_engines[layer_id][mvm_id] = new rtl_mvm(module_name); @@ -122,11 +122,11 @@ mlp_top::mlp_top(const sc_module_name &name) : sc_module(name) { } mlp_top::~mlp_top() { - for (unsigned int layer_id = 0; layer_id < matrix_vector_engines.size(); + for (unsigned int layer_id = 0; layer_id < num_mvms_total.size(); layer_id++) { for (unsigned int mvm_id = 0; - mvm_id < matrix_vector_engines[layer_id].size(); mvm_id++) { - delete matrix_vector_engines[layer_id][mvm_id]; + mvm_id < sysc_matrix_vector_engines[layer_id].size(); mvm_id++) { + delete sysc_matrix_vector_engines[layer_id][mvm_id]; } for (unsigned int mvm_id = 0; mvm_id < rtl_matrix_vector_engines[layer_id].size(); mvm_id++) { diff --git a/rad-sim/example-designs/mlp_int8/mlp_top.hpp b/rad-sim/example-designs/mlp_int8/mlp_top.hpp index c8d92b7..00704d5 100644 --- a/rad-sim/example-designs/mlp_int8/mlp_top.hpp +++ b/rad-sim/example-designs/mlp_int8/mlp_top.hpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -16,8 +16,8 @@ class mlp_top : public sc_module { private: std::vector> rtl_matrix_vector_engines; - std::vector> matrix_vector_engines; - std::vector num_mvms; + std::vector> sysc_matrix_vector_engines; + std::vector num_mvms_sysc; std::vector num_mvms_rtl; std::vector num_mvms_total; std::vector input_dispatchers; diff --git a/rad-sim/example-designs/mlp_int8/modules/collector.hpp b/rad-sim/example-designs/mlp_int8/modules/collector.hpp index ba67219..91c402b 100644 --- a/rad-sim/example-designs/mlp_int8/modules/collector.hpp +++ b/rad-sim/example-designs/mlp_int8/modules/collector.hpp @@ -7,7 +7,6 @@ #include "radsim_defines.hpp" #include "axis_interface.hpp" #include "fifo.hpp" -#include "mvm.hpp" #include "radsim_module.hpp" class collector : public RADSimModule { diff --git a/rad-sim/example-designs/mlp_int8/modules/dispatcher.hpp b/rad-sim/example-designs/mlp_int8/modules/dispatcher.hpp index e339496..a706a9e 100644 --- a/rad-sim/example-designs/mlp_int8/modules/dispatcher.hpp +++ b/rad-sim/example-designs/mlp_int8/modules/dispatcher.hpp @@ -7,9 +7,10 @@ #include "radsim_defines.hpp" #include "axis_interface.hpp" #include "fifo.hpp" -#include "mvm.hpp" #include "radsim_module.hpp" +#include + class dispatcher : public RADSimModule { private: std::string module_name; diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v b/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v index 4c4ce16..3eabfb7 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/accum.v @@ -1,3 +1,18 @@ +/** +Accumulator Module +Responsible for accumulating sums from previous subset vectors the MVM processed as part of one input vector. Independent of each DPE. + +Inputs: +i_valid: The data in is valid +i_data: The data +i_addr: The address in the accum_mem to write/read to +i_last: The last subset in the input vector. Required for o_valid to be propagated as true. + +Outputs: +o_valid: The result out is valid +o_result: The result +**/ + module accum # ( parameter DATAW = 32, parameter DEPTH = 512, diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/components.v b/rad-sim/example-designs/mlp_int8/modules/rtl/components.v index a6b5546..df80b2f 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/components.v +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/components.v @@ -1,3 +1,18 @@ +/** +Pipeline Module +Convenient module to specify register delays + +Parameters: +DELAY: Number of Registers to delay data_in by +WIDTH: Width of data + +Inputs: +data_in: The data to be delayed + +Outputs: +data_out: The data after the pipelined delay +**/ + module pipeline # ( parameter DELAY = 1, parameter WIDTH = 32 @@ -28,6 +43,24 @@ assign data_out = r_pipeline[DELAY-1]; endmodule +/** +Memory Block Module +A dual port memory with 1 Write and 1 Read. Can read and write simultaneously if not on the same address. + +Parameters: +DATAW: Width of data word +DEPTH: Depth of the memory +ADDRW: Width of memory address + +Inputs: +waddr: Write Address +wen: Write Enable +wdata: Write Data +raddr: Read Address + +Outputs: +rdata: Read Data +**/ module memory_block # ( parameter DATAW = 8, parameter DEPTH = 512, @@ -77,6 +110,27 @@ assign rdata = r_rdata; endmodule +/** +FIFO Module +A peek FIFO implementation + +Parameters: +DATAW: Width of data word +DEPTH: Depth of the FIFO +ADDRW: Width of FIFO address for pointer use +ALMOST_FULL_DEPTH: Depth of the FIFO at which the almost_full signal is asserted + +Inputs: +push: Signal to push data to the FIFO +idata: Input Data +pop: Siginal to pop data from the FIFO + +Outputs: +odata: Output Data +empty: Signal asserted when FIFO is empty +full: Signal asserted when FIFO is full +almost_full: Signal asserted when FIFO is almost full defined by ALMOST_FULL_DEPTH +**/ module fifo # ( parameter DATAW = 64, parameter DEPTH = 128, diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v b/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v index fffdb8e..8edd627 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/datapath.v @@ -1,3 +1,29 @@ +/** +Datapath Module +Facilitates the workflow of data for a singular DPE of the MVM module + +Parameters: +LANES: The number of elements each DPE can handle at once (Max number of elements in a subset of the input vector) +DATAW: Bit width of data +IPREC: Precision of elements in reduction vector +OPREC: Precision of elements in output vector +MEM_DEPTH: Depth of the accumulation memory +ADDRW: Width of the memory address used for accumulation + +Inputs: +i_valid: Valid signal for all data +i_dataa: Vector data +i_datab: Weight data +i_datac: Reduce data +i_accum_addr: Accumulation memory address +i_accum: Enable signal for accumulation +i_last: The last subset in the input vector. (Release) +i_reduce: Enable signal for reduction + +Outputs: +o_valid: Valid signal for output result +o_result: Result +**/ module datapath # ( parameter LANES = 64, parameter DATAW = 512, diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v b/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v index 20f9792..cb6ebe7 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/dpe.v @@ -1,9 +1,29 @@ +/** +Dot Product Engine Module +Performs a dot product calculation for two vectors + +Parameters: +LANES: The number of elements the DPE can handle at once (Max number of elements in a subset of the input vector) +DATAW: Bit width of data +IPREC: Precision of elements in inputs to DPE +MPREC: Precision after multiplication +OPREC: Precision of elements in outputs from DPE +ADDER_STAGES: Number of stages required for hierarchical adding + +Inputs: +i_valid: Valid signal for all data +i_dataa: Vector data +i_datab: Vector data + +Outputs: +o_valid: Valid signal for output result +o_result: Result +**/ module dpe # ( parameter LANES = 64, parameter DATAW = 512, parameter IPREC = 8, parameter MPREC = 2 * IPREC, - parameter NUM_MULT = DATAW / IPREC, parameter OPREC = 32, parameter ADDER_STAGES = $clog2(LANES) )( @@ -17,14 +37,14 @@ module dpe # ( ); // Input registers -wire signed [IPREC-1:0] dataa [0:NUM_MULT-1]; -wire signed [IPREC-1:0] datab [0:NUM_MULT-1]; -reg signed [IPREC-1:0] r_dataa [0:NUM_MULT-1]; -reg signed [IPREC-1:0] r_datab [0:NUM_MULT-1]; +wire signed [IPREC-1:0] dataa [0:LANES-1]; +wire signed [IPREC-1:0] datab [0:LANES-1]; +reg signed [IPREC-1:0] r_dataa [0:LANES-1]; +reg signed [IPREC-1:0] r_datab [0:LANES-1]; reg r_ivalid; // Multiplication registers -reg signed [MPREC-1:0] r_mrslt [0:NUM_MULT-1]; +reg signed [MPREC-1:0] r_mrslt [0:LANES-1]; reg r_mvalid; // Adder tree registers @@ -38,7 +58,7 @@ reg r_avalid [0:ADDER_STAGES-1]; genvar j; generate -for (j = 0; j < NUM_MULT; j = j + 1) begin: split_input +for (j = 0; j < LANES; j = j + 1) begin: split_input assign dataa[j] = i_dataa[(j+1)*IPREC-1:j*IPREC]; assign datab[j] = i_datab[(j+1)*IPREC-1:j*IPREC]; end @@ -47,7 +67,7 @@ endgenerate integer i; always @ (posedge clk) begin if (rst) begin - for (i = 0; i < NUM_MULT; i = i + 1) begin + for (i = 0; i < LANES; i = i + 1) begin r_mrslt[i] <= 'd0; r_dataa[i] <= 'd0; r_datab[i] <= 'd0; @@ -65,14 +85,14 @@ always @ (posedge clk) begin end end else begin // Register inputs - for (i = 0; i < NUM_MULT; i = i + 1) begin + for (i = 0; i < LANES; i = i + 1) begin r_dataa[i] <= dataa[i]; r_datab[i] <= datab[i]; end r_ivalid <= i_valid; // Perform multiplication - for (i = 0; i < NUM_MULT; i = i + 1) begin + for (i = 0; i < LANES; i = i + 1) begin r_mrslt[i] <= r_dataa[i] * r_datab[i]; end r_mvalid <= r_ivalid; diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v b/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v index 1498627..93c4a31 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/reduce.v @@ -1,3 +1,21 @@ +/** +Reduction Module +Reduce vectors from one MVM to another + +Parameters: +IPREC: Precision of elements in reduction vector +OPREC: Precision of elements in output vector + +Inputs: +i_valid: Valid signal for all data +i_dataa: Vector data from host MVM +i_datab: Reduction vector from other MVM on same layer +i_reduce: Enable signal for reduction + +Outputs: +o_valid: Valid signal for output result +o_result: Result +**/ module reduce # ( parameter IPREC = 8, parameter OPREC = 32 diff --git a/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v b/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v index de367e7..2468c18 100644 --- a/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v +++ b/rad-sim/example-designs/mlp_int8/modules/rtl/rtl_mvm.v @@ -8,6 +8,10 @@ //`define inst_release_dest(inst) ``inst``[30:22] //`define inst_release_op(inst) ``inst``[31] +/** +RTL MVM Module +Scalable Matrix Vector Multiplication implementation +**/ module rtl_mvm # ( parameter DATAW = 512, // Bitwidth of axi-s tdata parameter BYTEW = 8, // Bitwidth of axi-s tkeep, tstrb diff --git a/rad-sim/example-designs/mlp_int8/modules/mvm.cpp b/rad-sim/example-designs/mlp_int8/modules/sysc_mvm.cpp similarity index 98% rename from rad-sim/example-designs/mlp_int8/modules/mvm.cpp rename to rad-sim/example-designs/mlp_int8/modules/sysc_mvm.cpp index 709c8a1..3674a83 100644 --- a/rad-sim/example-designs/mlp_int8/modules/mvm.cpp +++ b/rad-sim/example-designs/mlp_int8/modules/sysc_mvm.cpp @@ -1,6 +1,6 @@ -#include "mvm.hpp" +#include "sysc_mvm.hpp" -mvm::mvm(const sc_module_name &name, unsigned int id_mvm, unsigned int id_layer) +sysc_mvm::sysc_mvm(const sc_module_name &name, unsigned int id_mvm, unsigned int id_layer) : RADSimModule(name), rf_rdata("rf_rdata", DPES), rf_wdata("rf_wdata"), @@ -180,7 +180,7 @@ mvm::mvm(const sc_module_name &name, unsigned int id_mvm, unsigned int id_layer) this->RegisterModuleInfo(); } -mvm::~mvm() { +sysc_mvm::~sysc_mvm() { delete instruction_fifo; delete inst_rf_pipeline; delete inst_valid_rf_pipeline; @@ -196,7 +196,7 @@ mvm::~mvm() { delete output_data_fifo; } -void mvm::Tick() { +void sysc_mvm::Tick() { // Reset logic input_fifo_push.write(false); reduction_fifo_push.write(false); @@ -284,7 +284,7 @@ void mvm::Tick() { } } -void mvm::Assign() { +void sysc_mvm::Assign() { if (rst.read()) { // Module signals inst_rf_raddr.write(0); @@ -392,7 +392,7 @@ void mvm::Assign() { } } -void mvm::RegisterModuleInfo() { +void sysc_mvm::RegisterModuleInfo() { std::string port_name; _num_noc_axis_slave_ports = 0; _num_noc_axis_master_ports = 0; diff --git a/rad-sim/example-designs/mlp_int8/modules/mvm.hpp b/rad-sim/example-designs/mlp_int8/modules/sysc_mvm.hpp similarity index 94% rename from rad-sim/example-designs/mlp_int8/modules/mvm.hpp rename to rad-sim/example-designs/mlp_int8/modules/sysc_mvm.hpp index 1a6b002..6b9c9fb 100644 --- a/rad-sim/example-designs/mlp_int8/modules/mvm.hpp +++ b/rad-sim/example-designs/mlp_int8/modules/sysc_mvm.hpp @@ -15,7 +15,7 @@ #include -class mvm : public RADSimModule { +class sysc_mvm : public RADSimModule { private: std::string module_name; unsigned int mvm_id; @@ -78,11 +78,11 @@ class mvm : public RADSimModule { axis_slave_port rx_interface; axis_master_port tx_interface; - mvm(const sc_module_name& name, unsigned int id_mvm, unsigned int id_layer); - ~mvm(); + sysc_mvm(const sc_module_name& name, unsigned int id_mvm, unsigned int id_layer); + ~sysc_mvm(); void Assign(); void Tick(); - SC_HAS_PROCESS(mvm); + SC_HAS_PROCESS(sysc_mvm); void RegisterModuleInfo(); }; \ No newline at end of file diff --git a/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp b/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp index 490ff25..1605a50 100644 --- a/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp +++ b/rad-sim/example-designs/rtl_add/rtl_add_driver.cpp @@ -31,7 +31,7 @@ void rtl_add_driver::source() { client_valid.write(false); wait(); rst.write(false); - start_cycle = GetSimulationCycle(1.0); + start_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); start_time = std::chrono::steady_clock::now(); wait(); @@ -61,7 +61,7 @@ void rtl_add_driver::sink() { if (response.read() != actual_sum) std::cout << "FAILURE - Output is not matching!" << std::endl; else std::cout << "SUCCESS - Output is matching!" << std::endl; - end_cycle = GetSimulationCycle(1.0); + end_cycle = GetSimulationCycle(radsim_config.GetDoubleKnob("sim_driver_period")); end_time = std::chrono::steady_clock::now(); std::cout << "Simulation Cycles = " << end_cycle - start_cycle << std::endl; std::cout << "Simulation Time = " << std::chrono::duration_cast (end_time - start_time).count() << " us" << std::endl; diff --git a/rad-sim/scripts/generate_port_mappings.py b/rad-sim/scripts/generate_port_mappings.py index 6980cde..60f39d0 100644 --- a/rad-sim/scripts/generate_port_mappings.py +++ b/rad-sim/scripts/generate_port_mappings.py @@ -1,4 +1,10 @@ -# Port Mapping Automated Parser +# Port Mapping Parser Script +# Parses inputs and outputs from top-level Verilog designs and maps it to the port.map format used by Wrapper Generation +# Support for AXI-S interfaces parsing +# TODO: add support for AXI-MM +# Arguments: +# [1] => Path to the design folder +# [2...] => Verilog files for top-level designs import argparse import re import verilog_parser as vlog @@ -9,6 +15,7 @@ axis_slave_input_ports = ["tvalid", "tdata", "tstrb", "tkeep", "tlast", "tid", "tdest", "tuser"] +# Determines the width of the port. Outputs the width if successful, "?" if failure def determine_port_width(port): if not port.data_type: return 1 @@ -28,15 +35,18 @@ def determine_port_width(port): return "?" +# RegEx pattern matching for AXI-S ports. Returns capture groups with information on the AXI-S interface name, and the AXI-S port. def match_axis_regex(port): signal = re.match(verilog_axis_regex, port.name) return signal.groups() +# Determines if a port is an AXI-S port def is_axis_port(port): if re.match(verilog_axis_regex, port.name): return True return False +# Determines if the role for an AXI-S port has already been saved def is_axis_role_found(axis_roles, port): (axis_interface, axis_port) = match_axis_regex(port) if axis_interface in axis_roles: @@ -53,10 +63,12 @@ def determine_axis_roles(module): axis_roles[axis_interface] = "slave" if p.mode == "input" else "master" return axis_roles +# Parses modules from a Verilog File def get_modules_from_verilog_file(verilog_file_path): vlog_ex = vlog.VerilogExtractor() return vlog_ex.extract_objects(verilog_file_path) +# Main function for a single module def generate_port_mappings_for_module(port_mapping_file, module, axis_roles): warnings = False port_mapping_file.write("module {0}\n".format(module.name)) @@ -75,6 +87,7 @@ def generate_port_mappings_for_module(port_mapping_file, module, axis_roles): print("Port Mappings for {0} has been added to the port map file.".format(module.name)) return warnings +# Determines the modules in each file, and calls generate_port_mappings_for_module def generate(design_folder, rtl_files, cmd_overwrite): modules_folder = design_folder / "modules" rtl_folder = modules_folder / "rtl" @@ -103,7 +116,9 @@ def generate(design_folder, rtl_files, cmd_overwrite): else: print("ERROR: File {0} is not supported. Only Verilog/SystemVerilog files are supported.".format(rtl_file)) exit() + for m in modules: + # Finds all the AXI-S roles (master/slave) for each AXI-S interface axis_roles = determine_axis_roles(m) warnings = True if generate_port_mappings_for_module(port_mapping_file, m, axis_roles) else warnings if warnings: diff --git a/rad-sim/scripts/generate_wrapper.py b/rad-sim/scripts/generate_wrapper.py index 4805d08..39ca169 100644 --- a/rad-sim/scripts/generate_wrapper.py +++ b/rad-sim/scripts/generate_wrapper.py @@ -1,19 +1,23 @@ # Generate Wrapper Code for RTL Support +# Creates RAD-Sim modules that instantiates SystemC modules under the hood. # Current version only supports AXI-S +# TODO: add support for AXI-MM # Arguments: # [1] => Path to the design folder # [2...] => Modules to generate wrapper code for import argparse from pathlib import Path -DEFAULT_PORT_WIDTH = 1024 +DEFAULT_PORT_WIDTH = 1024 # AXI-S data width +# Verilog-style -> SystemC port type translations port_type_translation = { "input": "sc_in", "output": "sc_out", "inout": "sc_inout" } +# Generates the C++ wrapper file def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_roles): verilated_design = "V" + design_name design_inst = "v" + design_name @@ -28,7 +32,7 @@ def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_r wrapper_cpp_file.write("\tstd::strcpy(" + design_inst + "_name, " + design_inst + "_name_str.c_str());\n\n") wrapper_cpp_file.write("\t" + design_inst + " = new " + verilated_design + "{" + design_inst + "_name};\n") - #inputs and outputs connections + # inputs and outputs connections if not design_name in mappings: print("WARNING: No mappings declared for the module", design_name) elif len(mappings[design_name]) == 0: @@ -63,6 +67,7 @@ def generate_source_wrapper(design_name, modules_folder, dataw, mappings, axis_r wrapper_cpp_file.write("}\n") +# Generates the accompanying C++ header wrapper file def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): verilated_design = "V" + design_name design_inst = "v" + design_name @@ -83,7 +88,7 @@ def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): wrapper_hpp_file.write("\t" + verilated_design + "* " + design_inst + ";\n\n") wrapper_hpp_file.write("public:\n") - #inputs and outputs + # inputs and outputs if not design_name in mappings: print("WARNING: No mappings declared for the module", design_name) elif len(mappings[design_name]) == 0: @@ -95,7 +100,7 @@ def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): port_size_type = "bool" if port[1] == "1" else "sc_bv<" + port[1] + ">" wrapper_hpp_file.write("\t" + port[0] + "<" + port_size_type + "> " + port[3] + ";\n") - #NoC connection, TODO: add support for AXI-MM + # NoC connection if axis_roles != None: wrapper_hpp_file.write("\n") for axis_interface, axis_role in axis_roles.items(): @@ -112,6 +117,7 @@ def generate_header_wrapper(design_name, modules_folder, mappings, axis_roles): wrapper_hpp_file.write("\tvoid RegisterModuleInfo();\n") wrapper_hpp_file.write("};\n") +# Parses the port mappings file def read_port_mappings(port_mapping_file): current_module = "" mappings = {} @@ -122,15 +128,19 @@ def read_port_mappings(port_mapping_file): if not components: continue - if components[0] == "module": + if components[0] == "module": # The port mapping line specifies a module + # Validity Checks if len(components) != 2: raise ValueError("A line specifying a module can only contain 2 parameters separated by a whitespace.") + current_module = components[1] mappings[current_module] = [] # insert new dictionary entry print("Found port mappings for module", current_module) - elif components[0] == "axis": + elif components[0] == "axis": # The port mapping line specifies an AXI-S port + # Validity Checks if not current_module: raise ValueError("A module must be specified before mappings for the module.") if len(components) != 5: raise ValueError("Each line specifying an AXI-S port must contain 5 parameters separated by a whitespace.") + # Add the parsed data to data structures to be used during wrapper generation (keyword, axis_role, rtl_port, axis_interface, axis_port) = components radsim_port = axis_interface + "." + axis_port mappings[current_module].append((keyword, axis_role, rtl_port, radsim_port)) @@ -139,11 +149,14 @@ def read_port_mappings(port_mapping_file): if axis_interface not in axis_roles[current_module]: axis_roles[current_module][axis_interface] = axis_role else: + # verify there is no inconsistencies if axis_roles[current_module][axis_interface] != axis_role: raise ValueError("Inconsistent AXI-S role for interface " + axis_interface + ". Each interface can either be master or slave.") - else: + else: # The port mapping line specifies any other port port_mode = components[0] port_width = components[1] + + # Validity Checks if not current_module: raise ValueError("A module must be specified before mappings for the module.") if len(components) != 4: raise ValueError("Each line specifying a port can only contain 4 parameters separated by a whitespace.") if port_mode != "input" and port_mode != "output" and port_mode != "inout": raise ValueError("The first argument of each port must be either axis/input/output/inout.") @@ -154,6 +167,7 @@ def read_port_mappings(port_mapping_file): mappings[current_module].append((port_mode, port_width, components[2], components[3])) return (mappings, axis_roles) +# Main function to generate wrapper files def generate(design_folder, design_modules): modules_folder = design_folder / "modules" rtl_folder = modules_folder / "rtl" diff --git a/rad-sim/scripts/verilog_parser.py b/rad-sim/scripts/verilog_parser.py index 8538e43..ee7bfc4 100644 --- a/rad-sim/scripts/verilog_parser.py +++ b/rad-sim/scripts/verilog_parser.py @@ -1,4 +1,5 @@ -# -*- coding: utf-8 -*- +# Verilog Parser +# RegEx and push down automata to parse symbols from Verilog and SystemVerilog files # Copyright © 2017 Kevin Thibedeau # Distributed under the terms of the MIT license from __future__ import print_function