Skip to content

Commit

Permalink
Increased sim mem capacity
Browse files Browse the repository at this point in the history
  • Loading branch information
quetric committed Feb 21, 2024
1 parent a9c1c89 commit 271a075
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 11 deletions.
4 changes: 2 additions & 2 deletions kernels/cclo/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ EN_ARITH ?= 1
EN_COMPRESS ?= 1
EN_EXT_KRNL ?= 1
MB_DEBUG_LEVEL ?= 0
SIM_MEM_SIZE ?= 16M
SIM_MEM_SIZE_LOG ?= 28
SIM_MEM_LATENCY ?= 50
CCLO_ELF=vitis_ws/ccl_offload_control/Debug/ccl_offload_control.elf
CCLO_SIMDLL=ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/xsim.dir/ccl_offload_behav/xsimk.so
Expand Down Expand Up @@ -88,7 +88,7 @@ simdll: $(CCLO_SIMDLL)

$(CCLO_SIMDLL): tcl/generate_sim.tcl elf
$(MAKE) -C ../plugins PLATFORM=$(PLATFORM)
vivado -mode batch -source $< -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(SIM_MEM_SIZE) $(SIM_MEM_LATENCY)
vivado -mode batch -source $< -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(SIM_MEM_SIZE_LOG) $(SIM_MEM_LATENCY)
cd ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/ && ./compile.sh && ./elaborate.sh

dcp: $(CCLO_DCP)
Expand Down
102 changes: 102 additions & 0 deletions kernels/cclo/hdl/sim_mem.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*******************************************************************************
# Copyright (C) 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# *******************************************************************************/

`timescale 1ns / 1ps

module sim_mem
#(
parameter MEM_WIDTH = 512,
parameter MEM_DEPTH_LOG = 22,
parameter READ_LATENCY = 50
)(

(* X_INTERFACE_PARAMETER = "MODE Slave, MASTER_TYPE BRAM_CTRL, MEM_ECC NONE, READ_WRITE_MODE READ_WRITE" *)
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A CLK" *)
input clk_a,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A RST" *)
input rst_a,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A EN" *)
input en_a,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A ADDR" *)
input [MEM_DEPTH_LOG-1:0] addr_a,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A WE" *)
input [MEM_WIDTH/8-1:0] we_a,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A DIN" *)
input [MEM_WIDTH-1:0] din_a,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_A DOUT" *)
output [MEM_WIDTH-1:0] dout_a,

(* X_INTERFACE_PARAMETER = "MODE Slave, MASTER_TYPE BRAM_CTRL, MEM_ECC NONE, READ_WRITE_MODE READ_WRITE" *)
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B CLK" *)
input clk_b,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B RST" *)
input rst_b,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B EN" *)
input en_b,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B ADDR" *)
input [MEM_DEPTH_LOG-1:0] addr_b,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B WE" *)
input [MEM_WIDTH/8-1:0] we_b,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B DIN" *)
input [MEM_WIDTH-1:0] din_b,
(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B DOUT" *)
output [MEM_WIDTH-1:0] dout_b
);


reg [MEM_WIDTH-1:0] mem[2**MEM_DEPTH_LOG-1:0];


genvar byte_idx;
generate for(byte_idx=0; byte_idx<MEM_WIDTH/8; byte_idx=byte_idx+1) begin: byte_write
always @(posedge clk_a)
if(en_a)
if(we_a[byte_idx])
mem[addr_a][8*(byte_idx+1)-1:8*byte_idx] <= din_a[8*(byte_idx+1)-1:8*byte_idx];
always @(posedge clk_b)
if(en_b)
if(we_b[byte_idx])
mem[addr_b][8*(byte_idx+1)-1:8*byte_idx] <= din_b[8*(byte_idx+1)-1:8*byte_idx];
end
endgenerate

reg [MEM_WIDTH-1:0] delayline_a[READ_LATENCY-1:0];
reg [MEM_WIDTH-1:0] delayline_b[READ_LATENCY-1:0];

always @(posedge clk_a)
if(rst_a) delayline_a[0] <= 0;
else if(en_a) delayline_a[0] <= mem[addr_a];

always @(posedge clk_b)
if(rst_b) delayline_b[0] <= 0;
else if(en_b) delayline_b[0] <= mem[addr_b];

genvar i;
generate for(i=1; i<READ_LATENCY; i=i+1) begin: read_delay
always @(posedge clk_a)
if(rst_a) delayline_a[i] <= 0;
else delayline_a[i] <= delayline_a[i-1];
always @(posedge clk_b)
if(rst_b) delayline_b[i] <= 0;
else delayline_b[i] <= delayline_b[i-1];
end
endgenerate

assign dout_a = delayline_a[READ_LATENCY-1];
assign dout_b = delayline_b[READ_LATENCY-1];

endmodule
24 changes: 16 additions & 8 deletions kernels/cclo/tcl/generate_sim.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@
# en_arith - 0/1 - enables arithmetic, providing support for reduction collectives and combine primitive
# en_compress - 0/1 - enables compression feature
# en_extkrnl - 0/1 - enables PL stream attachments, providing support for non-memory send/recv
# memsize - size of simulated memory, up to 16M
# memsize_log - log of size of simulated memory
# latency - read latency of simulated memory, up to 128 cycles
set stacktype [lindex $::argv 0]
set en_dma [lindex $::argv 1]
set en_arith [lindex $::argv 2]
set en_compress [lindex $::argv 3]
set en_extkrnl [lindex $::argv 4]
set memsize [lindex $::argv 5]
set memsize_log [lindex $::argv 5]
set latency [lindex $::argv 6]
puts "$stacktype $en_dma $memsize $latency"
set memsize [expr { 2 ** $memsize_log }]
set mem_addr_bits [expr { $memsize_log - 6 }]
puts "$stacktype $en_dma $memsize $mem_addr_bits $latency"

# open project
open_project ./ccl_offload_ex/ccl_offload_ex.xpr
Expand All @@ -39,6 +41,11 @@ update_compile_order -fileset sim_1
set_property ip_repo_paths { ./hls ./../plugins } [current_project]
update_ip_catalog

# add the simulation memory to the project
add_files -norecurse ./hdl/sim_mem.v
update_compile_order -fileset sources_1
update_compile_order -fileset sim_1

# open the block design
open_bd_design {./ccl_offload_ex/ccl_offload_ex.srcs/sources_1/bd/ccl_offload_bd/ccl_offload_bd.bd}

Expand Down Expand Up @@ -99,11 +106,12 @@ assign_bd_address -offset 0x4000 -range 8K -target_address_space [get_bd_addr_sp
group_bd_cells control [get_bd_cells hostctrl_0] [get_bd_cells hostctrl_1] [get_bd_cells client_arbiter_0] [get_bd_cells smartconnect_0]

if { $en_dma != 0 } {
create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 blk_mem_gen_0
create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0
set_property -dict [list CONFIG.SINGLE_PORT_BRAM {1} CONFIG.DATA_WIDTH {512} CONFIG.ECC_TYPE {0}] [get_bd_cells axi_bram_ctrl_0]
set_property -dict [list CONFIG.PRIM_type_to_Implement {URAM} CONFIG.READ_LATENCY_A $latency] [get_bd_cells blk_mem_gen_0]
connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins blk_mem_gen_0/BRAM_PORTA]
set_property -dict [list CONFIG.SINGLE_PORT_BRAM {0} CONFIG.DATA_WIDTH {512} CONFIG.ECC_TYPE {0} CONFIG.READ_LATENCY $latency] [get_bd_cells axi_bram_ctrl_0]
create_bd_cell -type module -reference sim_mem sim_mem_0
set_property -dict [list CONFIG.MEM_DEPTH_LOG $mem_addr_bits CONFIG.MEM_WIDTH {512} CONFIG.READ_LATENCY $latency] [get_bd_cells sim_mem_0]
connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins sim_mem_0/MEM_PORT_A]
connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins sim_mem_0/MEM_PORT_B]

create_bd_cell -type ip -vlnv xilinx.com:ip:axi_crossbar:2.1 axi_crossbar_0
set_property -dict [list CONFIG.NUM_SI {3} CONFIG.NUM_MI {1}] [get_bd_cells axi_crossbar_0]
Expand All @@ -129,7 +137,7 @@ if { $en_dma != 0 } {
set_property offset 0x0000000000000000 [get_bd_addr_segs {cclo/dma_1/Data/SEG_axi_bram_ctrl_0_Mem0}]
set_property offset 0x0000000000000000 [get_bd_addr_segs {cclo/dma_0/Data/SEG_axi_bram_ctrl_0_Mem0}]

group_bd_cells external_memory [get_bd_cells axi_bram_ctrl_0] [get_bd_cells blk_mem_gen_0] [get_bd_cells axi_crossbar_0]
group_bd_cells external_memory [get_bd_cells axi_bram_ctrl_0] [get_bd_cells sim_mem_0] [get_bd_cells axi_crossbar_0]

}

Expand Down
2 changes: 1 addition & 1 deletion test/model/simulator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.9)
project(accl_simulation)

set(CMAKE_CXX_STANDARD 17)
set(ACCL_SIM_MEM_SIZE_KB 16384 CACHE STRING "Size of ACCL simulator memory, in KB (must correspond to simdll)")
set(ACCL_SIM_MEM_SIZE_KB 262144 CACHE STRING "Size of ACCL simulator memory, in KB (must correspond to simdll)")

# Consider using cmake_path from CMake 3.20 (2021) for better error messages:
# https://cmake.org/cmake/help/latest/command/cmake_path.html
Expand Down

0 comments on commit 271a075

Please sign in to comment.