diff --git a/kernels/cclo/Makefile b/kernels/cclo/Makefile index c04fdaeb..dc76095c 100644 --- a/kernels/cclo/Makefile +++ b/kernels/cclo/Makefile @@ -18,6 +18,7 @@ PLATFORM ?= xilinx_u280_xdma_201920_3 HW_DEBUG ?= none STACK_TYPE ?= UDP +MODE ?= xo EN_DMA ?= 1 EN_ARITH ?= 1 EN_COMPRESS ?= 1 @@ -25,24 +26,32 @@ EN_EXT_KRNL ?= 1 MB_DEBUG_LEVEL ?= 0 SIM_MEM_SIZE_LOG ?= 28 SIM_MEM_LATENCY ?= 50 -CCLO_ELF=vitis_ws/ccl_offload_control/Debug/ccl_offload_control.elf -CCLO_SIMDLL=ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/xsim.dir/ccl_offload_behav/xsimk.so -CCLO_XSA=ccl_offload_ex/ccl_offload.xsa -CCLO_DCP=ccl_offload_ex/ccl_offload_ex.runs/synth_1/packaged.dcp -CCLO_XO=ccl_offload.xo FW_SOURCES = $(shell find fw -name '*.c') $(shell find fw -name '*.h') $(shell find fw -name '*.tcl') +ifeq ($(MODE), simdll) + EN_DMA=1 + EN_ARITH=1 + EN_COMPRESS=1 + EN_EXT_KRNL=1 + MB_DEBUG_LEVEL=0 +endif + ifeq (u250,$(findstring u250, $(PLATFORM))) FPGAPART=xcu250-figd2104-2L-e + BOARD=u250 else ifeq (u280,$(findstring u280, $(PLATFORM))) FPGAPART=xcu280-fsvh2892-2L-e + BOARD=u280 else ifeq (u55c,$(findstring u55c, $(PLATFORM))) FPGAPART=xcu55c-fsvh2892-2L-e + BOARD=u55c else ifeq (u200,$(findstring u200, $(PLATFORM))) FPGAPART=xcu200-fsgd2104-2-e + BOARD=u200 else ifeq (u50,$(findstring u50, $(PLATFORM))) FPGAPART=xcu50-fsvh2104-2-e + BOARD=u50 else $(error Unsupported PLATFORM) endif @@ -51,14 +60,34 @@ COMMIT_HASH := 0x$(shell git rev-parse HEAD | cut -c 1-6) GEN_KERNEL_TCL := tcl/generate_kernel.tcl REBUILD_BD_TCL := tcl/rebuild_bd.tcl tcl/control_bd.tcl tcl/rx_bd.tcl tcl/tx_bd.tcl -all: $(CCLO_XO) +ifeq ($(MODE), simdll) + FPGAPART=xcu280-fsvh2892-2L-e + BOARD=u280 + BUILD_FOLDER = $(STACK_TYPE)_sim +else + BUILD_FOLDER = $(STACK_TYPE)_$(EN_DMA)$(EN_ARITH)$(EN_COMPRESS)$(EN_EXT_KRNL)$(MB_DEBUG_LEVEL)_$(FPGAPART) +endif + +CCLO_ELF=$(BUILD_FOLDER)/vitis_ws/ccl_offload_control/Debug/ccl_offload_control.elf +CCLO_SIMDLL=$(BUILD_FOLDER)/ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/xsim.dir/ccl_offload_behav/xsimk.so +CCLO_XSA=$(BUILD_FOLDER)/ccl_offload.xsa +CCLO_DCP=$(BUILD_FOLDER)/ccl_offload_ex/ccl_offload_ex.runs/synth_1/packaged.dcp +CCLO_XO=$(BUILD_FOLDER)/ccl_offload.xo + +OUTPUT_PRODUCT=$(CCLO_XO) +ifeq ($(MODE), simdll) + OUTPUT_PRODUCT=$(CCLO_SIMDLL) +endif + +all: $(OUTPUT_PRODUCT) .PHONY: xsa xsa: $(CCLO_XSA) $(CCLO_XSA): $(GEN_KERNEL_TCL) $(REBUILD_BD_TCL) $(MAKE) -C hls/ DEVICE=$(FPGAPART) - vivado -mode batch -source $< -tclargs $(FPGAPART) $(HW_DEBUG) $(CCLO_XSA) $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(MB_DEBUG_LEVEL) $(COMMIT_HASH) + mkdir -p $(BUILD_FOLDER) + cd $(BUILD_FOLDER) && vivado -mode batch -source ../$< -tclargs $(FPGAPART) $(HW_DEBUG) ./ccl_offload.xsa $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(MB_DEBUG_LEVEL) $(COMMIT_HASH) OPTIMIZATION=3 #valid values 0,1,2,3,s,g @@ -74,36 +103,34 @@ else ifeq ($(OPTIMIZATION),s) OPTIMIZATION_STRING = "Optimize for size (-Os)" endif -.PHONY: elf simdll xo clean dcp +.PHONY: elf xo clean dcp elf: $(CCLO_ELF) $(CCLO_ELF): tcl/generate_sw.tcl tcl/associate_elf.tcl $(FW_SOURCES) $(CCLO_XSA) - rm -rf vitis_ws && mkdir vitis_ws - xsct $< ccl_offload $(CCLO_XSA) ./fw $(OPTIMIZATION_STRING) - vivado -mode batch -source tcl/associate_elf.tcl -tclargs $(CCLO_ELF) - -simdll: $(CCLO_SIMDLL) + cd $(BUILD_FOLDER) && xsct ../tcl/generate_sw.tcl ccl_offload ./ccl_offload.xsa ../fw $(OPTIMIZATION_STRING) + cd $(BUILD_FOLDER) && vivado -mode batch -source ../tcl/associate_elf.tcl -tclargs vitis_ws/ccl_offload_control/Debug/ccl_offload_control.elf $(CCLO_SIMDLL): tcl/generate_sim.tcl elf - $(MAKE) -C ../plugins PLATFORM=$(PLATFORM) NUM_EXTDMA_AXI=2 - vivado -mode batch -source $< -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(SIM_MEM_SIZE_LOG) $(SIM_MEM_LATENCY) - cd ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/ && ./compile.sh && ./elaborate.sh + $(MAKE) -C ../plugins DEVICE=$(FPGAPART) + cd $(BUILD_FOLDER) && vivado -mode batch -source ../tcl/generate_sim.tcl -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(SIM_MEM_SIZE_LOG) $(SIM_MEM_LATENCY) + cd $(BUILD_FOLDER)/ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/ && ./compile.sh && ./elaborate.sh dcp: $(CCLO_DCP) $(CCLO_DCP): tcl/generate_dcp.tcl $(CCLO_ELF) - vivado -mode batch -source $< -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(MB_DEBUG_LEVEL) + cd $(BUILD_FOLDER) && vivado -mode batch -source ../tcl/generate_dcp.tcl -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(MB_DEBUG_LEVEL) xo: $(CCLO_XO) -ccl_offload.xml: +$(BUILD_FOLDER)/ccl_offload.xml: python3 gen_xml.py $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) + mkdir -p $(BUILD_FOLDER) + mv ccl_offload.xml $@ -$(CCLO_XO): tcl/generate_xo.tcl ccl_offload.xml $(CCLO_DCP) - rm -rf ccl_offload_ex/ccl_offload - vivado -mode batch -source $< -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(MB_DEBUG_LEVEL) +$(CCLO_XO): tcl/generate_xo.tcl $(BUILD_FOLDER)/ccl_offload.xml $(CCLO_DCP) + rm -rf $(BUILD_FOLDER)/ccl_offload_ex/ccl_offload + cd $(BUILD_FOLDER) && vivado -mode batch -source ../tcl/generate_xo.tcl -tclargs $(STACK_TYPE) $(EN_DMA) $(EN_ARITH) $(EN_COMPRESS) $(EN_EXT_KRNL) $(MB_DEBUG_LEVEL) clean: - $(MAKE) -C hls/ clean - rm -rf ccl_offload_ex ccl_offload.xml ccl_offload.xo vitis_ws .Xil *.jou *.log + git clean -xfd diff --git a/kernels/cclo/hdl/ccl_offload.v b/kernels/cclo/hdl/ccl_offload.v index 426434b0..8fbea965 100644 --- a/kernels/cclo/hdl/ccl_offload.v +++ b/kernels/cclo/hdl/ccl_offload.v @@ -340,70 +340,6 @@ module ccl_offload `endif `ifdef DMA_ENABLE - .m_axi_0_araddr(m_axi_0_araddr), - .m_axi_0_arburst(m_axi_0_arburst), - .m_axi_0_arcache(m_axi_0_arcache), - .m_axi_0_arlen(m_axi_0_arlen), - .m_axi_0_arprot(m_axi_0_arprot), - .m_axi_0_arready(m_axi_0_arready), - .m_axi_0_arsize(m_axi_0_arsize), - .m_axi_0_aruser(m_axi_0_aruser), - .m_axi_0_arvalid(m_axi_0_arvalid), - .m_axi_0_awaddr(m_axi_0_awaddr), - .m_axi_0_awburst(m_axi_0_awburst), - .m_axi_0_awcache(m_axi_0_awcache), - .m_axi_0_awlen(m_axi_0_awlen), - .m_axi_0_awprot(m_axi_0_awprot), - .m_axi_0_awready(m_axi_0_awready), - .m_axi_0_awsize(m_axi_0_awsize), - .m_axi_0_awuser(m_axi_0_awuser), - .m_axi_0_awvalid(m_axi_0_awvalid), - .m_axi_0_bready(m_axi_0_bready), - .m_axi_0_bresp(m_axi_0_bresp), - .m_axi_0_bvalid(m_axi_0_bvalid), - .m_axi_0_rdata(m_axi_0_rdata), - .m_axi_0_rlast(m_axi_0_rlast), - .m_axi_0_rready(m_axi_0_rready), - .m_axi_0_rresp(m_axi_0_rresp), - .m_axi_0_rvalid(m_axi_0_rvalid), - .m_axi_0_wdata(m_axi_0_wdata), - .m_axi_0_wlast(m_axi_0_wlast), - .m_axi_0_wready(m_axi_0_wready), - .m_axi_0_wstrb(m_axi_0_wstrb), - .m_axi_0_wvalid(m_axi_0_wvalid), - - .m_axi_1_araddr(m_axi_1_araddr), - .m_axi_1_arburst(m_axi_1_arburst), - .m_axi_1_arcache(m_axi_1_arcache), - .m_axi_1_arlen(m_axi_1_arlen), - .m_axi_1_arprot(m_axi_1_arprot), - .m_axi_1_arready(m_axi_1_arready), - .m_axi_1_arsize(m_axi_1_arsize), - .m_axi_1_aruser(m_axi_1_aruser), - .m_axi_1_arvalid(m_axi_1_arvalid), - .m_axi_1_awaddr(m_axi_1_awaddr), - .m_axi_1_awburst(m_axi_1_awburst), - .m_axi_1_awcache(m_axi_1_awcache), - .m_axi_1_awlen(m_axi_1_awlen), - .m_axi_1_awprot(m_axi_1_awprot), - .m_axi_1_awready(m_axi_1_awready), - .m_axi_1_awsize(m_axi_1_awsize), - .m_axi_1_awuser(m_axi_1_awuser), - .m_axi_1_awvalid(m_axi_1_awvalid), - .m_axi_1_bready(m_axi_1_bready), - .m_axi_1_bresp(m_axi_1_bresp), - .m_axi_1_bvalid(m_axi_1_bvalid), - .m_axi_1_rdata(m_axi_1_rdata), - .m_axi_1_rlast(m_axi_1_rlast), - .m_axi_1_rready(m_axi_1_rready), - .m_axi_1_rresp(m_axi_1_rresp), - .m_axi_1_rvalid(m_axi_1_rvalid), - .m_axi_1_wdata(m_axi_1_wdata), - .m_axi_1_wlast(m_axi_1_wlast), - .m_axi_1_wready(m_axi_1_wready), - .m_axi_1_wstrb(m_axi_1_wstrb), - .m_axi_1_wvalid(m_axi_1_wvalid), -`elsif DMA_EXTERNAL .m_axis_dma0_s2mm_tdata(m_axis_dma0_s2mm_tdata), .m_axis_dma0_s2mm_tkeep(m_axis_dma0_s2mm_tkeep), .m_axis_dma0_s2mm_tdest(m_axis_dma0_s2mm_tdest), diff --git a/kernels/cclo/hls/build.tcl b/kernels/cclo/hls/build.tcl index dde09e0a..ea2a921f 100644 --- a/kernels/cclo/hls/build.tcl +++ b/kernels/cclo/hls/build.tcl @@ -58,7 +58,7 @@ set seg_dir "[pwd]/../segmenter/" set rx_dir "[pwd]/../rxbuf_offload/" set drv_dir "[pwd]/../../../../driver/hls/" -open_project build_$ipname +open_project build_${ipname}.${device} add_files $ipname.cpp -cflags "-std=c++14 -I. -I../ -I$drv_dir -I$hlslib_dir -I$fw_dir -I$eth_dir -I$seg_dir -I$rx_dir -DACCL_SYNTHESIS" if {$do_sim || $do_cosim} { diff --git a/kernels/cclo/hls/dma_mover/Makefile b/kernels/cclo/hls/dma_mover/Makefile index 2d804673..fe7a524e 100644 --- a/kernels/cclo/hls/dma_mover/Makefile +++ b/kernels/cclo/hls/dma_mover/Makefile @@ -17,7 +17,7 @@ TARGET=ip DEVICE=xcu280-fsvh2892-2L-e -DMA_MOVER_IP=build_dma_mover/sol1/impl/ip/xilinx_com_hls_dma_mover_1_0.zip +DMA_MOVER_IP=build_dma_mover.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_dma_mover_1_0.zip all: $(DMA_MOVER_IP) diff --git a/kernels/cclo/hls/dma_mover/build_dma_mover.tcl b/kernels/cclo/hls/dma_mover/build_dma_mover.tcl index 4372816d..9c7307d3 100644 --- a/kernels/cclo/hls/dma_mover/build_dma_mover.tcl +++ b/kernels/cclo/hls/dma_mover/build_dma_mover.tcl @@ -50,7 +50,7 @@ switch $command { } } -open_project build_dma_mover +open_project build_dma_mover.${device} add_files dma_mover.cpp -cflags "-std=c++14 -I[pwd]/../../../../driver/hls -I[pwd]/../eth_intf/ -I[pwd]/../../../../hlslib/include/hlslib/xilinx -I[pwd]/../segmenter -I[pwd]/../../fw/sw_apps/ccl_offload_control/src -DHLSLIB_SYNTHESIS" add_files -tb tb_dma_mover.cpp -cflags "-std=c++14 -I[pwd]/../../../../driver/hls -I[pwd]/../eth_intf/ -I[pwd]/../../../../hlslib/include/hlslib/xilinx -I[pwd]/../segmenter -I[pwd]/../../fw/sw_apps/ccl_offload_control/src -DHLSLIB_SYNTHESIS" diff --git a/kernels/cclo/hls/eth_intf/Makefile b/kernels/cclo/hls/eth_intf/Makefile index d4537a85..f1de0a95 100644 --- a/kernels/cclo/hls/eth_intf/Makefile +++ b/kernels/cclo/hls/eth_intf/Makefile @@ -16,15 +16,15 @@ # *******************************************************************************/ DEVICE=xcu280-fsvh2892-2L-e -TCP_PACKETIZER_IP=build_tcp_packetizer/sol1/impl/ip/xilinx_com_hls_tcp_packetizer_1_0.zip -TCP_DEPACKETIZER_IP=build_tcp_depacketizer/sol1/impl/ip/xilinx_com_hls_tcp_depacketizer_1_0.zip -TCP_TXHANDLER_IP=build_tcp_txHandler//sol1/impl/ip/xilinx_com_hls_tcp_txHandler_1_0.zip -TCP_RXHANDLER_IP=build_tcp_rxHandler//sol1/impl/ip/xilinx_com_hls_tcp_rxHandler_1_0.zip -UDP_PACKETIZER_IP=build_udp_packetizer/sol1/impl/ip/xilinx_com_hls_udp_packetizer_1_0.zip -UDP_DEPACKETIZER_IP=build_udp_depacketizer/sol1/impl/ip/xilinx_com_hls_udp_depacketizer_1_0.zip -RDMA_SQ_HANDLER_IP=build_rdma_sq_handler/sol1/impl/ip/xilinx_com_hls_rdma_sq_handler_1_0.zip -RDMA_PACKETIZER_IP=build_rdma_packetizer/sol1/impl/ip/xilinx_com_hls_rdma_packetizer_1_0.zip -RDMA_DEPACKETIZER_IP=build_rdma_depacketizer/sol1/impl/ip/xilinx_com_hls_rdma_depacketizer_1_0.zip +TCP_PACKETIZER_IP=build_tcp_packetizer.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_tcp_packetizer_1_0.zip +TCP_DEPACKETIZER_IP=build_tcp_depacketizer.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_tcp_depacketizer_1_0.zip +TCP_TXHANDLER_IP=build_tcp_txHandler.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_tcp_txHandler_1_0.zip +TCP_RXHANDLER_IP=build_tcp_rxHandler.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_tcp_rxHandler_1_0.zip +UDP_PACKETIZER_IP=build_udp_packetizer.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_udp_packetizer_1_0.zip +UDP_DEPACKETIZER_IP=build_udp_depacketizer.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_udp_depacketizer_1_0.zip +RDMA_SQ_HANDLER_IP=build_rdma_sq_handler.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rdma_sq_handler_1_0.zip +RDMA_PACKETIZER_IP=build_rdma_packetizer.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rdma_packetizer_1_0.zip +RDMA_DEPACKETIZER_IP=build_rdma_depacketizer.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rdma_depacketizer_1_0.zip TARGET=ip diff --git a/kernels/cclo/hls/rxbuf_offload/Makefile b/kernels/cclo/hls/rxbuf_offload/Makefile index d8fdb3fe..9282cffa 100644 --- a/kernels/cclo/hls/rxbuf_offload/Makefile +++ b/kernels/cclo/hls/rxbuf_offload/Makefile @@ -17,10 +17,10 @@ TARGET=ip DEVICE=xcu250-figd2104-2L-e -RXBUF_DEQUEUE_IP=build_rxbuf_dequeue/sol1/impl/ip/xilinx_com_hls_rxbuf_dequeue_1_0.zip -RXBUF_ENQUEUE_IP=build_rxbuf_enqueue/sol1/impl/ip/xilinx_com_hls_rxbuf_enqueue_1_0.zip -RXBUF_SEEK_IP=build_rxbuf_seek/sol1/impl/ip/xilinx_com_hls_rxbuf_seek_1_0.zip -RXBUF_SESSION_IP=build_rxbuf_session/sol1/impl/ip/xilinx_com_hls_rxbuf_session_1_0.zip +RXBUF_DEQUEUE_IP=build_rxbuf_dequeue.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rxbuf_dequeue_1_0.zip +RXBUF_ENQUEUE_IP=build_rxbuf_enqueue.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rxbuf_enqueue_1_0.zip +RXBUF_SEEK_IP=build_rxbuf_seek.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rxbuf_seek_1_0.zip +RXBUF_SESSION_IP=build_rxbuf_session.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_rxbuf_session_1_0.zip all: $(RXBUF_DEQUEUE_IP) $(RXBUF_ENQUEUE_IP) $(RXBUF_SEEK_IP) $(RXBUF_SESSION_IP) diff --git a/kernels/cclo/hls/segmenter/Makefile b/kernels/cclo/hls/segmenter/Makefile index aa06d397..4055fe2a 100644 --- a/kernels/cclo/hls/segmenter/Makefile +++ b/kernels/cclo/hls/segmenter/Makefile @@ -16,8 +16,8 @@ # *******************************************************************************/ DEVICE=xcu250-figd2104-2L-e -SEGMENTER_IP=build_stream_segmenter/sol1/impl/ip/xilinx_com_hls_stream_segmenter_1_0.zip -DMA2SEGCMD_IP=build_dma2seg_cmd/sol1/impl/ip/xilinx_com_hls_dma2seg_cmd_1_0.zip +SEGMENTER_IP=build_stream_segmenter.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_stream_segmenter_1_0.zip +DMA2SEGCMD_IP=build_dma2seg_cmd.$(DEVICE)/sol1/impl/ip/xilinx_com_hls_dma2seg_cmd_1_0.zip TARGET=ip all: $(SEGMENTER_IP) $(DMA2SEGCMD_IP) diff --git a/kernels/cclo/tcl/generate_kernel.tcl b/kernels/cclo/tcl/generate_kernel.tcl index 07972299..0ccd5835 100644 --- a/kernels/cclo/tcl/generate_kernel.tcl +++ b/kernels/cclo/tcl/generate_kernel.tcl @@ -34,36 +34,36 @@ update_compile_order -fileset sources_1 create_bd_design ccl_offload_bd # add our own ip to the repo -set_property ip_repo_paths {./hls/} [current_project] +set_property ip_repo_paths {../hls/} [current_project] update_ip_catalog #rebuild bd -source -notrace tcl/rebuild_bd.tcl +source -notrace ../tcl/rebuild_bd.tcl create_root_design $stacktype $en_dma $en_arith $en_compress $en_extkrnl $mb_debug_level $commit_hash #add debug if requested if [string equal $hw_debug_level "dma"] { puts "Adding DMA debug to block design" - source -notrace tcl/debug_dma.tcl + source -notrace ../tcl/debug_dma.tcl } elseif [string equal $hw_debug_level "pkt"] { puts "Adding (de)packetizer debug to block design" - source -notrace tcl/debug_pkt.tcl + source -notrace ../tcl/debug_pkt.tcl } elseif [string equal $hw_debug_level "arith"] { puts "Adding arithmetic debug to block design" - source -notrace tcl/debug_arith.tcl + source -notrace ../tcl/debug_arith.tcl } elseif [string equal $hw_debug_level "control"] { puts "Adding control debug to block design" - source -notrace tcl/debug_control.tcl + source -notrace ../tcl/debug_control.tcl } elseif [string equal $hw_debug_level "all"] { puts "Adding all debug cores to block design" - source -notrace tcl/debug_dma.tcl - source -notrace tcl/debug_pkt.tcl - source -notrace tcl/debug_arith.tcl - source -notrace tcl/debug_control.tcl + source -notrace ../tcl/debug_dma.tcl + source -notrace ../tcl/debug_pkt.tcl + source -notrace ../tcl/debug_arith.tcl + source -notrace ../tcl/debug_control.tcl } # add wrapper -add_files -norecurse ./hdl/ccl_offload.v +add_files -norecurse ../hdl/ccl_offload.v update_compile_order -fileset sources_1 update_compile_order -fileset sim_1 generate_target all [get_files ./ccl_offload_ex/ccl_offload_ex.srcs/sources_1/bd/ccl_offload_bd/ccl_offload_bd.bd] diff --git a/kernels/cclo/tcl/generate_sim.tcl b/kernels/cclo/tcl/generate_sim.tcl index c1a51760..185cc990 100644 --- a/kernels/cclo/tcl/generate_sim.tcl +++ b/kernels/cclo/tcl/generate_sim.tcl @@ -38,11 +38,11 @@ open_project ./ccl_offload_ex/ccl_offload_ex.xpr update_compile_order -fileset sim_1 # add plugins to the catalog -set_property ip_repo_paths { ./hls ./../plugins } [current_project] +set_property ip_repo_paths { ../hls ../../plugins } [current_project] update_ip_catalog # add the simulation memory to the project -add_files -norecurse ./hdl/sim_mem.v +add_files -norecurse ../hdl/sim_mem.v update_compile_order -fileset sources_1 update_compile_order -fileset sim_1 @@ -141,7 +141,7 @@ if { $en_dma != 0 } { connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M00_AXI] [get_bd_intf_pins axi_crossbar_0/S02_AXI] connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M01_AXI] [get_bd_intf_pins axi_crossbar_1/S02_AXI] - create_bd_cell -type ip -vlnv Xilinx:ACCL:external_dma:1.0 external_dma_0 + create_bd_cell -type ip -vlnv Xilinx:ACCL:external_dma_2port:1.0 external_dma_0 connect_bd_net [get_bd_ports ap_clk] [get_bd_pins external_dma_0/ap_clk] connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins external_dma_0/ap_rst_n] connect_bd_intf_net [get_bd_intf_pins external_dma_0/m_axi_0] [get_bd_intf_pins axi_crossbar_0/S00_AXI] @@ -149,7 +149,7 @@ if { $en_dma != 0 } { connect_bd_intf_net [get_bd_intf_pins external_dma_0/s_axis_s2mm] [get_bd_intf_pins cclo/m_axis_dma0_s2mm] connect_bd_intf_net [get_bd_intf_pins external_dma_0/m_axis_mm2s] [get_bd_intf_pins cclo/s_axis_dma0_mm2s] - create_bd_cell -type ip -vlnv Xilinx:ACCL:external_dma:1.0 external_dma_1 + create_bd_cell -type ip -vlnv Xilinx:ACCL:external_dma_2port:1.0 external_dma_1 connect_bd_net [get_bd_ports ap_clk] [get_bd_pins external_dma_1/ap_clk] connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins external_dma_1/ap_rst_n] connect_bd_intf_net [get_bd_intf_pins external_dma_1/m_axi_0] [get_bd_intf_pins axi_crossbar_0/S01_AXI] @@ -203,6 +203,8 @@ if { $en_dma != 0 } { set s_axi [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 s_axi_data ] set_property -dict [ list CONFIG.ADDR_WIDTH {64} CONFIG.DATA_WIDTH {512} CONFIG.FREQ_HZ {250000000} CONFIG.HAS_BRESP {0} CONFIG.HAS_BURST {0} CONFIG.HAS_CACHE {0} CONFIG.HAS_LOCK {0} CONFIG.HAS_PROT {0} CONFIG.HAS_QOS {0} CONFIG.HAS_REGION {0} CONFIG.HAS_WSTRB {1} CONFIG.NUM_READ_OUTSTANDING {1} CONFIG.NUM_WRITE_OUTSTANDING {1} CONFIG.PROTOCOL {AXI4} CONFIG.READ_WRITE_MODE {READ_WRITE} ] $s_axi + set_property -dict [list CONFIG.ID_WIDTH.VALUE_SRC USER CONFIG.AWUSER_WIDTH.VALUE_SRC USER CONFIG.ARUSER_WIDTH.VALUE_SRC USER] $s_axi + set_property -dict [list CONFIG.ARUSER_WIDTH {4} CONFIG.AWUSER_WIDTH {4} CONFIG.ID_WIDTH {4} ] $s_axi connect_bd_intf_net [get_bd_intf_ports s_axi_data] [get_bd_intf_pins axi_crossbar_2/S00_AXI] connect_bd_net [get_bd_ports ap_clk] [get_bd_pins axi_crossbar_0/aclk] diff --git a/kernels/cclo/tcl/rebuild_bd.tcl b/kernels/cclo/tcl/rebuild_bd.tcl index 559b28c5..259d0145 100644 --- a/kernels/cclo/tcl/rebuild_bd.tcl +++ b/kernels/cclo/tcl/rebuild_bd.tcl @@ -110,7 +110,7 @@ proc create_root_design { netStackType enableDMA enableArithmetic enableCompress set control_xbar [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 control_xbar ] set_property -dict [ list CONFIG.NUM_MI {2} ] $control_xbar - source -notrace ./tcl/control_bd.tcl + source -notrace ../tcl/control_bd.tcl set idcode [expr {$commitHash<<8 | $debugLevel<<6 | $enableExtKrnlStream<<5 | $enableCompression<<4 | $enableArithmetic<<3 | $enableDMA<<2 | ($netStackType == "RDMA" ? 2 : $netStackType == "TCP" ? 1 : 0) }] create_hier_cell_control [current_bd_instance .] control $debugLevel $idcode @@ -232,8 +232,8 @@ proc create_root_design { netStackType enableDMA enableArithmetic enableCompress save_bd_design # Create network (de)packetizer - source -notrace ./tcl/rx_bd.tcl - source -notrace ./tcl/tx_bd.tcl + source -notrace ../tcl/rx_bd.tcl + source -notrace ../tcl/tx_bd.tcl if { $netStackType == "TCP" } { # TCP interfaces diff --git a/kernels/plugins/Makefile b/kernels/plugins/Makefile index 1c1df661..da39ea18 100644 --- a/kernels/plugins/Makefile +++ b/kernels/plugins/Makefile @@ -17,30 +17,12 @@ # *******************************************************************************/ PERIPHERAL_IPS = hostctrl loopback reduce_ops hp_compression dummy_tcp_stack client_arbiter vadd_put cyt_adapter external_dma dummy_cyt_rdma_stack dummy_cyt_dma tcp_session_handler +DEVICE=xcu280-fsvh2892-2L-e TARGET=ip -PLATFORM ?= xilinx_u280_xdma_201920_3 -DEBUG ?= none -STACK_TYPE ?= UDP -NUM_EXTDMA_AXI ?= 1 - -ifeq (u250,$(findstring u250, $(PLATFORM))) - FPGAPART=xcu250-figd2104-2L-e -else ifeq (u280,$(findstring u280, $(PLATFORM))) - FPGAPART=xcu280-fsvh2892-2L-e -else ifeq (u55c,$(findstring u55c, $(PLATFORM))) - FPGAPART=xcu55c-fsvh2892-2L-e -else ifeq (u200,$(findstring u200, $(PLATFORM))) - FPGAPART=xcu200-fsgd2104-2-e -else ifeq (u50,$(findstring u50, $(PLATFORM))) - FPGAPART=xcu50-fsvh2104-2-e -else - $(error Unsupported PLATFORM) -endif all: $(PERIPHERAL_IPS) - .PHONY: hostctrl loopback reduce_ops hp_compression dummy_tcp_stack client_arbiter vadd_put cyt_adapter external_dma dummy_cyt_rdma_stack dummy_cyt_dma tcp_session_handler $(PERIPHERAL_IPS): - $(MAKE) -C $@ DEVICE=$(FPGAPART) TARGET=$(TARGET) STACK_TYPE=$(STACK_TYPE) + $(MAKE) -C $@ DEVICE=$(DEVICE) TARGET=$(TARGET) diff --git a/kernels/plugins/client_arbiter/Makefile b/kernels/plugins/client_arbiter/Makefile index 54c94cc2..da1fa056 100644 --- a/kernels/plugins/client_arbiter/Makefile +++ b/kernels/plugins/client_arbiter/Makefile @@ -16,9 +16,9 @@ # *******************************************************************************/ DEVICE=xcu250-figd2104-2L-e -ARBITER_IP=client_arbiter.xo +ARBITER_IP=client_arbiter_$(DEVICE).xo TARGET=ip -NCLIENTS=3 +NCLIENTS=2 all: $(ARBITER_IP) diff --git a/kernels/plugins/client_arbiter/build_client_arbiter.tcl b/kernels/plugins/client_arbiter/build_client_arbiter.tcl index 7b81765a..c23bb3f8 100644 --- a/kernels/plugins/client_arbiter/build_client_arbiter.tcl +++ b/kernels/plugins/client_arbiter/build_client_arbiter.tcl @@ -41,14 +41,14 @@ switch $command { } -open_project build_client_arbiter +open_project build_client_arbiter.$device add_files client_arbiter.cpp -cflags "-std=c++14 -I../../../driver/hls/ -DNUM_CTRL_STREAMS=$nclients -DACCL_SYNTHESIS" set_top client_arbiter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/client_arbiter.xo +config_export -format xo -library ACCL -output [pwd]/client_arbiter_${device}.xo if {$do_syn} { set_part $device diff --git a/kernels/plugins/cyt_adapter/Makefile b/kernels/plugins/cyt_adapter/Makefile index d435f07c..be1a85c2 100644 --- a/kernels/plugins/cyt_adapter/Makefile +++ b/kernels/plugins/cyt_adapter/Makefile @@ -17,15 +17,14 @@ TARGET=ip DEVICE=xcu250-figd2104-2L-e -CYT_DMA_ADAPTER=cyt_dma_adapter.xo -CYT_RDMA_ARBITER=cyt_rdma_arbiter.xo -CYT_RDMA_MUX=cyt_rdma_mux.xo -STACK_TYPE ?= UDP +CYT_DMA_ADAPTER=cyt_dma_adapter_$(DEVICE).xo +CYT_RDMA_ARBITER=cyt_rdma_arbiter_$(DEVICE).xo +CYT_RDMA_MUX=cyt_rdma_mux_$(DEVICE).xo all: $(CYT_DMA_ADAPTER) $(CYT_RDMA_ARBITER) $(CYT_RDMA_MUX) $(CYT_DMA_ADAPTER): build_cyt_dma_adapter.tcl cyt_dma_adapter.cpp - vitis_hls $< -tclargs $(TARGET) $(DEVICE) $(STACK_TYPE) + vitis_hls $< -tclargs $(TARGET) $(DEVICE) $(CYT_RDMA_ARBITER): build_cyt_rdma_arbiter.tcl cyt_rdma_arbiter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) diff --git a/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl b/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl index 2de071ec..342b3400 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl +++ b/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl @@ -52,7 +52,7 @@ switch $command { } -open_project build_cyt_dma_adapter +open_project build_cyt_dma_adapter.${device} if {$stack eq "RDMA"} { add_files cyt_dma_adapter.cpp -cflags "-std=c++14 -I. -I../../../driver/hls/ -DACCL_SYNTHESIS -DACCL_RDMA" @@ -64,7 +64,7 @@ if {$stack eq "RDMA"} { set_top cyt_dma_adapter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_dma_adapter.xo +config_export -format xo -library ACCL -output [pwd]/cyt_dma_adapter_$device.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/build_cyt_rdma_arbiter.tcl b/kernels/plugins/cyt_adapter/build_cyt_rdma_arbiter.tcl index 9309459b..d8534621 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_rdma_arbiter.tcl +++ b/kernels/plugins/cyt_adapter/build_cyt_rdma_arbiter.tcl @@ -51,7 +51,7 @@ switch $command { } -open_project build_cyt_rdma_arbiter +open_project build_cyt_rdma_arbiter.${device} add_files cyt_rdma_arbiter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" @@ -60,7 +60,7 @@ add_files cyt_rdma_arbiter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf set_top cyt_rdma_arbiter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_rdma_arbiter.xo +config_export -format xo -library ACCL -output [pwd]/cyt_rdma_arbiter_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl b/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl index df908845..ac0e6dde 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl +++ b/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl @@ -51,7 +51,7 @@ switch $command { } -open_project build_cyt_rdma_mux +open_project build_cyt_rdma_mux.${device} add_files cyt_rdma_mux.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" @@ -60,7 +60,7 @@ add_files cyt_rdma_mux.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I. set_top cyt_rdma_mux open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_rdma_mux.xo +config_export -format xo -library ACCL -output [pwd]/cyt_rdma_mux_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp b/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp index b0767dad..300e991a 100644 --- a/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp +++ b/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp @@ -71,27 +71,6 @@ void rdma_req_byp_cmd_converter( } -void multiplexor(hls::stream& in0, - hls::stream& in1, - hls::stream& out) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - cyt_req_t currWord; - - if (!STREAM_IS_EMPTY(in0)) - { - currWord = STREAM_READ(in0); - STREAM_WRITE(out, currWord); - } - else if(!STREAM_IS_EMPTY(in1)) - { - currWord = STREAM_READ(in1); - STREAM_WRITE(out, currWord); - } -} - void multiplexor(hls::stream& in0, hls::stream& in1, hls::stream& in2, @@ -184,11 +163,11 @@ void cyt_dma_adapter( hls::stream> &dma1_s2mm_sts, hls::stream> &dma0_mm2s_sts, hls::stream> &dma1_mm2s_sts, -#ifdef ACCL_RDMA + //RDMA rd_req and wr_req hls::stream & rdma_wr_req, hls::stream & rdma_rd_req, -#endif + //Coyote Bypass interface command and status hls::stream &cyt_byp_wr_cmd, hls::stream> &cyt_byp_wr_sts, @@ -213,13 +192,10 @@ void cyt_dma_adapter( #pragma HLS aggregate variable=cyt_byp_wr_cmd compact=bit #pragma HLS aggregate variable=cyt_byp_rd_cmd compact=bit -#ifdef ACCL_RDMA #pragma HLS INTERFACE axis port=rdma_wr_req #pragma HLS INTERFACE axis port=rdma_rd_req #pragma HLS aggregate variable=rdma_wr_req compact=bit #pragma HLS aggregate variable=rdma_rd_req compact=bit -#endif - static hls::stream byp_wr_cmd_0; #pragma HLS stream variable=byp_wr_cmd_0 depth=16 @@ -239,36 +215,21 @@ void cyt_dma_adapter( static hls::stream> dma1_s2mm_meta; #pragma HLS stream variable=dma1_s2mm_meta depth=16 -#ifdef ACCL_RDMA static hls::stream byp_wr_cmd_2; #pragma HLS stream variable=byp_wr_cmd_2 depth=16 static hls::stream byp_rd_cmd_2; #pragma HLS stream variable=byp_rd_cmd_2 depth=16 -#endif dm_byp_cmd_converter<0>(dma0_s2mm_cmd, byp_wr_cmd_0, dma0_s2mm_meta); dm_byp_cmd_converter<1>(dma1_s2mm_cmd, byp_wr_cmd_1, dma1_s2mm_meta); -#ifdef ACCL_RDMA rdma_req_byp_cmd_converter<2>(rdma_wr_req, byp_wr_cmd_2); -#endif - -#ifdef ACCL_RDMA multiplexor(byp_wr_cmd_0,byp_wr_cmd_1,byp_wr_cmd_2,cyt_byp_wr_cmd); -#else - multiplexor(byp_wr_cmd_0,byp_wr_cmd_1,cyt_byp_wr_cmd); -#endif + dm_byp_cmd_converter<0>(dma0_mm2s_cmd,byp_rd_cmd_0, dma0_mm2s_meta); dm_byp_cmd_converter<1>(dma1_mm2s_cmd,byp_rd_cmd_1, dma1_mm2s_meta); -#ifdef ACCL_RDMA rdma_req_byp_cmd_converter<2>(rdma_rd_req, byp_rd_cmd_2); -#endif - -#ifdef ACCL_RDMA multiplexor(byp_rd_cmd_0,byp_rd_cmd_1,byp_rd_cmd_2,cyt_byp_rd_cmd); -#else - multiplexor(byp_rd_cmd_0,byp_rd_cmd_1,cyt_byp_rd_cmd); -#endif byp_dm_sts_converter(cyt_byp_wr_sts, dma0_s2mm_sts, dma1_s2mm_sts, dma0_s2mm_meta, dma1_s2mm_meta); byp_dm_sts_converter(cyt_byp_rd_sts, dma0_mm2s_sts, dma1_mm2s_sts, dma0_mm2s_meta, dma1_mm2s_meta); diff --git a/kernels/plugins/dummy_cyt_dma/Makefile b/kernels/plugins/dummy_cyt_dma/Makefile index 7302142f..549ee6aa 100644 --- a/kernels/plugins/dummy_cyt_dma/Makefile +++ b/kernels/plugins/dummy_cyt_dma/Makefile @@ -15,7 +15,7 @@ # # *******************************************************************************/ DEVICE=xcu280-fsvh2892-2L-e -DUMMY_DMA_IP=dummy_cyt_dma.xo +DUMMY_DMA_IP=dummy_cyt_dma_$(DEVICE).xo TARGET=ip diff --git a/kernels/plugins/dummy_cyt_dma/build_dummy_cyt_dma.tcl b/kernels/plugins/dummy_cyt_dma/build_dummy_cyt_dma.tcl index 9abb37cc..b8d1d8e5 100644 --- a/kernels/plugins/dummy_cyt_dma/build_dummy_cyt_dma.tcl +++ b/kernels/plugins/dummy_cyt_dma/build_dummy_cyt_dma.tcl @@ -51,14 +51,14 @@ switch $command { } -open_project dummy_cyt_dma +open_project dummy_cyt_dma.${device} add_files dummy_cyt_dma.cpp -cflags "-std=c++14 -I../../../driver/hls/ -I../cyt_adapter -I../../../hlslib/include/hlslib/xilinx -DACCL_SYNTHESIS" set_top cyt_dma open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/dummy_cyt_dma.xo +config_export -format xo -library ACCL -output [pwd]/dummy_cyt_dma_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/dummy_cyt_rdma_stack/Makefile b/kernels/plugins/dummy_cyt_rdma_stack/Makefile index 2f1b5ab8..237dddfb 100644 --- a/kernels/plugins/dummy_cyt_rdma_stack/Makefile +++ b/kernels/plugins/dummy_cyt_rdma_stack/Makefile @@ -15,7 +15,7 @@ # # *******************************************************************************/ DEVICE=xcu280-fsvh2892-2L-e -TCP_STACK_IP=dummy_cyt_rdma_stack.xo +TCP_STACK_IP=dummy_cyt_rdma_stack_$(DEVICE).xo TARGET=ip diff --git a/kernels/plugins/dummy_cyt_rdma_stack/build_cyt_rdma_stack.tcl b/kernels/plugins/dummy_cyt_rdma_stack/build_cyt_rdma_stack.tcl index 7f21e873..02f1d882 100644 --- a/kernels/plugins/dummy_cyt_rdma_stack/build_cyt_rdma_stack.tcl +++ b/kernels/plugins/dummy_cyt_rdma_stack/build_cyt_rdma_stack.tcl @@ -51,14 +51,14 @@ switch $command { } -open_project dummy_cyt_rdma_stack +open_project dummy_cyt_rdma_stack.${device} add_files dummy_cyt_rdma_stack.cpp -cflags "-std=c++14 -I../../../driver/hls/ -I../../cclo/hls/eth_intf/ -I../../../hlslib/include/hlslib/xilinx -DACCL_SYNTHESIS" set_top cyt_rdma open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/dummy_cyt_rdma_stack.xo +config_export -format xo -library ACCL -output [pwd]/dummy_cyt_rdma_stack_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/dummy_tcp_stack/Makefile b/kernels/plugins/dummy_tcp_stack/Makefile index 933dcb08..b577ffe4 100644 --- a/kernels/plugins/dummy_tcp_stack/Makefile +++ b/kernels/plugins/dummy_tcp_stack/Makefile @@ -15,7 +15,7 @@ # # *******************************************************************************/ DEVICE=xcu280-fsvh2892-2L-e -TCP_STACK_IP=dummy_tcp_stack.xo +TCP_STACK_IP=dummy_tcp_stack_$(DEVICE).xo TARGET=ip diff --git a/kernels/plugins/dummy_tcp_stack/build_tcp_stack.tcl b/kernels/plugins/dummy_tcp_stack/build_tcp_stack.tcl index 1c04a7ee..6f3af94f 100644 --- a/kernels/plugins/dummy_tcp_stack/build_tcp_stack.tcl +++ b/kernels/plugins/dummy_tcp_stack/build_tcp_stack.tcl @@ -51,14 +51,14 @@ switch $command { } -open_project build_tcp_stack +open_project build_tcp_stack.${device} add_files dummy_tcp_stack.cpp -cflags "-std=c++14 -I../../../driver/hls/ -I../../cclo/hls/eth_intf/ -I../../../hlslib/include/hlslib/xilinx -DACCL_SYNTHESIS" set_top network_krnl open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/dummy_tcp_stack.xo +config_export -format xo -library ACCL -output [pwd]/dummy_tcp_stack_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/external_dma/Makefile b/kernels/plugins/external_dma/Makefile index 2c4b8b6c..174d5753 100755 --- a/kernels/plugins/external_dma/Makefile +++ b/kernels/plugins/external_dma/Makefile @@ -16,12 +16,9 @@ # *******************************************************************************/ DEVICE ?= xcu280-fsvh2892-2L-e -NUM_EXTDMA_AXI ?= 1 -all: external_dma.xo +all: external_dma_1port.xo external_dma_2port.xo external_dma_3port.xo external_dma_4port.xo -external_dma.v kernel.xml &: gen_files.py - python3 $< -n $(NUM_EXTDMA_AXI) - -external_dma.xo: bd.tcl kernel.xml external_dma.v - vivado -mode batch -source $< -notrace -tclargs $(DEVICE) +external_dma_%port.xo: bd.tcl + python3 gen_files.py -n $* + vivado -mode batch -source $< -notrace -tclargs $(DEVICE) $* diff --git a/kernels/plugins/external_dma/bd.tcl b/kernels/plugins/external_dma/bd.tcl index cc06c074..0d417b7c 100644 --- a/kernels/plugins/external_dma/bd.tcl +++ b/kernels/plugins/external_dma/bd.tcl @@ -15,10 +15,11 @@ # # *******************************************************************************/ set fpgapart [lindex $::argv 0] -set num_dma 2 +set num_dma [lindex $::argv 1] +set ipname external_dma_${num_dma}port # create project with correct target -create_project -force external_dma ./external_dma -part $fpgapart +create_project -force external_dma ./${ipname} -part $fpgapart set_property target_language verilog [current_project] set_property simulator_language MIXED [current_project] set_property coreContainer.enable false [current_project] @@ -314,23 +315,23 @@ set_property -dict [ list CONFIG.ASSOCIATED_BUSIF $interfaces ] [get_bd_ports ap validate_bd_design save_bd_design -add_files -norecurse ./external_dma.v +add_files -norecurse ./${ipname}.v update_compile_order -fileset sources_1 update_compile_order -fileset sim_1 -set bdfile [get_files ./external_dma/external_dma.srcs/sources_1/bd/external_dma_bd/external_dma_bd.bd] +set bdfile [get_files ./${ipname}/external_dma.srcs/sources_1/bd/external_dma_bd/external_dma_bd.bd] generate_target all $bdfile export_ip_user_files -of_objects $bdfile -no_script -sync -force -quiet create_ip_run $bdfile update_compile_order -fileset sources_1 -set_property top external_dma [current_fileset] +set_property top ${ipname} [current_fileset] # Package IP -ipx::package_project -root_dir ./packaged_kernel -vendor Xilinx -library ACCL -taxonomy /KernelIP -import_files -set_current false -ipx::unload_core ./packaged_kernel/component.xml +ipx::package_project -root_dir ./${ipname}/packaged_kernel -vendor Xilinx -library ACCL -taxonomy /KernelIP -import_files -set_current false +ipx::unload_core ./${ipname}/packaged_kernel/component.xml -ipx::edit_ip_in_project -upgrade true -name tmp_edit_project -directory ./package ./packaged_kernel/component.xml +ipx::edit_ip_in_project -upgrade true -name tmp_edit_project -directory ./${ipname}/package ./${ipname}/packaged_kernel/component.xml set_property core_revision 1 [ipx::current_core] foreach up [ipx::get_user_parameters] { @@ -365,10 +366,10 @@ ipx::update_checksums [ipx::current_core] ipx::save_core [ipx::current_core] ## Generate XO -if {[file exists "external_dma.xo"]} { - file delete -force "external_dma.xo" +if {[file exists "${ipname}.xo"]} { + file delete -force "${ipname}.xo" } -package_xo -xo_path external_dma.xo -kernel_name external_dma -ip_directory ./packaged_kernel -kernel_xml ./kernel.xml +package_xo -xo_path ${ipname}.xo -kernel_name ${ipname} -ip_directory ./${ipname}/packaged_kernel -kernel_xml ./${ipname}.xml close_project -delete diff --git a/kernels/plugins/external_dma/gen_files.py b/kernels/plugins/external_dma/gen_files.py index 260c5e59..999ac90c 100644 --- a/kernels/plugins/external_dma/gen_files.py +++ b/kernels/plugins/external_dma/gen_files.py @@ -25,7 +25,7 @@ `timescale 1 ns / 1 ps -module external_dma +module external_dma_{}port ( input ap_clk, input ap_rst_n, @@ -110,7 +110,7 @@ .s_axis_s2mm_tdata(s_axis_s2mm_tdata), .s_axis_s2mm_tkeep(s_axis_s2mm_tkeep), - .s_axis_s2mm_tdest(s_axis_s2mm_tdest), + {} .s_axis_s2mm_tlast(s_axis_s2mm_tlast), .s_axis_s2mm_tready(s_axis_s2mm_tready), .s_axis_s2mm_tvalid(s_axis_s2mm_tvalid), @@ -124,7 +124,7 @@ .s_axis_mm2s_cmd_tdata(s_axis_mm2s_cmd_tdata), .s_axis_mm2s_cmd_tready(s_axis_mm2s_cmd_tready), .s_axis_mm2s_cmd_tvalid(s_axis_mm2s_cmd_tvalid), - .s_axis_mm2s_cmd_tdest(s_axis_mm2s_cmd_tdest), + {} .m_axis_mm2s_sts_tdata(m_axis_mm2s_sts_tdata), .m_axis_mm2s_sts_tready(m_axis_mm2s_sts_tready), @@ -135,7 +135,7 @@ .s_axis_s2mm_cmd_tdata(s_axis_s2mm_cmd_tdata), .s_axis_s2mm_cmd_tready(s_axis_s2mm_cmd_tready), .s_axis_s2mm_cmd_tvalid(s_axis_s2mm_cmd_tvalid), - .s_axis_s2mm_cmd_tdest(s_axis_s2mm_cmd_tdest), + {} .m_axis_s2mm_sts_tdata(m_axis_s2mm_sts_tdata), .m_axis_s2mm_sts_tready(m_axis_s2mm_sts_tready), @@ -226,13 +226,16 @@ all_axi_declarations += axi_intf_declaration.format(i) all_axi_connections += axi_intf_connection.format(i) -with open("external_dma.v", "w") as f: - f.write(verilog_wrapper.format(all_axi_declarations, all_axi_connections)) +with open("external_dma_%dport.v" % args.numdma, "w") as f: + tdest_1 = '\t\t\t\t.s_axis_s2mm_tdest(s_axis_s2mm_tdest),\n' if args.numdma > 1 else '\n' + tdest_2 = '\t\t\t\t.s_axis_mm2s_cmd_tdest(s_axis_mm2s_cmd_tdest),\n' if args.numdma > 1 else '\n' + tdest_3 = '\t\t\t\t.s_axis_s2mm_cmd_tdest(s_axis_s2mm_cmd_tdest),\n' if args.numdma > 1 else '\n' + f.write(verilog_wrapper.format(args.numdma, all_axi_declarations, all_axi_connections, tdest_1, tdest_2, tdest_3)) kernel_xml = """ - + @@ -241,7 +244,7 @@ -{} +{1} @@ -249,7 +252,7 @@ -{} +{2} """ @@ -265,5 +268,5 @@ all_xml_ports += xml_axi_port.format(i) all_xml_args += xml_axi_arg.format(i,i+6) -with open("kernel.xml", "w") as f: - f.write(kernel_xml.format(all_xml_ports, all_xml_args)) +with open("external_dma_%dport.xml" % args.numdma, "w") as f: + f.write(kernel_xml.format(args.numdma, all_xml_ports, all_xml_args)) diff --git a/kernels/plugins/hostctrl/Makefile b/kernels/plugins/hostctrl/Makefile index febf9f68..3d0cb406 100644 --- a/kernels/plugins/hostctrl/Makefile +++ b/kernels/plugins/hostctrl/Makefile @@ -17,7 +17,7 @@ TARGET=ip DEVICE=xcu250-figd2104-2L-e -HOSTCTRL_IP=hostctrl.xo +HOSTCTRL_IP=hostctrl_$(DEVICE).xo all: $(HOSTCTRL_IP) diff --git a/kernels/plugins/hostctrl/build_hostctrl.tcl b/kernels/plugins/hostctrl/build_hostctrl.tcl index 53effa44..96b128ad 100644 --- a/kernels/plugins/hostctrl/build_hostctrl.tcl +++ b/kernels/plugins/hostctrl/build_hostctrl.tcl @@ -51,14 +51,14 @@ switch $command { } -open_project build_hostctrl +open_project build_hostctrl.${device} add_files hostctrl.cpp -cflags "-std=c++14 -I. -I../../../driver/hls/ -DACCL_SYNTHESIS" set_top hostctrl open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/hostctrl.xo +config_export -format xo -library ACCL -output [pwd]/hostctrl_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/hp_compression/Makefile b/kernels/plugins/hp_compression/Makefile index f5b26434..a57630ec 100644 --- a/kernels/plugins/hp_compression/Makefile +++ b/kernels/plugins/hp_compression/Makefile @@ -17,7 +17,7 @@ TARGET=ip DEVICE=xcu250-figd2104-2L-e -REDUCE_IP=hp_compression.xo +REDUCE_IP=hp_compression_$(DEVICE).xo all: $(REDUCE_IP) diff --git a/kernels/plugins/hp_compression/build.tcl b/kernels/plugins/hp_compression/build.tcl index 30338e9c..49af1fb4 100644 --- a/kernels/plugins/hp_compression/build.tcl +++ b/kernels/plugins/hp_compression/build.tcl @@ -50,14 +50,14 @@ switch $command { } } -open_project build_hp_compression +open_project build_hp_compression.${device} add_files hp_compression.cpp -cflags "-std=c++14 -I[pwd]/../../../driver/hls/ -DACCL_SYNTHESIS" set_top hp_compression open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/hp_compression.xo +config_export -format xo -library ACCL -output [pwd]/hp_compression_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/loopback/Makefile b/kernels/plugins/loopback/Makefile index 60a35b0a..c3ad9015 100644 --- a/kernels/plugins/loopback/Makefile +++ b/kernels/plugins/loopback/Makefile @@ -17,7 +17,7 @@ # *******************************************************************************/ DEVICE=xcu250-figd2104-2L-e -LOOPBACK_IP=loopback.xo +LOOPBACK_IP=loopback_$(DEVICE).xo TARGET=ip all: $(LOOPBACK_IP) diff --git a/kernels/plugins/loopback/build_loopback.tcl b/kernels/plugins/loopback/build_loopback.tcl index 3b9dbcfc..797b7c83 100644 --- a/kernels/plugins/loopback/build_loopback.tcl +++ b/kernels/plugins/loopback/build_loopback.tcl @@ -50,14 +50,14 @@ switch $command { } } -open_project build_loopback +open_project build_loopback.${device} add_files loopback.cpp -cflags "-std=c++14 -I[pwd]/../../../driver/hls/ -DACCL_SYNTHESIS" set_top loopback open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/loopback.xo +config_export -format xo -library ACCL -output [pwd]/loopback_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/reduce_ops/Makefile b/kernels/plugins/reduce_ops/Makefile index 7c941058..0817e372 100644 --- a/kernels/plugins/reduce_ops/Makefile +++ b/kernels/plugins/reduce_ops/Makefile @@ -19,11 +19,11 @@ TARGET=ip DEVICE=xcu250-figd2104-2L-e DTYPES=float half double int32_t int64_t DWIDTH=512 -REDUCE_IP = reduce_ops.xo +REDUCE_IP = reduce_ops_$(DEVICE).xo all: $(REDUCE_IP) -reduce_ops.xo: build.tcl reduce_ops.cpp +$(REDUCE_IP): build.tcl reduce_ops.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) diff --git a/kernels/plugins/reduce_ops/build.tcl b/kernels/plugins/reduce_ops/build.tcl index 47e8eba0..5d8bbf17 100644 --- a/kernels/plugins/reduce_ops/build.tcl +++ b/kernels/plugins/reduce_ops/build.tcl @@ -53,14 +53,14 @@ switch $command { } -open_project build_${ipname} +open_project build_${ipname}.${device} add_files reduce_ops.cpp -cflags "-std=c++14 -I[pwd]/ -I[pwd]/../../../driver/hls/ -DACCL_SYNTHESIS" set_top ${ipname} open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/${ipname}.xo +config_export -format xo -library ACCL -output [pwd]/${ipname}_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/tcp_session_handler/Makefile b/kernels/plugins/tcp_session_handler/Makefile index 2ac5aa80..db730ad7 100644 --- a/kernels/plugins/tcp_session_handler/Makefile +++ b/kernels/plugins/tcp_session_handler/Makefile @@ -15,7 +15,7 @@ # # *******************************************************************************/ DEVICE=xcu280-fsvh2892-2L-e -TCP_STACK_IP=tcp_session_handler.xo +TCP_STACK_IP=tcp_session_handler_$(DEVICE).xo TARGET=ip diff --git a/kernels/plugins/tcp_session_handler/build_tcp_session_handler.tcl b/kernels/plugins/tcp_session_handler/build_tcp_session_handler.tcl index f59ee89c..28902435 100644 --- a/kernels/plugins/tcp_session_handler/build_tcp_session_handler.tcl +++ b/kernels/plugins/tcp_session_handler/build_tcp_session_handler.tcl @@ -51,14 +51,14 @@ switch $command { } -open_project tcp_session_handler +open_project tcp_session_handler.${device} add_files tcp_session_handler.cpp -cflags "-std=c++14 -I../../../driver/hls/ -I../../../hlslib/include/hlslib/xilinx -DACCL_SYNTHESIS" set_top tcp_session_handler open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/tcp_session_handler.xo +config_export -format xo -library ACCL -output [pwd]/tcp_session_handler_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/vadd_put/Makefile b/kernels/plugins/vadd_put/Makefile index 395ca0a8..61d9d6b5 100644 --- a/kernels/plugins/vadd_put/Makefile +++ b/kernels/plugins/vadd_put/Makefile @@ -16,7 +16,7 @@ # *******************************************************************************/ DEVICE=xcu250-figd2104-2L-e -VADD_IP=vadd_put.xo +VADD_IP=vadd_put_$(DEVICE).xo TARGET=ip all: $(VADD_IP) diff --git a/kernels/plugins/vadd_put/build_vadd_put.tcl b/kernels/plugins/vadd_put/build_vadd_put.tcl index 5ff3c6b4..829df0ef 100644 --- a/kernels/plugins/vadd_put/build_vadd_put.tcl +++ b/kernels/plugins/vadd_put/build_vadd_put.tcl @@ -40,14 +40,14 @@ switch $command { } -open_project build_vadd_put +open_project build_vadd_put.${device} add_files vadd_put.cpp -cflags "-std=c++14 -I../../../driver/hls/ -I. -DACCL_SYNTHESIS" set_top vadd_put open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/vadd_put.xo +config_export -format xo -library ACCL -output [pwd]/vadd_put_${device}.xo if {$do_syn} { set_part $device diff --git a/test/model/simulator/CMakeLists.txt b/test/model/simulator/CMakeLists.txt index 9f02a71a..4175d9ab 100644 --- a/test/model/simulator/CMakeLists.txt +++ b/test/model/simulator/CMakeLists.txt @@ -19,6 +19,7 @@ cmake_minimum_required(VERSION 3.9) project(accl_simulation) set(CMAKE_CXX_STANDARD 17) +set(ACCL_SIM_NUM_BANKS 2 CACHE STRING "Number of ACCL simulator memory banks (must correspond to simdll)") set(ACCL_SIM_MEM_SIZE_KB 262144 CACHE STRING "Size of ACCL simulator memory, in KB (must correspond to simdll)") # Consider using cmake_path from CMake 3.20 (2021) for better error messages: @@ -51,10 +52,14 @@ target_link_libraries(cclo_sim PUBLIC zmq zmqpp pthread jsoncpp_lib dl rt) target_compile_definitions(cclo_sim PUBLIC ZMQ_CALL_VERBOSE NUM_CTRL_STREAMS=3 ACCL_SIM_MEM_SIZE_KB=${ACCL_SIM_MEM_SIZE_KB}) target_compile_options(cclo_sim PRIVATE -Wno-attributes -fdiagnostics-color=always -g -fmax-errors=3) -set(XSIM_COMPILE_FOLDER ${ACCL_REPO_ROOT}/kernels/cclo/ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/) +set(STACK_TYPE "TCP" CACHE STRING "Type of POE") +set_property(CACHE STACK_TYPE PROPERTY STRINGS "TCP" "UDP" "RDMA") + +set(BUILD_FOLDER ${STACK_TYPE}_sim) +set(XSIM_COMPILE_FOLDER ${ACCL_REPO_ROOT}/kernels/cclo/${BUILD_FOLDER}/ccl_offload_ex/ccl_offload_ex.sim/sim_1/behav/xsim/) if (NOT EXISTS ${XSIM_COMPILE_FOLDER}) - message(FATAL_ERROR "Simulation kernel not build. Run make simdll in ${ACCL_REPO_ROOT}/kernels/cclo") + message(FATAL_ERROR "Simulation kernel not built. Run make STACK_TYPE=${STACK_TYPE} MODE=simdll in ${ACCL_REPO_ROOT}/kernels/cclo") endif () set(SYMLINKS xsim.dir diff --git a/test/refdesigns/Makefile b/test/refdesigns/Makefile index 1494493a..60a760dc 100644 --- a/test/refdesigns/Makefile +++ b/test/refdesigns/Makefile @@ -23,6 +23,25 @@ PLATFORM ?= xilinx_u55c_gen3x16_xdma_3_202210_1 # U280: xilinx_u280_gen3x16_xdma_1_202211_1 # U250: xilinx_u250_gen3x16_xdma_4_1_202210_1 +ifeq (u250,$(findstring u250, $(PLATFORM))) + FPGAPART=xcu250-figd2104-2L-e + BOARD=u250 +else ifeq (u280,$(findstring u280, $(PLATFORM))) + FPGAPART=xcu280-fsvh2892-2L-e + BOARD=u280 +else ifeq (u55c,$(findstring u55c, $(PLATFORM))) + FPGAPART=xcu55c-fsvh2892-2L-e + BOARD=u55c +else ifeq (u200,$(findstring u200, $(PLATFORM))) + FPGAPART=xcu200-fsgd2104-2-e + BOARD=u200 +else ifeq (u50,$(findstring u50, $(PLATFORM))) + FPGAPART=xcu50-fsvh2104-2-e + BOARD=u50 +else + $(error Unsupported PLATFORM) +endif + XSA := $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM)))) DEBUG ?= none PROFILE ?= none @@ -31,7 +50,6 @@ USE_HOSTMEM ?= 0 VPP_TARGET ?= hw USER_KERNEL ?= none FREQUENCY = 250 -CCLO_XO = ../../kernels/cclo/ccl_offload.xo CCLO_MB_DEBUG_LEVEL ?= 0 VNX=xup_vitis_network_example @@ -45,22 +63,20 @@ FW_SOURCES = $(shell find fw -name '*.c') $(shell find fw -name '*.h') $(shell f ETH_IF=0 CMAC_UDP_XO=$(VNX)/Ethernet/_x.$(PLATFORM)/cmac_$(ETH_IF).xo UDP_XO=$(VNX)/NetLayers/_x.$(PLATFORM)/networklayer.xo -TCP_DUMMY_XO=../../kernels/plugins/dummy_tcp_stack/dummy_tcp_stack.xo -TCP_XO=Vitis_with_100Gbps_TCP-IP/_x.hw.$(XSA)/network_krnl.xo +TCP_DUMMY_XO=../../kernels/plugins/dummy_tcp_stack/dummy_tcp_stack_$(FPGAPART).xo +TCP_XO=tcp_stack.$(XSA)/network_krnl.xo TCP_VIVADO_ROOTDIR ?= $$XILINX_VIVADO TCP_HLS_ROOTDIR ?= $$XILINX_HLS TCP_RX_BYPASS ?= 0 -CMAC_TCP_XO=Vitis_with_100Gbps_TCP-IP/_x.hw.$(XSA)/cmac_krnl.xo -EXTERNAL_DMA_XO=../../kernels/plugins/external_dma/external_dma.xo -CYT_BIT=Coyote/hw/build_$(CCLO_STACK_TYPE)/bitstreams/cyt_top.bit - -HOSTCTRL_XO=../../kernels/plugins/hostctrl/hostctrl.xo -CLIENT_ARB_XO=../../kernels/plugins/client_arbiter/client_arbiter.xo -SUM_XO=../../kernels/plugins/reduce_ops/reduce_ops.xo -COMPRESSION_XO=../../kernels/plugins/hp_compression/hp_compression.xo -LOOPBACK_XO=../../kernels/plugins/loopback/loopback.xo -TCP_SESS_XO=../../kernels/plugins/tcp_session_handler/tcp_session_handler.xo -VADD_XO=../../kernels/plugins/vadd_put/vadd_put.xo +CMAC_TCP_XO=tcp_stack.$(XSA)/cmac_krnl.xo + +HOSTCTRL_XO=../../kernels/plugins/hostctrl/hostctrl_$(FPGAPART).xo +CLIENT_ARB_XO=../../kernels/plugins/client_arbiter/client_arbiter_$(FPGAPART).xo +SUM_XO=../../kernels/plugins/reduce_ops/reduce_ops_$(FPGAPART).xo +COMPRESSION_XO=../../kernels/plugins/hp_compression/hp_compression_$(FPGAPART).xo +LOOPBACK_XO=../../kernels/plugins/loopback/loopback_$(FPGAPART).xo +TCP_SESS_XO=../../kernels/plugins/tcp_session_handler/tcp_session_handler_$(FPGAPART).xo +VADD_XO=../../kernels/plugins/vadd_put/vadd_put_$(FPGAPART).xo HWEMU_MST_XO=$$XILINX_VITIS/data/emulation/XO/sim_ipc_axis_master_512.xo HWEMU_SLV_XO=$$XILINX_VITIS/data/emulation/XO/sim_ipc_axis_slave_512.xo @@ -82,27 +98,12 @@ else $(error Unsupported PROFILE setting) endif -ifeq (u250,$(findstring u250, $(PLATFORM))) - FPGAPART=xcu250-figd2104-2L-e - BOARD=u250 -else ifeq (u280,$(findstring u280, $(PLATFORM))) - FPGAPART=xcu280-fsvh2892-2L-e - BOARD=u280 -else ifeq (u55c,$(findstring u55c, $(PLATFORM))) - FPGAPART=xcu55c-fsvh2892-2L-e - BOARD=u55c -else ifeq (u200,$(findstring u200, $(PLATFORM))) - FPGAPART=xcu200-fsgd2104-2-e - BOARD=u200 -else ifeq (u50,$(findstring u50, $(PLATFORM))) - FPGAPART=xcu50-fsvh2104-2-e - BOARD=u50 -else - $(error Unsupported PLATFORM) -endif - -BUILD_DIR := link_$(MODE)_eth_$(ETH_IF)_debug_$(DEBUG)_$(XSA) +BUILD_DIR := link_$(MODE)_$(XSA) XCLBIN=$(BUILD_DIR)/ccl_offload.xclbin + +CYT_BUILD_DIR=coyote_build_$(CCLO_STACK_TYPE)_$(BOARD) +CYT_BIT=$(CYT_BUILD_DIR)/bitstreams/cyt_top.bit + OUTPUT_PRODUCT := $(XCLBIN) CONFIGFILE := link_config.ini @@ -110,7 +111,7 @@ VPP_CONFIG := -t $(VPP_TARGET) --config $(CONFIGFILE) ADV_CMD := IPREPO_CMD := -OTHER_XO := $(CCLO_XO) $(HOSTCTRL_XO) $(CLIENT_ARB_XO) $(SUM_XO) $(COMPRESSION_XO) $(EXTERNAL_DMA_XO) +OTHER_XO := $(HOSTCTRL_XO) $(CLIENT_ARB_XO) $(SUM_XO) $(COMPRESSION_XO) $(EXTERNAL_DMA_XO) GEN_CONFIG_FLAGS := ifeq (vadd, $(USER_KERNEL)) @@ -158,14 +159,13 @@ else ifeq (axis3x,$(MODE)) CCLO_STACK_TYPE = TCP CCLO_MB_DEBUG_LEVEL = 2 else ifeq (tcp, $(MODE)) - CMAC_TCP_XO=Vitis_with_100Gbps_TCP-IP/_x.hw.$(XSA)/cmac_krnl.xo NET_XO = $(TCP_XO) ifneq (hw_emu, $(VPP_TARGET)) NET_XO += $(CMAC_TCP_XO) - ADV_CMD += --advanced.param compiler.userPostSysLinkOverlayTcl=$(shell pwd)/Vitis_with_100Gbps_TCP-IP/scripts/post_sys_link.tcl + ADV_CMD += --advanced.param compiler.userPostSysLinkOverlayTcl=$(shell pwd)/tcp_stack.$(XSA)/scripts/post_sys_link.tcl endif OTHER_XO += $(LOOPBACK_XO) $(TCP_SESS_XO) - IPREPO_CMD += --user_ip_repo_paths Vitis_with_100Gbps_TCP-IP/build/fpga-network-stack/iprepo + IPREPO_CMD += --user_ip_repo_paths tcp_stack.$(XSA)/build/fpga-network-stack/iprepo CCLO_STACK_TYPE = TCP else ifeq (coyote_tcp, $(MODE)) N_DDR_CHAN = 0 @@ -197,11 +197,19 @@ else $(error Unsupported MODE) endif +CCLO_XO = ../../kernels/cclo/$(CCLO_STACK_TYPE)_1111$(CCLO_MB_DEBUG_LEVEL)_$(FPGAPART)/ccl_offload.xo +OTHER_XO += $(CCLO_XO) + ifeq (1, $(USE_HOSTMEM)) GEN_CONFIG_FLAGS += --host NUM_EXTDMA_AXI = 2 +else + NUM_EXTDMA_AXI = 1 endif +EXTERNAL_DMA_XO=../../kernels/plugins/external_dma/external_dma_$(NUM_EXTDMA_AXI)port.xo +OTHER_XO += $(EXTERNAL_DMA_XO) + ifneq (none, $(ETH_IF)) GEN_CONFIG_FLAGS += --ethif $(ETH_IF) endif @@ -211,53 +219,56 @@ all: $(OUTPUT_PRODUCT) .PHONY: vnx vnx: $(CMAC_UDP_XO) $(UDP_XO) -$(CMAC_UDP_XO) &: +$(VNX)/Ethernet/_x.%/cmac_$(ETH_IF).xo: git submodule update --init --recursive xup_vitis_network_example - $(MAKE) -C xup_vitis_network_example/Ethernet DEVICE=$(PLATFORM) INTERFACE=$(ETH_IF) all + $(MAKE) -C xup_vitis_network_example/Ethernet DEVICE=$* INTERFACE=$(ETH_IF) all -$(UDP_XO): +$(VNX)/NetLayers/_x.%/networklayer.xo: git submodule update --init --recursive xup_vitis_network_example - $(MAKE) -C xup_vitis_network_example/NetLayers DEVICE=$(PLATFORM) all - -$(CMAC_TCP_XO): - git submodule update --init --recursive Vitis_with_100Gbps_TCP-IP - $(MAKE) -C Vitis_with_100Gbps_TCP-IP/ cmac_krnl DEVICE=$(PLATFORM) TEMP_DIR=_x.hw.$(XSA)/ XSA=$(XSA) - -$(TCP_DUMMY_XO): - $(MAKE) -C ../../kernels/plugins/dummy_tcp_stack DEVICE=$(FPGAPART) all + $(MAKE) -C xup_vitis_network_example/NetLayers DEVICE=$* all .PHONY: coyote_shell -coyote_shell: Coyote/hw/build_$(CCLO_STACK_TYPE)/lynx/lynx.xpr +coyote_shell: $(CYT_BUILD_DIR)/lynx/lynx.xpr -Coyote/hw/build_$(CCLO_STACK_TYPE)/lynx/lynx.xpr: - cd Coyote/hw && mkdir build_$(CCLO_STACK_TYPE) && cd build_$(CCLO_STACK_TYPE) && cmake .. $(COYOTE_CONFIG) - $(MAKE) -C Coyote/hw/build_$(CCLO_STACK_TYPE)/ shell +$(CYT_BUILD_DIR)/lynx/lynx.xpr: + mkdir $(CYT_BUILD_DIR) && cd $(CYT_BUILD_DIR) && cmake ../Coyote/hw $(COYOTE_CONFIG) + $(MAKE) -C $(CYT_BUILD_DIR) shell $(CYT_BIT): coyote_shell - $(MAKE) -C ../../kernels/cclo PLATFORM=$(PLATFORM) STACK_TYPE=$(CCLO_STACK_TYPE) MB_DEBUG_LEVEL=$(CCLO_MB_DEBUG_LEVEL) EN_DMA=0 EN_EXT_DMA=1 - $(MAKE) -C ../../kernels/plugins PLATFORM=$(PLATFORM) DEBUG=$(DEBUG) STACK_TYPE=$(CCLO_STACK_TYPE) NUM_EXTDMA_AXI=$(NUM_EXTDMA_AXI) - vivado -mode tcl -source tcl/coyote.tcl -tclargs $(CCLO_STACK_TYPE) Coyote/hw/build_$(CCLO_STACK_TYPE) - cp hdl/$(MODE)_top.sv Coyote/hw/build_$(CCLO_STACK_TYPE)/lynx/hdl/config_0/user_logic_c0_0.sv - $(MAKE) -C Coyote/hw/build_$(CCLO_STACK_TYPE)/ compile + $(MAKE) -C ../../kernels/cclo PLATFORM=$(PLATFORM) STACK_TYPE=$(CCLO_STACK_TYPE) MB_DEBUG_LEVEL=$(CCLO_MB_DEBUG_LEVEL) + $(MAKE) -C ../../kernels/plugins DEVICE=$(FPGAPART) + cp -rf $(CCLO_XO) $(CYT_BUILD_DIR)/iprepo + vivado -mode tcl -source tcl/coyote.tcl -tclargs $(CCLO_STACK_TYPE) $(CYT_BUILD_DIR) + cp hdl/$(MODE)_top.sv $(CYT_BUILD_DIR)/lynx/hdl/config_0/user_logic_c0_0.sv + $(MAKE) -C $(CYT_BUILD_DIR) compile .PHONY: tcp_stack tcp_stack: $(TCP_XO) $(CMAC_TCP_XO) -$(TCP_XO): +$(CMAC_TCP_XO): git submodule update --init --recursive Vitis_with_100Gbps_TCP-IP - mkdir -p Vitis_with_100Gbps_TCP-IP/build && \ - cd Vitis_with_100Gbps_TCP-IP/build && \ + mkdir -p tcp_stack.$(XSA) + cp -r Vitis_with_100Gbps_TCP-IP/* tcp_stack.$(XSA) + mkdir -p tcp_stack.$(XSA)/build && \ + cd tcp_stack.$(XSA)/build && \ cmake ../ -DFDEV_NAME=$(BOARD) -DVIVADO_HLS_ROOT_DIR=$(TCP_HLS_ROOTDIR) -DVIVADO_ROOT_DIR=$(TCP_VIVADO_ROOTDIR) -DTCP_STACK_EN=1 -DTCP_STACK_RX_DDR_BYPASS_EN=$(TCP_RX_BYPASS) -DDTCP_STACK_WINDOW_SCALING_EN=0 - $(MAKE) -C Vitis_with_100Gbps_TCP-IP/build installip - $(MAKE) -C Vitis_with_100Gbps_TCP-IP/ network_krnl DEVICE=$(PLATFORM) TEMP_DIR=_x.hw.$(XSA)/ XSA=$(XSA) + $(MAKE) -C tcp_stack.$(XSA)/build installip + $(MAKE) -C tcp_stack.$(XSA)/ cmac_krnl DEVICE=$(PLATFORM) TEMP_DIR=. XSA=$(XSA) + +$(TCP_XO): $(CMAC_TCP_XO) + $(MAKE) -C tcp_stack.$(XSA)/ network_krnl DEVICE=$(PLATFORM) TEMP_DIR=. XSA=$(XSA) + +$(TCP_DUMMY_XO): + $(MAKE) -C ../../kernels/plugins/dummy_tcp_stack DEVICE=$(FPGAPART) all $(CCLO_XO): $(MAKE) -C ../../kernels/cclo PLATFORM=$(PLATFORM) STACK_TYPE=$(CCLO_STACK_TYPE) MB_DEBUG_LEVEL=$(CCLO_MB_DEBUG_LEVEL) $(XCLBIN): $(NET_XO) $(CCLO_XO) python3 gen_config.py --board $(BOARD) --poe $(MODE) $(GEN_CONFIG_FLAGS) -o $(CONFIGFILE) - $(MAKE) -C ../../kernels/plugins PLATFORM=$(PLATFORM) DEBUG=$(DEBUG) STACK_TYPE=$(CCLO_STACK_TYPE) NUM_EXTDMA_AXI=$(NUM_EXTDMA_AXI) - v++ --link --platform $(PLATFORM) --kernel_frequency $(FREQUENCY) --save-temps --temp_dir $(BUILD_DIR) $(VPP_CONFIG) $(ADV_CMD) $(IPREPO_CMD) -o $@ $(NET_XO) $(OTHER_XO) + mkdir -p kernels/$(FPGAPART) + $(MAKE) -C ../../kernels/plugins DEVICE=$(FPGAPART) + v++ --link --platform $(PLATFORM) $(PROFILE_OPTS) --kernel_frequency $(FREQUENCY) --save-temps --temp_dir $(BUILD_DIR) $(VPP_CONFIG) $(ADV_CMD) $(IPREPO_CMD) -o $@ $(NET_XO) $(OTHER_XO) .PHONY: distclean distclean: diff --git a/test/refdesigns/build_all.sh b/test/refdesigns/build_all.sh new file mode 100755 index 00000000..23354c4b --- /dev/null +++ b/test/refdesigns/build_all.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# /******************************************************************************* +# Copyright (C) 2024 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +MODES=( + axis3x + udp + tcp + coyote_tcp + coyote_rdma +) + +PLATFORMS=( + xilinx_u55c_gen3x16_xdma_3_202210_1 + xilinx_u50_gen3x16_xdma_5_202210_1 + xilinx_u200_gen3x16_xdma_2_202110_1 + xilinx_u280_gen3x16_xdma_1_202211_1 + xilinx_u250_gen3x16_xdma_4_1_202210_1 +) + +# build for any combination of mode and platform +for mode in ${MODES[@]}; do + for platform in ${PLATFORMS[@]}; do + make -j$(nproc) MODE=$mode PLATFORM=$platform > build_${mode}_${platform}.log + done +done diff --git a/test/refdesigns/gen_config.py b/test/refdesigns/gen_config.py index ca910c70..2bd2bcfd 100755 --- a/test/refdesigns/gen_config.py +++ b/test/refdesigns/gen_config.py @@ -31,7 +31,9 @@ if args.board == "u50" and args.ethif != 0: raise "U50 has a single Ethernet port" +num_extdma_ports = 1 if args.host: + num_extdma_ports = 2 if args.board == "u280" or args.board == "u50": raise "Host memory only supported on U55C/U200/U250" @@ -68,16 +70,16 @@ cclo_instantiation = "nk=ccl_offload:{num_inst}:".format(num_inst=num_cclo) arb_instantiation = "nk=client_arbiter:{num_inst}:".format(num_inst=num_cclo) -hc_instantiation = "nk=hostctrl:{num_inst}:".format(num_inst=2*num_cclo) +hc_instantiation = "nk=hostctrl:{num_inst}:".format(num_inst=num_cclo) reduce_instantiation = "nk=reduce_ops:{num_inst}:".format(num_inst=num_cclo) cast_instantiation = "nk=hp_compression:{num_inst}:".format(num_inst=3*num_cclo) -extdma_instantiation = "nk=external_dma:{num_inst}:".format(num_inst=2*num_cclo) +extdma_instantiation = "nk=external_dma_{num_ports}port:{num_inst}:".format(num_inst=2*num_cclo, num_ports=num_extdma_ports) for i in range(num_cclo): endch = "" if i == num_cclo-1 else "." cclo_instantiation += "ccl_offload_{inst_nr}".format(inst_nr=i) + endch arb_instantiation += "arb_{inst_nr}".format(inst_nr=i) + endch - hc_instantiation += "hostctrl_{inst_nr}_0.hostctrl_{inst_nr}_1".format(inst_nr=i) + endch + hc_instantiation += "hostctrl_{inst_nr}_0".format(inst_nr=i) + endch reduce_instantiation += "arith_{inst_nr}".format(inst_nr=i) + endch cast_instantiation += "compression_{inst_nr}_0.compression_{inst_nr}_1.compression_{inst_nr}_2".format(inst_nr=i) + endch extdma_instantiation += "extdma_{num_inst}_0.extdma_{num_inst}_1".format(num_inst=i) + endch @@ -121,8 +123,7 @@ slr_constraints += "slr=arb_{inst_nr}:SLR{slr_nr}\nslr=arith_{inst_nr}:SLR{slr_nr}\nslr=ccl_offload_{inst_nr}:SLR{slr_nr}\n".format(inst_nr=i, slr_nr=target_slr) for j in range(3): slr_constraints += "slr=compression_{inst_nr}_{dp_nr}:SLR{slr_nr}\n".format(inst_nr=i, dp_nr=j, slr_nr=target_slr) - for j in range(2): - slr_constraints += "slr=hostctrl_{inst_nr}_{dp_nr}:SLR{slr_nr}\n".format(inst_nr=i, dp_nr=j, slr_nr=target_slr) + slr_constraints += "slr=hostctrl_{inst_nr}_0:SLR{slr_nr}\n".format(inst_nr=i, slr_nr=target_slr) if args.axis3x: slr_constraints += "slr=poe_{inst_nr}:SLR{slr_nr}\n".format(inst_nr=i, slr_nr=target_slr) else: @@ -201,11 +202,9 @@ for i in range(num_cclo): # Command interfaces stream_connections += "stream_connect=hostctrl_{inst_nr}_0.cmd:arb_{inst_nr}.cmd_clients_0\n".format(inst_nr=i) - stream_connections += "stream_connect=hostctrl_{inst_nr}_1.cmd:arb_{inst_nr}.cmd_clients_1\n".format(inst_nr=i) stream_connections += "stream_connect=arb_{inst_nr}.cmd_cclo:ccl_offload_{inst_nr}.s_axis_call_req\n".format(inst_nr=i) stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_call_ack:arb_{inst_nr}.ack_cclo\n".format(inst_nr=i) stream_connections += "stream_connect=arb_{inst_nr}.ack_clients_0:hostctrl_{inst_nr}_0.sts\n".format(inst_nr=i) - stream_connections += "stream_connect=arb_{inst_nr}.ack_clients_1:hostctrl_{inst_nr}_1.sts\n".format(inst_nr=i) # Plugin interfaces stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_arith_op0:arith_{inst_nr}.in0\n".format(inst_nr=i) stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_arith_op1:arith_{inst_nr}.in1\n".format(inst_nr=i) @@ -220,9 +219,13 @@ if args.vadd: stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_krnl:vadd_{inst_nr}_0.data_from_cclo\n".format(inst_nr=i) stream_connections += "stream_connect=vadd_{inst_nr}_0.data_to_cclo:ccl_offload_{inst_nr}.s_axis_krnl\n".format(inst_nr=i) + stream_connections += "stream_connect=arb_0.ack_clients_1:vadd_{inst_nr}_0.sts_from_cclo:512\n".format(inst_nr=i) + stream_connections += "stream_connect=vadd_{inst_nr}_0.cmd_to_cclo:arb_0.cmd_clients_1:512\n".format(inst_nr=i) else: stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_krnl:lb_user_krnl_{inst_nr}.in\n".format(inst_nr=i) stream_connections += "stream_connect=lb_user_krnl_{inst_nr}.out:ccl_offload_{inst_nr}.s_axis_krnl\n".format(inst_nr=i) + stream_connections += "stream_connect=arb_0.ack_clients_1:lb_user_krnl_{inst_nr}.sts_from_cclo:512\n".format(inst_nr=i) + stream_connections += "stream_connect=lb_user_krnl_{inst_nr}.cmd_to_cclo:arb_0.cmd_clients_1:512\n".format(inst_nr=i) # External DMA interface stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_dma0_s2mm:extdma_{inst_nr}_0.s_axis_s2mm\n".format(inst_nr=i) stream_connections += "stream_connect=ccl_offload_{inst_nr}.m_axis_dma0_mm2s_cmd:extdma_{inst_nr}_0.s_axis_mm2s_cmd\n".format(inst_nr=i) diff --git a/test/refdesigns/tcl/coyote.tcl b/test/refdesigns/tcl/coyote.tcl index 3381867d..5225362d 100644 --- a/test/refdesigns/tcl/coyote.tcl +++ b/test/refdesigns/tcl/coyote.tcl @@ -21,7 +21,6 @@ open_project "$build_dir/lynx/lynx.xpr" update_compile_order -fileset sources_1 create_bd_design "accl_bd" update_compile_order -fileset sources_1 -set_property ip_repo_paths "$build_dir ../../kernels" [current_project] update_ip_catalog create_bd_cell -type ip -vlnv Xilinx:ACCL:ccl_offload:1.0 ccl_offload_0