Skip to content

Commit 7702798

Browse files
committed
[hardware] Parametrize DMA for DAS
[software] Add DMA with DAS test
1 parent 737d106 commit 7702798

File tree

15 files changed

+636
-906
lines changed

15 files changed

+636
-906
lines changed

Bender.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ sources:
4444
# Level 3
4545
- hardware/src/mempool_group.sv
4646
# Level 4
47-
- hardware/src/idma_partition_midend.sv
4847
- hardware/src/mempool_cluster.sv
4948
# Level 5
5049
- hardware/src/ctrl_registers.sv

config/config.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ zquarterinx ?= 0
7474
xDivSqrt ?= 0
7575

7676
# Enable configurable addressing scheme in the heap
77-
das ?= 1
77+
das ?= 0
7878
num_das_partitions ?= 4
7979
# Size of DAS-heap per core
8080
das_mem_size ?= 2048

hardware/deps/idma/Bender.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ sources:
2525
- src/midends/idma_split_midend.sv
2626
- src/midends/idma_distributed_midend.sv
2727
# If enabled DAS
28-
- src/midends/idma_split_midend_v2.sv
29-
- src/midends/idma_distributed_midend_v2.sv
3028
- src/frontends/mempool/mempool_dma_frontend_reg_pkg.sv
3129
- src/frontends/mempool/mempool_dma_frontend_reg_top.sv
3230
- src/frontends/mempool/mempool_dma.sv

hardware/deps/idma/src/midends/idma_address_scrambler.sv

Lines changed: 58 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -7,168 +7,91 @@
77
// Current constraints:
88

99
// Author: Bowen Wang <[email protected]>
10+
// Author: Marco Bertuletti <[email protected]>
1011

1112
module idma_address_scrambler #(
1213
parameter int unsigned AddrWidth = 32,
1314
parameter int unsigned DataWidth = 32,
1415
parameter int unsigned ByteOffset = 2,
16+
parameter bit Bypass = 0,
1517
parameter int unsigned NumTiles = 128,
1618
parameter int unsigned NumBanksPerTile = 32,
17-
parameter bit Bypass = 0,
18-
parameter int unsigned SeqMemSizePerTile = 4*1024,
19-
parameter int unsigned HeapSeqMemSizePerTile = 8*2048,
20-
parameter int unsigned MemSizePerTile = 8*4*1024,
21-
parameter int unsigned MemSizePerRow = 4*4*1024, // 4bytes * 4096 banks
22-
parameter int unsigned TCDMSize = 1024*1024
19+
parameter int unsigned TCDMSizePerBank = 1024,
20+
parameter int unsigned NumDASPartitions = 4,
21+
parameter int unsigned MemSizePerTile = NumBanksPerTile*TCDMSizePerBank,
22+
parameter int unsigned MemSizePerRow = (1 << ByteOffset)*NumBanksPerTile*NumTiles
2323
) (
24-
input logic [AddrWidth-1:0] address_i,
25-
input logic [31:0] num_bytes_i,
26-
input logic [3:0][7:0] group_factor_i,
27-
// For each allocation, the maximum number of rows assigned can be 128 rows
28-
input logic [3:0][7:0] allocated_size_i,
29-
input logic [3:0][DataWidth-1:0] start_addr_scheme_i,
30-
output logic [7:0] group_factor_o,
31-
output logic [7:0] allocated_size_o,
32-
output logic [AddrWidth-1:0] address_o
24+
input logic [AddrWidth-1:0] address_i,
25+
input logic [31:0] num_bytes_i,
26+
input logic [NumDASPartitions-1:0][$clog2(NumTiles):0] group_factor_i,
27+
input logic [NumDASPartitions-1:0][$clog2(NumTiles):0] allocated_size_i,
28+
input logic [NumDASPartitions-1:0][DataWidth-1:0] start_addr_scheme_i,
29+
output logic [$clog2(NumTiles):0] group_factor_o,
30+
output logic [$clog2(NumTiles):0] allocated_size_o,
31+
output logic [AddrWidth-1:0] address_o
3332
);
3433
// Basic Settings
3534
localparam int unsigned BankOffsetBits = $clog2(NumBanksPerTile);
3635
localparam int unsigned TileIdBits = $clog2(NumTiles);
3736
localparam int unsigned ConstantBitsLSB = ByteOffset + BankOffsetBits;
3837

39-
// Heap Sequential Settings
40-
localparam int unsigned HeapSeqPerTileBits = $clog2(MemSizePerTile); // log2(8*4096) = 15 | RowIndexBits + ConstBits
41-
localparam int unsigned HeapSeqTotalBits = HeapSeqPerTileBits+TileIdBits; // 15+7=22 | used for address_o assignment
42-
localparam int unsigned RowIndexBits = HeapSeqPerTileBits-ConstantBitsLSB; // 15-7=8 | RowIndex
43-
4438
if (Bypass || NumTiles < 2) begin
4539
assign address_o = address_i;
4640
end else begin
47-
// ------ Heap Sequential Signals ------ //
48-
// `shift_index` : how many bits to shift for TileID bits in each partition
49-
// `shift_index_sc`: how many bits need to swap within Row Index
50-
logic [3:0][2:0] shift_index;
51-
logic [3:0][2:0] shift_index_sc;
52-
for (genvar i = 0; i < 4; i++) begin : gen_shift_index
53-
always_comb begin
54-
case(group_factor_i[i])
55-
128: shift_index[i] = 7;
56-
64: shift_index[i] = 6;
57-
32: shift_index[i] = 5;
58-
16: shift_index[i] = 4;
59-
8: shift_index[i] = 3;
60-
4: shift_index[i] = 2;
61-
2: shift_index[i] = 1;
62-
default: shift_index[i] = 0;
63-
endcase
64-
65-
case(allocated_size_i[i])
66-
128: shift_index_sc[i] = 7;
67-
64: shift_index_sc[i] = 6;
68-
32: shift_index_sc[i] = 5;
69-
16: shift_index_sc[i] = 4;
70-
8: shift_index_sc[i] = 3;
71-
4: shift_index_sc[i] = 2;
72-
2: shift_index_sc[i] = 1;
73-
default: shift_index_sc[i] = 0;
74-
endcase
75-
end
76-
end
77-
78-
// post-scramble row index
79-
logic [RowIndexBits-1:0] post_scramble_row_index;
80-
logic [TileIdBits-1:0] post_scramble_tile_id;
81-
82-
logic [3:0][RowIndexBits-1:0] mask_row_index, mask_row_index_n;
83-
logic [3:0][TileIdBits-1:0] mask_tile_id, mask_tile_id_n;
8441

85-
logic [TileIdBits-1:0] heap_tile_id;
42+
// ------ Heap Sequential Signals ------ //
8643

87-
for (genvar j = 0; j < 4; j++) begin : gen_mask
88-
assign mask_row_index[j] = (shift_index_sc[j] == 0) ? {RowIndexBits{1'b0}} : ({RowIndexBits{1'b1}} >> (RowIndexBits-shift_index_sc[j]));
89-
assign mask_tile_id[j] = (shift_index[j] == 0) ? {TileIdBits{1'b0}} : ({TileIdBits{1'b1}} >> (TileIdBits -shift_index[j]));
90-
91-
assign mask_row_index_n[j] = ~mask_row_index[j];
92-
assign mask_tile_id_n[j] = ~mask_tile_id[j];
44+
// `tile_index` : how many bits to shift for TileID bits in each partition
45+
// `row_index`: how many bits need to swap within Row Index
46+
logic [NumDASPartitions-1:0][$clog2($clog2(NumTiles)+1)-1:0] tile_index;
47+
logic [NumDASPartitions-1:0][$clog2($clog2(NumTiles)+1)-1:0] row_index;
48+
49+
for (genvar i = 0; i < NumDASPartitions; i++) begin : gen_shift_index
50+
lzc #(
51+
.WIDTH ($clog2(NumTiles)+1),
52+
.MODE (1'b0 )
53+
) i_log_tile_index (
54+
.in_i (group_factor_i[i]),
55+
.cnt_o (tile_index[i] ),
56+
.empty_o (/* Unused */ )
57+
);
58+
lzc #(
59+
.WIDTH ($clog2(NumTiles)+1),
60+
.MODE (1'b0 )
61+
) i_log_row_index (
62+
.in_i (allocated_size_i[i][$clog2(NumTiles):0]),
63+
.cnt_o (row_index[i] ),
64+
.empty_o (/* Unused */ )
65+
);
9366
end
9467

95-
assign heap_tile_id = address_i[(TileIdBits+ConstantBitsLSB-1):ConstantBitsLSB];
96-
9768
always_comb begin
69+
9870
// Default: Unscrambled
9971
address_o = address_i;
10072
group_factor_o = '0;
10173
allocated_size_o = '0;
102-
post_scramble_row_index = 'b0;
103-
post_scramble_tile_id = 'b0;
104-
// Need one more logic for interleaved heap region
105-
// group_factor_o = {7{1'b1}};
106-
// Sequential Heap Region
74+
10775
// TODO (bowwang): add a new register to indicate the start addr of sequential heap region, currently hard coded
108-
// if (address_i < start_addr_scheme_i[0]) begin
10976
if (address_i < 32'h00120000) begin
110-
group_factor_o = 128; // fully interleaved
111-
allocated_size_o = num_bytes_i/(4*4096);
112-
end else if ( (address_i >= start_addr_scheme_i[0]) && (address_i < start_addr_scheme_i[0]+MemSizePerRow*allocated_size_i[0]) ) begin
113-
114-
post_scramble_row_index = 'b0;
115-
post_scramble_tile_id = 'b0;
116-
// 1. `post_scramble_row_index` generation
117-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + shift_index[0])) & mask_row_index[0];
118-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[0];
119-
120-
// 2. `post_scramble_tile_id` generation
121-
post_scramble_tile_id |= heap_tile_id & mask_tile_id[0];
122-
post_scramble_tile_id |= (address_i >> (ConstantBitsLSB + shift_index_sc[0])) & mask_tile_id_n[0];
123-
124-
address_o[HeapSeqTotalBits-1:ConstantBitsLSB] = {post_scramble_row_index, post_scramble_tile_id};
125-
group_factor_o = group_factor_i[0];
126-
allocated_size_o = allocated_size_i[0];
127-
end else if ( (address_i >= start_addr_scheme_i[1]) && (address_i < start_addr_scheme_i[1]+MemSizePerRow*allocated_size_i[1]) ) begin
128-
129-
post_scramble_row_index = 'b0;
130-
post_scramble_tile_id = 'b0;
131-
// 1. `post_scramble_row_index` generation
132-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + shift_index[1])) & mask_row_index[1];
133-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[1];
134-
135-
// 2. `post_scramble_tile_id` generation
136-
post_scramble_tile_id |= heap_tile_id & mask_tile_id[1];
137-
post_scramble_tile_id |= (address_i >> (ConstantBitsLSB + shift_index_sc[1])) & mask_tile_id_n[1];
138-
139-
address_o[HeapSeqTotalBits-1:ConstantBitsLSB] = {post_scramble_row_index, post_scramble_tile_id};
140-
group_factor_o = group_factor_i[1];
141-
allocated_size_o = allocated_size_i[1];
142-
end else if ( (address_i >= start_addr_scheme_i[2]) && (address_i < start_addr_scheme_i[2]+MemSizePerRow*allocated_size_i[2]) ) begin
143-
144-
post_scramble_row_index = 'b0;
145-
post_scramble_tile_id = 'b0;
146-
// 1. `post_scramble_row_index` generation
147-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + shift_index[2])) & mask_row_index[2];
148-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[2];
149-
150-
// 2. `post_scramble_tile_id` generation
151-
post_scramble_tile_id |= heap_tile_id & mask_tile_id[2];
152-
post_scramble_tile_id |= (address_i >> (ConstantBitsLSB + shift_index_sc[2])) & mask_tile_id_n[2];
153-
154-
address_o[HeapSeqTotalBits-1:ConstantBitsLSB] = {post_scramble_row_index, post_scramble_tile_id};
155-
group_factor_o = group_factor_i[2];
156-
allocated_size_o = allocated_size_i[2];
157-
end else if ( (address_i >= start_addr_scheme_i[3]) && (address_i < start_addr_scheme_i[3]+MemSizePerRow*allocated_size_i[3]) ) begin
158-
159-
post_scramble_row_index = 'b0;
160-
post_scramble_tile_id = 'b0;
161-
// 1. `post_scramble_row_index` generation
162-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + shift_index[3])) & mask_row_index[3];
163-
post_scramble_row_index |= (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[3];
164-
165-
// 2. `post_scramble_tile_id` generation
166-
post_scramble_tile_id |= heap_tile_id & mask_tile_id[3];
167-
post_scramble_tile_id |= (address_i >> (ConstantBitsLSB + shift_index_sc[3])) & mask_tile_id_n[3];
168-
169-
address_o[HeapSeqTotalBits-1:ConstantBitsLSB] = {post_scramble_row_index, post_scramble_tile_id};
170-
group_factor_o = group_factor_i[3];
171-
allocated_size_o = allocated_size_i[3];
77+
group_factor_o = NumTiles; // fully interleaved
78+
allocated_size_o = num_bytes_i / MemSizePerRow;
79+
80+
// DAS address scrambling
81+
end else begin
82+
83+
for (int p = 0; p < NumDASPartitions; p++) begin
84+
if ( (address_i >= start_addr_scheme_i[p]) && (address_i < start_addr_scheme_i[p]+MemSizePerRow*allocated_size_i[p]) ) begin
85+
address_o = '0;
86+
address_o |= address_i & ((1 << (tile_index[p]+ConstantBitsLSB)) - 1);
87+
address_o |= ((address_i >> (row_index[p]+tile_index[p]+ConstantBitsLSB)) << (tile_index[p]+ConstantBitsLSB)) & ((1 << (TileIdBits+ConstantBitsLSB)) - 1);
88+
address_o |= ((address_i >> (tile_index[p]+ConstantBitsLSB)) << (TileIdBits + ConstantBitsLSB)) & ((1 << (row_index[p]+TileIdBits+ConstantBitsLSB)) - 1);
89+
address_o |= address_i & ~((1 << (row_index[p]+TileIdBits+ConstantBitsLSB)) - 1);
90+
group_factor_o = group_factor_i[p];
91+
allocated_size_o = allocated_size_i[p];
92+
end
93+
end
94+
17295
end
17396
end
17497

@@ -177,6 +100,5 @@ module idma_address_scrambler #(
177100
// Check for unsupported configurations
178101
if (NumBanksPerTile < 2)
179102
$fatal(1, "NumBanksPerTile must be greater than 2. The special case '1' is currently not supported!");
180-
if (HeapSeqMemSizePerTile % (2**ByteOffset*NumBanksPerTile) != 0)
181-
$fatal(1, "HeapSeqMemSizePerTile must be a multiple of BankWidth*NumBanksPerTile!");
103+
182104
endmodule : idma_address_scrambler

0 commit comments

Comments
 (0)