77// Current constraints:
88
99// Author: Bowen Wang <[email protected] >10+ // Author: Marco Bertuletti <[email protected] >1011
1112module idma_address_scrambler # (
1213 parameter int unsigned AddrWidth = 32 ,
1314 parameter int unsigned DataWidth = 32 ,
1415 parameter int unsigned ByteOffset = 2 ,
16+ parameter bit Bypass = 0 ,
1517 parameter int unsigned NumTiles = 128 ,
1618 parameter int unsigned NumBanksPerTile = 32 ,
17- parameter bit Bypass = 0 ,
18- parameter int unsigned SeqMemSizePerTile = 4 * 1024 ,
19- parameter int unsigned HeapSeqMemSizePerTile = 8 * 2048 ,
20- parameter int unsigned MemSizePerTile = 8 * 4 * 1024 ,
21- parameter int unsigned MemSizePerRow = 4 * 4 * 1024 , // 4bytes * 4096 banks
22- parameter int unsigned TCDMSize = 1024 * 1024
19+ parameter int unsigned TCDMSizePerBank = 1024 ,
20+ parameter int unsigned NumDASPartitions = 4 ,
21+ parameter int unsigned MemSizePerTile = NumBanksPerTile* TCDMSizePerBank,
22+ parameter int unsigned MemSizePerRow = (1 << ByteOffset)* NumBanksPerTile* NumTiles
2323) (
24- input logic [AddrWidth- 1 : 0 ] address_i,
25- input logic [31 : 0 ] num_bytes_i,
26- input logic [3 : 0 ][7 : 0 ] group_factor_i,
27- // For each allocation, the maximum number of rows assigned can be 128 rows
28- input logic [3 : 0 ][7 : 0 ] allocated_size_i,
29- input logic [3 : 0 ][DataWidth- 1 : 0 ] start_addr_scheme_i,
30- output logic [7 : 0 ] group_factor_o,
31- output logic [7 : 0 ] allocated_size_o,
32- output logic [AddrWidth- 1 : 0 ] address_o
24+ input logic [AddrWidth- 1 : 0 ] address_i,
25+ input logic [31 : 0 ] num_bytes_i,
26+ input logic [NumDASPartitions- 1 : 0 ][$clog2(NumTiles): 0 ] group_factor_i,
27+ input logic [NumDASPartitions- 1 : 0 ][$clog2(NumTiles): 0 ] allocated_size_i,
28+ input logic [NumDASPartitions- 1 : 0 ][DataWidth- 1 : 0 ] start_addr_scheme_i,
29+ output logic [$clog2(NumTiles): 0 ] group_factor_o,
30+ output logic [$clog2(NumTiles): 0 ] allocated_size_o,
31+ output logic [AddrWidth- 1 : 0 ] address_o
3332);
3433 // Basic Settings
3534 localparam int unsigned BankOffsetBits = $clog2 (NumBanksPerTile);
3635 localparam int unsigned TileIdBits = $clog2 (NumTiles);
3736 localparam int unsigned ConstantBitsLSB = ByteOffset + BankOffsetBits;
3837
39- // Heap Sequential Settings
40- localparam int unsigned HeapSeqPerTileBits = $clog2 (MemSizePerTile); // log2(8*4096) = 15 | RowIndexBits + ConstBits
41- localparam int unsigned HeapSeqTotalBits = HeapSeqPerTileBits+ TileIdBits; // 15+7=22 | used for address_o assignment
42- localparam int unsigned RowIndexBits = HeapSeqPerTileBits- ConstantBitsLSB; // 15-7=8 | RowIndex
43-
4438 if (Bypass || NumTiles < 2 ) begin
4539 assign address_o = address_i;
4640 end else begin
47- // ------ Heap Sequential Signals ------ //
48- // `shift_index` : how many bits to shift for TileID bits in each partition
49- // `shift_index_sc`: how many bits need to swap within Row Index
50- logic [3 : 0 ][2 : 0 ] shift_index;
51- logic [3 : 0 ][2 : 0 ] shift_index_sc;
52- for (genvar i = 0 ; i < 4 ; i++ ) begin : gen_shift_index
53- always_comb begin
54- case (group_factor_i[i])
55- 128 : shift_index[i] = 7 ;
56- 64 : shift_index[i] = 6 ;
57- 32 : shift_index[i] = 5 ;
58- 16 : shift_index[i] = 4 ;
59- 8 : shift_index[i] = 3 ;
60- 4 : shift_index[i] = 2 ;
61- 2 : shift_index[i] = 1 ;
62- default : shift_index[i] = 0 ;
63- endcase
64-
65- case (allocated_size_i[i])
66- 128 : shift_index_sc[i] = 7 ;
67- 64 : shift_index_sc[i] = 6 ;
68- 32 : shift_index_sc[i] = 5 ;
69- 16 : shift_index_sc[i] = 4 ;
70- 8 : shift_index_sc[i] = 3 ;
71- 4 : shift_index_sc[i] = 2 ;
72- 2 : shift_index_sc[i] = 1 ;
73- default : shift_index_sc[i] = 0 ;
74- endcase
75- end
76- end
77-
78- // post-scramble row index
79- logic [RowIndexBits- 1 : 0 ] post_scramble_row_index;
80- logic [TileIdBits- 1 : 0 ] post_scramble_tile_id;
81-
82- logic [3 : 0 ][RowIndexBits- 1 : 0 ] mask_row_index, mask_row_index_n;
83- logic [3 : 0 ][TileIdBits- 1 : 0 ] mask_tile_id, mask_tile_id_n;
8441
85- logic [TileIdBits - 1 : 0 ] heap_tile_id;
42+ // ------ Heap Sequential Signals ------ //
8643
87- for (genvar j = 0 ; j < 4 ; j++ ) begin : gen_mask
88- assign mask_row_index[j] = (shift_index_sc[j] == 0 ) ? { RowIndexBits{ 1'b0 }} : ({ RowIndexBits{ 1'b1 }} >> (RowIndexBits- shift_index_sc[j]));
89- assign mask_tile_id[j] = (shift_index[j] == 0 ) ? { TileIdBits{ 1'b0 }} : ({ TileIdBits{ 1'b1 }} >> (TileIdBits - shift_index[j]));
90-
91- assign mask_row_index_n[j] = ~ mask_row_index[j];
92- assign mask_tile_id_n[j] = ~ mask_tile_id[j];
44+ // `tile_index` : how many bits to shift for TileID bits in each partition
45+ // `row_index`: how many bits need to swap within Row Index
46+ logic [NumDASPartitions- 1 : 0 ][$clog2 ($clog2 (NumTiles)+ 1 )- 1 : 0 ] tile_index;
47+ logic [NumDASPartitions- 1 : 0 ][$clog2 ($clog2 (NumTiles)+ 1 )- 1 : 0 ] row_index;
48+
49+ for (genvar i = 0 ; i < NumDASPartitions; i++ ) begin : gen_shift_index
50+ lzc # (
51+ .WIDTH ($clog2 (NumTiles)+ 1 ),
52+ .MODE (1'b0 )
53+ ) i_log_tile_index (
54+ .in_i (group_factor_i[i]),
55+ .cnt_o (tile_index[i] ),
56+ .empty_o (/* Unused */ )
57+ );
58+ lzc # (
59+ .WIDTH ($clog2 (NumTiles)+ 1 ),
60+ .MODE (1'b0 )
61+ ) i_log_row_index (
62+ .in_i (allocated_size_i[i][$clog2 (NumTiles): 0 ]),
63+ .cnt_o (row_index[i] ),
64+ .empty_o (/* Unused */ )
65+ );
9366 end
9467
95- assign heap_tile_id = address_i[(TileIdBits+ ConstantBitsLSB- 1 ): ConstantBitsLSB];
96-
9768 always_comb begin
69+
9870 // Default: Unscrambled
9971 address_o = address_i;
10072 group_factor_o = '0 ;
10173 allocated_size_o = '0 ;
102- post_scramble_row_index = 'b0 ;
103- post_scramble_tile_id = 'b0 ;
104- // Need one more logic for interleaved heap region
105- // group_factor_o = {7{1'b1}};
106- // Sequential Heap Region
74+
10775 // TODO (bowwang): add a new register to indicate the start addr of sequential heap region, currently hard coded
108- // if (address_i < start_addr_scheme_i[0]) begin
10976 if (address_i < 32'h00120000 ) begin
110- group_factor_o = 128 ; // fully interleaved
111- allocated_size_o = num_bytes_i/ (4 * 4096 );
112- end else if ( (address_i >= start_addr_scheme_i[0 ]) && (address_i < start_addr_scheme_i[0 ]+ MemSizePerRow* allocated_size_i[0 ]) ) begin
113-
114- post_scramble_row_index = 'b0 ;
115- post_scramble_tile_id = 'b0 ;
116- // 1. `post_scramble_row_index` generation
117- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + shift_index[0 ])) & mask_row_index[0 ];
118- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[0 ];
119-
120- // 2. `post_scramble_tile_id` generation
121- post_scramble_tile_id | = heap_tile_id & mask_tile_id[0 ];
122- post_scramble_tile_id | = (address_i >> (ConstantBitsLSB + shift_index_sc[0 ])) & mask_tile_id_n[0 ];
123-
124- address_o[HeapSeqTotalBits- 1 : ConstantBitsLSB] = { post_scramble_row_index, post_scramble_tile_id} ;
125- group_factor_o = group_factor_i[0 ];
126- allocated_size_o = allocated_size_i[0 ];
127- end else if ( (address_i >= start_addr_scheme_i[1 ]) && (address_i < start_addr_scheme_i[1 ]+ MemSizePerRow* allocated_size_i[1 ]) ) begin
128-
129- post_scramble_row_index = 'b0 ;
130- post_scramble_tile_id = 'b0 ;
131- // 1. `post_scramble_row_index` generation
132- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + shift_index[1 ])) & mask_row_index[1 ];
133- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[1 ];
134-
135- // 2. `post_scramble_tile_id` generation
136- post_scramble_tile_id | = heap_tile_id & mask_tile_id[1 ];
137- post_scramble_tile_id | = (address_i >> (ConstantBitsLSB + shift_index_sc[1 ])) & mask_tile_id_n[1 ];
138-
139- address_o[HeapSeqTotalBits- 1 : ConstantBitsLSB] = { post_scramble_row_index, post_scramble_tile_id} ;
140- group_factor_o = group_factor_i[1 ];
141- allocated_size_o = allocated_size_i[1 ];
142- end else if ( (address_i >= start_addr_scheme_i[2 ]) && (address_i < start_addr_scheme_i[2 ]+ MemSizePerRow* allocated_size_i[2 ]) ) begin
143-
144- post_scramble_row_index = 'b0 ;
145- post_scramble_tile_id = 'b0 ;
146- // 1. `post_scramble_row_index` generation
147- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + shift_index[2 ])) & mask_row_index[2 ];
148- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[2 ];
149-
150- // 2. `post_scramble_tile_id` generation
151- post_scramble_tile_id | = heap_tile_id & mask_tile_id[2 ];
152- post_scramble_tile_id | = (address_i >> (ConstantBitsLSB + shift_index_sc[2 ])) & mask_tile_id_n[2 ];
153-
154- address_o[HeapSeqTotalBits- 1 : ConstantBitsLSB] = { post_scramble_row_index, post_scramble_tile_id} ;
155- group_factor_o = group_factor_i[2 ];
156- allocated_size_o = allocated_size_i[2 ];
157- end else if ( (address_i >= start_addr_scheme_i[3 ]) && (address_i < start_addr_scheme_i[3 ]+ MemSizePerRow* allocated_size_i[3 ]) ) begin
158-
159- post_scramble_row_index = 'b0 ;
160- post_scramble_tile_id = 'b0 ;
161- // 1. `post_scramble_row_index` generation
162- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + shift_index[3 ])) & mask_row_index[3 ];
163- post_scramble_row_index | = (address_i >> (ConstantBitsLSB + TileIdBits )) & mask_row_index_n[3 ];
164-
165- // 2. `post_scramble_tile_id` generation
166- post_scramble_tile_id | = heap_tile_id & mask_tile_id[3 ];
167- post_scramble_tile_id | = (address_i >> (ConstantBitsLSB + shift_index_sc[3 ])) & mask_tile_id_n[3 ];
168-
169- address_o[HeapSeqTotalBits- 1 : ConstantBitsLSB] = { post_scramble_row_index, post_scramble_tile_id} ;
170- group_factor_o = group_factor_i[3 ];
171- allocated_size_o = allocated_size_i[3 ];
77+ group_factor_o = NumTiles; // fully interleaved
78+ allocated_size_o = num_bytes_i / MemSizePerRow;
79+
80+ // DAS address scrambling
81+ end else begin
82+
83+ for (int p = 0 ; p < NumDASPartitions; p++ ) begin
84+ if ( (address_i >= start_addr_scheme_i[p]) && (address_i < start_addr_scheme_i[p]+ MemSizePerRow* allocated_size_i[p]) ) begin
85+ address_o = '0 ;
86+ address_o | = address_i & ((1 << (tile_index[p]+ ConstantBitsLSB)) - 1 );
87+ address_o | = ((address_i >> (row_index[p]+ tile_index[p]+ ConstantBitsLSB)) << (tile_index[p]+ ConstantBitsLSB)) & ((1 << (TileIdBits+ ConstantBitsLSB)) - 1 );
88+ address_o | = ((address_i >> (tile_index[p]+ ConstantBitsLSB)) << (TileIdBits + ConstantBitsLSB)) & ((1 << (row_index[p]+ TileIdBits+ ConstantBitsLSB)) - 1 );
89+ address_o | = address_i & ~ ((1 << (row_index[p]+ TileIdBits+ ConstantBitsLSB)) - 1 );
90+ group_factor_o = group_factor_i[p];
91+ allocated_size_o = allocated_size_i[p];
92+ end
93+ end
94+
17295 end
17396 end
17497
@@ -177,6 +100,5 @@ module idma_address_scrambler #(
177100 // Check for unsupported configurations
178101 if (NumBanksPerTile < 2 )
179102 $fatal (1 , " NumBanksPerTile must be greater than 2. The special case '1' is currently not supported!" );
180- if (HeapSeqMemSizePerTile % (2 ** ByteOffset* NumBanksPerTile) != 0 )
181- $fatal (1 , " HeapSeqMemSizePerTile must be a multiple of BankWidth*NumBanksPerTile!" );
103+
182104endmodule : idma_address_scrambler
0 commit comments