Tue, 06 Apr 2010 18:27:55 +0100
Make cache 2-way associative
Switched from Direct Mapped to 2-Way Set Associative caches. Should boost speed
a bit.
1 // =============================================================================
2 // COPYRIGHT NOTICE
3 // Copyright 2006 (c) Lattice Semiconductor Corporation
4 // ALL RIGHTS RESERVED
5 // This confidential and proprietary software may be used only as authorised by
6 // a licensing agreement from Lattice Semiconductor Corporation.
7 // The entire notice above must be reproduced on all authorized copies and
8 // copies may only be made to the extent permitted by a licensing agreement from
9 // Lattice Semiconductor Corporation.
10 //
11 // Lattice Semiconductor Corporation TEL : 1-800-Lattice (USA and Canada)
12 // 5555 NE Moore Court 408-826-6000 (other locations)
13 // Hillsboro, OR 97124 web : http://www.latticesemi.com/
14 // U.S.A email: techsupport@latticesemi.com
15 // =============================================================================/
16 // FILE DETAILS
17 // Project : LatticeMico32
18 // File : lm32_dcache.v
19 // Title : Data cache
20 // Dependencies : lm32_include.v
21 // Version : 6.1.17
22 // : Initial Release
23 // Version : 7.0SP2, 3.0
24 // : No Change
25 // Version : 3.1
26 // : Support for user-selected resource usage when implementing
27 // : cache memory. Additional parameters must be defined when
28 // : invoking lm32_ram.v
29 // =============================================================================
31 `include "lm32_include.v"
33 `ifdef CFG_DCACHE_ENABLED
35 `define LM32_DC_ADDR_OFFSET_RNG addr_offset_msb:addr_offset_lsb
36 `define LM32_DC_ADDR_SET_RNG addr_set_msb:addr_set_lsb
37 `define LM32_DC_ADDR_TAG_RNG addr_tag_msb:addr_tag_lsb
38 `define LM32_DC_ADDR_IDX_RNG addr_set_msb:addr_offset_lsb
40 `define LM32_DC_TMEM_ADDR_WIDTH addr_set_width
41 `define LM32_DC_TMEM_ADDR_RNG (`LM32_DC_TMEM_ADDR_WIDTH-1):0
42 `define LM32_DC_DMEM_ADDR_WIDTH (addr_offset_width+addr_set_width)
43 `define LM32_DC_DMEM_ADDR_RNG (`LM32_DC_DMEM_ADDR_WIDTH-1):0
45 `define LM32_DC_TAGS_WIDTH (addr_tag_width+1)
46 `define LM32_DC_TAGS_RNG (`LM32_DC_TAGS_WIDTH-1):0
47 `define LM32_DC_TAGS_TAG_RNG (`LM32_DC_TAGS_WIDTH-1):1
48 `define LM32_DC_TAGS_VALID_RNG 0
50 `define LM32_DC_STATE_RNG 2:0
51 `define LM32_DC_STATE_FLUSH 3'b001
52 `define LM32_DC_STATE_CHECK 3'b010
53 `define LM32_DC_STATE_REFILL 3'b100
55 /////////////////////////////////////////////////////
56 // Module interface
57 /////////////////////////////////////////////////////
59 module lm32_dcache (
60 // ----- Inputs -----
61 clk_i,
62 rst_i,
63 stall_a,
64 stall_x,
65 stall_m,
66 address_x,
67 address_m,
68 load_q_m,
69 store_q_m,
70 store_data,
71 store_byte_select,
72 refill_ready,
73 refill_data,
74 dflush,
75 // ----- Outputs -----
76 stall_request,
77 restart_request,
78 refill_request,
79 refill_address,
80 refilling,
81 load_data
82 );
84 /////////////////////////////////////////////////////
85 // Parameters
86 /////////////////////////////////////////////////////
88 parameter associativity = 1; // Associativity of the cache (Number of ways)
89 parameter sets = 512; // Number of sets
90 parameter bytes_per_line = 16; // Number of bytes per cache line
91 parameter base_address = 0; // Base address of cachable memory
92 parameter limit = 0; // Limit (highest address) of cachable memory
94 localparam addr_offset_width = clogb2(bytes_per_line)-1-2;
95 localparam addr_set_width = clogb2(sets)-1;
96 localparam addr_offset_lsb = 2;
97 localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
98 localparam addr_set_lsb = (addr_offset_msb+1);
99 localparam addr_set_msb = (addr_set_lsb+addr_set_width-1);
100 localparam addr_tag_lsb = (addr_set_msb+1);
101 localparam addr_tag_msb = clogb2(`CFG_DCACHE_LIMIT-`CFG_DCACHE_BASE_ADDRESS)-1;
102 localparam addr_tag_width = (addr_tag_msb-addr_tag_lsb+1);
104 /////////////////////////////////////////////////////
105 // Inputs
106 /////////////////////////////////////////////////////
108 input clk_i; // Clock
109 input rst_i; // Reset
111 input stall_a; // Stall A stage
112 input stall_x; // Stall X stage
113 input stall_m; // Stall M stage
115 input [`LM32_WORD_RNG] address_x; // X stage load/store address
116 input [`LM32_WORD_RNG] address_m; // M stage load/store address
117 input load_q_m; // Load instruction in M stage
118 input store_q_m; // Store instruction in M stage
119 input [`LM32_WORD_RNG] store_data; // Data to store
120 input [`LM32_BYTE_SELECT_RNG] store_byte_select; // Which bytes in store data should be modified
122 input refill_ready; // Indicates next word of refill data is ready
123 input [`LM32_WORD_RNG] refill_data; // Refill data
125 input dflush; // Indicates cache should be flushed
127 /////////////////////////////////////////////////////
128 // Outputs
129 /////////////////////////////////////////////////////
131 output stall_request; // Request pipeline be stalled because cache is busy
132 wire stall_request;
133 output restart_request; // Request to restart instruction that caused the cache miss
134 reg restart_request;
135 output refill_request; // Request a refill
136 reg refill_request;
137 output [`LM32_WORD_RNG] refill_address; // Address to refill from
138 reg [`LM32_WORD_RNG] refill_address;
139 output refilling; // Indicates if the cache is currently refilling
140 reg refilling;
141 output [`LM32_WORD_RNG] load_data; // Data read from cache
142 wire [`LM32_WORD_RNG] load_data;
144 /////////////////////////////////////////////////////
145 // Internal nets and registers
146 /////////////////////////////////////////////////////
148 wire read_port_enable; // Cache memory read port clock enable
149 wire write_port_enable; // Cache memory write port clock enable
150 wire [0:associativity-1] way_tmem_we; // Tag memory write enable
151 wire [0:associativity-1] way_dmem_we; // Data memory write enable
152 wire [`LM32_WORD_RNG] way_data[0:associativity-1]; // Data read from data memory
153 wire [`LM32_DC_TAGS_TAG_RNG] way_tag[0:associativity-1];// Tag read from tag memory
154 wire [0:associativity-1] way_valid; // Indicates which ways are valid
155 wire [0:associativity-1] way_match; // Indicates which ways matched
156 wire miss; // Indicates no ways matched
158 wire [`LM32_DC_TMEM_ADDR_RNG] tmem_read_address; // Tag memory read address
159 wire [`LM32_DC_TMEM_ADDR_RNG] tmem_write_address; // Tag memory write address
160 wire [`LM32_DC_DMEM_ADDR_RNG] dmem_read_address; // Data memory read address
161 wire [`LM32_DC_DMEM_ADDR_RNG] dmem_write_address; // Data memory write address
162 wire [`LM32_DC_TAGS_RNG] tmem_write_data; // Tag memory write data
163 reg [`LM32_WORD_RNG] dmem_write_data; // Data memory write data
165 reg [`LM32_DC_STATE_RNG] state; // Current state of FSM
166 wire flushing; // Indicates if cache is currently flushing
167 wire check; // Indicates if cache is currently checking for hits/misses
168 wire refill; // Indicates if cache is currently refilling
170 wire valid_store; // Indicates if there is a valid store instruction
171 reg [associativity-1:0] refill_way_select; // Which way should be refilled
172 reg [`LM32_DC_ADDR_OFFSET_RNG] refill_offset; // Which word in cache line should be refilled
173 wire last_refill; // Indicates when on last cycle of cache refill
174 reg [`LM32_DC_TMEM_ADDR_RNG] flush_set; // Which set is currently being flushed
176 genvar i, j;
178 /////////////////////////////////////////////////////
179 // Functions
180 /////////////////////////////////////////////////////
182 `include "lm32_functions.v"
184 /////////////////////////////////////////////////////
185 // Instantiations
186 /////////////////////////////////////////////////////
188 generate
189 for (i = 0; i < associativity; i = i + 1)
190 begin : memories
191 // Way data
192 if (`LM32_DC_DMEM_ADDR_WIDTH < 11)
193 begin : data_memories
194 lm32_ram
195 #(
196 // ----- Parameters -------
197 .data_width (32),
198 .address_width (`LM32_DC_DMEM_ADDR_WIDTH)
199 `ifdef PLATFORM_LATTICE
200 ,
201 `ifdef CFG_DCACHE_DAT_USE_DP_TRUE
202 .RAM_IMPLEMENTATION ("EBR"),
203 .RAM_TYPE ("RAM_DP_TRUE")
204 `else
205 `ifdef CFG_DCACHE_DAT_USE_SLICE
206 .RAM_IMPLEMENTATION ("SLICE")
207 `else
208 .RAM_IMPLEMENTATION ("AUTO")
209 `endif
210 `endif
211 `endif
212 ) way_0_data_ram
213 (
214 // ----- Inputs -------
215 .read_clk (clk_i),
216 .write_clk (clk_i),
217 .reset (rst_i),
218 .read_address (dmem_read_address),
219 .enable_read (read_port_enable),
220 .write_address (dmem_write_address),
221 .enable_write (write_port_enable),
222 .write_enable (way_dmem_we[i]),
223 .write_data (dmem_write_data),
224 // ----- Outputs -------
225 .read_data (way_data[i])
226 );
227 end
228 else
229 begin
230 for (j = 0; j < 4; j = j + 1)
231 begin : byte_memories
232 lm32_ram
233 #(
234 // ----- Parameters -------
235 .data_width (8),
236 .address_width (`LM32_DC_DMEM_ADDR_WIDTH)
237 `ifdef PLATFORM_LATTICE
238 ,
239 `ifdef CFG_DCACHE_DAT_USE_DP_TRUE
240 .RAM_IMPLEMENTATION ("EBR"),
241 .RAM_TYPE ("RAM_DP_TRUE")
242 `else
243 `ifdef CFG_DCACHE_DAT_USE_SLICE
244 .RAM_IMPLEMENTATION ("SLICE")
245 `else
246 .RAM_IMPLEMENTATION ("AUTO")
247 `endif
248 `endif
249 `endif
250 ) way_0_data_ram
251 (
252 // ----- Inputs -------
253 .read_clk (clk_i),
254 .write_clk (clk_i),
255 .reset (rst_i),
256 .read_address (dmem_read_address),
257 .enable_read (read_port_enable),
258 .write_address (dmem_write_address),
259 .enable_write (write_port_enable),
260 .write_enable (way_dmem_we[i] & (store_byte_select[j] | refill)),
261 .write_data (dmem_write_data[(j+1)*8-1:j*8]),
262 // ----- Outputs -------
263 .read_data (way_data[i][(j+1)*8-1:j*8])
264 );
265 end
266 end
268 // Way tags
269 lm32_ram
270 #(
271 // ----- Parameters -------
272 .data_width (`LM32_DC_TAGS_WIDTH),
273 .address_width (`LM32_DC_TMEM_ADDR_WIDTH)
274 `ifdef PLATFORM_LATTICE
275 ,
276 `ifdef CFG_DCACHE_DAT_USE_DP_TRUE
277 .RAM_IMPLEMENTATION ("EBR"),
278 .RAM_TYPE ("RAM_DP_TRUE")
279 `else
280 `ifdef CFG_DCACHE_DAT_USE_SLICE
281 .RAM_IMPLEMENTATION ("SLICE")
282 `else
283 .RAM_IMPLEMENTATION ("AUTO")
284 `endif
285 `endif
286 `endif
287 ) way_0_tag_ram
288 (
289 // ----- Inputs -------
290 .read_clk (clk_i),
291 .write_clk (clk_i),
292 .reset (rst_i),
293 .read_address (tmem_read_address),
294 .enable_read (read_port_enable),
295 .write_address (tmem_write_address),
296 .enable_write (`TRUE),
297 .write_enable (way_tmem_we[i]),
298 .write_data (tmem_write_data),
299 // ----- Outputs -------
300 .read_data ({way_tag[i], way_valid[i]})
301 );
302 end
304 endgenerate
306 /////////////////////////////////////////////////////
307 // Combinational logic
308 /////////////////////////////////////////////////////
310 // Compute which ways in the cache match the address being read
311 generate
312 for (i = 0; i < associativity; i = i + 1)
313 begin : match
314 assign way_match[i] = ({way_tag[i], way_valid[i]} == {address_m[`LM32_DC_ADDR_TAG_RNG], `TRUE});
315 end
316 endgenerate
318 // Select data from way that matched the address being read
319 generate
320 if (associativity == 1)
321 begin : data_1
322 assign load_data = way_data[0];
323 end
324 else if (associativity == 2)
325 begin : data_2
326 assign load_data = way_match[0] ? way_data[0] : way_data[1];
327 end
328 endgenerate
330 generate
331 if (`LM32_DC_DMEM_ADDR_WIDTH < 11)
332 begin
333 // Select data to write to data memories
334 always @(*)
335 begin
336 if (refill == `TRUE)
337 dmem_write_data = refill_data;
338 else
339 begin
340 dmem_write_data[`LM32_BYTE_0_RNG] = store_byte_select[0] ? store_data[`LM32_BYTE_0_RNG] : load_data[`LM32_BYTE_0_RNG];
341 dmem_write_data[`LM32_BYTE_1_RNG] = store_byte_select[1] ? store_data[`LM32_BYTE_1_RNG] : load_data[`LM32_BYTE_1_RNG];
342 dmem_write_data[`LM32_BYTE_2_RNG] = store_byte_select[2] ? store_data[`LM32_BYTE_2_RNG] : load_data[`LM32_BYTE_2_RNG];
343 dmem_write_data[`LM32_BYTE_3_RNG] = store_byte_select[3] ? store_data[`LM32_BYTE_3_RNG] : load_data[`LM32_BYTE_3_RNG];
344 end
345 end
346 end
347 else
348 begin
349 // Select data to write to data memories - FIXME: Should use different write ports on dual port RAMs, but they don't work
350 always @(*)
351 begin
352 if (refill == `TRUE)
353 dmem_write_data = refill_data;
354 else
355 dmem_write_data = store_data;
356 end
357 end
358 endgenerate
360 // Compute address to use to index into the data memories
361 generate
362 if (bytes_per_line > 4)
363 assign dmem_write_address = (refill == `TRUE)
364 ? {refill_address[`LM32_DC_ADDR_SET_RNG], refill_offset}
365 : address_m[`LM32_DC_ADDR_IDX_RNG];
366 else
367 assign dmem_write_address = (refill == `TRUE)
368 ? refill_address[`LM32_DC_ADDR_SET_RNG]
369 : address_m[`LM32_DC_ADDR_IDX_RNG];
370 endgenerate
371 assign dmem_read_address = address_x[`LM32_DC_ADDR_IDX_RNG];
372 // Compute address to use to index into the tag memories
373 assign tmem_write_address = (flushing == `TRUE)
374 ? flush_set
375 : refill_address[`LM32_DC_ADDR_SET_RNG];
376 assign tmem_read_address = address_x[`LM32_DC_ADDR_SET_RNG];
378 // Compute signal to indicate when we are on the last refill accesses
379 generate
380 if (bytes_per_line > 4)
381 assign last_refill = refill_offset == {addr_offset_width{1'b1}};
382 else
383 assign last_refill = `TRUE;
384 endgenerate
386 // Compute data and tag memory access enable
387 assign read_port_enable = (stall_x == `FALSE);
388 assign write_port_enable = (refill_ready == `TRUE) || !stall_m;
390 // Determine when we have a valid store
391 assign valid_store = (store_q_m == `TRUE) && (check == `TRUE);
393 // Compute data and tag memory write enables
394 generate
395 if (associativity == 1)
396 begin : we_1
397 assign way_dmem_we[0] = (refill_ready == `TRUE) || ((valid_store == `TRUE) && (way_match[0] == `TRUE));
398 assign way_tmem_we[0] = (refill_ready == `TRUE) || (flushing == `TRUE);
399 end
400 else
401 begin : we_2
402 assign way_dmem_we[0] = ((refill_ready == `TRUE) && (refill_way_select[0] == `TRUE)) || ((valid_store == `TRUE) && (way_match[0] == `TRUE));
403 assign way_dmem_we[1] = ((refill_ready == `TRUE) && (refill_way_select[1] == `TRUE)) || ((valid_store == `TRUE) && (way_match[1] == `TRUE));
404 assign way_tmem_we[0] = ((refill_ready == `TRUE) && (refill_way_select[0] == `TRUE)) || (flushing == `TRUE);
405 assign way_tmem_we[1] = ((refill_ready == `TRUE) && (refill_way_select[1] == `TRUE)) || (flushing == `TRUE);
406 end
407 endgenerate
409 // On the last refill cycle set the valid bit, for all other writes it should be cleared
410 assign tmem_write_data[`LM32_DC_TAGS_VALID_RNG] = ((last_refill == `TRUE) || (valid_store == `TRUE)) && (flushing == `FALSE);
411 assign tmem_write_data[`LM32_DC_TAGS_TAG_RNG] = refill_address[`LM32_DC_ADDR_TAG_RNG];
413 // Signals that indicate which state we are in
414 assign flushing = state[0];
415 assign check = state[1];
416 assign refill = state[2];
418 assign miss = (~(|way_match)) && (load_q_m == `TRUE) && (stall_m == `FALSE);
419 assign stall_request = (check == `FALSE);
421 /////////////////////////////////////////////////////
422 // Sequential logic
423 /////////////////////////////////////////////////////
425 // Record way selected for replacement on a cache miss
426 generate
427 if (associativity >= 2)
428 begin : way_select
429 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
430 begin
431 if (rst_i == `TRUE)
432 refill_way_select <= {{associativity-1{1'b0}}, 1'b1};
433 else
434 begin
435 if (refill_request == `TRUE)
436 refill_way_select <= {refill_way_select[0], refill_way_select[1]};
437 end
438 end
439 end
440 endgenerate
442 // Record whether we are currently refilling
443 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
444 begin
445 if (rst_i == `TRUE)
446 refilling <= `FALSE;
447 else
448 refilling <= refill;
449 end
451 // Instruction cache control FSM
452 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
453 begin
454 if (rst_i == `TRUE)
455 begin
456 state <= `LM32_DC_STATE_FLUSH;
457 flush_set <= {`LM32_DC_TMEM_ADDR_WIDTH{1'b1}};
458 refill_request <= `FALSE;
459 refill_address <= {`LM32_WORD_WIDTH{1'bx}};
460 restart_request <= `FALSE;
461 end
462 else
463 begin
464 case (state)
466 // Flush the cache
467 `LM32_DC_STATE_FLUSH:
468 begin
469 if (flush_set == {`LM32_DC_TMEM_ADDR_WIDTH{1'b0}})
470 state <= `LM32_DC_STATE_CHECK;
471 flush_set <= flush_set - 1'b1;
472 end
474 // Check for cache misses
475 `LM32_DC_STATE_CHECK:
476 begin
477 if (stall_a == `FALSE)
478 restart_request <= `FALSE;
479 if (miss == `TRUE)
480 begin
481 refill_request <= `TRUE;
482 refill_address <= address_m;
483 state <= `LM32_DC_STATE_REFILL;
484 end
485 else if (dflush == `TRUE)
486 state <= `LM32_DC_STATE_FLUSH;
487 end
489 // Refill a cache line
490 `LM32_DC_STATE_REFILL:
491 begin
492 refill_request <= `FALSE;
493 if (refill_ready == `TRUE)
494 begin
495 if (last_refill == `TRUE)
496 begin
497 restart_request <= `TRUE;
498 state <= `LM32_DC_STATE_CHECK;
499 end
500 end
501 end
503 endcase
504 end
505 end
507 generate
508 if (bytes_per_line > 4)
509 begin
510 // Refill offset
511 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
512 begin
513 if (rst_i == `TRUE)
514 refill_offset <= {addr_offset_width{1'b0}};
515 else
516 begin
517 case (state)
519 // Check for cache misses
520 `LM32_DC_STATE_CHECK:
521 begin
522 if (miss == `TRUE)
523 refill_offset <= {addr_offset_width{1'b0}};
524 end
526 // Refill a cache line
527 `LM32_DC_STATE_REFILL:
528 begin
529 if (refill_ready == `TRUE)
530 refill_offset <= refill_offset + 1'b1;
531 end
533 endcase
534 end
535 end
536 end
537 endgenerate
539 endmodule
541 `endif