Sun, 06 Mar 2011 21:03:32 +0000
Commit GSI patches from Wesley Terpstra
- Add JTAG capture pin
==> allows removing sensitivity to reg_update which caused clocking problems making JTAG unstable
- Use register file backed by RAM blocks
==> saves quite some area and speed on altera
... be sure to enable it using `define CFG_EBR_POSEDGE_REGISTER_FILE
- Fix a minor problem where compilation fails when interrupts are not supported
- Add support to flush icache and dcache per JTAG
- Fix wrong width assignments for PC
Multiplier patch has been left out for now; don't the design synthesizers (Quartus / Xst) split the multiply automatically?
Original-Author: Wesley Terpstra <w.terpsta gsi.de>
Original-Source: Milkymist mailing list postings, 2011-02-28 (11:19 and 13:32) and 2011-03-01
Original-Message-Ids: <4D6B84B5.9040604@gsi.de> <4D6BA3E4.3020609@gsi.de> <4D6CFFF2.6030703@gsi.de>
1 // =============================================================================
2 // COPYRIGHT NOTICE
3 // Copyright 2006 (c) Lattice Semiconductor Corporation
4 // ALL RIGHTS RESERVED
5 // This confidential and proprietary software may be used only as authorised by
6 // a licensing agreement from Lattice Semiconductor Corporation.
7 // The entire notice above must be reproduced on all authorized copies and
8 // copies may only be made to the extent permitted by a licensing agreement from
9 // Lattice Semiconductor Corporation.
10 //
11 // Lattice Semiconductor Corporation TEL : 1-800-Lattice (USA and Canada)
12 // 5555 NE Moore Court 408-826-6000 (other locations)
13 // Hillsboro, OR 97124 web : http://www.latticesemi.com/
14 // U.S.A email: techsupport@latticesemi.com
15 // =============================================================================/
16 // FILE DETAILS
17 // Project : LatticeMico32
18 // File : lm32_load_store_unit.v
19 // Title : Load and store unit
20 // Dependencies : lm32_include.v
21 // Version : 6.1.17
22 // : Initial Release
23 // Version : 7.0SP2, 3.0
24 // : No Change
25 // Version : 3.1
26 // : Instead of disallowing an instruction cache miss on a data cache
27 // : miss, both can now occur at the same time. If both occur at same
28 // : time, then restart address is the address of instruction that
29 // : caused data cache miss.
30 // Version : 3.2
31 // : EBRs use SYNC resets instead of ASYNC resets.
32 // Version : 3.3
33 // : Support for new non-cacheable Data Memory that is accessible by
34 // : the data port and has a one cycle access latency.
35 // Version : 3.4
36 // : No change
37 // Version : 3.5
38 // : Bug fix: Inline memory is correctly generated if it is not a
39 // : power-of-two
40 // =============================================================================
42 `include "lm32_include.v"
44 /////////////////////////////////////////////////////
45 // Module interface
46 /////////////////////////////////////////////////////
48 module lm32_load_store_unit (
49 // ----- Inputs -------
50 clk_i,
51 rst_i,
52 // From pipeline
53 stall_a,
54 stall_x,
55 stall_m,
56 kill_m,
57 exception_m,
58 store_operand_x,
59 load_store_address_x,
60 load_store_address_m,
61 load_store_address_w,
62 load_x,
63 store_x,
64 load_q_x,
65 store_q_x,
66 load_q_m,
67 store_q_m,
68 sign_extend_x,
69 size_x,
70 `ifdef CFG_DCACHE_ENABLED
71 dflush,
72 `endif
73 `ifdef CFG_IROM_ENABLED
74 irom_data_m,
75 `endif
76 // From Wishbone
77 d_dat_i,
78 d_ack_i,
79 d_err_i,
80 d_rty_i,
81 // ----- Outputs -------
82 // To pipeline
83 `ifdef CFG_DCACHE_ENABLED
84 dcache_refill_request,
85 dcache_restart_request,
86 dcache_stall_request,
87 dcache_refilling,
88 `endif
89 `ifdef CFG_IROM_ENABLED
90 irom_store_data_m,
91 irom_address_xm,
92 irom_we_xm,
93 irom_stall_request_x,
94 `endif
95 load_data_w,
96 stall_wb_load,
97 // To Wishbone
98 d_dat_o,
99 d_adr_o,
100 d_cyc_o,
101 d_sel_o,
102 d_stb_o,
103 d_we_o,
104 d_cti_o,
105 d_lock_o,
106 d_bte_o
107 );
109 /////////////////////////////////////////////////////
110 // Parameters
111 /////////////////////////////////////////////////////
113 parameter associativity = 1; // Associativity of the cache (Number of ways)
114 parameter sets = 512; // Number of sets
115 parameter bytes_per_line = 16; // Number of bytes per cache line
116 parameter base_address = 0; // Base address of cachable memory
117 parameter limit = 0; // Limit (highest address) of cachable memory
119 // For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used
120 localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2;
121 localparam addr_offset_lsb = 2;
122 localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
124 /////////////////////////////////////////////////////
125 // Inputs
126 /////////////////////////////////////////////////////
128 input clk_i; // Clock
129 input rst_i; // Reset
131 input stall_a; // A stage stall
132 input stall_x; // X stage stall
133 input stall_m; // M stage stall
134 input kill_m; // Kill instruction in M stage
135 input exception_m; // An exception occured in the M stage
137 input [`LM32_WORD_RNG] store_operand_x; // Data read from register to store
138 input [`LM32_WORD_RNG] load_store_address_x; // X stage load/store address
139 input [`LM32_WORD_RNG] load_store_address_m; // M stage load/store address
140 input [1:0] load_store_address_w; // W stage load/store address (only least two significant bits are needed)
141 input load_x; // Load instruction in X stage
142 input store_x; // Store instruction in X stage
143 input load_q_x; // Load instruction in X stage
144 input store_q_x; // Store instruction in X stage
145 input load_q_m; // Load instruction in M stage
146 input store_q_m; // Store instruction in M stage
147 input sign_extend_x; // Whether load instruction in X stage should sign extend or zero extend
148 input [`LM32_SIZE_RNG] size_x; // Size of load or store (byte, hword, word)
150 `ifdef CFG_DCACHE_ENABLED
151 input dflush; // Flush the data cache
152 `endif
154 `ifdef CFG_IROM_ENABLED
155 input [`LM32_WORD_RNG] irom_data_m; // Data from Instruction-ROM
156 `endif
158 input [`LM32_WORD_RNG] d_dat_i; // Data Wishbone interface read data
159 input d_ack_i; // Data Wishbone interface acknowledgement
160 input d_err_i; // Data Wishbone interface error
161 input d_rty_i; // Data Wishbone interface retry
163 /////////////////////////////////////////////////////
164 // Outputs
165 /////////////////////////////////////////////////////
167 `ifdef CFG_DCACHE_ENABLED
168 output dcache_refill_request; // Request to refill data cache
169 wire dcache_refill_request;
170 output dcache_restart_request; // Request to restart the instruction that caused a data cache miss
171 wire dcache_restart_request;
172 output dcache_stall_request; // Data cache stall request
173 wire dcache_stall_request;
174 output dcache_refilling;
175 wire dcache_refilling;
176 `endif
178 `ifdef CFG_IROM_ENABLED
179 output irom_store_data_m; // Store data to Instruction ROM
180 wire [`LM32_WORD_RNG] irom_store_data_m;
181 output [`LM32_WORD_RNG] irom_address_xm; // Load/store address to Instruction ROM
182 wire [`LM32_WORD_RNG] irom_address_xm;
183 output irom_we_xm; // Write-enable of 2nd port of Instruction ROM
184 wire irom_we_xm;
185 output irom_stall_request_x; // Stall instruction in D stage
186 wire irom_stall_request_x;
187 `endif
189 output [`LM32_WORD_RNG] load_data_w; // Result of a load instruction
190 reg [`LM32_WORD_RNG] load_data_w;
191 output stall_wb_load; // Request to stall pipeline due to a load from the Wishbone interface
192 reg stall_wb_load;
194 output [`LM32_WORD_RNG] d_dat_o; // Data Wishbone interface write data
195 reg [`LM32_WORD_RNG] d_dat_o;
196 output [`LM32_WORD_RNG] d_adr_o; // Data Wishbone interface address
197 reg [`LM32_WORD_RNG] d_adr_o;
198 output d_cyc_o; // Data Wishbone interface cycle
199 reg d_cyc_o;
200 output [`LM32_BYTE_SELECT_RNG] d_sel_o; // Data Wishbone interface byte select
201 reg [`LM32_BYTE_SELECT_RNG] d_sel_o;
202 output d_stb_o; // Data Wishbone interface strobe
203 reg d_stb_o;
204 output d_we_o; // Data Wishbone interface write enable
205 reg d_we_o;
206 output [`LM32_CTYPE_RNG] d_cti_o; // Data Wishbone interface cycle type
207 reg [`LM32_CTYPE_RNG] d_cti_o;
208 output d_lock_o; // Date Wishbone interface lock bus
209 reg d_lock_o;
210 output [`LM32_BTYPE_RNG] d_bte_o; // Data Wishbone interface burst type
211 wire [`LM32_BTYPE_RNG] d_bte_o;
213 /////////////////////////////////////////////////////
214 // Internal nets and registers
215 /////////////////////////////////////////////////////
217 // Microcode pipeline registers - See inputs for description
218 reg [`LM32_SIZE_RNG] size_m;
219 reg [`LM32_SIZE_RNG] size_w;
220 reg sign_extend_m;
221 reg sign_extend_w;
222 reg [`LM32_WORD_RNG] store_data_x;
223 reg [`LM32_WORD_RNG] store_data_m;
224 reg [`LM32_BYTE_SELECT_RNG] byte_enable_x;
225 reg [`LM32_BYTE_SELECT_RNG] byte_enable_m;
226 wire [`LM32_WORD_RNG] data_m;
227 reg [`LM32_WORD_RNG] data_w;
229 `ifdef CFG_DCACHE_ENABLED
230 wire dcache_select_x; // Select data cache to load from / store to
231 reg dcache_select_m;
232 wire [`LM32_WORD_RNG] dcache_data_m; // Data read from cache
233 wire [`LM32_WORD_RNG] dcache_refill_address; // Address to refill data cache from
234 reg dcache_refill_ready; // Indicates the next word of refill data is ready
235 wire [`LM32_CTYPE_RNG] first_cycle_type; // First Wishbone cycle type
236 wire [`LM32_CTYPE_RNG] next_cycle_type; // Next Wishbone cycle type
237 wire last_word; // Indicates if this is the last word in the cache line
238 wire [`LM32_WORD_RNG] first_address; // First cache refill address
239 `endif
240 `ifdef CFG_DRAM_ENABLED
241 wire dram_select_x; // Select data RAM to load from / store to
242 reg dram_select_m;
243 reg dram_bypass_en; // RAW in data RAM; read latched (bypass) value rather than value from memory
244 reg [`LM32_WORD_RNG] dram_bypass_data; // Latched value of store'd data to data RAM
245 wire [`LM32_WORD_RNG] dram_data_out; // Data read from data RAM
246 wire [`LM32_WORD_RNG] dram_data_m; // Data read from data RAM: bypass value or value from memory
247 wire [`LM32_WORD_RNG] dram_store_data_m; // Data to write to RAM
248 `endif
249 wire wb_select_x; // Select Wishbone to load from / store to
250 `ifdef CFG_IROM_ENABLED
251 wire irom_select_x; // Select instruction ROM to load from / store to
252 reg irom_select_m;
253 `endif
254 reg wb_select_m;
255 reg [`LM32_WORD_RNG] wb_data_m; // Data read from Wishbone
256 reg wb_load_complete; // Indicates when a Wishbone load is complete
258 /////////////////////////////////////////////////////
259 // Functions
260 /////////////////////////////////////////////////////
262 `include "lm32_functions.v"
264 /////////////////////////////////////////////////////
265 // Instantiations
266 /////////////////////////////////////////////////////
268 `ifdef CFG_DRAM_ENABLED
269 // Data RAM
270 pmi_ram_dp_true
271 #(
272 // ----- Parameters -------
273 .pmi_family (`LATTICE_FAMILY),
275 //.pmi_addr_depth_a (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
276 //.pmi_addr_width_a ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
277 //.pmi_data_width_a (`LM32_WORD_WIDTH),
278 //.pmi_addr_depth_b (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
279 //.pmi_addr_width_b ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
280 //.pmi_data_width_b (`LM32_WORD_WIDTH),
282 .pmi_addr_depth_a (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
283 .pmi_addr_width_a (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
284 .pmi_data_width_a (`LM32_WORD_WIDTH),
285 .pmi_addr_depth_b (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
286 .pmi_addr_width_b (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
287 .pmi_data_width_b (`LM32_WORD_WIDTH),
289 .pmi_regmode_a ("noreg"),
290 .pmi_regmode_b ("noreg"),
291 .pmi_gsr ("enable"),
292 .pmi_resetmode ("sync"),
293 .pmi_init_file (`CFG_DRAM_INIT_FILE),
294 .pmi_init_file_format (`CFG_DRAM_INIT_FILE_FORMAT),
295 .module_type ("pmi_ram_dp_true")
296 )
297 ram (
298 // ----- Inputs -------
299 .ClockA (clk_i),
300 .ClockB (clk_i),
301 .ResetA (rst_i),
302 .ResetB (rst_i),
303 .DataInA ({32{1'b0}}),
304 .DataInB (dram_store_data_m),
305 .AddressA (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
306 .AddressB (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
307 // .ClockEnA (!stall_x & (load_x | store_x)),
308 .ClockEnA (!stall_x),
309 .ClockEnB (!stall_m),
310 .WrA (`FALSE),
311 .WrB (store_q_m & dram_select_m),
312 // ----- Outputs -------
313 .QA (dram_data_out),
314 .QB ()
315 );
317 /*----------------------------------------------------------------------
318 EBRs cannot perform reads from location 'written to' on the same clock
319 edge. Therefore bypass logic is required to latch the store'd value
320 and use it for the load (instead of value from memory).
321 ----------------------------------------------------------------------*/
322 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
323 if (rst_i == `TRUE)
324 begin
325 dram_bypass_en <= `FALSE;
326 dram_bypass_data <= 0;
327 end
328 else
329 begin
330 if (stall_x == `FALSE)
331 dram_bypass_data <= dram_store_data_m;
333 if ( (stall_m == `FALSE)
334 && (stall_x == `FALSE)
335 && (store_q_m == `TRUE)
336 && ( (load_x == `TRUE)
337 || (store_x == `TRUE)
338 )
339 && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2])
340 )
341 dram_bypass_en <= `TRUE;
342 else
343 if ( (dram_bypass_en == `TRUE)
344 && (stall_x == `FALSE)
345 )
346 dram_bypass_en <= `FALSE;
347 end
349 assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out;
350 `endif
352 `ifdef CFG_DCACHE_ENABLED
353 // Data cache
354 lm32_dcache #(
355 .associativity (associativity),
356 .sets (sets),
357 .bytes_per_line (bytes_per_line),
358 .base_address (base_address),
359 .limit (limit)
360 ) dcache (
361 // ----- Inputs -----
362 .clk_i (clk_i),
363 .rst_i (rst_i),
364 .stall_a (stall_a),
365 .stall_x (stall_x),
366 .stall_m (stall_m),
367 .address_x (load_store_address_x),
368 .address_m (load_store_address_m),
369 .load_q_m (load_q_m & dcache_select_m),
370 .store_q_m (store_q_m & dcache_select_m),
371 .store_data (store_data_m),
372 .store_byte_select (byte_enable_m & {4{dcache_select_m}}),
373 .refill_ready (dcache_refill_ready),
374 .refill_data (wb_data_m),
375 .dflush (dflush),
376 // ----- Outputs -----
377 .stall_request (dcache_stall_request),
378 .restart_request (dcache_restart_request),
379 .refill_request (dcache_refill_request),
380 .refill_address (dcache_refill_address),
381 .refilling (dcache_refilling),
382 .load_data (dcache_data_m)
383 );
384 `endif
386 /////////////////////////////////////////////////////
387 // Combinational Logic
388 /////////////////////////////////////////////////////
390 // Select where data should be loaded from / stored to
391 `ifdef CFG_DRAM_ENABLED
392 assign dram_select_x = (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS)
393 && (load_store_address_x <= `CFG_DRAM_LIMIT);
394 `endif
396 `ifdef CFG_IROM_ENABLED
397 assign irom_select_x = (load_store_address_x >= `CFG_IROM_BASE_ADDRESS)
398 && (load_store_address_x <= `CFG_IROM_LIMIT);
399 `endif
401 `ifdef CFG_DCACHE_ENABLED
402 assign dcache_select_x = (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS)
403 && (load_store_address_x <= `CFG_DCACHE_LIMIT)
404 `ifdef CFG_DRAM_ENABLED
405 && (dram_select_x == `FALSE)
406 `endif
407 `ifdef CFG_IROM_ENABLED
408 && (irom_select_x == `FALSE)
409 `endif
410 ;
411 `endif
413 assign wb_select_x = `TRUE
414 `ifdef CFG_DCACHE_ENABLED
415 && !dcache_select_x
416 `endif
417 `ifdef CFG_DRAM_ENABLED
418 && !dram_select_x
419 `endif
420 `ifdef CFG_IROM_ENABLED
421 && !irom_select_x
422 `endif
423 ;
425 // Make sure data to store is in correct byte lane
426 always @(*)
427 begin
428 case (size_x)
429 `LM32_SIZE_BYTE: store_data_x = {4{store_operand_x[7:0]}};
430 `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}};
431 `LM32_SIZE_WORD: store_data_x = store_operand_x;
432 default: store_data_x = {`LM32_WORD_WIDTH{1'bx}};
433 endcase
434 end
436 // Generate byte enable accoring to size of load or store and address being accessed
437 always @(*)
438 begin
439 casez ({size_x, load_store_address_x[1:0]})
440 {`LM32_SIZE_BYTE, 2'b11}: byte_enable_x = 4'b0001;
441 {`LM32_SIZE_BYTE, 2'b10}: byte_enable_x = 4'b0010;
442 {`LM32_SIZE_BYTE, 2'b01}: byte_enable_x = 4'b0100;
443 {`LM32_SIZE_BYTE, 2'b00}: byte_enable_x = 4'b1000;
444 {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011;
445 {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100;
446 {`LM32_SIZE_WORD, 2'b??}: byte_enable_x = 4'b1111;
447 default: byte_enable_x = 4'bxxxx;
448 endcase
449 end
451 `ifdef CFG_DRAM_ENABLED
452 // Only replace selected bytes
453 assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG];
454 assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG];
455 assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG];
456 assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG];
457 `endif
459 `ifdef CFG_IROM_ENABLED
460 // Only replace selected bytes
461 assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG];
462 assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG];
463 assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG];
464 assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG];
465 `endif
467 `ifdef CFG_IROM_ENABLED
468 // Instead of implementing a byte-addressable instruction ROM (for store byte instruction),
469 // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite
470 // byte is replaced, and the whole 32-bit value is written back
472 assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE))
473 ? load_store_address_m
474 : load_store_address_x;
476 // All store instructions perform a write operation in the M stage
477 assign irom_we_xm = (irom_select_m == `TRUE)
478 && (store_q_m == `TRUE);
480 // A single port in instruction ROM is available to load-store unit for doing loads/stores.
481 // Since every store requires a load (in X stage) and then a store (in M stage), we cannot
482 // allow load (or store) instructions sequentially after the store instructions to proceed
483 // until the store instruction has vacated M stage (i.e., completed the store operation)
484 assign irom_stall_request_x = (irom_select_x == `TRUE)
485 && (store_q_x == `TRUE);
486 `endif
488 `ifdef CFG_DCACHE_ENABLED
489 `ifdef CFG_DRAM_ENABLED
490 `ifdef CFG_IROM_ENABLED
491 // WB + DC + DRAM + IROM
492 assign data_m = wb_select_m == `TRUE
493 ? wb_data_m
494 : dram_select_m == `TRUE
495 ? dram_data_m
496 : irom_select_m == `TRUE
497 ? irom_data_m
498 : dcache_data_m;
499 `else
500 // WB + DC + DRAM
501 assign data_m = wb_select_m == `TRUE
502 ? wb_data_m
503 : dram_select_m == `TRUE
504 ? dram_data_m
505 : dcache_data_m;
506 `endif
507 `else
508 `ifdef CFG_IROM_ENABLED
509 // WB + DC + IROM
510 assign data_m = wb_select_m == `TRUE
511 ? wb_data_m
512 : irom_select_m == `TRUE
513 ? irom_data_m
514 : dcache_data_m;
515 `else
516 // WB + DC
517 assign data_m = wb_select_m == `TRUE
518 ? wb_data_m
519 : dcache_data_m;
520 `endif
521 `endif
522 `else
523 `ifdef CFG_DRAM_ENABLED
524 `ifdef CFG_IROM_ENABLED
525 // WB + DRAM + IROM
526 assign data_m = wb_select_m == `TRUE
527 ? wb_data_m
528 : dram_select_m == `TRUE
529 ? dram_data_m
530 : irom_data_m;
531 `else
532 // WB + DRAM
533 assign data_m = wb_select_m == `TRUE
534 ? wb_data_m
535 : dram_data_m;
536 `endif
537 `else
538 `ifdef CFG_IROM_ENABLED
539 // WB + IROM
540 assign data_m = wb_select_m == `TRUE
541 ? wb_data_m
542 : irom_data_m;
543 `else
544 // WB
545 assign data_m = wb_data_m;
546 `endif
547 `endif
548 `endif
550 // Sub-word selection and sign/zero-extension for loads
551 always @(*)
552 begin
553 casez ({size_w, load_store_address_w[1:0]})
554 {`LM32_SIZE_BYTE, 2'b11}: load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]};
555 {`LM32_SIZE_BYTE, 2'b10}: load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]};
556 {`LM32_SIZE_BYTE, 2'b01}: load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]};
557 {`LM32_SIZE_BYTE, 2'b00}: load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]};
558 {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]};
559 {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]};
560 {`LM32_SIZE_WORD, 2'b??}: load_data_w = data_w;
561 default: load_data_w = {`LM32_WORD_WIDTH{1'bx}};
562 endcase
563 end
565 // Unused/constant Wishbone signals
566 assign d_bte_o = `LM32_BTYPE_LINEAR;
568 `ifdef CFG_DCACHE_ENABLED
569 // Generate signal to indicate last word in cache line
570 generate
571 case (bytes_per_line)
572 4:
573 begin
574 assign first_cycle_type = `LM32_CTYPE_END;
575 assign next_cycle_type = `LM32_CTYPE_END;
576 assign last_word = `TRUE;
577 assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00};
578 end
579 8:
580 begin
581 assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
582 assign next_cycle_type = `LM32_CTYPE_END;
583 assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
584 assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
585 end
586 16:
587 begin
588 assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
589 assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING;
590 assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
591 assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
592 end
593 endcase
594 endgenerate
595 `endif
597 /////////////////////////////////////////////////////
598 // Sequential Logic
599 /////////////////////////////////////////////////////
601 // Data Wishbone interface
602 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
603 begin
604 if (rst_i == `TRUE)
605 begin
606 d_cyc_o <= `FALSE;
607 d_stb_o <= `FALSE;
608 d_dat_o <= {`LM32_WORD_WIDTH{1'b0}};
609 d_adr_o <= {`LM32_WORD_WIDTH{1'b0}};
610 d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}};
611 d_we_o <= `FALSE;
612 d_cti_o <= `LM32_CTYPE_END;
613 d_lock_o <= `FALSE;
614 wb_data_m <= {`LM32_WORD_WIDTH{1'b0}};
615 wb_load_complete <= `FALSE;
616 stall_wb_load <= `FALSE;
617 `ifdef CFG_DCACHE_ENABLED
618 dcache_refill_ready <= `FALSE;
619 `endif
620 end
621 else
622 begin
623 `ifdef CFG_DCACHE_ENABLED
624 // Refill ready should only be asserted for a single cycle
625 dcache_refill_ready <= `FALSE;
626 `endif
627 // Is a Wishbone cycle already in progress?
628 if (d_cyc_o == `TRUE)
629 begin
630 // Is the cycle complete?
631 if ((d_ack_i == `TRUE) || (d_err_i == `TRUE))
632 begin
633 `ifdef CFG_DCACHE_ENABLED
634 if ((dcache_refilling == `TRUE) && (!last_word))
635 begin
636 // Fetch next word of cache line
637 d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1;
638 end
639 else
640 `endif
641 begin
642 // Refill/access complete
643 d_cyc_o <= `FALSE;
644 d_stb_o <= `FALSE;
645 d_lock_o <= `FALSE;
646 end
647 `ifdef CFG_DCACHE_ENABLED
648 d_cti_o <= next_cycle_type;
649 // If we are performing a refill, indicate to cache next word of data is ready
650 dcache_refill_ready <= dcache_refilling;
651 `endif
652 // Register data read from Wishbone interface
653 wb_data_m <= d_dat_i;
654 // Don't set when stores complete - otherwise we'll deadlock if load in m stage
655 wb_load_complete <= !d_we_o;
656 end
657 // synthesis translate_off
658 if (d_err_i == `TRUE)
659 $display ("Data bus error. Address: %x", d_adr_o);
660 // synthesis translate_on
661 end
662 else
663 begin
664 `ifdef CFG_DCACHE_ENABLED
665 if (dcache_refill_request == `TRUE)
666 begin
667 // Start cache refill
668 d_adr_o <= first_address;
669 d_cyc_o <= `TRUE;
670 d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}};
671 d_stb_o <= `TRUE;
672 d_we_o <= `FALSE;
673 d_cti_o <= first_cycle_type;
674 //d_lock_o <= `TRUE;
675 end
676 else
677 `endif
678 if ( (store_q_m == `TRUE)
679 && (stall_m == `FALSE)
680 `ifdef CFG_DRAM_ENABLED
681 && (dram_select_m == `FALSE)
682 `endif
683 `ifdef CFG_IROM_ENABLED
684 && (irom_select_m == `FALSE)
685 `endif
686 )
687 begin
688 // Data cache is write through, so all stores go to memory
689 d_dat_o <= store_data_m;
690 d_adr_o <= load_store_address_m;
691 d_cyc_o <= `TRUE;
692 d_sel_o <= byte_enable_m;
693 d_stb_o <= `TRUE;
694 d_we_o <= `TRUE;
695 d_cti_o <= `LM32_CTYPE_END;
696 end
697 else if ( (load_q_m == `TRUE)
698 && (wb_select_m == `TRUE)
699 && (wb_load_complete == `FALSE)
700 // stall_m will be TRUE, because stall_wb_load will be TRUE
701 )
702 begin
703 // Read requested address
704 stall_wb_load <= `FALSE;
705 d_adr_o <= load_store_address_m;
706 d_cyc_o <= `TRUE;
707 d_sel_o <= byte_enable_m;
708 d_stb_o <= `TRUE;
709 d_we_o <= `FALSE;
710 d_cti_o <= `LM32_CTYPE_END;
711 end
712 end
713 // Clear load/store complete flag when instruction leaves M stage
714 if (stall_m == `FALSE)
715 wb_load_complete <= `FALSE;
716 // When a Wishbone load first enters the M stage, we need to stall it
717 if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE))
718 stall_wb_load <= `TRUE;
719 // Clear stall request if load instruction is killed
720 if ((kill_m == `TRUE) || (exception_m == `TRUE))
721 stall_wb_load <= `FALSE;
722 end
723 end
725 // Pipeline registers
727 // X/M stage pipeline registers
728 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
729 begin
730 if (rst_i == `TRUE)
731 begin
732 sign_extend_m <= `FALSE;
733 size_m <= 2'b00;
734 byte_enable_m <= `FALSE;
735 store_data_m <= {`LM32_WORD_WIDTH{1'b0}};
736 `ifdef CFG_DCACHE_ENABLED
737 dcache_select_m <= `FALSE;
738 `endif
739 `ifdef CFG_DRAM_ENABLED
740 dram_select_m <= `FALSE;
741 `endif
742 `ifdef CFG_IROM_ENABLED
743 irom_select_m <= `FALSE;
744 `endif
745 wb_select_m <= `FALSE;
746 end
747 else
748 begin
749 if (stall_m == `FALSE)
750 begin
751 sign_extend_m <= sign_extend_x;
752 size_m <= size_x;
753 byte_enable_m <= byte_enable_x;
754 store_data_m <= store_data_x;
755 `ifdef CFG_DCACHE_ENABLED
756 dcache_select_m <= dcache_select_x;
757 `endif
758 `ifdef CFG_DRAM_ENABLED
759 dram_select_m <= dram_select_x;
760 `endif
761 `ifdef CFG_IROM_ENABLED
762 irom_select_m <= irom_select_x;
763 `endif
764 wb_select_m <= wb_select_x;
765 end
766 end
767 end
769 // M/W stage pipeline registers
770 always @(posedge clk_i `CFG_RESET_SENSITIVITY)
771 begin
772 if (rst_i == `TRUE)
773 begin
774 size_w <= 2'b00;
775 data_w <= {`LM32_WORD_WIDTH{1'b0}};
776 sign_extend_w <= `FALSE;
777 end
778 else
779 begin
780 size_w <= size_m;
781 data_w <= data_m;
782 sign_extend_w <= sign_extend_m;
783 end
784 end
786 /////////////////////////////////////////////////////
787 // Behavioural Logic
788 /////////////////////////////////////////////////////
790 // synthesis translate_off
792 // Check for non-aligned loads or stores
793 always @(posedge clk_i)
794 begin
795 if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE))
796 begin
797 if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0))
798 $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
799 if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00))
800 $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
801 end
802 end
804 // synthesis translate_on
806 endmodule