1.1 diff -r 000000000000 -r cd0b58aa6f83 lm32_load_store_unit.v 1.2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.3 +++ b/lm32_load_store_unit.v Sun Apr 04 20:40:03 2010 +0100 1.4 @@ -0,0 +1,808 @@ 1.5 +// ============================================================================= 1.6 +// COPYRIGHT NOTICE 1.7 +// Copyright 2006 (c) Lattice Semiconductor Corporation 1.8 +// ALL RIGHTS RESERVED 1.9 +// This confidential and proprietary software may be used only as authorised by 1.10 +// a licensing agreement from Lattice Semiconductor Corporation. 1.11 +// The entire notice above must be reproduced on all authorized copies and 1.12 +// copies may only be made to the extent permitted by a licensing agreement from 1.13 +// Lattice Semiconductor Corporation. 1.14 +// 1.15 +// Lattice Semiconductor Corporation TEL : 1-800-Lattice (USA and Canada) 1.16 +// 5555 NE Moore Court 408-826-6000 (other locations) 1.17 +// Hillsboro, OR 97124 web : http://www.latticesemi.com/ 1.18 +// U.S.A email: techsupport@latticesemi.com 1.19 +// =============================================================================/ 1.20 +// FILE DETAILS 1.21 +// Project : LatticeMico32 1.22 +// File : lm32_load_store_unit.v 1.23 +// Title : Load and store unit 1.24 +// Dependencies : lm32_include.v 1.25 +// Version : 6.1.17 1.26 +// : Initial Release 1.27 +// Version : 7.0SP2, 3.0 1.28 +// : No Change 1.29 +// Version : 3.1 1.30 +// : Instead of disallowing an instruction cache miss on a data cache 1.31 +// : miss, both can now occur at the same time. If both occur at same 1.32 +// : time, then restart address is the address of instruction that 1.33 +// : caused data cache miss. 1.34 +// Version : 3.2 1.35 +// : EBRs use SYNC resets instead of ASYNC resets. 1.36 +// Version : 3.3 1.37 +// : Support for new non-cacheable Data Memory that is accessible by 1.38 +// : the data port and has a one cycle access latency. 1.39 +// Version : 3.4 1.40 +// : No change 1.41 +// Version : 3.5 1.42 +// : Bug fix: Inline memory is correctly generated if it is not a 1.43 +// : power-of-two 1.44 +// ============================================================================= 1.45 + 1.46 +`include "lm32_include.v" 1.47 + 1.48 +///////////////////////////////////////////////////// 1.49 +// Module interface 1.50 +///////////////////////////////////////////////////// 1.51 + 1.52 +module lm32_load_store_unit ( 1.53 + // ----- Inputs ------- 1.54 + clk_i, 1.55 + rst_i, 1.56 + // From pipeline 1.57 + stall_a, 1.58 + stall_x, 1.59 + stall_m, 1.60 + kill_x, 1.61 + kill_m, 1.62 + exception_m, 1.63 + store_operand_x, 1.64 + load_store_address_x, 1.65 + load_store_address_m, 1.66 + load_store_address_w, 1.67 + load_x, 1.68 + store_x, 1.69 + load_q_x, 1.70 + store_q_x, 1.71 + load_q_m, 1.72 + store_q_m, 1.73 + sign_extend_x, 1.74 + size_x, 1.75 +`ifdef CFG_DCACHE_ENABLED 1.76 + dflush, 1.77 +`endif 1.78 +`ifdef CFG_IROM_ENABLED 1.79 + irom_data_m, 1.80 +`endif 1.81 + // From Wishbone 1.82 + d_dat_i, 1.83 + d_ack_i, 1.84 + d_err_i, 1.85 + d_rty_i, 1.86 + // ----- Outputs ------- 1.87 + // To pipeline 1.88 +`ifdef CFG_DCACHE_ENABLED 1.89 + dcache_refill_request, 1.90 + dcache_restart_request, 1.91 + dcache_stall_request, 1.92 + dcache_refilling, 1.93 +`endif 1.94 +`ifdef CFG_IROM_ENABLED 1.95 + irom_store_data_m, 1.96 + irom_address_xm, 1.97 + irom_we_xm, 1.98 + irom_stall_request_x, 1.99 +`endif 1.100 + load_data_w, 1.101 + stall_wb_load, 1.102 + // To Wishbone 1.103 + d_dat_o, 1.104 + d_adr_o, 1.105 + d_cyc_o, 1.106 + d_sel_o, 1.107 + d_stb_o, 1.108 + d_we_o, 1.109 + d_cti_o, 1.110 + d_lock_o, 1.111 + d_bte_o 1.112 + ); 1.113 + 1.114 +///////////////////////////////////////////////////// 1.115 +// Parameters 1.116 +///////////////////////////////////////////////////// 1.117 + 1.118 +parameter associativity = 1; // Associativity of the cache (Number of ways) 1.119 +parameter sets = 512; // Number of sets 1.120 +parameter bytes_per_line = 16; // Number of bytes per cache line 1.121 +parameter base_address = 0; // Base address of cachable memory 1.122 +parameter limit = 0; // Limit (highest address) of cachable memory 1.123 + 1.124 +// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used 1.125 +localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2; 1.126 +localparam addr_offset_lsb = 2; 1.127 +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1); 1.128 + 1.129 +///////////////////////////////////////////////////// 1.130 +// Inputs 1.131 +///////////////////////////////////////////////////// 1.132 + 1.133 +input clk_i; // Clock 1.134 +input rst_i; // Reset 1.135 + 1.136 +input stall_a; // A stage stall 1.137 +input stall_x; // X stage stall 1.138 +input stall_m; // M stage stall 1.139 +input kill_x; // Kill instruction in X stage 1.140 +input kill_m; // Kill instruction in M stage 1.141 +input exception_m; // An exception occured in the M stage 1.142 + 1.143 +input [`LM32_WORD_RNG] store_operand_x; // Data read from register to store 1.144 +input [`LM32_WORD_RNG] load_store_address_x; // X stage load/store address 1.145 +input [`LM32_WORD_RNG] load_store_address_m; // M stage load/store address 1.146 +input [1:0] load_store_address_w; // W stage load/store address (only least two significant bits are needed) 1.147 +input load_x; // Load instruction in X stage 1.148 +input store_x; // Store instruction in X stage 1.149 +input load_q_x; // Load instruction in X stage 1.150 +input store_q_x; // Store instruction in X stage 1.151 +input load_q_m; // Load instruction in M stage 1.152 +input store_q_m; // Store instruction in M stage 1.153 +input sign_extend_x; // Whether load instruction in X stage should sign extend or zero extend 1.154 +input [`LM32_SIZE_RNG] size_x; // Size of load or store (byte, hword, word) 1.155 + 1.156 +`ifdef CFG_DCACHE_ENABLED 1.157 +input dflush; // Flush the data cache 1.158 +`endif 1.159 + 1.160 +`ifdef CFG_IROM_ENABLED 1.161 +input [`LM32_WORD_RNG] irom_data_m; // Data from Instruction-ROM 1.162 +`endif 1.163 + 1.164 +input [`LM32_WORD_RNG] d_dat_i; // Data Wishbone interface read data 1.165 +input d_ack_i; // Data Wishbone interface acknowledgement 1.166 +input d_err_i; // Data Wishbone interface error 1.167 +input d_rty_i; // Data Wishbone interface retry 1.168 + 1.169 +///////////////////////////////////////////////////// 1.170 +// Outputs 1.171 +///////////////////////////////////////////////////// 1.172 + 1.173 +`ifdef CFG_DCACHE_ENABLED 1.174 +output dcache_refill_request; // Request to refill data cache 1.175 +wire dcache_refill_request; 1.176 +output dcache_restart_request; // Request to restart the instruction that caused a data cache miss 1.177 +wire dcache_restart_request; 1.178 +output dcache_stall_request; // Data cache stall request 1.179 +wire dcache_stall_request; 1.180 +output dcache_refilling; 1.181 +wire dcache_refilling; 1.182 +`endif 1.183 + 1.184 +`ifdef CFG_IROM_ENABLED 1.185 +output irom_store_data_m; // Store data to Instruction ROM 1.186 +wire [`LM32_WORD_RNG] irom_store_data_m; 1.187 +output [`LM32_WORD_RNG] irom_address_xm; // Load/store address to Instruction ROM 1.188 +wire [`LM32_WORD_RNG] irom_address_xm; 1.189 +output irom_we_xm; // Write-enable of 2nd port of Instruction ROM 1.190 +wire irom_we_xm; 1.191 +output irom_stall_request_x; // Stall instruction in D stage 1.192 +wire irom_stall_request_x; 1.193 +`endif 1.194 + 1.195 +output [`LM32_WORD_RNG] load_data_w; // Result of a load instruction 1.196 +reg [`LM32_WORD_RNG] load_data_w; 1.197 +output stall_wb_load; // Request to stall pipeline due to a load from the Wishbone interface 1.198 +reg stall_wb_load; 1.199 + 1.200 +output [`LM32_WORD_RNG] d_dat_o; // Data Wishbone interface write data 1.201 +reg [`LM32_WORD_RNG] d_dat_o; 1.202 +output [`LM32_WORD_RNG] d_adr_o; // Data Wishbone interface address 1.203 +reg [`LM32_WORD_RNG] d_adr_o; 1.204 +output d_cyc_o; // Data Wishbone interface cycle 1.205 +reg d_cyc_o; 1.206 +output [`LM32_BYTE_SELECT_RNG] d_sel_o; // Data Wishbone interface byte select 1.207 +reg [`LM32_BYTE_SELECT_RNG] d_sel_o; 1.208 +output d_stb_o; // Data Wishbone interface strobe 1.209 +reg d_stb_o; 1.210 +output d_we_o; // Data Wishbone interface write enable 1.211 +reg d_we_o; 1.212 +output [`LM32_CTYPE_RNG] d_cti_o; // Data Wishbone interface cycle type 1.213 +reg [`LM32_CTYPE_RNG] d_cti_o; 1.214 +output d_lock_o; // Date Wishbone interface lock bus 1.215 +reg d_lock_o; 1.216 +output [`LM32_BTYPE_RNG] d_bte_o; // Data Wishbone interface burst type 1.217 +wire [`LM32_BTYPE_RNG] d_bte_o; 1.218 + 1.219 +///////////////////////////////////////////////////// 1.220 +// Internal nets and registers 1.221 +///////////////////////////////////////////////////// 1.222 + 1.223 +// Microcode pipeline registers - See inputs for description 1.224 +reg [`LM32_SIZE_RNG] size_m; 1.225 +reg [`LM32_SIZE_RNG] size_w; 1.226 +reg sign_extend_m; 1.227 +reg sign_extend_w; 1.228 +reg [`LM32_WORD_RNG] store_data_x; 1.229 +reg [`LM32_WORD_RNG] store_data_m; 1.230 +reg [`LM32_BYTE_SELECT_RNG] byte_enable_x; 1.231 +reg [`LM32_BYTE_SELECT_RNG] byte_enable_m; 1.232 +wire [`LM32_WORD_RNG] data_m; 1.233 +reg [`LM32_WORD_RNG] data_w; 1.234 + 1.235 +`ifdef CFG_DCACHE_ENABLED 1.236 +wire dcache_select_x; // Select data cache to load from / store to 1.237 +reg dcache_select_m; 1.238 +wire [`LM32_WORD_RNG] dcache_data_m; // Data read from cache 1.239 +wire [`LM32_WORD_RNG] dcache_refill_address; // Address to refill data cache from 1.240 +reg dcache_refill_ready; // Indicates the next word of refill data is ready 1.241 +wire [`LM32_CTYPE_RNG] first_cycle_type; // First Wishbone cycle type 1.242 +wire [`LM32_CTYPE_RNG] next_cycle_type; // Next Wishbone cycle type 1.243 +wire last_word; // Indicates if this is the last word in the cache line 1.244 +wire [`LM32_WORD_RNG] first_address; // First cache refill address 1.245 +`endif 1.246 +`ifdef CFG_DRAM_ENABLED 1.247 +wire dram_select_x; // Select data RAM to load from / store to 1.248 +reg dram_select_m; 1.249 +reg dram_bypass_en; // RAW in data RAM; read latched (bypass) value rather than value from memory 1.250 +reg [`LM32_WORD_RNG] dram_bypass_data; // Latched value of store'd data to data RAM 1.251 +wire [`LM32_WORD_RNG] dram_data_out; // Data read from data RAM 1.252 +wire [`LM32_WORD_RNG] dram_data_m; // Data read from data RAM: bypass value or value from memory 1.253 +wire [`LM32_WORD_RNG] dram_store_data_m; // Data to write to RAM 1.254 +`endif 1.255 +wire wb_select_x; // Select Wishbone to load from / store to 1.256 +`ifdef CFG_IROM_ENABLED 1.257 +wire irom_select_x; // Select instruction ROM to load from / store to 1.258 +reg irom_select_m; 1.259 +`endif 1.260 +reg wb_select_m; 1.261 +reg [`LM32_WORD_RNG] wb_data_m; // Data read from Wishbone 1.262 +reg wb_load_complete; // Indicates when a Wishbone load is complete 1.263 + 1.264 +///////////////////////////////////////////////////// 1.265 +// Functions 1.266 +///////////////////////////////////////////////////// 1.267 + 1.268 +`include "lm32_functions.v" 1.269 + 1.270 +///////////////////////////////////////////////////// 1.271 +// Instantiations 1.272 +///////////////////////////////////////////////////// 1.273 + 1.274 +`ifdef CFG_DRAM_ENABLED 1.275 + // Data RAM 1.276 + pmi_ram_dp_true 1.277 + #( 1.278 + // ----- Parameters ------- 1.279 + .pmi_family (`LATTICE_FAMILY), 1.280 + 1.281 + //.pmi_addr_depth_a (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.282 + //.pmi_addr_width_a ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.283 + //.pmi_data_width_a (`LM32_WORD_WIDTH), 1.284 + //.pmi_addr_depth_b (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.285 + //.pmi_addr_width_b ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.286 + //.pmi_data_width_b (`LM32_WORD_WIDTH), 1.287 + 1.288 + .pmi_addr_depth_a (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1), 1.289 + .pmi_addr_width_a (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)), 1.290 + .pmi_data_width_a (`LM32_WORD_WIDTH), 1.291 + .pmi_addr_depth_b (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1), 1.292 + .pmi_addr_width_b (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)), 1.293 + .pmi_data_width_b (`LM32_WORD_WIDTH), 1.294 + 1.295 + .pmi_regmode_a ("noreg"), 1.296 + .pmi_regmode_b ("noreg"), 1.297 + .pmi_gsr ("enable"), 1.298 + .pmi_resetmode ("sync"), 1.299 + .pmi_init_file (`CFG_DRAM_INIT_FILE), 1.300 + .pmi_init_file_format (`CFG_DRAM_INIT_FILE_FORMAT), 1.301 + .module_type ("pmi_ram_dp_true") 1.302 + ) 1.303 + ram ( 1.304 + // ----- Inputs ------- 1.305 + .ClockA (clk_i), 1.306 + .ClockB (clk_i), 1.307 + .ResetA (rst_i), 1.308 + .ResetB (rst_i), 1.309 + .DataInA ({32{1'b0}}), 1.310 + .DataInB (dram_store_data_m), 1.311 + .AddressA (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]), 1.312 + .AddressB (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]), 1.313 + // .ClockEnA (!stall_x & (load_x | store_x)), 1.314 + .ClockEnA (!stall_x), 1.315 + .ClockEnB (!stall_m), 1.316 + .WrA (`FALSE), 1.317 + .WrB (store_q_m & dram_select_m), 1.318 + // ----- Outputs ------- 1.319 + .QA (dram_data_out), 1.320 + .QB () 1.321 + ); 1.322 + 1.323 + /*---------------------------------------------------------------------- 1.324 + EBRs cannot perform reads from location 'written to' on the same clock 1.325 + edge. Therefore bypass logic is required to latch the store'd value 1.326 + and use it for the load (instead of value from memory). 1.327 + ----------------------------------------------------------------------*/ 1.328 + always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.329 + if (rst_i == `TRUE) 1.330 + begin 1.331 + dram_bypass_en <= `FALSE; 1.332 + dram_bypass_data <= 0; 1.333 + end 1.334 + else 1.335 + begin 1.336 + if (stall_x == `FALSE) 1.337 + dram_bypass_data <= dram_store_data_m; 1.338 + 1.339 + if ( (stall_m == `FALSE) 1.340 + && (stall_x == `FALSE) 1.341 + && (store_q_m == `TRUE) 1.342 + && ( (load_x == `TRUE) 1.343 + || (store_x == `TRUE) 1.344 + ) 1.345 + && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2]) 1.346 + ) 1.347 + dram_bypass_en <= `TRUE; 1.348 + else 1.349 + if ( (dram_bypass_en == `TRUE) 1.350 + && (stall_x == `FALSE) 1.351 + ) 1.352 + dram_bypass_en <= `FALSE; 1.353 + end 1.354 + 1.355 + assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out; 1.356 +`endif 1.357 + 1.358 +`ifdef CFG_DCACHE_ENABLED 1.359 +// Data cache 1.360 +lm32_dcache #( 1.361 + .associativity (associativity), 1.362 + .sets (sets), 1.363 + .bytes_per_line (bytes_per_line), 1.364 + .base_address (base_address), 1.365 + .limit (limit) 1.366 + ) dcache ( 1.367 + // ----- Inputs ----- 1.368 + .clk_i (clk_i), 1.369 + .rst_i (rst_i), 1.370 + .stall_a (stall_a), 1.371 + .stall_x (stall_x), 1.372 + .stall_m (stall_m), 1.373 + .address_x (load_store_address_x), 1.374 + .address_m (load_store_address_m), 1.375 + .load_q_m (load_q_m & dcache_select_m), 1.376 + .store_q_m (store_q_m & dcache_select_m), 1.377 + .store_data (store_data_m), 1.378 + .store_byte_select (byte_enable_m & {4{dcache_select_m}}), 1.379 + .refill_ready (dcache_refill_ready), 1.380 + .refill_data (wb_data_m), 1.381 + .dflush (dflush), 1.382 + // ----- Outputs ----- 1.383 + .stall_request (dcache_stall_request), 1.384 + .restart_request (dcache_restart_request), 1.385 + .refill_request (dcache_refill_request), 1.386 + .refill_address (dcache_refill_address), 1.387 + .refilling (dcache_refilling), 1.388 + .load_data (dcache_data_m) 1.389 + ); 1.390 +`endif 1.391 + 1.392 +///////////////////////////////////////////////////// 1.393 +// Combinational Logic 1.394 +///////////////////////////////////////////////////// 1.395 + 1.396 +// Select where data should be loaded from / stored to 1.397 +`ifdef CFG_DRAM_ENABLED 1.398 + assign dram_select_x = (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS) 1.399 + && (load_store_address_x <= `CFG_DRAM_LIMIT); 1.400 +`endif 1.401 + 1.402 +`ifdef CFG_IROM_ENABLED 1.403 + assign irom_select_x = (load_store_address_x >= `CFG_IROM_BASE_ADDRESS) 1.404 + && (load_store_address_x <= `CFG_IROM_LIMIT); 1.405 +`endif 1.406 + 1.407 +`ifdef CFG_DCACHE_ENABLED 1.408 + assign dcache_select_x = (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS) 1.409 + && (load_store_address_x <= `CFG_DCACHE_LIMIT) 1.410 +`ifdef CFG_DRAM_ENABLED 1.411 + && (dram_select_x == `FALSE) 1.412 +`endif 1.413 +`ifdef CFG_IROM_ENABLED 1.414 + && (irom_select_x == `FALSE) 1.415 +`endif 1.416 + ; 1.417 +`endif 1.418 + 1.419 + assign wb_select_x = `TRUE 1.420 +`ifdef CFG_DCACHE_ENABLED 1.421 + && !dcache_select_x 1.422 +`endif 1.423 +`ifdef CFG_DRAM_ENABLED 1.424 + && !dram_select_x 1.425 +`endif 1.426 +`ifdef CFG_IROM_ENABLED 1.427 + && !irom_select_x 1.428 +`endif 1.429 + ; 1.430 + 1.431 +// Make sure data to store is in correct byte lane 1.432 +always @(*) 1.433 +begin 1.434 + case (size_x) 1.435 + `LM32_SIZE_BYTE: store_data_x = {4{store_operand_x[7:0]}}; 1.436 + `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}}; 1.437 + `LM32_SIZE_WORD: store_data_x = store_operand_x; 1.438 + default: store_data_x = {`LM32_WORD_WIDTH{1'bx}}; 1.439 + endcase 1.440 +end 1.441 + 1.442 +// Generate byte enable accoring to size of load or store and address being accessed 1.443 +always @(*) 1.444 +begin 1.445 + casez ({size_x, load_store_address_x[1:0]}) 1.446 + {`LM32_SIZE_BYTE, 2'b11}: byte_enable_x = 4'b0001; 1.447 + {`LM32_SIZE_BYTE, 2'b10}: byte_enable_x = 4'b0010; 1.448 + {`LM32_SIZE_BYTE, 2'b01}: byte_enable_x = 4'b0100; 1.449 + {`LM32_SIZE_BYTE, 2'b00}: byte_enable_x = 4'b1000; 1.450 + {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011; 1.451 + {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100; 1.452 + {`LM32_SIZE_WORD, 2'b??}: byte_enable_x = 4'b1111; 1.453 + default: byte_enable_x = 4'bxxxx; 1.454 + endcase 1.455 +end 1.456 + 1.457 +`ifdef CFG_DRAM_ENABLED 1.458 +// Only replace selected bytes 1.459 +assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG]; 1.460 +assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG]; 1.461 +assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG]; 1.462 +assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG]; 1.463 +`endif 1.464 + 1.465 +`ifdef CFG_IROM_ENABLED 1.466 +// Only replace selected bytes 1.467 +assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG]; 1.468 +assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG]; 1.469 +assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG]; 1.470 +assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG]; 1.471 +`endif 1.472 + 1.473 +`ifdef CFG_IROM_ENABLED 1.474 + // Instead of implementing a byte-addressable instruction ROM (for store byte instruction), 1.475 + // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite 1.476 + // byte is replaced, and the whole 32-bit value is written back 1.477 + 1.478 + assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE)) 1.479 + ? load_store_address_m 1.480 + : load_store_address_x; 1.481 + 1.482 + // All store instructions perform a write operation in the M stage 1.483 + assign irom_we_xm = (irom_select_m == `TRUE) 1.484 + && (store_q_m == `TRUE); 1.485 + 1.486 + // A single port in instruction ROM is available to load-store unit for doing loads/stores. 1.487 + // Since every store requires a load (in X stage) and then a store (in M stage), we cannot 1.488 + // allow load (or store) instructions sequentially after the store instructions to proceed 1.489 + // until the store instruction has vacated M stage (i.e., completed the store operation) 1.490 + assign irom_stall_request_x = (irom_select_x == `TRUE) 1.491 + && (store_q_x == `TRUE); 1.492 +`endif 1.493 + 1.494 +`ifdef CFG_DCACHE_ENABLED 1.495 + `ifdef CFG_DRAM_ENABLED 1.496 + `ifdef CFG_IROM_ENABLED 1.497 + // WB + DC + DRAM + IROM 1.498 + assign data_m = wb_select_m == `TRUE 1.499 + ? wb_data_m 1.500 + : dram_select_m == `TRUE 1.501 + ? dram_data_m 1.502 + : irom_select_m == `TRUE 1.503 + ? irom_data_m 1.504 + : dcache_data_m; 1.505 + `else 1.506 + // WB + DC + DRAM 1.507 + assign data_m = wb_select_m == `TRUE 1.508 + ? wb_data_m 1.509 + : dram_select_m == `TRUE 1.510 + ? dram_data_m 1.511 + : dcache_data_m; 1.512 + `endif 1.513 + `else 1.514 + `ifdef CFG_IROM_ENABLED 1.515 + // WB + DC + IROM 1.516 + assign data_m = wb_select_m == `TRUE 1.517 + ? wb_data_m 1.518 + : irom_select_m == `TRUE 1.519 + ? irom_data_m 1.520 + : dcache_data_m; 1.521 + `else 1.522 + // WB + DC 1.523 + assign data_m = wb_select_m == `TRUE 1.524 + ? wb_data_m 1.525 + : dcache_data_m; 1.526 + `endif 1.527 + `endif 1.528 +`else 1.529 + `ifdef CFG_DRAM_ENABLED 1.530 + `ifdef CFG_IROM_ENABLED 1.531 + // WB + DRAM + IROM 1.532 + assign data_m = wb_select_m == `TRUE 1.533 + ? wb_data_m 1.534 + : dram_select_m == `TRUE 1.535 + ? dram_data_m 1.536 + : irom_data_m; 1.537 + `else 1.538 + // WB + DRAM 1.539 + assign data_m = wb_select_m == `TRUE 1.540 + ? wb_data_m 1.541 + : dram_data_m; 1.542 + `endif 1.543 + `else 1.544 + `ifdef CFG_IROM_ENABLED 1.545 + // WB + IROM 1.546 + assign data_m = wb_select_m == `TRUE 1.547 + ? wb_data_m 1.548 + : irom_data_m; 1.549 + `else 1.550 + // WB 1.551 + assign data_m = wb_data_m; 1.552 + `endif 1.553 + `endif 1.554 +`endif 1.555 + 1.556 +// Sub-word selection and sign/zero-extension for loads 1.557 +always @(*) 1.558 +begin 1.559 + casez ({size_w, load_store_address_w[1:0]}) 1.560 + {`LM32_SIZE_BYTE, 2'b11}: load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]}; 1.561 + {`LM32_SIZE_BYTE, 2'b10}: load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]}; 1.562 + {`LM32_SIZE_BYTE, 2'b01}: load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]}; 1.563 + {`LM32_SIZE_BYTE, 2'b00}: load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]}; 1.564 + {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]}; 1.565 + {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]}; 1.566 + {`LM32_SIZE_WORD, 2'b??}: load_data_w = data_w; 1.567 + default: load_data_w = {`LM32_WORD_WIDTH{1'bx}}; 1.568 + endcase 1.569 +end 1.570 + 1.571 +// Unused/constant Wishbone signals 1.572 +assign d_bte_o = `LM32_BTYPE_LINEAR; 1.573 + 1.574 +`ifdef CFG_DCACHE_ENABLED 1.575 +// Generate signal to indicate last word in cache line 1.576 +generate 1.577 + case (bytes_per_line) 1.578 + 4: 1.579 + begin 1.580 +assign first_cycle_type = `LM32_CTYPE_END; 1.581 +assign next_cycle_type = `LM32_CTYPE_END; 1.582 +assign last_word = `TRUE; 1.583 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00}; 1.584 + end 1.585 + 8: 1.586 + begin 1.587 +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; 1.588 +assign next_cycle_type = `LM32_CTYPE_END; 1.589 +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1; 1.590 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00}; 1.591 + end 1.592 + 16: 1.593 + begin 1.594 +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; 1.595 +assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING; 1.596 +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1; 1.597 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00}; 1.598 + end 1.599 + endcase 1.600 +endgenerate 1.601 +`endif 1.602 + 1.603 +///////////////////////////////////////////////////// 1.604 +// Sequential Logic 1.605 +///////////////////////////////////////////////////// 1.606 + 1.607 +// Data Wishbone interface 1.608 +always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.609 +begin 1.610 + if (rst_i == `TRUE) 1.611 + begin 1.612 + d_cyc_o <= `FALSE; 1.613 + d_stb_o <= `FALSE; 1.614 + d_dat_o <= {`LM32_WORD_WIDTH{1'b0}}; 1.615 + d_adr_o <= {`LM32_WORD_WIDTH{1'b0}}; 1.616 + d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}}; 1.617 + d_we_o <= `FALSE; 1.618 + d_cti_o <= `LM32_CTYPE_END; 1.619 + d_lock_o <= `FALSE; 1.620 + wb_data_m <= {`LM32_WORD_WIDTH{1'b0}}; 1.621 + wb_load_complete <= `FALSE; 1.622 + stall_wb_load <= `FALSE; 1.623 +`ifdef CFG_DCACHE_ENABLED 1.624 + dcache_refill_ready <= `FALSE; 1.625 +`endif 1.626 + end 1.627 + else 1.628 + begin 1.629 +`ifdef CFG_DCACHE_ENABLED 1.630 + // Refill ready should only be asserted for a single cycle 1.631 + dcache_refill_ready <= `FALSE; 1.632 +`endif 1.633 + // Is a Wishbone cycle already in progress? 1.634 + if (d_cyc_o == `TRUE) 1.635 + begin 1.636 + // Is the cycle complete? 1.637 + if ((d_ack_i == `TRUE) || (d_err_i == `TRUE)) 1.638 + begin 1.639 +`ifdef CFG_DCACHE_ENABLED 1.640 + if ((dcache_refilling == `TRUE) && (!last_word)) 1.641 + begin 1.642 + // Fetch next word of cache line 1.643 + d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1; 1.644 + end 1.645 + else 1.646 +`endif 1.647 + begin 1.648 + // Refill/access complete 1.649 + d_cyc_o <= `FALSE; 1.650 + d_stb_o <= `FALSE; 1.651 + d_lock_o <= `FALSE; 1.652 + end 1.653 +`ifdef CFG_DCACHE_ENABLED 1.654 + d_cti_o <= next_cycle_type; 1.655 + // If we are performing a refill, indicate to cache next word of data is ready 1.656 + dcache_refill_ready <= dcache_refilling; 1.657 +`endif 1.658 + // Register data read from Wishbone interface 1.659 + wb_data_m <= d_dat_i; 1.660 + // Don't set when stores complete - otherwise we'll deadlock if load in m stage 1.661 + wb_load_complete <= !d_we_o; 1.662 + end 1.663 + // synthesis translate_off 1.664 + if (d_err_i == `TRUE) 1.665 + $display ("Data bus error. Address: %x", d_adr_o); 1.666 + // synthesis translate_on 1.667 + end 1.668 + else 1.669 + begin 1.670 +`ifdef CFG_DCACHE_ENABLED 1.671 + if (dcache_refill_request == `TRUE) 1.672 + begin 1.673 + // Start cache refill 1.674 + d_adr_o <= first_address; 1.675 + d_cyc_o <= `TRUE; 1.676 + d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}}; 1.677 + d_stb_o <= `TRUE; 1.678 + d_we_o <= `FALSE; 1.679 + d_cti_o <= first_cycle_type; 1.680 + //d_lock_o <= `TRUE; 1.681 + end 1.682 + else 1.683 +`endif 1.684 + if ( (store_q_m == `TRUE) 1.685 + && (stall_m == `FALSE) 1.686 +`ifdef CFG_DRAM_ENABLED 1.687 + && (dram_select_m == `FALSE) 1.688 +`endif 1.689 +`ifdef CFG_IROM_ENABLED 1.690 + && (irom_select_m == `FALSE) 1.691 +`endif 1.692 + ) 1.693 + begin 1.694 + // Data cache is write through, so all stores go to memory 1.695 + d_dat_o <= store_data_m; 1.696 + d_adr_o <= load_store_address_m; 1.697 + d_cyc_o <= `TRUE; 1.698 + d_sel_o <= byte_enable_m; 1.699 + d_stb_o <= `TRUE; 1.700 + d_we_o <= `TRUE; 1.701 + d_cti_o <= `LM32_CTYPE_END; 1.702 + end 1.703 + else if ( (load_q_m == `TRUE) 1.704 + && (wb_select_m == `TRUE) 1.705 + && (wb_load_complete == `FALSE) 1.706 + // stall_m will be TRUE, because stall_wb_load will be TRUE 1.707 + ) 1.708 + begin 1.709 + // Read requested address 1.710 + stall_wb_load <= `FALSE; 1.711 + d_adr_o <= load_store_address_m; 1.712 + d_cyc_o <= `TRUE; 1.713 + d_sel_o <= byte_enable_m; 1.714 + d_stb_o <= `TRUE; 1.715 + d_we_o <= `FALSE; 1.716 + d_cti_o <= `LM32_CTYPE_END; 1.717 + end 1.718 + end 1.719 + // Clear load/store complete flag when instruction leaves M stage 1.720 + if (stall_m == `FALSE) 1.721 + wb_load_complete <= `FALSE; 1.722 + // When a Wishbone load first enters the M stage, we need to stall it 1.723 + if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE)) 1.724 + stall_wb_load <= `TRUE; 1.725 + // Clear stall request if load instruction is killed 1.726 + if ((kill_m == `TRUE) || (exception_m == `TRUE)) 1.727 + stall_wb_load <= `FALSE; 1.728 + end 1.729 +end 1.730 + 1.731 +// Pipeline registers 1.732 + 1.733 +// X/M stage pipeline registers 1.734 +always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.735 +begin 1.736 + if (rst_i == `TRUE) 1.737 + begin 1.738 + sign_extend_m <= `FALSE; 1.739 + size_m <= 2'b00; 1.740 + byte_enable_m <= `FALSE; 1.741 + store_data_m <= {`LM32_WORD_WIDTH{1'b0}}; 1.742 +`ifdef CFG_DCACHE_ENABLED 1.743 + dcache_select_m <= `FALSE; 1.744 +`endif 1.745 +`ifdef CFG_DRAM_ENABLED 1.746 + dram_select_m <= `FALSE; 1.747 +`endif 1.748 +`ifdef CFG_IROM_ENABLED 1.749 + irom_select_m <= `FALSE; 1.750 +`endif 1.751 + wb_select_m <= `FALSE; 1.752 + end 1.753 + else 1.754 + begin 1.755 + if (stall_m == `FALSE) 1.756 + begin 1.757 + sign_extend_m <= sign_extend_x; 1.758 + size_m <= size_x; 1.759 + byte_enable_m <= byte_enable_x; 1.760 + store_data_m <= store_data_x; 1.761 +`ifdef CFG_DCACHE_ENABLED 1.762 + dcache_select_m <= dcache_select_x; 1.763 +`endif 1.764 +`ifdef CFG_DRAM_ENABLED 1.765 + dram_select_m <= dram_select_x; 1.766 +`endif 1.767 +`ifdef CFG_IROM_ENABLED 1.768 + irom_select_m <= irom_select_x; 1.769 +`endif 1.770 + wb_select_m <= wb_select_x; 1.771 + end 1.772 + end 1.773 +end 1.774 + 1.775 +// M/W stage pipeline registers 1.776 +always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.777 +begin 1.778 + if (rst_i == `TRUE) 1.779 + begin 1.780 + size_w <= 2'b00; 1.781 + data_w <= {`LM32_WORD_WIDTH{1'b0}}; 1.782 + sign_extend_w <= `FALSE; 1.783 + end 1.784 + else 1.785 + begin 1.786 + size_w <= size_m; 1.787 + data_w <= data_m; 1.788 + sign_extend_w <= sign_extend_m; 1.789 + end 1.790 +end 1.791 + 1.792 +///////////////////////////////////////////////////// 1.793 +// Behavioural Logic 1.794 +///////////////////////////////////////////////////// 1.795 + 1.796 +// synthesis translate_off 1.797 + 1.798 +// Check for non-aligned loads or stores 1.799 +always @(posedge clk_i) 1.800 +begin 1.801 + if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE)) 1.802 + begin 1.803 + if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0)) 1.804 + $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time); 1.805 + if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00)) 1.806 + $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time); 1.807 + end 1.808 +end 1.809 + 1.810 +// synthesis translate_on 1.811 + 1.812 +endmodule