1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/lm32_load_store_unit.v Sun Apr 04 20:40:03 2010 +0100 1.3 @@ -0,0 +1,808 @@ 1.4 +// ============================================================================= 1.5 +// COPYRIGHT NOTICE 1.6 +// Copyright 2006 (c) Lattice Semiconductor Corporation 1.7 +// ALL RIGHTS RESERVED 1.8 +// This confidential and proprietary software may be used only as authorised by 1.9 +// a licensing agreement from Lattice Semiconductor Corporation. 1.10 +// The entire notice above must be reproduced on all authorized copies and 1.11 +// copies may only be made to the extent permitted by a licensing agreement from 1.12 +// Lattice Semiconductor Corporation. 1.13 +// 1.14 +// Lattice Semiconductor Corporation TEL : 1-800-Lattice (USA and Canada) 1.15 +// 5555 NE Moore Court 408-826-6000 (other locations) 1.16 +// Hillsboro, OR 97124 web : http://www.latticesemi.com/ 1.17 +// U.S.A email: techsupport@latticesemi.com 1.18 +// =============================================================================/ 1.19 +// FILE DETAILS 1.20 +// Project : LatticeMico32 1.21 +// File : lm32_load_store_unit.v 1.22 +// Title : Load and store unit 1.23 +// Dependencies : lm32_include.v 1.24 +// Version : 6.1.17 1.25 +// : Initial Release 1.26 +// Version : 7.0SP2, 3.0 1.27 +// : No Change 1.28 +// Version : 3.1 1.29 +// : Instead of disallowing an instruction cache miss on a data cache 1.30 +// : miss, both can now occur at the same time. If both occur at same 1.31 +// : time, then restart address is the address of instruction that 1.32 +// : caused data cache miss. 1.33 +// Version : 3.2 1.34 +// : EBRs use SYNC resets instead of ASYNC resets. 1.35 +// Version : 3.3 1.36 +// : Support for new non-cacheable Data Memory that is accessible by 1.37 +// : the data port and has a one cycle access latency. 1.38 +// Version : 3.4 1.39 +// : No change 1.40 +// Version : 3.5 1.41 +// : Bug fix: Inline memory is correctly generated if it is not a 1.42 +// : power-of-two 1.43 +// ============================================================================= 1.44 + 1.45 +`include "lm32_include.v" 1.46 + 1.47 +///////////////////////////////////////////////////// 1.48 +// Module interface 1.49 +///////////////////////////////////////////////////// 1.50 + 1.51 +module lm32_load_store_unit ( 1.52 + // ----- Inputs ------- 1.53 + clk_i, 1.54 + rst_i, 1.55 + // From pipeline 1.56 + stall_a, 1.57 + stall_x, 1.58 + stall_m, 1.59 + kill_x, 1.60 + kill_m, 1.61 + exception_m, 1.62 + store_operand_x, 1.63 + load_store_address_x, 1.64 + load_store_address_m, 1.65 + load_store_address_w, 1.66 + load_x, 1.67 + store_x, 1.68 + load_q_x, 1.69 + store_q_x, 1.70 + load_q_m, 1.71 + store_q_m, 1.72 + sign_extend_x, 1.73 + size_x, 1.74 +`ifdef CFG_DCACHE_ENABLED 1.75 + dflush, 1.76 +`endif 1.77 +`ifdef CFG_IROM_ENABLED 1.78 + irom_data_m, 1.79 +`endif 1.80 + // From Wishbone 1.81 + d_dat_i, 1.82 + d_ack_i, 1.83 + d_err_i, 1.84 + d_rty_i, 1.85 + // ----- Outputs ------- 1.86 + // To pipeline 1.87 +`ifdef CFG_DCACHE_ENABLED 1.88 + dcache_refill_request, 1.89 + dcache_restart_request, 1.90 + dcache_stall_request, 1.91 + dcache_refilling, 1.92 +`endif 1.93 +`ifdef CFG_IROM_ENABLED 1.94 + irom_store_data_m, 1.95 + irom_address_xm, 1.96 + irom_we_xm, 1.97 + irom_stall_request_x, 1.98 +`endif 1.99 + load_data_w, 1.100 + stall_wb_load, 1.101 + // To Wishbone 1.102 + d_dat_o, 1.103 + d_adr_o, 1.104 + d_cyc_o, 1.105 + d_sel_o, 1.106 + d_stb_o, 1.107 + d_we_o, 1.108 + d_cti_o, 1.109 + d_lock_o, 1.110 + d_bte_o 1.111 + ); 1.112 + 1.113 +///////////////////////////////////////////////////// 1.114 +// Parameters 1.115 +///////////////////////////////////////////////////// 1.116 + 1.117 +parameter associativity = 1; // Associativity of the cache (Number of ways) 1.118 +parameter sets = 512; // Number of sets 1.119 +parameter bytes_per_line = 16; // Number of bytes per cache line 1.120 +parameter base_address = 0; // Base address of cachable memory 1.121 +parameter limit = 0; // Limit (highest address) of cachable memory 1.122 + 1.123 +// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used 1.124 +localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2; 1.125 +localparam addr_offset_lsb = 2; 1.126 +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1); 1.127 + 1.128 +///////////////////////////////////////////////////// 1.129 +// Inputs 1.130 +///////////////////////////////////////////////////// 1.131 + 1.132 +input clk_i; // Clock 1.133 +input rst_i; // Reset 1.134 + 1.135 +input stall_a; // A stage stall 1.136 +input stall_x; // X stage stall 1.137 +input stall_m; // M stage stall 1.138 +input kill_x; // Kill instruction in X stage 1.139 +input kill_m; // Kill instruction in M stage 1.140 +input exception_m; // An exception occured in the M stage 1.141 + 1.142 +input [`LM32_WORD_RNG] store_operand_x; // Data read from register to store 1.143 +input [`LM32_WORD_RNG] load_store_address_x; // X stage load/store address 1.144 +input [`LM32_WORD_RNG] load_store_address_m; // M stage load/store address 1.145 +input [1:0] load_store_address_w; // W stage load/store address (only least two significant bits are needed) 1.146 +input load_x; // Load instruction in X stage 1.147 +input store_x; // Store instruction in X stage 1.148 +input load_q_x; // Load instruction in X stage 1.149 +input store_q_x; // Store instruction in X stage 1.150 +input load_q_m; // Load instruction in M stage 1.151 +input store_q_m; // Store instruction in M stage 1.152 +input sign_extend_x; // Whether load instruction in X stage should sign extend or zero extend 1.153 +input [`LM32_SIZE_RNG] size_x; // Size of load or store (byte, hword, word) 1.154 + 1.155 +`ifdef CFG_DCACHE_ENABLED 1.156 +input dflush; // Flush the data cache 1.157 +`endif 1.158 + 1.159 +`ifdef CFG_IROM_ENABLED 1.160 +input [`LM32_WORD_RNG] irom_data_m; // Data from Instruction-ROM 1.161 +`endif 1.162 + 1.163 +input [`LM32_WORD_RNG] d_dat_i; // Data Wishbone interface read data 1.164 +input d_ack_i; // Data Wishbone interface acknowledgement 1.165 +input d_err_i; // Data Wishbone interface error 1.166 +input d_rty_i; // Data Wishbone interface retry 1.167 + 1.168 +///////////////////////////////////////////////////// 1.169 +// Outputs 1.170 +///////////////////////////////////////////////////// 1.171 + 1.172 +`ifdef CFG_DCACHE_ENABLED 1.173 +output dcache_refill_request; // Request to refill data cache 1.174 +wire dcache_refill_request; 1.175 +output dcache_restart_request; // Request to restart the instruction that caused a data cache miss 1.176 +wire dcache_restart_request; 1.177 +output dcache_stall_request; // Data cache stall request 1.178 +wire dcache_stall_request; 1.179 +output dcache_refilling; 1.180 +wire dcache_refilling; 1.181 +`endif 1.182 + 1.183 +`ifdef CFG_IROM_ENABLED 1.184 +output irom_store_data_m; // Store data to Instruction ROM 1.185 +wire [`LM32_WORD_RNG] irom_store_data_m; 1.186 +output [`LM32_WORD_RNG] irom_address_xm; // Load/store address to Instruction ROM 1.187 +wire [`LM32_WORD_RNG] irom_address_xm; 1.188 +output irom_we_xm; // Write-enable of 2nd port of Instruction ROM 1.189 +wire irom_we_xm; 1.190 +output irom_stall_request_x; // Stall instruction in D stage 1.191 +wire irom_stall_request_x; 1.192 +`endif 1.193 + 1.194 +output [`LM32_WORD_RNG] load_data_w; // Result of a load instruction 1.195 +reg [`LM32_WORD_RNG] load_data_w; 1.196 +output stall_wb_load; // Request to stall pipeline due to a load from the Wishbone interface 1.197 +reg stall_wb_load; 1.198 + 1.199 +output [`LM32_WORD_RNG] d_dat_o; // Data Wishbone interface write data 1.200 +reg [`LM32_WORD_RNG] d_dat_o; 1.201 +output [`LM32_WORD_RNG] d_adr_o; // Data Wishbone interface address 1.202 +reg [`LM32_WORD_RNG] d_adr_o; 1.203 +output d_cyc_o; // Data Wishbone interface cycle 1.204 +reg d_cyc_o; 1.205 +output [`LM32_BYTE_SELECT_RNG] d_sel_o; // Data Wishbone interface byte select 1.206 +reg [`LM32_BYTE_SELECT_RNG] d_sel_o; 1.207 +output d_stb_o; // Data Wishbone interface strobe 1.208 +reg d_stb_o; 1.209 +output d_we_o; // Data Wishbone interface write enable 1.210 +reg d_we_o; 1.211 +output [`LM32_CTYPE_RNG] d_cti_o; // Data Wishbone interface cycle type 1.212 +reg [`LM32_CTYPE_RNG] d_cti_o; 1.213 +output d_lock_o; // Date Wishbone interface lock bus 1.214 +reg d_lock_o; 1.215 +output [`LM32_BTYPE_RNG] d_bte_o; // Data Wishbone interface burst type 1.216 +wire [`LM32_BTYPE_RNG] d_bte_o; 1.217 + 1.218 +///////////////////////////////////////////////////// 1.219 +// Internal nets and registers 1.220 +///////////////////////////////////////////////////// 1.221 + 1.222 +// Microcode pipeline registers - See inputs for description 1.223 +reg [`LM32_SIZE_RNG] size_m; 1.224 +reg [`LM32_SIZE_RNG] size_w; 1.225 +reg sign_extend_m; 1.226 +reg sign_extend_w; 1.227 +reg [`LM32_WORD_RNG] store_data_x; 1.228 +reg [`LM32_WORD_RNG] store_data_m; 1.229 +reg [`LM32_BYTE_SELECT_RNG] byte_enable_x; 1.230 +reg [`LM32_BYTE_SELECT_RNG] byte_enable_m; 1.231 +wire [`LM32_WORD_RNG] data_m; 1.232 +reg [`LM32_WORD_RNG] data_w; 1.233 + 1.234 +`ifdef CFG_DCACHE_ENABLED 1.235 +wire dcache_select_x; // Select data cache to load from / store to 1.236 +reg dcache_select_m; 1.237 +wire [`LM32_WORD_RNG] dcache_data_m; // Data read from cache 1.238 +wire [`LM32_WORD_RNG] dcache_refill_address; // Address to refill data cache from 1.239 +reg dcache_refill_ready; // Indicates the next word of refill data is ready 1.240 +wire [`LM32_CTYPE_RNG] first_cycle_type; // First Wishbone cycle type 1.241 +wire [`LM32_CTYPE_RNG] next_cycle_type; // Next Wishbone cycle type 1.242 +wire last_word; // Indicates if this is the last word in the cache line 1.243 +wire [`LM32_WORD_RNG] first_address; // First cache refill address 1.244 +`endif 1.245 +`ifdef CFG_DRAM_ENABLED 1.246 +wire dram_select_x; // Select data RAM to load from / store to 1.247 +reg dram_select_m; 1.248 +reg dram_bypass_en; // RAW in data RAM; read latched (bypass) value rather than value from memory 1.249 +reg [`LM32_WORD_RNG] dram_bypass_data; // Latched value of store'd data to data RAM 1.250 +wire [`LM32_WORD_RNG] dram_data_out; // Data read from data RAM 1.251 +wire [`LM32_WORD_RNG] dram_data_m; // Data read from data RAM: bypass value or value from memory 1.252 +wire [`LM32_WORD_RNG] dram_store_data_m; // Data to write to RAM 1.253 +`endif 1.254 +wire wb_select_x; // Select Wishbone to load from / store to 1.255 +`ifdef CFG_IROM_ENABLED 1.256 +wire irom_select_x; // Select instruction ROM to load from / store to 1.257 +reg irom_select_m; 1.258 +`endif 1.259 +reg wb_select_m; 1.260 +reg [`LM32_WORD_RNG] wb_data_m; // Data read from Wishbone 1.261 +reg wb_load_complete; // Indicates when a Wishbone load is complete 1.262 + 1.263 +///////////////////////////////////////////////////// 1.264 +// Functions 1.265 +///////////////////////////////////////////////////// 1.266 + 1.267 +`include "lm32_functions.v" 1.268 + 1.269 +///////////////////////////////////////////////////// 1.270 +// Instantiations 1.271 +///////////////////////////////////////////////////// 1.272 + 1.273 +`ifdef CFG_DRAM_ENABLED 1.274 + // Data RAM 1.275 + pmi_ram_dp_true 1.276 + #( 1.277 + // ----- Parameters ------- 1.278 + .pmi_family (`LATTICE_FAMILY), 1.279 + 1.280 + //.pmi_addr_depth_a (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.281 + //.pmi_addr_width_a ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.282 + //.pmi_data_width_a (`LM32_WORD_WIDTH), 1.283 + //.pmi_addr_depth_b (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.284 + //.pmi_addr_width_b ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), 1.285 + //.pmi_data_width_b (`LM32_WORD_WIDTH), 1.286 + 1.287 + .pmi_addr_depth_a (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1), 1.288 + .pmi_addr_width_a (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)), 1.289 + .pmi_data_width_a (`LM32_WORD_WIDTH), 1.290 + .pmi_addr_depth_b (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1), 1.291 + .pmi_addr_width_b (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)), 1.292 + .pmi_data_width_b (`LM32_WORD_WIDTH), 1.293 + 1.294 + .pmi_regmode_a ("noreg"), 1.295 + .pmi_regmode_b ("noreg"), 1.296 + .pmi_gsr ("enable"), 1.297 + .pmi_resetmode ("sync"), 1.298 + .pmi_init_file (`CFG_DRAM_INIT_FILE), 1.299 + .pmi_init_file_format (`CFG_DRAM_INIT_FILE_FORMAT), 1.300 + .module_type ("pmi_ram_dp_true") 1.301 + ) 1.302 + ram ( 1.303 + // ----- Inputs ------- 1.304 + .ClockA (clk_i), 1.305 + .ClockB (clk_i), 1.306 + .ResetA (rst_i), 1.307 + .ResetB (rst_i), 1.308 + .DataInA ({32{1'b0}}), 1.309 + .DataInB (dram_store_data_m), 1.310 + .AddressA (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]), 1.311 + .AddressB (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]), 1.312 + // .ClockEnA (!stall_x & (load_x | store_x)), 1.313 + .ClockEnA (!stall_x), 1.314 + .ClockEnB (!stall_m), 1.315 + .WrA (`FALSE), 1.316 + .WrB (store_q_m & dram_select_m), 1.317 + // ----- Outputs ------- 1.318 + .QA (dram_data_out), 1.319 + .QB () 1.320 + ); 1.321 + 1.322 + /*---------------------------------------------------------------------- 1.323 + EBRs cannot perform reads from location 'written to' on the same clock 1.324 + edge. Therefore bypass logic is required to latch the store'd value 1.325 + and use it for the load (instead of value from memory). 1.326 + ----------------------------------------------------------------------*/ 1.327 + always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.328 + if (rst_i == `TRUE) 1.329 + begin 1.330 + dram_bypass_en <= `FALSE; 1.331 + dram_bypass_data <= 0; 1.332 + end 1.333 + else 1.334 + begin 1.335 + if (stall_x == `FALSE) 1.336 + dram_bypass_data <= dram_store_data_m; 1.337 + 1.338 + if ( (stall_m == `FALSE) 1.339 + && (stall_x == `FALSE) 1.340 + && (store_q_m == `TRUE) 1.341 + && ( (load_x == `TRUE) 1.342 + || (store_x == `TRUE) 1.343 + ) 1.344 + && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2]) 1.345 + ) 1.346 + dram_bypass_en <= `TRUE; 1.347 + else 1.348 + if ( (dram_bypass_en == `TRUE) 1.349 + && (stall_x == `FALSE) 1.350 + ) 1.351 + dram_bypass_en <= `FALSE; 1.352 + end 1.353 + 1.354 + assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out; 1.355 +`endif 1.356 + 1.357 +`ifdef CFG_DCACHE_ENABLED 1.358 +// Data cache 1.359 +lm32_dcache #( 1.360 + .associativity (associativity), 1.361 + .sets (sets), 1.362 + .bytes_per_line (bytes_per_line), 1.363 + .base_address (base_address), 1.364 + .limit (limit) 1.365 + ) dcache ( 1.366 + // ----- Inputs ----- 1.367 + .clk_i (clk_i), 1.368 + .rst_i (rst_i), 1.369 + .stall_a (stall_a), 1.370 + .stall_x (stall_x), 1.371 + .stall_m (stall_m), 1.372 + .address_x (load_store_address_x), 1.373 + .address_m (load_store_address_m), 1.374 + .load_q_m (load_q_m & dcache_select_m), 1.375 + .store_q_m (store_q_m & dcache_select_m), 1.376 + .store_data (store_data_m), 1.377 + .store_byte_select (byte_enable_m & {4{dcache_select_m}}), 1.378 + .refill_ready (dcache_refill_ready), 1.379 + .refill_data (wb_data_m), 1.380 + .dflush (dflush), 1.381 + // ----- Outputs ----- 1.382 + .stall_request (dcache_stall_request), 1.383 + .restart_request (dcache_restart_request), 1.384 + .refill_request (dcache_refill_request), 1.385 + .refill_address (dcache_refill_address), 1.386 + .refilling (dcache_refilling), 1.387 + .load_data (dcache_data_m) 1.388 + ); 1.389 +`endif 1.390 + 1.391 +///////////////////////////////////////////////////// 1.392 +// Combinational Logic 1.393 +///////////////////////////////////////////////////// 1.394 + 1.395 +// Select where data should be loaded from / stored to 1.396 +`ifdef CFG_DRAM_ENABLED 1.397 + assign dram_select_x = (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS) 1.398 + && (load_store_address_x <= `CFG_DRAM_LIMIT); 1.399 +`endif 1.400 + 1.401 +`ifdef CFG_IROM_ENABLED 1.402 + assign irom_select_x = (load_store_address_x >= `CFG_IROM_BASE_ADDRESS) 1.403 + && (load_store_address_x <= `CFG_IROM_LIMIT); 1.404 +`endif 1.405 + 1.406 +`ifdef CFG_DCACHE_ENABLED 1.407 + assign dcache_select_x = (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS) 1.408 + && (load_store_address_x <= `CFG_DCACHE_LIMIT) 1.409 +`ifdef CFG_DRAM_ENABLED 1.410 + && (dram_select_x == `FALSE) 1.411 +`endif 1.412 +`ifdef CFG_IROM_ENABLED 1.413 + && (irom_select_x == `FALSE) 1.414 +`endif 1.415 + ; 1.416 +`endif 1.417 + 1.418 + assign wb_select_x = `TRUE 1.419 +`ifdef CFG_DCACHE_ENABLED 1.420 + && !dcache_select_x 1.421 +`endif 1.422 +`ifdef CFG_DRAM_ENABLED 1.423 + && !dram_select_x 1.424 +`endif 1.425 +`ifdef CFG_IROM_ENABLED 1.426 + && !irom_select_x 1.427 +`endif 1.428 + ; 1.429 + 1.430 +// Make sure data to store is in correct byte lane 1.431 +always @(*) 1.432 +begin 1.433 + case (size_x) 1.434 + `LM32_SIZE_BYTE: store_data_x = {4{store_operand_x[7:0]}}; 1.435 + `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}}; 1.436 + `LM32_SIZE_WORD: store_data_x = store_operand_x; 1.437 + default: store_data_x = {`LM32_WORD_WIDTH{1'bx}}; 1.438 + endcase 1.439 +end 1.440 + 1.441 +// Generate byte enable accoring to size of load or store and address being accessed 1.442 +always @(*) 1.443 +begin 1.444 + casez ({size_x, load_store_address_x[1:0]}) 1.445 + {`LM32_SIZE_BYTE, 2'b11}: byte_enable_x = 4'b0001; 1.446 + {`LM32_SIZE_BYTE, 2'b10}: byte_enable_x = 4'b0010; 1.447 + {`LM32_SIZE_BYTE, 2'b01}: byte_enable_x = 4'b0100; 1.448 + {`LM32_SIZE_BYTE, 2'b00}: byte_enable_x = 4'b1000; 1.449 + {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011; 1.450 + {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100; 1.451 + {`LM32_SIZE_WORD, 2'b??}: byte_enable_x = 4'b1111; 1.452 + default: byte_enable_x = 4'bxxxx; 1.453 + endcase 1.454 +end 1.455 + 1.456 +`ifdef CFG_DRAM_ENABLED 1.457 +// Only replace selected bytes 1.458 +assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG]; 1.459 +assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG]; 1.460 +assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG]; 1.461 +assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG]; 1.462 +`endif 1.463 + 1.464 +`ifdef CFG_IROM_ENABLED 1.465 +// Only replace selected bytes 1.466 +assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG]; 1.467 +assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG]; 1.468 +assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG]; 1.469 +assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG]; 1.470 +`endif 1.471 + 1.472 +`ifdef CFG_IROM_ENABLED 1.473 + // Instead of implementing a byte-addressable instruction ROM (for store byte instruction), 1.474 + // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite 1.475 + // byte is replaced, and the whole 32-bit value is written back 1.476 + 1.477 + assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE)) 1.478 + ? load_store_address_m 1.479 + : load_store_address_x; 1.480 + 1.481 + // All store instructions perform a write operation in the M stage 1.482 + assign irom_we_xm = (irom_select_m == `TRUE) 1.483 + && (store_q_m == `TRUE); 1.484 + 1.485 + // A single port in instruction ROM is available to load-store unit for doing loads/stores. 1.486 + // Since every store requires a load (in X stage) and then a store (in M stage), we cannot 1.487 + // allow load (or store) instructions sequentially after the store instructions to proceed 1.488 + // until the store instruction has vacated M stage (i.e., completed the store operation) 1.489 + assign irom_stall_request_x = (irom_select_x == `TRUE) 1.490 + && (store_q_x == `TRUE); 1.491 +`endif 1.492 + 1.493 +`ifdef CFG_DCACHE_ENABLED 1.494 + `ifdef CFG_DRAM_ENABLED 1.495 + `ifdef CFG_IROM_ENABLED 1.496 + // WB + DC + DRAM + IROM 1.497 + assign data_m = wb_select_m == `TRUE 1.498 + ? wb_data_m 1.499 + : dram_select_m == `TRUE 1.500 + ? dram_data_m 1.501 + : irom_select_m == `TRUE 1.502 + ? irom_data_m 1.503 + : dcache_data_m; 1.504 + `else 1.505 + // WB + DC + DRAM 1.506 + assign data_m = wb_select_m == `TRUE 1.507 + ? wb_data_m 1.508 + : dram_select_m == `TRUE 1.509 + ? dram_data_m 1.510 + : dcache_data_m; 1.511 + `endif 1.512 + `else 1.513 + `ifdef CFG_IROM_ENABLED 1.514 + // WB + DC + IROM 1.515 + assign data_m = wb_select_m == `TRUE 1.516 + ? wb_data_m 1.517 + : irom_select_m == `TRUE 1.518 + ? irom_data_m 1.519 + : dcache_data_m; 1.520 + `else 1.521 + // WB + DC 1.522 + assign data_m = wb_select_m == `TRUE 1.523 + ? wb_data_m 1.524 + : dcache_data_m; 1.525 + `endif 1.526 + `endif 1.527 +`else 1.528 + `ifdef CFG_DRAM_ENABLED 1.529 + `ifdef CFG_IROM_ENABLED 1.530 + // WB + DRAM + IROM 1.531 + assign data_m = wb_select_m == `TRUE 1.532 + ? wb_data_m 1.533 + : dram_select_m == `TRUE 1.534 + ? dram_data_m 1.535 + : irom_data_m; 1.536 + `else 1.537 + // WB + DRAM 1.538 + assign data_m = wb_select_m == `TRUE 1.539 + ? wb_data_m 1.540 + : dram_data_m; 1.541 + `endif 1.542 + `else 1.543 + `ifdef CFG_IROM_ENABLED 1.544 + // WB + IROM 1.545 + assign data_m = wb_select_m == `TRUE 1.546 + ? wb_data_m 1.547 + : irom_data_m; 1.548 + `else 1.549 + // WB 1.550 + assign data_m = wb_data_m; 1.551 + `endif 1.552 + `endif 1.553 +`endif 1.554 + 1.555 +// Sub-word selection and sign/zero-extension for loads 1.556 +always @(*) 1.557 +begin 1.558 + casez ({size_w, load_store_address_w[1:0]}) 1.559 + {`LM32_SIZE_BYTE, 2'b11}: load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]}; 1.560 + {`LM32_SIZE_BYTE, 2'b10}: load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]}; 1.561 + {`LM32_SIZE_BYTE, 2'b01}: load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]}; 1.562 + {`LM32_SIZE_BYTE, 2'b00}: load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]}; 1.563 + {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]}; 1.564 + {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]}; 1.565 + {`LM32_SIZE_WORD, 2'b??}: load_data_w = data_w; 1.566 + default: load_data_w = {`LM32_WORD_WIDTH{1'bx}}; 1.567 + endcase 1.568 +end 1.569 + 1.570 +// Unused/constant Wishbone signals 1.571 +assign d_bte_o = `LM32_BTYPE_LINEAR; 1.572 + 1.573 +`ifdef CFG_DCACHE_ENABLED 1.574 +// Generate signal to indicate last word in cache line 1.575 +generate 1.576 + case (bytes_per_line) 1.577 + 4: 1.578 + begin 1.579 +assign first_cycle_type = `LM32_CTYPE_END; 1.580 +assign next_cycle_type = `LM32_CTYPE_END; 1.581 +assign last_word = `TRUE; 1.582 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00}; 1.583 + end 1.584 + 8: 1.585 + begin 1.586 +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; 1.587 +assign next_cycle_type = `LM32_CTYPE_END; 1.588 +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1; 1.589 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00}; 1.590 + end 1.591 + 16: 1.592 + begin 1.593 +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; 1.594 +assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING; 1.595 +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1; 1.596 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00}; 1.597 + end 1.598 + endcase 1.599 +endgenerate 1.600 +`endif 1.601 + 1.602 +///////////////////////////////////////////////////// 1.603 +// Sequential Logic 1.604 +///////////////////////////////////////////////////// 1.605 + 1.606 +// Data Wishbone interface 1.607 +always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.608 +begin 1.609 + if (rst_i == `TRUE) 1.610 + begin 1.611 + d_cyc_o <= `FALSE; 1.612 + d_stb_o <= `FALSE; 1.613 + d_dat_o <= {`LM32_WORD_WIDTH{1'b0}}; 1.614 + d_adr_o <= {`LM32_WORD_WIDTH{1'b0}}; 1.615 + d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}}; 1.616 + d_we_o <= `FALSE; 1.617 + d_cti_o <= `LM32_CTYPE_END; 1.618 + d_lock_o <= `FALSE; 1.619 + wb_data_m <= {`LM32_WORD_WIDTH{1'b0}}; 1.620 + wb_load_complete <= `FALSE; 1.621 + stall_wb_load <= `FALSE; 1.622 +`ifdef CFG_DCACHE_ENABLED 1.623 + dcache_refill_ready <= `FALSE; 1.624 +`endif 1.625 + end 1.626 + else 1.627 + begin 1.628 +`ifdef CFG_DCACHE_ENABLED 1.629 + // Refill ready should only be asserted for a single cycle 1.630 + dcache_refill_ready <= `FALSE; 1.631 +`endif 1.632 + // Is a Wishbone cycle already in progress? 1.633 + if (d_cyc_o == `TRUE) 1.634 + begin 1.635 + // Is the cycle complete? 1.636 + if ((d_ack_i == `TRUE) || (d_err_i == `TRUE)) 1.637 + begin 1.638 +`ifdef CFG_DCACHE_ENABLED 1.639 + if ((dcache_refilling == `TRUE) && (!last_word)) 1.640 + begin 1.641 + // Fetch next word of cache line 1.642 + d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1; 1.643 + end 1.644 + else 1.645 +`endif 1.646 + begin 1.647 + // Refill/access complete 1.648 + d_cyc_o <= `FALSE; 1.649 + d_stb_o <= `FALSE; 1.650 + d_lock_o <= `FALSE; 1.651 + end 1.652 +`ifdef CFG_DCACHE_ENABLED 1.653 + d_cti_o <= next_cycle_type; 1.654 + // If we are performing a refill, indicate to cache next word of data is ready 1.655 + dcache_refill_ready <= dcache_refilling; 1.656 +`endif 1.657 + // Register data read from Wishbone interface 1.658 + wb_data_m <= d_dat_i; 1.659 + // Don't set when stores complete - otherwise we'll deadlock if load in m stage 1.660 + wb_load_complete <= !d_we_o; 1.661 + end 1.662 + // synthesis translate_off 1.663 + if (d_err_i == `TRUE) 1.664 + $display ("Data bus error. Address: %x", d_adr_o); 1.665 + // synthesis translate_on 1.666 + end 1.667 + else 1.668 + begin 1.669 +`ifdef CFG_DCACHE_ENABLED 1.670 + if (dcache_refill_request == `TRUE) 1.671 + begin 1.672 + // Start cache refill 1.673 + d_adr_o <= first_address; 1.674 + d_cyc_o <= `TRUE; 1.675 + d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}}; 1.676 + d_stb_o <= `TRUE; 1.677 + d_we_o <= `FALSE; 1.678 + d_cti_o <= first_cycle_type; 1.679 + //d_lock_o <= `TRUE; 1.680 + end 1.681 + else 1.682 +`endif 1.683 + if ( (store_q_m == `TRUE) 1.684 + && (stall_m == `FALSE) 1.685 +`ifdef CFG_DRAM_ENABLED 1.686 + && (dram_select_m == `FALSE) 1.687 +`endif 1.688 +`ifdef CFG_IROM_ENABLED 1.689 + && (irom_select_m == `FALSE) 1.690 +`endif 1.691 + ) 1.692 + begin 1.693 + // Data cache is write through, so all stores go to memory 1.694 + d_dat_o <= store_data_m; 1.695 + d_adr_o <= load_store_address_m; 1.696 + d_cyc_o <= `TRUE; 1.697 + d_sel_o <= byte_enable_m; 1.698 + d_stb_o <= `TRUE; 1.699 + d_we_o <= `TRUE; 1.700 + d_cti_o <= `LM32_CTYPE_END; 1.701 + end 1.702 + else if ( (load_q_m == `TRUE) 1.703 + && (wb_select_m == `TRUE) 1.704 + && (wb_load_complete == `FALSE) 1.705 + // stall_m will be TRUE, because stall_wb_load will be TRUE 1.706 + ) 1.707 + begin 1.708 + // Read requested address 1.709 + stall_wb_load <= `FALSE; 1.710 + d_adr_o <= load_store_address_m; 1.711 + d_cyc_o <= `TRUE; 1.712 + d_sel_o <= byte_enable_m; 1.713 + d_stb_o <= `TRUE; 1.714 + d_we_o <= `FALSE; 1.715 + d_cti_o <= `LM32_CTYPE_END; 1.716 + end 1.717 + end 1.718 + // Clear load/store complete flag when instruction leaves M stage 1.719 + if (stall_m == `FALSE) 1.720 + wb_load_complete <= `FALSE; 1.721 + // When a Wishbone load first enters the M stage, we need to stall it 1.722 + if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE)) 1.723 + stall_wb_load <= `TRUE; 1.724 + // Clear stall request if load instruction is killed 1.725 + if ((kill_m == `TRUE) || (exception_m == `TRUE)) 1.726 + stall_wb_load <= `FALSE; 1.727 + end 1.728 +end 1.729 + 1.730 +// Pipeline registers 1.731 + 1.732 +// X/M stage pipeline registers 1.733 +always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.734 +begin 1.735 + if (rst_i == `TRUE) 1.736 + begin 1.737 + sign_extend_m <= `FALSE; 1.738 + size_m <= 2'b00; 1.739 + byte_enable_m <= `FALSE; 1.740 + store_data_m <= {`LM32_WORD_WIDTH{1'b0}}; 1.741 +`ifdef CFG_DCACHE_ENABLED 1.742 + dcache_select_m <= `FALSE; 1.743 +`endif 1.744 +`ifdef CFG_DRAM_ENABLED 1.745 + dram_select_m <= `FALSE; 1.746 +`endif 1.747 +`ifdef CFG_IROM_ENABLED 1.748 + irom_select_m <= `FALSE; 1.749 +`endif 1.750 + wb_select_m <= `FALSE; 1.751 + end 1.752 + else 1.753 + begin 1.754 + if (stall_m == `FALSE) 1.755 + begin 1.756 + sign_extend_m <= sign_extend_x; 1.757 + size_m <= size_x; 1.758 + byte_enable_m <= byte_enable_x; 1.759 + store_data_m <= store_data_x; 1.760 +`ifdef CFG_DCACHE_ENABLED 1.761 + dcache_select_m <= dcache_select_x; 1.762 +`endif 1.763 +`ifdef CFG_DRAM_ENABLED 1.764 + dram_select_m <= dram_select_x; 1.765 +`endif 1.766 +`ifdef CFG_IROM_ENABLED 1.767 + irom_select_m <= irom_select_x; 1.768 +`endif 1.769 + wb_select_m <= wb_select_x; 1.770 + end 1.771 + end 1.772 +end 1.773 + 1.774 +// M/W stage pipeline registers 1.775 +always @(posedge clk_i `CFG_RESET_SENSITIVITY) 1.776 +begin 1.777 + if (rst_i == `TRUE) 1.778 + begin 1.779 + size_w <= 2'b00; 1.780 + data_w <= {`LM32_WORD_WIDTH{1'b0}}; 1.781 + sign_extend_w <= `FALSE; 1.782 + end 1.783 + else 1.784 + begin 1.785 + size_w <= size_m; 1.786 + data_w <= data_m; 1.787 + sign_extend_w <= sign_extend_m; 1.788 + end 1.789 +end 1.790 + 1.791 +///////////////////////////////////////////////////// 1.792 +// Behavioural Logic 1.793 +///////////////////////////////////////////////////// 1.794 + 1.795 +// synthesis translate_off 1.796 + 1.797 +// Check for non-aligned loads or stores 1.798 +always @(posedge clk_i) 1.799 +begin 1.800 + if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE)) 1.801 + begin 1.802 + if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0)) 1.803 + $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time); 1.804 + if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00)) 1.805 + $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time); 1.806 + end 1.807 +end 1.808 + 1.809 +// synthesis translate_on 1.810 + 1.811 +endmodule