lm32_load_store_unit.v

changeset 24
c336e674a37e
parent 23
252df75c8f67
child 25
7422134cbfea
     1.1 --- a/lm32_load_store_unit.v	Sun Mar 06 21:17:31 2011 +0000
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,806 +0,0 @@
     1.4 -// =============================================================================
     1.5 -//                           COPYRIGHT NOTICE
     1.6 -// Copyright 2006 (c) Lattice Semiconductor Corporation
     1.7 -// ALL RIGHTS RESERVED
     1.8 -// This confidential and proprietary software may be used only as authorised by
     1.9 -// a licensing agreement from Lattice Semiconductor Corporation.
    1.10 -// The entire notice above must be reproduced on all authorized copies and
    1.11 -// copies may only be made to the extent permitted by a licensing agreement from
    1.12 -// Lattice Semiconductor Corporation.
    1.13 -//
    1.14 -// Lattice Semiconductor Corporation        TEL : 1-800-Lattice (USA and Canada)
    1.15 -// 5555 NE Moore Court                            408-826-6000 (other locations)
    1.16 -// Hillsboro, OR 97124                     web  : http://www.latticesemi.com/
    1.17 -// U.S.A                                   email: techsupport@latticesemi.com
    1.18 -// =============================================================================/
    1.19 -//                         FILE DETAILS
    1.20 -// Project      : LatticeMico32
    1.21 -// File         : lm32_load_store_unit.v
    1.22 -// Title        : Load and store unit
    1.23 -// Dependencies : lm32_include.v
    1.24 -// Version      : 6.1.17
    1.25 -//              : Initial Release
    1.26 -// Version      : 7.0SP2, 3.0
    1.27 -//              : No Change
    1.28 -// Version      : 3.1
    1.29 -//              : Instead of disallowing an instruction cache miss on a data cache 
    1.30 -//              : miss, both can now occur at the same time. If both occur at same 
    1.31 -//              : time, then restart address is the address of instruction that 
    1.32 -//              : caused data cache miss.
    1.33 -// Version      : 3.2
    1.34 -//              : EBRs use SYNC resets instead of ASYNC resets.
    1.35 -// Version      : 3.3
    1.36 -//              : Support for new non-cacheable Data Memory that is accessible by 
    1.37 -//              : the data port and has a one cycle access latency.
    1.38 -// Version      : 3.4
    1.39 -//              : No change
    1.40 -// Version      : 3.5
    1.41 -//              : Bug fix: Inline memory is correctly generated if it is not a
    1.42 -//              : power-of-two
    1.43 -// =============================================================================
    1.44 -
    1.45 -`include "lm32_include.v"
    1.46 -
    1.47 -/////////////////////////////////////////////////////
    1.48 -// Module interface
    1.49 -/////////////////////////////////////////////////////
    1.50 -
    1.51 -module lm32_load_store_unit (
    1.52 -    // ----- Inputs -------
    1.53 -    clk_i,
    1.54 -    rst_i,
    1.55 -    // From pipeline
    1.56 -    stall_a,
    1.57 -    stall_x,
    1.58 -    stall_m,
    1.59 -    kill_m,
    1.60 -    exception_m,
    1.61 -    store_operand_x,
    1.62 -    load_store_address_x,
    1.63 -    load_store_address_m,
    1.64 -    load_store_address_w,
    1.65 -    load_x,
    1.66 -    store_x,
    1.67 -    load_q_x,
    1.68 -    store_q_x,
    1.69 -    load_q_m,
    1.70 -    store_q_m,
    1.71 -    sign_extend_x,
    1.72 -    size_x,
    1.73 -`ifdef CFG_DCACHE_ENABLED
    1.74 -    dflush,
    1.75 -`endif
    1.76 -`ifdef CFG_IROM_ENABLED
    1.77 -    irom_data_m,
    1.78 -`endif
    1.79 -    // From Wishbone
    1.80 -    d_dat_i,
    1.81 -    d_ack_i,
    1.82 -    d_err_i,
    1.83 -    d_rty_i,
    1.84 -    // ----- Outputs -------
    1.85 -    // To pipeline
    1.86 -`ifdef CFG_DCACHE_ENABLED
    1.87 -    dcache_refill_request,
    1.88 -    dcache_restart_request,
    1.89 -    dcache_stall_request,
    1.90 -    dcache_refilling,
    1.91 -`endif    
    1.92 -`ifdef CFG_IROM_ENABLED
    1.93 -    irom_store_data_m,
    1.94 -    irom_address_xm,
    1.95 -    irom_we_xm,
    1.96 -    irom_stall_request_x,
    1.97 -`endif			     
    1.98 -    load_data_w,
    1.99 -    stall_wb_load,
   1.100 -    // To Wishbone
   1.101 -    d_dat_o,
   1.102 -    d_adr_o,
   1.103 -    d_cyc_o,
   1.104 -    d_sel_o,
   1.105 -    d_stb_o,
   1.106 -    d_we_o,
   1.107 -    d_cti_o,
   1.108 -    d_lock_o,
   1.109 -    d_bte_o
   1.110 -    );
   1.111 -
   1.112 -/////////////////////////////////////////////////////
   1.113 -// Parameters
   1.114 -/////////////////////////////////////////////////////
   1.115 -
   1.116 -parameter associativity = 1;                            // Associativity of the cache (Number of ways)
   1.117 -parameter sets = 512;                                   // Number of sets
   1.118 -parameter bytes_per_line = 16;                          // Number of bytes per cache line
   1.119 -parameter base_address = 0;                             // Base address of cachable memory
   1.120 -parameter limit = 0;                                    // Limit (highest address) of cachable memory
   1.121 -
   1.122 -// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used 
   1.123 -localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2;
   1.124 -localparam addr_offset_lsb = 2;
   1.125 -localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
   1.126 -
   1.127 -/////////////////////////////////////////////////////
   1.128 -// Inputs
   1.129 -/////////////////////////////////////////////////////
   1.130 -
   1.131 -input clk_i;                                            // Clock 
   1.132 -input rst_i;                                            // Reset
   1.133 -
   1.134 -input stall_a;                                          // A stage stall 
   1.135 -input stall_x;                                          // X stage stall        
   1.136 -input stall_m;                                          // M stage stall
   1.137 -input kill_m;                                           // Kill instruction in M stage
   1.138 -input exception_m;                                      // An exception occured in the M stage
   1.139 -
   1.140 -input [`LM32_WORD_RNG] store_operand_x;                 // Data read from register to store
   1.141 -input [`LM32_WORD_RNG] load_store_address_x;            // X stage load/store address
   1.142 -input [`LM32_WORD_RNG] load_store_address_m;            // M stage load/store address
   1.143 -input [1:0] load_store_address_w;                       // W stage load/store address (only least two significant bits are needed)
   1.144 -input load_x;                                           // Load instruction in X stage
   1.145 -input store_x;                                          // Store instruction in X stage
   1.146 -input load_q_x;                                         // Load instruction in X stage
   1.147 -input store_q_x;                                        // Store instruction in X stage
   1.148 -input load_q_m;                                         // Load instruction in M stage
   1.149 -input store_q_m;                                        // Store instruction in M stage
   1.150 -input sign_extend_x;                                    // Whether load instruction in X stage should sign extend or zero extend
   1.151 -input [`LM32_SIZE_RNG] size_x;                          // Size of load or store (byte, hword, word)
   1.152 -
   1.153 -`ifdef CFG_DCACHE_ENABLED
   1.154 -input dflush;                                           // Flush the data cache
   1.155 -`endif
   1.156 -
   1.157 -`ifdef CFG_IROM_ENABLED   
   1.158 -input [`LM32_WORD_RNG] irom_data_m;                     // Data from Instruction-ROM
   1.159 -`endif
   1.160 -
   1.161 -input [`LM32_WORD_RNG] d_dat_i;                         // Data Wishbone interface read data
   1.162 -input d_ack_i;                                          // Data Wishbone interface acknowledgement
   1.163 -input d_err_i;                                          // Data Wishbone interface error
   1.164 -input d_rty_i;                                          // Data Wishbone interface retry
   1.165 -
   1.166 -/////////////////////////////////////////////////////
   1.167 -// Outputs
   1.168 -/////////////////////////////////////////////////////
   1.169 -
   1.170 -`ifdef CFG_DCACHE_ENABLED
   1.171 -output dcache_refill_request;                           // Request to refill data cache
   1.172 -wire   dcache_refill_request;
   1.173 -output dcache_restart_request;                          // Request to restart the instruction that caused a data cache miss
   1.174 -wire   dcache_restart_request;
   1.175 -output dcache_stall_request;                            // Data cache stall request
   1.176 -wire   dcache_stall_request;
   1.177 -output dcache_refilling;
   1.178 -wire   dcache_refilling;
   1.179 -`endif
   1.180 -
   1.181 -`ifdef CFG_IROM_ENABLED   
   1.182 -output irom_store_data_m;                               // Store data to Instruction ROM
   1.183 -wire   [`LM32_WORD_RNG] irom_store_data_m;
   1.184 -output [`LM32_WORD_RNG] irom_address_xm;                // Load/store address to Instruction ROM
   1.185 -wire   [`LM32_WORD_RNG] irom_address_xm;
   1.186 -output irom_we_xm;                                      // Write-enable of 2nd port of Instruction ROM
   1.187 -wire   irom_we_xm;
   1.188 -output irom_stall_request_x;                            // Stall instruction in D stage  
   1.189 -wire   irom_stall_request_x;                            
   1.190 -`endif
   1.191 -   
   1.192 -output [`LM32_WORD_RNG] load_data_w;                    // Result of a load instruction
   1.193 -reg    [`LM32_WORD_RNG] load_data_w;
   1.194 -output stall_wb_load;                                   // Request to stall pipeline due to a load from the Wishbone interface
   1.195 -reg    stall_wb_load;
   1.196 -
   1.197 -output [`LM32_WORD_RNG] d_dat_o;                        // Data Wishbone interface write data
   1.198 -reg    [`LM32_WORD_RNG] d_dat_o;
   1.199 -output [`LM32_WORD_RNG] d_adr_o;                        // Data Wishbone interface address
   1.200 -reg    [`LM32_WORD_RNG] d_adr_o;
   1.201 -output d_cyc_o;                                         // Data Wishbone interface cycle
   1.202 -reg    d_cyc_o;
   1.203 -output [`LM32_BYTE_SELECT_RNG] d_sel_o;                 // Data Wishbone interface byte select
   1.204 -reg    [`LM32_BYTE_SELECT_RNG] d_sel_o;
   1.205 -output d_stb_o;                                         // Data Wishbone interface strobe
   1.206 -reg    d_stb_o; 
   1.207 -output d_we_o;                                          // Data Wishbone interface write enable
   1.208 -reg    d_we_o;
   1.209 -output [`LM32_CTYPE_RNG] d_cti_o;                       // Data Wishbone interface cycle type 
   1.210 -reg    [`LM32_CTYPE_RNG] d_cti_o;
   1.211 -output d_lock_o;                                        // Date Wishbone interface lock bus
   1.212 -reg    d_lock_o;
   1.213 -output [`LM32_BTYPE_RNG] d_bte_o;                       // Data Wishbone interface burst type 
   1.214 -wire   [`LM32_BTYPE_RNG] d_bte_o;
   1.215 -
   1.216 -/////////////////////////////////////////////////////
   1.217 -// Internal nets and registers 
   1.218 -/////////////////////////////////////////////////////
   1.219 -
   1.220 -// Microcode pipeline registers - See inputs for description
   1.221 -reg [`LM32_SIZE_RNG] size_m;
   1.222 -reg [`LM32_SIZE_RNG] size_w;
   1.223 -reg sign_extend_m;
   1.224 -reg sign_extend_w;
   1.225 -reg [`LM32_WORD_RNG] store_data_x;       
   1.226 -reg [`LM32_WORD_RNG] store_data_m;       
   1.227 -reg [`LM32_BYTE_SELECT_RNG] byte_enable_x;
   1.228 -reg [`LM32_BYTE_SELECT_RNG] byte_enable_m;
   1.229 -wire [`LM32_WORD_RNG] data_m;
   1.230 -reg [`LM32_WORD_RNG] data_w;
   1.231 -
   1.232 -`ifdef CFG_DCACHE_ENABLED
   1.233 -wire dcache_select_x;                                   // Select data cache to load from / store to
   1.234 -reg dcache_select_m;
   1.235 -wire [`LM32_WORD_RNG] dcache_data_m;                    // Data read from cache
   1.236 -wire [`LM32_WORD_RNG] dcache_refill_address;            // Address to refill data cache from
   1.237 -reg dcache_refill_ready;                                // Indicates the next word of refill data is ready
   1.238 -wire [`LM32_CTYPE_RNG] first_cycle_type;                // First Wishbone cycle type
   1.239 -wire [`LM32_CTYPE_RNG] next_cycle_type;                 // Next Wishbone cycle type
   1.240 -wire last_word;                                         // Indicates if this is the last word in the cache line
   1.241 -wire [`LM32_WORD_RNG] first_address;                    // First cache refill address
   1.242 -`endif
   1.243 -`ifdef CFG_DRAM_ENABLED
   1.244 -wire dram_select_x;                                     // Select data RAM to load from / store to
   1.245 -reg dram_select_m;
   1.246 -reg dram_bypass_en;                                     // RAW in data RAM; read latched (bypass) value rather than value from memory
   1.247 -reg [`LM32_WORD_RNG] dram_bypass_data;                  // Latched value of store'd data to data RAM
   1.248 -wire [`LM32_WORD_RNG] dram_data_out;                    // Data read from data RAM
   1.249 -wire [`LM32_WORD_RNG] dram_data_m;                      // Data read from data RAM: bypass value or value from memory
   1.250 -wire [`LM32_WORD_RNG] dram_store_data_m;                // Data to write to RAM
   1.251 -`endif
   1.252 -wire wb_select_x;                                       // Select Wishbone to load from / store to
   1.253 -`ifdef CFG_IROM_ENABLED
   1.254 -wire irom_select_x;                                     // Select instruction ROM to load from / store to
   1.255 -reg  irom_select_m;
   1.256 -`endif
   1.257 -reg wb_select_m;
   1.258 -reg [`LM32_WORD_RNG] wb_data_m;                         // Data read from Wishbone
   1.259 -reg wb_load_complete;                                   // Indicates when a Wishbone load is complete
   1.260 -
   1.261 -/////////////////////////////////////////////////////
   1.262 -// Functions
   1.263 -/////////////////////////////////////////////////////
   1.264 -
   1.265 -`include "lm32_functions.v"
   1.266 -
   1.267 -/////////////////////////////////////////////////////
   1.268 -// Instantiations
   1.269 -/////////////////////////////////////////////////////
   1.270 -
   1.271 -`ifdef CFG_DRAM_ENABLED
   1.272 -   // Data RAM
   1.273 -   pmi_ram_dp_true 
   1.274 -     #(
   1.275 -       // ----- Parameters -------
   1.276 -       .pmi_family             (`LATTICE_FAMILY),
   1.277 -
   1.278 -       //.pmi_addr_depth_a       (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.279 -       //.pmi_addr_width_a       ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.280 -       //.pmi_data_width_a       (`LM32_WORD_WIDTH),
   1.281 -       //.pmi_addr_depth_b       (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.282 -       //.pmi_addr_width_b       ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.283 -       //.pmi_data_width_b       (`LM32_WORD_WIDTH),
   1.284 -	
   1.285 -       .pmi_addr_depth_a       (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
   1.286 -       .pmi_addr_width_a       (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
   1.287 -       .pmi_data_width_a       (`LM32_WORD_WIDTH),
   1.288 -       .pmi_addr_depth_b       (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
   1.289 -       .pmi_addr_width_b       (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
   1.290 -       .pmi_data_width_b       (`LM32_WORD_WIDTH),
   1.291 -
   1.292 -       .pmi_regmode_a          ("noreg"),
   1.293 -       .pmi_regmode_b          ("noreg"),
   1.294 -       .pmi_gsr                ("enable"),
   1.295 -       .pmi_resetmode          ("sync"),
   1.296 -       .pmi_init_file          (`CFG_DRAM_INIT_FILE),
   1.297 -       .pmi_init_file_format   (`CFG_DRAM_INIT_FILE_FORMAT),
   1.298 -       .module_type            ("pmi_ram_dp_true")
   1.299 -       ) 
   1.300 -       ram (
   1.301 -	    // ----- Inputs -------
   1.302 -	    .ClockA                 (clk_i),
   1.303 -	    .ClockB                 (clk_i),
   1.304 -	    .ResetA                 (rst_i),
   1.305 -	    .ResetB                 (rst_i),
   1.306 -	    .DataInA                ({32{1'b0}}),
   1.307 -	    .DataInB                (dram_store_data_m),
   1.308 -	    .AddressA               (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
   1.309 -	    .AddressB               (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
   1.310 -	    // .ClockEnA               (!stall_x & (load_x | store_x)),
   1.311 -	    .ClockEnA               (!stall_x),
   1.312 -	    .ClockEnB               (!stall_m),
   1.313 -	    .WrA                    (`FALSE),
   1.314 -	    .WrB                    (store_q_m & dram_select_m), 
   1.315 -	    // ----- Outputs -------
   1.316 -	    .QA                     (dram_data_out),
   1.317 -	    .QB                     ()
   1.318 -	    );
   1.319 -   
   1.320 -   /*----------------------------------------------------------------------
   1.321 -    EBRs cannot perform reads from location 'written to' on the same clock
   1.322 -    edge. Therefore bypass logic is required to latch the store'd value
   1.323 -    and use it for the load (instead of value from memory).
   1.324 -    ----------------------------------------------------------------------*/
   1.325 -   always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.326 -     if (rst_i == `TRUE)
   1.327 -       begin
   1.328 -	  dram_bypass_en <= `FALSE;
   1.329 -	  dram_bypass_data <= 0;
   1.330 -       end
   1.331 -     else
   1.332 -       begin
   1.333 -	  if (stall_x == `FALSE)
   1.334 -	    dram_bypass_data <= dram_store_data_m;
   1.335 -	  
   1.336 -	  if (   (stall_m == `FALSE) 
   1.337 -              && (stall_x == `FALSE)
   1.338 -	      && (store_q_m == `TRUE)
   1.339 -	      && (   (load_x == `TRUE)
   1.340 -	          || (store_x == `TRUE)
   1.341 -		 )
   1.342 -	      && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2])
   1.343 -	     )
   1.344 -	    dram_bypass_en <= `TRUE;
   1.345 -	  else
   1.346 -	    if (   (dram_bypass_en == `TRUE)
   1.347 -		&& (stall_x == `FALSE)
   1.348 -	       )
   1.349 -	      dram_bypass_en <= `FALSE;
   1.350 -       end
   1.351 -   
   1.352 -   assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out;
   1.353 -`endif
   1.354 -
   1.355 -`ifdef CFG_DCACHE_ENABLED
   1.356 -// Data cache
   1.357 -lm32_dcache #(
   1.358 -    .associativity          (associativity),
   1.359 -    .sets                   (sets),
   1.360 -    .bytes_per_line         (bytes_per_line),
   1.361 -    .base_address           (base_address),
   1.362 -    .limit                  (limit)
   1.363 -    ) dcache ( 
   1.364 -    // ----- Inputs -----
   1.365 -    .clk_i                  (clk_i),
   1.366 -    .rst_i                  (rst_i),      
   1.367 -    .stall_a                (stall_a),
   1.368 -    .stall_x                (stall_x),
   1.369 -    .stall_m                (stall_m),
   1.370 -    .address_x              (load_store_address_x),
   1.371 -    .address_m              (load_store_address_m),
   1.372 -    .load_q_m               (load_q_m & dcache_select_m),
   1.373 -    .store_q_m              (store_q_m & dcache_select_m),
   1.374 -    .store_data             (store_data_m),
   1.375 -    .store_byte_select      (byte_enable_m & {4{dcache_select_m}}),
   1.376 -    .refill_ready           (dcache_refill_ready),
   1.377 -    .refill_data            (wb_data_m),
   1.378 -    .dflush                 (dflush),
   1.379 -    // ----- Outputs -----
   1.380 -    .stall_request          (dcache_stall_request),
   1.381 -    .restart_request        (dcache_restart_request),
   1.382 -    .refill_request         (dcache_refill_request),
   1.383 -    .refill_address         (dcache_refill_address),
   1.384 -    .refilling              (dcache_refilling),
   1.385 -    .load_data              (dcache_data_m)
   1.386 -    );
   1.387 -`endif
   1.388 -
   1.389 -/////////////////////////////////////////////////////
   1.390 -// Combinational Logic
   1.391 -/////////////////////////////////////////////////////
   1.392 -
   1.393 -// Select where data should be loaded from / stored to
   1.394 -`ifdef CFG_DRAM_ENABLED
   1.395 -   assign dram_select_x =    (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS) 
   1.396 -                          && (load_store_address_x <= `CFG_DRAM_LIMIT);
   1.397 -`endif
   1.398 -
   1.399 -`ifdef CFG_IROM_ENABLED
   1.400 -   assign irom_select_x =    (load_store_address_x >= `CFG_IROM_BASE_ADDRESS) 
   1.401 -                          && (load_store_address_x <= `CFG_IROM_LIMIT);
   1.402 -`endif
   1.403 -   
   1.404 -`ifdef CFG_DCACHE_ENABLED
   1.405 -   assign dcache_select_x =    (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS) 
   1.406 -                            && (load_store_address_x <= `CFG_DCACHE_LIMIT)
   1.407 -`ifdef CFG_DRAM_ENABLED
   1.408 -                            && (dram_select_x == `FALSE)
   1.409 -`endif
   1.410 -`ifdef CFG_IROM_ENABLED
   1.411 -                            && (irom_select_x == `FALSE)
   1.412 -`endif
   1.413 -                     ;
   1.414 -`endif
   1.415 -	  
   1.416 -   assign wb_select_x =    `TRUE
   1.417 -`ifdef CFG_DCACHE_ENABLED
   1.418 -                        && !dcache_select_x 
   1.419 -`endif
   1.420 -`ifdef CFG_DRAM_ENABLED
   1.421 -                        && !dram_select_x
   1.422 -`endif
   1.423 -`ifdef CFG_IROM_ENABLED
   1.424 -                        && !irom_select_x
   1.425 -`endif
   1.426 -                     ;
   1.427 -
   1.428 -// Make sure data to store is in correct byte lane
   1.429 -always @(*)
   1.430 -begin
   1.431 -    case (size_x)
   1.432 -    `LM32_SIZE_BYTE:  store_data_x = {4{store_operand_x[7:0]}};
   1.433 -    `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}};
   1.434 -    `LM32_SIZE_WORD:  store_data_x = store_operand_x;    
   1.435 -    default:          store_data_x = {`LM32_WORD_WIDTH{1'bx}};
   1.436 -    endcase
   1.437 -end
   1.438 -
   1.439 -// Generate byte enable accoring to size of load or store and address being accessed
   1.440 -always @(*)
   1.441 -begin
   1.442 -    casez ({size_x, load_store_address_x[1:0]})
   1.443 -    {`LM32_SIZE_BYTE, 2'b11}:  byte_enable_x = 4'b0001;
   1.444 -    {`LM32_SIZE_BYTE, 2'b10}:  byte_enable_x = 4'b0010;
   1.445 -    {`LM32_SIZE_BYTE, 2'b01}:  byte_enable_x = 4'b0100;
   1.446 -    {`LM32_SIZE_BYTE, 2'b00}:  byte_enable_x = 4'b1000;
   1.447 -    {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011;
   1.448 -    {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100;
   1.449 -    {`LM32_SIZE_WORD, 2'b??}:  byte_enable_x = 4'b1111;
   1.450 -    default:                   byte_enable_x = 4'bxxxx;
   1.451 -    endcase
   1.452 -end
   1.453 -
   1.454 -`ifdef CFG_DRAM_ENABLED
   1.455 -// Only replace selected bytes
   1.456 -assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG];
   1.457 -assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG];
   1.458 -assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG];
   1.459 -assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG];
   1.460 -`endif
   1.461 -
   1.462 -`ifdef CFG_IROM_ENABLED
   1.463 -// Only replace selected bytes
   1.464 -assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG];
   1.465 -assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG];
   1.466 -assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG];
   1.467 -assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG];
   1.468 -`endif
   1.469 -
   1.470 -`ifdef CFG_IROM_ENABLED
   1.471 -   // Instead of implementing a byte-addressable instruction ROM (for store byte instruction),
   1.472 -   // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite
   1.473 -   // byte is replaced, and the whole 32-bit value is written back
   1.474 -   
   1.475 -   assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE))
   1.476 -	                    ? load_store_address_m
   1.477 -	                    : load_store_address_x;
   1.478 -   
   1.479 -   // All store instructions perform a write operation in the M stage
   1.480 -   assign irom_we_xm =    (irom_select_m == `TRUE)
   1.481 -	               && (store_q_m == `TRUE);
   1.482 -   
   1.483 -   // A single port in instruction ROM is available to load-store unit for doing loads/stores.
   1.484 -   // Since every store requires a load (in X stage) and then a store (in M stage), we cannot
   1.485 -   // allow load (or store) instructions sequentially after the store instructions to proceed 
   1.486 -   // until the store instruction has vacated M stage (i.e., completed the store operation)
   1.487 -   assign irom_stall_request_x =    (irom_select_x == `TRUE)
   1.488 -	                         && (store_q_x == `TRUE);
   1.489 -`endif
   1.490 -   
   1.491 -`ifdef CFG_DCACHE_ENABLED
   1.492 - `ifdef CFG_DRAM_ENABLED
   1.493 -  `ifdef CFG_IROM_ENABLED
   1.494 -   // WB + DC + DRAM + IROM
   1.495 -   assign data_m = wb_select_m == `TRUE 
   1.496 -                   ? wb_data_m
   1.497 -                   : dram_select_m == `TRUE 
   1.498 -                     ? dram_data_m
   1.499 -                     : irom_select_m == `TRUE
   1.500 -                       ? irom_data_m 
   1.501 -                       : dcache_data_m;
   1.502 -  `else
   1.503 -   // WB + DC + DRAM
   1.504 -   assign data_m = wb_select_m == `TRUE 
   1.505 -                   ? wb_data_m
   1.506 -                   : dram_select_m == `TRUE 
   1.507 -                     ? dram_data_m
   1.508 -                     : dcache_data_m;
   1.509 -  `endif
   1.510 - `else
   1.511 -  `ifdef CFG_IROM_ENABLED
   1.512 -   // WB + DC + IROM
   1.513 -   assign data_m = wb_select_m == `TRUE 
   1.514 -                   ? wb_data_m
   1.515 -                   : irom_select_m == `TRUE 
   1.516 -                     ? irom_data_m
   1.517 -                     : dcache_data_m;
   1.518 -  `else
   1.519 -   // WB + DC
   1.520 -   assign data_m = wb_select_m == `TRUE 
   1.521 -                   ? wb_data_m 
   1.522 -                   : dcache_data_m;
   1.523 -  `endif
   1.524 - `endif
   1.525 -`else
   1.526 - `ifdef CFG_DRAM_ENABLED
   1.527 -  `ifdef CFG_IROM_ENABLED
   1.528 -   // WB + DRAM + IROM
   1.529 -   assign data_m = wb_select_m == `TRUE 
   1.530 -                   ? wb_data_m 
   1.531 -                   : dram_select_m == `TRUE
   1.532 -                     ? dram_data_m
   1.533 -                     : irom_data_m;
   1.534 -  `else
   1.535 -   // WB + DRAM
   1.536 -   assign data_m = wb_select_m == `TRUE 
   1.537 -                   ? wb_data_m 
   1.538 -                   : dram_data_m;
   1.539 -  `endif
   1.540 - `else
   1.541 -  `ifdef CFG_IROM_ENABLED
   1.542 -   // WB + IROM
   1.543 -   assign data_m = wb_select_m == `TRUE 
   1.544 -                   ? wb_data_m 
   1.545 -                   : irom_data_m;
   1.546 -  `else
   1.547 -   // WB
   1.548 -   assign data_m = wb_data_m;
   1.549 -  `endif
   1.550 - `endif
   1.551 -`endif
   1.552 -
   1.553 -// Sub-word selection and sign/zero-extension for loads
   1.554 -always @(*)
   1.555 -begin
   1.556 -    casez ({size_w, load_store_address_w[1:0]})
   1.557 -    {`LM32_SIZE_BYTE, 2'b11}:  load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]};
   1.558 -    {`LM32_SIZE_BYTE, 2'b10}:  load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]};
   1.559 -    {`LM32_SIZE_BYTE, 2'b01}:  load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]};
   1.560 -    {`LM32_SIZE_BYTE, 2'b00}:  load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]};
   1.561 -    {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]};
   1.562 -    {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]};
   1.563 -    {`LM32_SIZE_WORD, 2'b??}:  load_data_w = data_w;
   1.564 -    default:                   load_data_w = {`LM32_WORD_WIDTH{1'bx}};
   1.565 -    endcase
   1.566 -end
   1.567 -
   1.568 -// Unused/constant Wishbone signals
   1.569 -assign d_bte_o = `LM32_BTYPE_LINEAR;
   1.570 -
   1.571 -`ifdef CFG_DCACHE_ENABLED                
   1.572 -// Generate signal to indicate last word in cache line
   1.573 -generate 
   1.574 -    case (bytes_per_line)
   1.575 -    4:
   1.576 -    begin
   1.577 -assign first_cycle_type = `LM32_CTYPE_END;
   1.578 -assign next_cycle_type = `LM32_CTYPE_END;
   1.579 -assign last_word = `TRUE;
   1.580 -assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00};
   1.581 -    end
   1.582 -    8:
   1.583 -    begin
   1.584 -assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
   1.585 -assign next_cycle_type = `LM32_CTYPE_END;
   1.586 -assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
   1.587 -assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
   1.588 -    end
   1.589 -    16:
   1.590 -    begin
   1.591 -assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
   1.592 -assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING;
   1.593 -assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
   1.594 -assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
   1.595 -    end
   1.596 -    endcase
   1.597 -endgenerate
   1.598 -`endif
   1.599 -
   1.600 -/////////////////////////////////////////////////////
   1.601 -// Sequential Logic
   1.602 -/////////////////////////////////////////////////////
   1.603 -
   1.604 -// Data Wishbone interface
   1.605 -always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.606 -begin
   1.607 -    if (rst_i == `TRUE)
   1.608 -    begin
   1.609 -        d_cyc_o <= `FALSE;
   1.610 -        d_stb_o <= `FALSE;
   1.611 -        d_dat_o <= {`LM32_WORD_WIDTH{1'b0}};
   1.612 -        d_adr_o <= {`LM32_WORD_WIDTH{1'b0}};
   1.613 -        d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}};
   1.614 -        d_we_o <= `FALSE;
   1.615 -        d_cti_o <= `LM32_CTYPE_END;
   1.616 -        d_lock_o <= `FALSE;
   1.617 -        wb_data_m <= {`LM32_WORD_WIDTH{1'b0}};
   1.618 -        wb_load_complete <= `FALSE;
   1.619 -        stall_wb_load <= `FALSE;
   1.620 -`ifdef CFG_DCACHE_ENABLED                
   1.621 -        dcache_refill_ready <= `FALSE;
   1.622 -`endif                
   1.623 -    end
   1.624 -    else
   1.625 -    begin
   1.626 -`ifdef CFG_DCACHE_ENABLED 
   1.627 -        // Refill ready should only be asserted for a single cycle               
   1.628 -        dcache_refill_ready <= `FALSE;
   1.629 -`endif                
   1.630 -        // Is a Wishbone cycle already in progress?
   1.631 -        if (d_cyc_o == `TRUE)
   1.632 -        begin
   1.633 -            // Is the cycle complete?
   1.634 -            if ((d_ack_i == `TRUE) || (d_err_i == `TRUE))
   1.635 -            begin
   1.636 -`ifdef CFG_DCACHE_ENABLED                
   1.637 -                if ((dcache_refilling == `TRUE) && (!last_word))
   1.638 -                begin
   1.639 -                    // Fetch next word of cache line    
   1.640 -                    d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1;
   1.641 -                end
   1.642 -                else
   1.643 -`endif                
   1.644 -                begin
   1.645 -                    // Refill/access complete
   1.646 -                    d_cyc_o <= `FALSE;
   1.647 -                    d_stb_o <= `FALSE;
   1.648 -                    d_lock_o <= `FALSE;
   1.649 -                end
   1.650 -`ifdef CFG_DCACHE_ENABLED    
   1.651 -                d_cti_o <= next_cycle_type;
   1.652 -                // If we are performing a refill, indicate to cache next word of data is ready            
   1.653 -                dcache_refill_ready <= dcache_refilling;
   1.654 -`endif
   1.655 -                // Register data read from Wishbone interface
   1.656 -                wb_data_m <= d_dat_i;
   1.657 -                // Don't set when stores complete - otherwise we'll deadlock if load in m stage
   1.658 -                wb_load_complete <= !d_we_o;
   1.659 -            end
   1.660 -            // synthesis translate_off            
   1.661 -            if (d_err_i == `TRUE)
   1.662 -                $display ("Data bus error. Address: %x", d_adr_o);
   1.663 -            // synthesis translate_on
   1.664 -        end
   1.665 -        else
   1.666 -        begin
   1.667 -`ifdef CFG_DCACHE_ENABLED                
   1.668 -            if (dcache_refill_request == `TRUE)
   1.669 -            begin
   1.670 -                // Start cache refill
   1.671 -                d_adr_o <= first_address;
   1.672 -                d_cyc_o <= `TRUE;
   1.673 -                d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}};
   1.674 -                d_stb_o <= `TRUE;                
   1.675 -                d_we_o <= `FALSE;
   1.676 -                d_cti_o <= first_cycle_type;
   1.677 -                //d_lock_o <= `TRUE;
   1.678 -            end
   1.679 -            else 
   1.680 -`endif            
   1.681 -                 if (   (store_q_m == `TRUE)
   1.682 -                     && (stall_m == `FALSE)
   1.683 -`ifdef CFG_DRAM_ENABLED
   1.684 -                     && (dram_select_m == `FALSE)
   1.685 -`endif
   1.686 -`ifdef CFG_IROM_ENABLED
   1.687 -		     && (irom_select_m == `FALSE)
   1.688 -`endif			
   1.689 -                    )
   1.690 -            begin
   1.691 -                // Data cache is write through, so all stores go to memory
   1.692 -                d_dat_o <= store_data_m;
   1.693 -                d_adr_o <= load_store_address_m;
   1.694 -                d_cyc_o <= `TRUE;
   1.695 -                d_sel_o <= byte_enable_m;
   1.696 -                d_stb_o <= `TRUE;
   1.697 -                d_we_o <= `TRUE;
   1.698 -                d_cti_o <= `LM32_CTYPE_END;
   1.699 -            end        
   1.700 -            else if (   (load_q_m == `TRUE) 
   1.701 -                     && (wb_select_m == `TRUE) 
   1.702 -                     && (wb_load_complete == `FALSE)
   1.703 -                     // stall_m will be TRUE, because stall_wb_load will be TRUE 
   1.704 -                    )
   1.705 -            begin
   1.706 -                // Read requested address
   1.707 -                stall_wb_load <= `FALSE;
   1.708 -                d_adr_o <= load_store_address_m;
   1.709 -                d_cyc_o <= `TRUE;
   1.710 -                d_sel_o <= byte_enable_m;
   1.711 -                d_stb_o <= `TRUE;
   1.712 -                d_we_o <= `FALSE;
   1.713 -                d_cti_o <= `LM32_CTYPE_END;
   1.714 -            end
   1.715 -        end
   1.716 -        // Clear load/store complete flag when instruction leaves M stage
   1.717 -        if (stall_m == `FALSE)
   1.718 -            wb_load_complete <= `FALSE;
   1.719 -        // When a Wishbone load first enters the M stage, we need to stall it
   1.720 -        if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE))
   1.721 -            stall_wb_load <= `TRUE;
   1.722 -        // Clear stall request if load instruction is killed
   1.723 -        if ((kill_m == `TRUE) || (exception_m == `TRUE))
   1.724 -            stall_wb_load <= `FALSE;
   1.725 -    end
   1.726 -end
   1.727 -
   1.728 -// Pipeline registers  
   1.729 -
   1.730 -// X/M stage pipeline registers
   1.731 -always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.732 -begin
   1.733 -    if (rst_i == `TRUE)
   1.734 -    begin
   1.735 -        sign_extend_m <= `FALSE;
   1.736 -        size_m <= 2'b00;
   1.737 -        byte_enable_m <= `FALSE;
   1.738 -        store_data_m <= {`LM32_WORD_WIDTH{1'b0}};
   1.739 -`ifdef CFG_DCACHE_ENABLED
   1.740 -        dcache_select_m <= `FALSE;
   1.741 -`endif
   1.742 -`ifdef CFG_DRAM_ENABLED
   1.743 -        dram_select_m <= `FALSE;
   1.744 -`endif
   1.745 -`ifdef CFG_IROM_ENABLED
   1.746 -        irom_select_m <= `FALSE;
   1.747 -`endif
   1.748 -        wb_select_m <= `FALSE;        
   1.749 -    end
   1.750 -    else
   1.751 -    begin
   1.752 -        if (stall_m == `FALSE)
   1.753 -        begin
   1.754 -            sign_extend_m <= sign_extend_x;
   1.755 -            size_m <= size_x;
   1.756 -            byte_enable_m <= byte_enable_x;    
   1.757 -            store_data_m <= store_data_x;
   1.758 -`ifdef CFG_DCACHE_ENABLED
   1.759 -            dcache_select_m <= dcache_select_x;
   1.760 -`endif
   1.761 -`ifdef CFG_DRAM_ENABLED
   1.762 -            dram_select_m <= dram_select_x;
   1.763 -`endif
   1.764 -`ifdef CFG_IROM_ENABLED
   1.765 -            irom_select_m <= irom_select_x;
   1.766 -`endif
   1.767 -            wb_select_m <= wb_select_x;
   1.768 -        end
   1.769 -    end
   1.770 -end
   1.771 -
   1.772 -// M/W stage pipeline registers
   1.773 -always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.774 -begin
   1.775 -    if (rst_i == `TRUE)
   1.776 -    begin
   1.777 -        size_w <= 2'b00;
   1.778 -        data_w <= {`LM32_WORD_WIDTH{1'b0}};
   1.779 -        sign_extend_w <= `FALSE;
   1.780 -    end
   1.781 -    else
   1.782 -    begin
   1.783 -        size_w <= size_m;
   1.784 -        data_w <= data_m;
   1.785 -        sign_extend_w <= sign_extend_m;
   1.786 -    end
   1.787 -end
   1.788 -
   1.789 -/////////////////////////////////////////////////////
   1.790 -// Behavioural Logic
   1.791 -/////////////////////////////////////////////////////
   1.792 -
   1.793 -// synthesis translate_off
   1.794 -
   1.795 -// Check for non-aligned loads or stores
   1.796 -always @(posedge clk_i)
   1.797 -begin
   1.798 -    if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE)) 
   1.799 -    begin
   1.800 -        if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0))
   1.801 -            $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
   1.802 -        if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00))
   1.803 -            $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
   1.804 -    end
   1.805 -end
   1.806 -
   1.807 -// synthesis translate_on
   1.808 -
   1.809 -endmodule