lm32_load_store_unit.v

changeset 0
cd0b58aa6f83
child 8
07be9df9fee8
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lm32_load_store_unit.v	Sun Apr 04 20:40:03 2010 +0100
     1.3 @@ -0,0 +1,808 @@
     1.4 +// =============================================================================
     1.5 +//                           COPYRIGHT NOTICE
     1.6 +// Copyright 2006 (c) Lattice Semiconductor Corporation
     1.7 +// ALL RIGHTS RESERVED
     1.8 +// This confidential and proprietary software may be used only as authorised by
     1.9 +// a licensing agreement from Lattice Semiconductor Corporation.
    1.10 +// The entire notice above must be reproduced on all authorized copies and
    1.11 +// copies may only be made to the extent permitted by a licensing agreement from
    1.12 +// Lattice Semiconductor Corporation.
    1.13 +//
    1.14 +// Lattice Semiconductor Corporation        TEL : 1-800-Lattice (USA and Canada)
    1.15 +// 5555 NE Moore Court                            408-826-6000 (other locations)
    1.16 +// Hillsboro, OR 97124                     web  : http://www.latticesemi.com/
    1.17 +// U.S.A                                   email: techsupport@latticesemi.com
    1.18 +// =============================================================================/
    1.19 +//                         FILE DETAILS
    1.20 +// Project      : LatticeMico32
    1.21 +// File         : lm32_load_store_unit.v
    1.22 +// Title        : Load and store unit
    1.23 +// Dependencies : lm32_include.v
    1.24 +// Version      : 6.1.17
    1.25 +//              : Initial Release
    1.26 +// Version      : 7.0SP2, 3.0
    1.27 +//              : No Change
    1.28 +// Version      : 3.1
    1.29 +//              : Instead of disallowing an instruction cache miss on a data cache 
    1.30 +//              : miss, both can now occur at the same time. If both occur at same 
    1.31 +//              : time, then restart address is the address of instruction that 
    1.32 +//              : caused data cache miss.
    1.33 +// Version      : 3.2
    1.34 +//              : EBRs use SYNC resets instead of ASYNC resets.
    1.35 +// Version      : 3.3
    1.36 +//              : Support for new non-cacheable Data Memory that is accessible by 
    1.37 +//              : the data port and has a one cycle access latency.
    1.38 +// Version      : 3.4
    1.39 +//              : No change
    1.40 +// Version      : 3.5
    1.41 +//              : Bug fix: Inline memory is correctly generated if it is not a
    1.42 +//              : power-of-two
    1.43 +// =============================================================================
    1.44 +
    1.45 +`include "lm32_include.v"
    1.46 +
    1.47 +/////////////////////////////////////////////////////
    1.48 +// Module interface
    1.49 +/////////////////////////////////////////////////////
    1.50 +
    1.51 +module lm32_load_store_unit (
    1.52 +    // ----- Inputs -------
    1.53 +    clk_i,
    1.54 +    rst_i,
    1.55 +    // From pipeline
    1.56 +    stall_a,
    1.57 +    stall_x,
    1.58 +    stall_m,
    1.59 +    kill_x,
    1.60 +    kill_m,
    1.61 +    exception_m,
    1.62 +    store_operand_x,
    1.63 +    load_store_address_x,
    1.64 +    load_store_address_m,
    1.65 +    load_store_address_w,
    1.66 +    load_x,
    1.67 +    store_x,
    1.68 +    load_q_x,
    1.69 +    store_q_x,
    1.70 +    load_q_m,
    1.71 +    store_q_m,
    1.72 +    sign_extend_x,
    1.73 +    size_x,
    1.74 +`ifdef CFG_DCACHE_ENABLED
    1.75 +    dflush,
    1.76 +`endif
    1.77 +`ifdef CFG_IROM_ENABLED
    1.78 +    irom_data_m,
    1.79 +`endif
    1.80 +    // From Wishbone
    1.81 +    d_dat_i,
    1.82 +    d_ack_i,
    1.83 +    d_err_i,
    1.84 +    d_rty_i,
    1.85 +    // ----- Outputs -------
    1.86 +    // To pipeline
    1.87 +`ifdef CFG_DCACHE_ENABLED
    1.88 +    dcache_refill_request,
    1.89 +    dcache_restart_request,
    1.90 +    dcache_stall_request,
    1.91 +    dcache_refilling,
    1.92 +`endif    
    1.93 +`ifdef CFG_IROM_ENABLED
    1.94 +    irom_store_data_m,
    1.95 +    irom_address_xm,
    1.96 +    irom_we_xm,
    1.97 +    irom_stall_request_x,
    1.98 +`endif			     
    1.99 +    load_data_w,
   1.100 +    stall_wb_load,
   1.101 +    // To Wishbone
   1.102 +    d_dat_o,
   1.103 +    d_adr_o,
   1.104 +    d_cyc_o,
   1.105 +    d_sel_o,
   1.106 +    d_stb_o,
   1.107 +    d_we_o,
   1.108 +    d_cti_o,
   1.109 +    d_lock_o,
   1.110 +    d_bte_o
   1.111 +    );
   1.112 +
   1.113 +/////////////////////////////////////////////////////
   1.114 +// Parameters
   1.115 +/////////////////////////////////////////////////////
   1.116 +
   1.117 +parameter associativity = 1;                            // Associativity of the cache (Number of ways)
   1.118 +parameter sets = 512;                                   // Number of sets
   1.119 +parameter bytes_per_line = 16;                          // Number of bytes per cache line
   1.120 +parameter base_address = 0;                             // Base address of cachable memory
   1.121 +parameter limit = 0;                                    // Limit (highest address) of cachable memory
   1.122 +
   1.123 +// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used 
   1.124 +localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2;
   1.125 +localparam addr_offset_lsb = 2;
   1.126 +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
   1.127 +
   1.128 +/////////////////////////////////////////////////////
   1.129 +// Inputs
   1.130 +/////////////////////////////////////////////////////
   1.131 +
   1.132 +input clk_i;                                            // Clock 
   1.133 +input rst_i;                                            // Reset
   1.134 +
   1.135 +input stall_a;                                          // A stage stall 
   1.136 +input stall_x;                                          // X stage stall        
   1.137 +input stall_m;                                          // M stage stall
   1.138 +input kill_x;                                           // Kill instruction in X stage
   1.139 +input kill_m;                                           // Kill instruction in M stage
   1.140 +input exception_m;                                      // An exception occured in the M stage
   1.141 +
   1.142 +input [`LM32_WORD_RNG] store_operand_x;                 // Data read from register to store
   1.143 +input [`LM32_WORD_RNG] load_store_address_x;            // X stage load/store address
   1.144 +input [`LM32_WORD_RNG] load_store_address_m;            // M stage load/store address
   1.145 +input [1:0] load_store_address_w;                       // W stage load/store address (only least two significant bits are needed)
   1.146 +input load_x;                                           // Load instruction in X stage
   1.147 +input store_x;                                          // Store instruction in X stage
   1.148 +input load_q_x;                                         // Load instruction in X stage
   1.149 +input store_q_x;                                        // Store instruction in X stage
   1.150 +input load_q_m;                                         // Load instruction in M stage
   1.151 +input store_q_m;                                        // Store instruction in M stage
   1.152 +input sign_extend_x;                                    // Whether load instruction in X stage should sign extend or zero extend
   1.153 +input [`LM32_SIZE_RNG] size_x;                          // Size of load or store (byte, hword, word)
   1.154 +
   1.155 +`ifdef CFG_DCACHE_ENABLED
   1.156 +input dflush;                                           // Flush the data cache
   1.157 +`endif
   1.158 +
   1.159 +`ifdef CFG_IROM_ENABLED   
   1.160 +input [`LM32_WORD_RNG] irom_data_m;                     // Data from Instruction-ROM
   1.161 +`endif
   1.162 +
   1.163 +input [`LM32_WORD_RNG] d_dat_i;                         // Data Wishbone interface read data
   1.164 +input d_ack_i;                                          // Data Wishbone interface acknowledgement
   1.165 +input d_err_i;                                          // Data Wishbone interface error
   1.166 +input d_rty_i;                                          // Data Wishbone interface retry
   1.167 +
   1.168 +/////////////////////////////////////////////////////
   1.169 +// Outputs
   1.170 +/////////////////////////////////////////////////////
   1.171 +
   1.172 +`ifdef CFG_DCACHE_ENABLED
   1.173 +output dcache_refill_request;                           // Request to refill data cache
   1.174 +wire   dcache_refill_request;
   1.175 +output dcache_restart_request;                          // Request to restart the instruction that caused a data cache miss
   1.176 +wire   dcache_restart_request;
   1.177 +output dcache_stall_request;                            // Data cache stall request
   1.178 +wire   dcache_stall_request;
   1.179 +output dcache_refilling;
   1.180 +wire   dcache_refilling;
   1.181 +`endif
   1.182 +
   1.183 +`ifdef CFG_IROM_ENABLED   
   1.184 +output irom_store_data_m;                               // Store data to Instruction ROM
   1.185 +wire   [`LM32_WORD_RNG] irom_store_data_m;
   1.186 +output [`LM32_WORD_RNG] irom_address_xm;                // Load/store address to Instruction ROM
   1.187 +wire   [`LM32_WORD_RNG] irom_address_xm;
   1.188 +output irom_we_xm;                                      // Write-enable of 2nd port of Instruction ROM
   1.189 +wire   irom_we_xm;
   1.190 +output irom_stall_request_x;                            // Stall instruction in D stage  
   1.191 +wire   irom_stall_request_x;                            
   1.192 +`endif
   1.193 +   
   1.194 +output [`LM32_WORD_RNG] load_data_w;                    // Result of a load instruction
   1.195 +reg    [`LM32_WORD_RNG] load_data_w;
   1.196 +output stall_wb_load;                                   // Request to stall pipeline due to a load from the Wishbone interface
   1.197 +reg    stall_wb_load;
   1.198 +
   1.199 +output [`LM32_WORD_RNG] d_dat_o;                        // Data Wishbone interface write data
   1.200 +reg    [`LM32_WORD_RNG] d_dat_o;
   1.201 +output [`LM32_WORD_RNG] d_adr_o;                        // Data Wishbone interface address
   1.202 +reg    [`LM32_WORD_RNG] d_adr_o;
   1.203 +output d_cyc_o;                                         // Data Wishbone interface cycle
   1.204 +reg    d_cyc_o;
   1.205 +output [`LM32_BYTE_SELECT_RNG] d_sel_o;                 // Data Wishbone interface byte select
   1.206 +reg    [`LM32_BYTE_SELECT_RNG] d_sel_o;
   1.207 +output d_stb_o;                                         // Data Wishbone interface strobe
   1.208 +reg    d_stb_o; 
   1.209 +output d_we_o;                                          // Data Wishbone interface write enable
   1.210 +reg    d_we_o;
   1.211 +output [`LM32_CTYPE_RNG] d_cti_o;                       // Data Wishbone interface cycle type 
   1.212 +reg    [`LM32_CTYPE_RNG] d_cti_o;
   1.213 +output d_lock_o;                                        // Date Wishbone interface lock bus
   1.214 +reg    d_lock_o;
   1.215 +output [`LM32_BTYPE_RNG] d_bte_o;                       // Data Wishbone interface burst type 
   1.216 +wire   [`LM32_BTYPE_RNG] d_bte_o;
   1.217 +
   1.218 +/////////////////////////////////////////////////////
   1.219 +// Internal nets and registers 
   1.220 +/////////////////////////////////////////////////////
   1.221 +
   1.222 +// Microcode pipeline registers - See inputs for description
   1.223 +reg [`LM32_SIZE_RNG] size_m;
   1.224 +reg [`LM32_SIZE_RNG] size_w;
   1.225 +reg sign_extend_m;
   1.226 +reg sign_extend_w;
   1.227 +reg [`LM32_WORD_RNG] store_data_x;       
   1.228 +reg [`LM32_WORD_RNG] store_data_m;       
   1.229 +reg [`LM32_BYTE_SELECT_RNG] byte_enable_x;
   1.230 +reg [`LM32_BYTE_SELECT_RNG] byte_enable_m;
   1.231 +wire [`LM32_WORD_RNG] data_m;
   1.232 +reg [`LM32_WORD_RNG] data_w;
   1.233 +
   1.234 +`ifdef CFG_DCACHE_ENABLED
   1.235 +wire dcache_select_x;                                   // Select data cache to load from / store to
   1.236 +reg dcache_select_m;
   1.237 +wire [`LM32_WORD_RNG] dcache_data_m;                    // Data read from cache
   1.238 +wire [`LM32_WORD_RNG] dcache_refill_address;            // Address to refill data cache from
   1.239 +reg dcache_refill_ready;                                // Indicates the next word of refill data is ready
   1.240 +wire [`LM32_CTYPE_RNG] first_cycle_type;                // First Wishbone cycle type
   1.241 +wire [`LM32_CTYPE_RNG] next_cycle_type;                 // Next Wishbone cycle type
   1.242 +wire last_word;                                         // Indicates if this is the last word in the cache line
   1.243 +wire [`LM32_WORD_RNG] first_address;                    // First cache refill address
   1.244 +`endif
   1.245 +`ifdef CFG_DRAM_ENABLED
   1.246 +wire dram_select_x;                                     // Select data RAM to load from / store to
   1.247 +reg dram_select_m;
   1.248 +reg dram_bypass_en;                                     // RAW in data RAM; read latched (bypass) value rather than value from memory
   1.249 +reg [`LM32_WORD_RNG] dram_bypass_data;                  // Latched value of store'd data to data RAM
   1.250 +wire [`LM32_WORD_RNG] dram_data_out;                    // Data read from data RAM
   1.251 +wire [`LM32_WORD_RNG] dram_data_m;                      // Data read from data RAM: bypass value or value from memory
   1.252 +wire [`LM32_WORD_RNG] dram_store_data_m;                // Data to write to RAM
   1.253 +`endif
   1.254 +wire wb_select_x;                                       // Select Wishbone to load from / store to
   1.255 +`ifdef CFG_IROM_ENABLED
   1.256 +wire irom_select_x;                                     // Select instruction ROM to load from / store to
   1.257 +reg  irom_select_m;
   1.258 +`endif
   1.259 +reg wb_select_m;
   1.260 +reg [`LM32_WORD_RNG] wb_data_m;                         // Data read from Wishbone
   1.261 +reg wb_load_complete;                                   // Indicates when a Wishbone load is complete
   1.262 +
   1.263 +/////////////////////////////////////////////////////
   1.264 +// Functions
   1.265 +/////////////////////////////////////////////////////
   1.266 +
   1.267 +`include "lm32_functions.v"
   1.268 +
   1.269 +/////////////////////////////////////////////////////
   1.270 +// Instantiations
   1.271 +/////////////////////////////////////////////////////
   1.272 +
   1.273 +`ifdef CFG_DRAM_ENABLED
   1.274 +   // Data RAM
   1.275 +   pmi_ram_dp_true 
   1.276 +     #(
   1.277 +       // ----- Parameters -------
   1.278 +       .pmi_family             (`LATTICE_FAMILY),
   1.279 +
   1.280 +       //.pmi_addr_depth_a       (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.281 +       //.pmi_addr_width_a       ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.282 +       //.pmi_data_width_a       (`LM32_WORD_WIDTH),
   1.283 +       //.pmi_addr_depth_b       (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.284 +       //.pmi_addr_width_b       ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
   1.285 +       //.pmi_data_width_b       (`LM32_WORD_WIDTH),
   1.286 +	
   1.287 +       .pmi_addr_depth_a       (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
   1.288 +       .pmi_addr_width_a       (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
   1.289 +       .pmi_data_width_a       (`LM32_WORD_WIDTH),
   1.290 +       .pmi_addr_depth_b       (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
   1.291 +       .pmi_addr_width_b       (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
   1.292 +       .pmi_data_width_b       (`LM32_WORD_WIDTH),
   1.293 +
   1.294 +       .pmi_regmode_a          ("noreg"),
   1.295 +       .pmi_regmode_b          ("noreg"),
   1.296 +       .pmi_gsr                ("enable"),
   1.297 +       .pmi_resetmode          ("sync"),
   1.298 +       .pmi_init_file          (`CFG_DRAM_INIT_FILE),
   1.299 +       .pmi_init_file_format   (`CFG_DRAM_INIT_FILE_FORMAT),
   1.300 +       .module_type            ("pmi_ram_dp_true")
   1.301 +       ) 
   1.302 +       ram (
   1.303 +	    // ----- Inputs -------
   1.304 +	    .ClockA                 (clk_i),
   1.305 +	    .ClockB                 (clk_i),
   1.306 +	    .ResetA                 (rst_i),
   1.307 +	    .ResetB                 (rst_i),
   1.308 +	    .DataInA                ({32{1'b0}}),
   1.309 +	    .DataInB                (dram_store_data_m),
   1.310 +	    .AddressA               (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
   1.311 +	    .AddressB               (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
   1.312 +	    // .ClockEnA               (!stall_x & (load_x | store_x)),
   1.313 +	    .ClockEnA               (!stall_x),
   1.314 +	    .ClockEnB               (!stall_m),
   1.315 +	    .WrA                    (`FALSE),
   1.316 +	    .WrB                    (store_q_m & dram_select_m), 
   1.317 +	    // ----- Outputs -------
   1.318 +	    .QA                     (dram_data_out),
   1.319 +	    .QB                     ()
   1.320 +	    );
   1.321 +   
   1.322 +   /*----------------------------------------------------------------------
   1.323 +    EBRs cannot perform reads from location 'written to' on the same clock
   1.324 +    edge. Therefore bypass logic is required to latch the store'd value
   1.325 +    and use it for the load (instead of value from memory).
   1.326 +    ----------------------------------------------------------------------*/
   1.327 +   always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.328 +     if (rst_i == `TRUE)
   1.329 +       begin
   1.330 +	  dram_bypass_en <= `FALSE;
   1.331 +	  dram_bypass_data <= 0;
   1.332 +       end
   1.333 +     else
   1.334 +       begin
   1.335 +	  if (stall_x == `FALSE)
   1.336 +	    dram_bypass_data <= dram_store_data_m;
   1.337 +	  
   1.338 +	  if (   (stall_m == `FALSE) 
   1.339 +              && (stall_x == `FALSE)
   1.340 +	      && (store_q_m == `TRUE)
   1.341 +	      && (   (load_x == `TRUE)
   1.342 +	          || (store_x == `TRUE)
   1.343 +		 )
   1.344 +	      && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2])
   1.345 +	     )
   1.346 +	    dram_bypass_en <= `TRUE;
   1.347 +	  else
   1.348 +	    if (   (dram_bypass_en == `TRUE)
   1.349 +		&& (stall_x == `FALSE)
   1.350 +	       )
   1.351 +	      dram_bypass_en <= `FALSE;
   1.352 +       end
   1.353 +   
   1.354 +   assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out;
   1.355 +`endif
   1.356 +
   1.357 +`ifdef CFG_DCACHE_ENABLED
   1.358 +// Data cache
   1.359 +lm32_dcache #(
   1.360 +    .associativity          (associativity),
   1.361 +    .sets                   (sets),
   1.362 +    .bytes_per_line         (bytes_per_line),
   1.363 +    .base_address           (base_address),
   1.364 +    .limit                  (limit)
   1.365 +    ) dcache ( 
   1.366 +    // ----- Inputs -----
   1.367 +    .clk_i                  (clk_i),
   1.368 +    .rst_i                  (rst_i),      
   1.369 +    .stall_a                (stall_a),
   1.370 +    .stall_x                (stall_x),
   1.371 +    .stall_m                (stall_m),
   1.372 +    .address_x              (load_store_address_x),
   1.373 +    .address_m              (load_store_address_m),
   1.374 +    .load_q_m               (load_q_m & dcache_select_m),
   1.375 +    .store_q_m              (store_q_m & dcache_select_m),
   1.376 +    .store_data             (store_data_m),
   1.377 +    .store_byte_select      (byte_enable_m & {4{dcache_select_m}}),
   1.378 +    .refill_ready           (dcache_refill_ready),
   1.379 +    .refill_data            (wb_data_m),
   1.380 +    .dflush                 (dflush),
   1.381 +    // ----- Outputs -----
   1.382 +    .stall_request          (dcache_stall_request),
   1.383 +    .restart_request        (dcache_restart_request),
   1.384 +    .refill_request         (dcache_refill_request),
   1.385 +    .refill_address         (dcache_refill_address),
   1.386 +    .refilling              (dcache_refilling),
   1.387 +    .load_data              (dcache_data_m)
   1.388 +    );
   1.389 +`endif
   1.390 +
   1.391 +/////////////////////////////////////////////////////
   1.392 +// Combinational Logic
   1.393 +/////////////////////////////////////////////////////
   1.394 +
   1.395 +// Select where data should be loaded from / stored to
   1.396 +`ifdef CFG_DRAM_ENABLED
   1.397 +   assign dram_select_x =    (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS) 
   1.398 +                          && (load_store_address_x <= `CFG_DRAM_LIMIT);
   1.399 +`endif
   1.400 +
   1.401 +`ifdef CFG_IROM_ENABLED
   1.402 +   assign irom_select_x =    (load_store_address_x >= `CFG_IROM_BASE_ADDRESS) 
   1.403 +                          && (load_store_address_x <= `CFG_IROM_LIMIT);
   1.404 +`endif
   1.405 +   
   1.406 +`ifdef CFG_DCACHE_ENABLED
   1.407 +   assign dcache_select_x =    (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS) 
   1.408 +                            && (load_store_address_x <= `CFG_DCACHE_LIMIT)
   1.409 +`ifdef CFG_DRAM_ENABLED
   1.410 +                            && (dram_select_x == `FALSE)
   1.411 +`endif
   1.412 +`ifdef CFG_IROM_ENABLED
   1.413 +                            && (irom_select_x == `FALSE)
   1.414 +`endif
   1.415 +                     ;
   1.416 +`endif
   1.417 +	  
   1.418 +   assign wb_select_x =    `TRUE
   1.419 +`ifdef CFG_DCACHE_ENABLED
   1.420 +                        && !dcache_select_x 
   1.421 +`endif
   1.422 +`ifdef CFG_DRAM_ENABLED
   1.423 +                        && !dram_select_x
   1.424 +`endif
   1.425 +`ifdef CFG_IROM_ENABLED
   1.426 +                        && !irom_select_x
   1.427 +`endif
   1.428 +                     ;
   1.429 +
   1.430 +// Make sure data to store is in correct byte lane
   1.431 +always @(*)
   1.432 +begin
   1.433 +    case (size_x)
   1.434 +    `LM32_SIZE_BYTE:  store_data_x = {4{store_operand_x[7:0]}};
   1.435 +    `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}};
   1.436 +    `LM32_SIZE_WORD:  store_data_x = store_operand_x;    
   1.437 +    default:          store_data_x = {`LM32_WORD_WIDTH{1'bx}};
   1.438 +    endcase
   1.439 +end
   1.440 +
   1.441 +// Generate byte enable accoring to size of load or store and address being accessed
   1.442 +always @(*)
   1.443 +begin
   1.444 +    casez ({size_x, load_store_address_x[1:0]})
   1.445 +    {`LM32_SIZE_BYTE, 2'b11}:  byte_enable_x = 4'b0001;
   1.446 +    {`LM32_SIZE_BYTE, 2'b10}:  byte_enable_x = 4'b0010;
   1.447 +    {`LM32_SIZE_BYTE, 2'b01}:  byte_enable_x = 4'b0100;
   1.448 +    {`LM32_SIZE_BYTE, 2'b00}:  byte_enable_x = 4'b1000;
   1.449 +    {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011;
   1.450 +    {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100;
   1.451 +    {`LM32_SIZE_WORD, 2'b??}:  byte_enable_x = 4'b1111;
   1.452 +    default:                   byte_enable_x = 4'bxxxx;
   1.453 +    endcase
   1.454 +end
   1.455 +
   1.456 +`ifdef CFG_DRAM_ENABLED
   1.457 +// Only replace selected bytes
   1.458 +assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG];
   1.459 +assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG];
   1.460 +assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG];
   1.461 +assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG];
   1.462 +`endif
   1.463 +
   1.464 +`ifdef CFG_IROM_ENABLED
   1.465 +// Only replace selected bytes
   1.466 +assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG];
   1.467 +assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG];
   1.468 +assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG];
   1.469 +assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG];
   1.470 +`endif
   1.471 +
   1.472 +`ifdef CFG_IROM_ENABLED
   1.473 +   // Instead of implementing a byte-addressable instruction ROM (for store byte instruction),
   1.474 +   // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite
   1.475 +   // byte is replaced, and the whole 32-bit value is written back
   1.476 +   
   1.477 +   assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE))
   1.478 +	                    ? load_store_address_m
   1.479 +	                    : load_store_address_x;
   1.480 +   
   1.481 +   // All store instructions perform a write operation in the M stage
   1.482 +   assign irom_we_xm =    (irom_select_m == `TRUE)
   1.483 +	               && (store_q_m == `TRUE);
   1.484 +   
   1.485 +   // A single port in instruction ROM is available to load-store unit for doing loads/stores.
   1.486 +   // Since every store requires a load (in X stage) and then a store (in M stage), we cannot
   1.487 +   // allow load (or store) instructions sequentially after the store instructions to proceed 
   1.488 +   // until the store instruction has vacated M stage (i.e., completed the store operation)
   1.489 +   assign irom_stall_request_x =    (irom_select_x == `TRUE)
   1.490 +	                         && (store_q_x == `TRUE);
   1.491 +`endif
   1.492 +   
   1.493 +`ifdef CFG_DCACHE_ENABLED
   1.494 + `ifdef CFG_DRAM_ENABLED
   1.495 +  `ifdef CFG_IROM_ENABLED
   1.496 +   // WB + DC + DRAM + IROM
   1.497 +   assign data_m = wb_select_m == `TRUE 
   1.498 +                   ? wb_data_m
   1.499 +                   : dram_select_m == `TRUE 
   1.500 +                     ? dram_data_m
   1.501 +                     : irom_select_m == `TRUE
   1.502 +                       ? irom_data_m 
   1.503 +                       : dcache_data_m;
   1.504 +  `else
   1.505 +   // WB + DC + DRAM
   1.506 +   assign data_m = wb_select_m == `TRUE 
   1.507 +                   ? wb_data_m
   1.508 +                   : dram_select_m == `TRUE 
   1.509 +                     ? dram_data_m
   1.510 +                     : dcache_data_m;
   1.511 +  `endif
   1.512 + `else
   1.513 +  `ifdef CFG_IROM_ENABLED
   1.514 +   // WB + DC + IROM
   1.515 +   assign data_m = wb_select_m == `TRUE 
   1.516 +                   ? wb_data_m
   1.517 +                   : irom_select_m == `TRUE 
   1.518 +                     ? irom_data_m
   1.519 +                     : dcache_data_m;
   1.520 +  `else
   1.521 +   // WB + DC
   1.522 +   assign data_m = wb_select_m == `TRUE 
   1.523 +                   ? wb_data_m 
   1.524 +                   : dcache_data_m;
   1.525 +  `endif
   1.526 + `endif
   1.527 +`else
   1.528 + `ifdef CFG_DRAM_ENABLED
   1.529 +  `ifdef CFG_IROM_ENABLED
   1.530 +   // WB + DRAM + IROM
   1.531 +   assign data_m = wb_select_m == `TRUE 
   1.532 +                   ? wb_data_m 
   1.533 +                   : dram_select_m == `TRUE
   1.534 +                     ? dram_data_m
   1.535 +                     : irom_data_m;
   1.536 +  `else
   1.537 +   // WB + DRAM
   1.538 +   assign data_m = wb_select_m == `TRUE 
   1.539 +                   ? wb_data_m 
   1.540 +                   : dram_data_m;
   1.541 +  `endif
   1.542 + `else
   1.543 +  `ifdef CFG_IROM_ENABLED
   1.544 +   // WB + IROM
   1.545 +   assign data_m = wb_select_m == `TRUE 
   1.546 +                   ? wb_data_m 
   1.547 +                   : irom_data_m;
   1.548 +  `else
   1.549 +   // WB
   1.550 +   assign data_m = wb_data_m;
   1.551 +  `endif
   1.552 + `endif
   1.553 +`endif
   1.554 +
   1.555 +// Sub-word selection and sign/zero-extension for loads
   1.556 +always @(*)
   1.557 +begin
   1.558 +    casez ({size_w, load_store_address_w[1:0]})
   1.559 +    {`LM32_SIZE_BYTE, 2'b11}:  load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]};
   1.560 +    {`LM32_SIZE_BYTE, 2'b10}:  load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]};
   1.561 +    {`LM32_SIZE_BYTE, 2'b01}:  load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]};
   1.562 +    {`LM32_SIZE_BYTE, 2'b00}:  load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]};
   1.563 +    {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]};
   1.564 +    {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]};
   1.565 +    {`LM32_SIZE_WORD, 2'b??}:  load_data_w = data_w;
   1.566 +    default:                   load_data_w = {`LM32_WORD_WIDTH{1'bx}};
   1.567 +    endcase
   1.568 +end
   1.569 +
   1.570 +// Unused/constant Wishbone signals
   1.571 +assign d_bte_o = `LM32_BTYPE_LINEAR;
   1.572 +
   1.573 +`ifdef CFG_DCACHE_ENABLED                
   1.574 +// Generate signal to indicate last word in cache line
   1.575 +generate 
   1.576 +    case (bytes_per_line)
   1.577 +    4:
   1.578 +    begin
   1.579 +assign first_cycle_type = `LM32_CTYPE_END;
   1.580 +assign next_cycle_type = `LM32_CTYPE_END;
   1.581 +assign last_word = `TRUE;
   1.582 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00};
   1.583 +    end
   1.584 +    8:
   1.585 +    begin
   1.586 +assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
   1.587 +assign next_cycle_type = `LM32_CTYPE_END;
   1.588 +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
   1.589 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
   1.590 +    end
   1.591 +    16:
   1.592 +    begin
   1.593 +assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
   1.594 +assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING;
   1.595 +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
   1.596 +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
   1.597 +    end
   1.598 +    endcase
   1.599 +endgenerate
   1.600 +`endif
   1.601 +
   1.602 +/////////////////////////////////////////////////////
   1.603 +// Sequential Logic
   1.604 +/////////////////////////////////////////////////////
   1.605 +
   1.606 +// Data Wishbone interface
   1.607 +always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.608 +begin
   1.609 +    if (rst_i == `TRUE)
   1.610 +    begin
   1.611 +        d_cyc_o <= `FALSE;
   1.612 +        d_stb_o <= `FALSE;
   1.613 +        d_dat_o <= {`LM32_WORD_WIDTH{1'b0}};
   1.614 +        d_adr_o <= {`LM32_WORD_WIDTH{1'b0}};
   1.615 +        d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}};
   1.616 +        d_we_o <= `FALSE;
   1.617 +        d_cti_o <= `LM32_CTYPE_END;
   1.618 +        d_lock_o <= `FALSE;
   1.619 +        wb_data_m <= {`LM32_WORD_WIDTH{1'b0}};
   1.620 +        wb_load_complete <= `FALSE;
   1.621 +        stall_wb_load <= `FALSE;
   1.622 +`ifdef CFG_DCACHE_ENABLED                
   1.623 +        dcache_refill_ready <= `FALSE;
   1.624 +`endif                
   1.625 +    end
   1.626 +    else
   1.627 +    begin
   1.628 +`ifdef CFG_DCACHE_ENABLED 
   1.629 +        // Refill ready should only be asserted for a single cycle               
   1.630 +        dcache_refill_ready <= `FALSE;
   1.631 +`endif                
   1.632 +        // Is a Wishbone cycle already in progress?
   1.633 +        if (d_cyc_o == `TRUE)
   1.634 +        begin
   1.635 +            // Is the cycle complete?
   1.636 +            if ((d_ack_i == `TRUE) || (d_err_i == `TRUE))
   1.637 +            begin
   1.638 +`ifdef CFG_DCACHE_ENABLED                
   1.639 +                if ((dcache_refilling == `TRUE) && (!last_word))
   1.640 +                begin
   1.641 +                    // Fetch next word of cache line    
   1.642 +                    d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1;
   1.643 +                end
   1.644 +                else
   1.645 +`endif                
   1.646 +                begin
   1.647 +                    // Refill/access complete
   1.648 +                    d_cyc_o <= `FALSE;
   1.649 +                    d_stb_o <= `FALSE;
   1.650 +                    d_lock_o <= `FALSE;
   1.651 +                end
   1.652 +`ifdef CFG_DCACHE_ENABLED    
   1.653 +                d_cti_o <= next_cycle_type;
   1.654 +                // If we are performing a refill, indicate to cache next word of data is ready            
   1.655 +                dcache_refill_ready <= dcache_refilling;
   1.656 +`endif
   1.657 +                // Register data read from Wishbone interface
   1.658 +                wb_data_m <= d_dat_i;
   1.659 +                // Don't set when stores complete - otherwise we'll deadlock if load in m stage
   1.660 +                wb_load_complete <= !d_we_o;
   1.661 +            end
   1.662 +            // synthesis translate_off            
   1.663 +            if (d_err_i == `TRUE)
   1.664 +                $display ("Data bus error. Address: %x", d_adr_o);
   1.665 +            // synthesis translate_on
   1.666 +        end
   1.667 +        else
   1.668 +        begin
   1.669 +`ifdef CFG_DCACHE_ENABLED                
   1.670 +            if (dcache_refill_request == `TRUE)
   1.671 +            begin
   1.672 +                // Start cache refill
   1.673 +                d_adr_o <= first_address;
   1.674 +                d_cyc_o <= `TRUE;
   1.675 +                d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}};
   1.676 +                d_stb_o <= `TRUE;                
   1.677 +                d_we_o <= `FALSE;
   1.678 +                d_cti_o <= first_cycle_type;
   1.679 +                //d_lock_o <= `TRUE;
   1.680 +            end
   1.681 +            else 
   1.682 +`endif            
   1.683 +                 if (   (store_q_m == `TRUE)
   1.684 +                     && (stall_m == `FALSE)
   1.685 +`ifdef CFG_DRAM_ENABLED
   1.686 +                     && (dram_select_m == `FALSE)
   1.687 +`endif
   1.688 +`ifdef CFG_IROM_ENABLED
   1.689 +		     && (irom_select_m == `FALSE)
   1.690 +`endif			
   1.691 +                    )
   1.692 +            begin
   1.693 +                // Data cache is write through, so all stores go to memory
   1.694 +                d_dat_o <= store_data_m;
   1.695 +                d_adr_o <= load_store_address_m;
   1.696 +                d_cyc_o <= `TRUE;
   1.697 +                d_sel_o <= byte_enable_m;
   1.698 +                d_stb_o <= `TRUE;
   1.699 +                d_we_o <= `TRUE;
   1.700 +                d_cti_o <= `LM32_CTYPE_END;
   1.701 +            end        
   1.702 +            else if (   (load_q_m == `TRUE) 
   1.703 +                     && (wb_select_m == `TRUE) 
   1.704 +                     && (wb_load_complete == `FALSE)
   1.705 +                     // stall_m will be TRUE, because stall_wb_load will be TRUE 
   1.706 +                    )
   1.707 +            begin
   1.708 +                // Read requested address
   1.709 +                stall_wb_load <= `FALSE;
   1.710 +                d_adr_o <= load_store_address_m;
   1.711 +                d_cyc_o <= `TRUE;
   1.712 +                d_sel_o <= byte_enable_m;
   1.713 +                d_stb_o <= `TRUE;
   1.714 +                d_we_o <= `FALSE;
   1.715 +                d_cti_o <= `LM32_CTYPE_END;
   1.716 +            end
   1.717 +        end
   1.718 +        // Clear load/store complete flag when instruction leaves M stage
   1.719 +        if (stall_m == `FALSE)
   1.720 +            wb_load_complete <= `FALSE;
   1.721 +        // When a Wishbone load first enters the M stage, we need to stall it
   1.722 +        if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE))
   1.723 +            stall_wb_load <= `TRUE;
   1.724 +        // Clear stall request if load instruction is killed
   1.725 +        if ((kill_m == `TRUE) || (exception_m == `TRUE))
   1.726 +            stall_wb_load <= `FALSE;
   1.727 +    end
   1.728 +end
   1.729 +
   1.730 +// Pipeline registers  
   1.731 +
   1.732 +// X/M stage pipeline registers
   1.733 +always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.734 +begin
   1.735 +    if (rst_i == `TRUE)
   1.736 +    begin
   1.737 +        sign_extend_m <= `FALSE;
   1.738 +        size_m <= 2'b00;
   1.739 +        byte_enable_m <= `FALSE;
   1.740 +        store_data_m <= {`LM32_WORD_WIDTH{1'b0}};
   1.741 +`ifdef CFG_DCACHE_ENABLED
   1.742 +        dcache_select_m <= `FALSE;
   1.743 +`endif
   1.744 +`ifdef CFG_DRAM_ENABLED
   1.745 +        dram_select_m <= `FALSE;
   1.746 +`endif
   1.747 +`ifdef CFG_IROM_ENABLED
   1.748 +        irom_select_m <= `FALSE;
   1.749 +`endif
   1.750 +        wb_select_m <= `FALSE;        
   1.751 +    end
   1.752 +    else
   1.753 +    begin
   1.754 +        if (stall_m == `FALSE)
   1.755 +        begin
   1.756 +            sign_extend_m <= sign_extend_x;
   1.757 +            size_m <= size_x;
   1.758 +            byte_enable_m <= byte_enable_x;    
   1.759 +            store_data_m <= store_data_x;
   1.760 +`ifdef CFG_DCACHE_ENABLED
   1.761 +            dcache_select_m <= dcache_select_x;
   1.762 +`endif
   1.763 +`ifdef CFG_DRAM_ENABLED
   1.764 +            dram_select_m <= dram_select_x;
   1.765 +`endif
   1.766 +`ifdef CFG_IROM_ENABLED
   1.767 +            irom_select_m <= irom_select_x;
   1.768 +`endif
   1.769 +            wb_select_m <= wb_select_x;
   1.770 +        end
   1.771 +    end
   1.772 +end
   1.773 +
   1.774 +// M/W stage pipeline registers
   1.775 +always @(posedge clk_i `CFG_RESET_SENSITIVITY)
   1.776 +begin
   1.777 +    if (rst_i == `TRUE)
   1.778 +    begin
   1.779 +        size_w <= 2'b00;
   1.780 +        data_w <= {`LM32_WORD_WIDTH{1'b0}};
   1.781 +        sign_extend_w <= `FALSE;
   1.782 +    end
   1.783 +    else
   1.784 +    begin
   1.785 +        size_w <= size_m;
   1.786 +        data_w <= data_m;
   1.787 +        sign_extend_w <= sign_extend_m;
   1.788 +    end
   1.789 +end
   1.790 +
   1.791 +/////////////////////////////////////////////////////
   1.792 +// Behavioural Logic
   1.793 +/////////////////////////////////////////////////////
   1.794 +
   1.795 +// synthesis translate_off
   1.796 +
   1.797 +// Check for non-aligned loads or stores
   1.798 +always @(posedge clk_i)
   1.799 +begin
   1.800 +    if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE)) 
   1.801 +    begin
   1.802 +        if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0))
   1.803 +            $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
   1.804 +        if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00))
   1.805 +            $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
   1.806 +    end
   1.807 +end
   1.808 +
   1.809 +// synthesis translate_on
   1.810 +
   1.811 +endmodule