Sun, 06 Mar 2011 21:03:32 +0000
Commit GSI patches from Wesley Terpstra
- Add JTAG capture pin
==> allows removing sensitivity to reg_update which caused clocking problems making JTAG unstable
- Use register file backed by RAM blocks
==> saves quite some area and speed on altera
... be sure to enable it using `define CFG_EBR_POSEDGE_REGISTER_FILE
- Fix a minor problem where compilation fails when interrupts are not supported
- Add support to flush icache and dcache per JTAG
- Fix wrong width assignments for PC
Multiplier patch has been left out for now; don't the design synthesizers (Quartus / Xst) split the multiply automatically?
Original-Author: Wesley Terpstra <w.terpsta gsi.de>
Original-Source: Milkymist mailing list postings, 2011-02-28 (11:19 and 13:32) and 2011-03-01
Original-Message-Ids: <4D6B84B5.9040604@gsi.de> <4D6BA3E4.3020609@gsi.de> <4D6CFFF2.6030703@gsi.de>
jtag_cores.v | file | annotate | diff | revisions | |
jtag_tap_altera.v | file | annotate | diff | revisions | |
jtag_tap_spartan6.v | file | annotate | diff | revisions | |
jtag_tap_xilinx_spartan6.v | file | annotate | diff | revisions | |
lm32_cpu.v | file | annotate | diff | revisions | |
lm32_dp_ram.v | file | annotate | diff | revisions | |
lm32_jtag.v | file | annotate | diff | revisions |
1.1 --- a/jtag_cores.v Sun Mar 06 19:49:17 2011 +0000 1.2 +++ b/jtag_cores.v Sun Mar 06 21:03:32 2011 +0000 1.3 @@ -1,3 +1,5 @@ 1.4 +// Modified by GSI to use simple positive edge clocking and the JTAG capture state 1.5 + 1.6 module jtag_cores ( 1.7 input [7:0] reg_d, 1.8 input [2:0] reg_addr_d, 1.9 @@ -11,15 +13,19 @@ 1.10 wire tck; 1.11 wire tdi; 1.12 wire tdo; 1.13 +wire capture; 1.14 wire shift; 1.15 wire update; 1.16 +wire e1dr; 1.17 wire reset; 1.18 1.19 jtag_tap jtag_tap ( 1.20 .tck(tck), 1.21 .tdi(tdi), 1.22 .tdo(tdo), 1.23 + .capture(capture), 1.24 .shift(shift), 1.25 + .e1dr(e1dr), 1.26 .update(update), 1.27 .reset(reset) 1.28 ); 1.29 @@ -27,26 +33,28 @@ 1.30 reg [10:0] jtag_shift; 1.31 reg [10:0] jtag_latched; 1.32 1.33 -always @(posedge tck or posedge reset) 1.34 +always @(posedge tck) 1.35 begin 1.36 if(reset) 1.37 jtag_shift <= 11'b0; 1.38 else begin 1.39 - if(shift) 1.40 + if (shift) 1.41 jtag_shift <= {tdi, jtag_shift[10:1]}; 1.42 - else 1.43 + else if (capture) 1.44 jtag_shift <= {reg_d, reg_addr_d}; 1.45 end 1.46 end 1.47 1.48 assign tdo = jtag_shift[0]; 1.49 1.50 -always @(posedge reg_update or posedge reset) 1.51 +always @(posedge tck) 1.52 begin 1.53 if(reset) 1.54 jtag_latched <= 11'b0; 1.55 - else 1.56 - jtag_latched <= jtag_shift; 1.57 + else begin 1.58 + if (e1dr) 1.59 + jtag_latched <= jtag_shift; 1.60 + end 1.61 end 1.62 1.63 assign reg_update = update;
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/jtag_tap_altera.v Sun Mar 06 21:03:32 2011 +0000 2.3 @@ -0,0 +1,59 @@ 2.4 +module jtag_tap( 2.5 + output tck, 2.6 + output tdi, 2.7 + input tdo, 2.8 + output capture, 2.9 + output shift, 2.10 + output e1dr, 2.11 + output update, 2.12 + output reset 2.13 +); 2.14 + 2.15 +assign reset = 0; 2.16 +wire nil1, nil2, nil3, nil4; 2.17 + 2.18 +sld_virtual_jtag altera_jtag( 2.19 + .ir_in (), 2.20 + .ir_out (), 2.21 + .tck (tck), 2.22 + .tdo (tdo), 2.23 + .tdi (tdi), 2.24 + .virtual_state_cdr (capture), 2.25 + .virtual_state_sdr (shift), 2.26 + .virtual_state_e1dr (e1dr), 2.27 + .virtual_state_pdr (nil1), 2.28 + .virtual_state_e2dr (nil2), 2.29 + .virtual_state_udr (update), 2.30 + .virtual_state_cir (nil3), 2.31 + .virtual_state_uir (nil4) 2.32 + // synopsys translate_off 2.33 + , 2.34 + .jtag_state_cdr (), 2.35 + .jtag_state_cir (), 2.36 + .jtag_state_e1dr (), 2.37 + .jtag_state_e1ir (), 2.38 + .jtag_state_e2dr (), 2.39 + .jtag_state_e2ir (), 2.40 + .jtag_state_pdr (), 2.41 + .jtag_state_pir (), 2.42 + .jtag_state_rti (), 2.43 + .jtag_state_sdr (), 2.44 + .jtag_state_sdrs (), 2.45 + .jtag_state_sir (), 2.46 + .jtag_state_sirs (), 2.47 + .jtag_state_tlr (), 2.48 + .jtag_state_udr (), 2.49 + .jtag_state_uir (), 2.50 + .tms () 2.51 + // synopsys translate_on 2.52 + ); 2.53 + 2.54 +defparam 2.55 + altera_jtag.sld_auto_instance_index = "YES", 2.56 + altera_jtag.sld_instance_index = 0, 2.57 + altera_jtag.sld_ir_width = 1, 2.58 + altera_jtag.sld_sim_action = "", 2.59 + altera_jtag.sld_sim_n_scan = 0, 2.60 + altera_jtag.sld_sim_total_length = 0; 2.61 + 2.62 +endmodule
3.1 --- a/jtag_tap_spartan6.v Sun Mar 06 19:49:17 2011 +0000 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,33 +0,0 @@ 3.4 - 3.5 -module jtag_tap( 3.6 - output tck, 3.7 - output tdi, 3.8 - input tdo, 3.9 - output shift, 3.10 - output update, 3.11 - output reset 3.12 -); 3.13 - 3.14 -wire g_shift; 3.15 -wire g_update; 3.16 - 3.17 -assign shift = g_shift & sel; 3.18 -assign update = g_update & sel; 3.19 - 3.20 -BSCAN_SPARTAN6 #( 3.21 - .JTAG_CHAIN(1) 3.22 -) bscan ( 3.23 - .CAPTURE(), 3.24 - .DRCK(tck), 3.25 - .RESET(reset), 3.26 - .RUNTEST(), 3.27 - .SEL(sel), 3.28 - .SHIFT(g_shift), 3.29 - .TCK(), 3.30 - .TDI(tdi), 3.31 - .TMS(), 3.32 - .UPDATE(g_update), 3.33 - .TDO(tdo) 3.34 -); 3.35 - 3.36 -endmodule
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/jtag_tap_xilinx_spartan6.v Sun Mar 06 21:03:32 2011 +0000 4.3 @@ -0,0 +1,43 @@ 4.4 + 4.5 +module jtag_tap( 4.6 + output tck, 4.7 + output tdi, 4.8 + input tdo, 4.9 + output capture, 4.10 + output shift, 4.11 + output e1dr, 4.12 + output update, 4.13 + output reset 4.14 +); 4.15 + 4.16 +// Unfortunately the exit1 state for DR (e1dr) is mising 4.17 +// We can simulate it by interpretting 'update' as e1dr and delaying 'update' 4.18 +wire g_capture; 4.19 +wire g_shift; 4.20 +wire g_update; 4.21 +reg update_delay; 4.22 + 4.23 +assign capture = g_capture & sel; 4.24 +assign shift = g_shift & sel; 4.25 +assign e1dr = g_update & sel; 4.26 +assign update = update_delay; 4.27 + 4.28 +BSCAN_SPARTAN6 #( 4.29 + .JTAG_CHAIN(1) 4.30 +) bscan ( 4.31 + .CAPTURE(g_capture), 4.32 + .DRCK(tck), 4.33 + .RESET(reset), 4.34 + .RUNTEST(), 4.35 + .SEL(sel), 4.36 + .SHIFT(g_shift), 4.37 + .TCK(), 4.38 + .TDI(tdi), 4.39 + .TMS(), 4.40 + .UPDATE(g_update), 4.41 + .TDO(tdo) 4.42 +); 4.43 + 4.44 +update_delay <= g_update; 4.45 + 4.46 +endmodule
5.1 --- a/lm32_cpu.v Sun Mar 06 19:49:17 2011 +0000 5.2 +++ b/lm32_cpu.v Sun Mar 06 21:03:32 2011 +0000 5.3 @@ -1269,70 +1269,44 @@ 5.4 /*---------------------------------------------------------------------- 5.5 Register file instantiation as Pseudo-Dual Port EBRs. 5.6 ----------------------------------------------------------------------*/ 5.7 - pmi_ram_dp 5.8 + // Modified by GSI: removed non-portable RAM instantiation 5.9 + lm32_dp_ram 5.10 #( 5.11 // ----- Parameters ----- 5.12 - .pmi_wr_addr_depth(1<<5), 5.13 - .pmi_wr_addr_width(5), 5.14 - .pmi_wr_data_width(32), 5.15 - .pmi_rd_addr_depth(1<<5), 5.16 - .pmi_rd_addr_width(5), 5.17 - .pmi_rd_data_width(32), 5.18 - .pmi_regmode("noreg"), 5.19 - .pmi_gsr("enable"), 5.20 - .pmi_resetmode("sync"), 5.21 - .pmi_init_file("none"), 5.22 - .pmi_init_file_format("binary"), 5.23 - .pmi_family(`LATTICE_FAMILY), 5.24 - .module_type("pmi_ram_dp") 5.25 + .addr_depth(1<<5), 5.26 + .addr_width(5), 5.27 + .data_width(32) 5.28 ) 5.29 reg_0 5.30 ( 5.31 // ----- Inputs ----- 5.32 - .Data(w_result), 5.33 - .WrAddress(write_idx_w), 5.34 - .RdAddress(instruction_f[25:21]), 5.35 - .WrClock(clk_i), 5.36 - .RdClock(clk_i), 5.37 - .WrClockEn(`TRUE), 5.38 - .RdClockEn(`TRUE), 5.39 - .WE(reg_write_enable_q_w), 5.40 - .Reset(rst_i), 5.41 + .clk_i (clk_i), 5.42 + .rst_i (rst_i), 5.43 + .we_i (reg_write_enable_q_w), 5.44 + .wdata_i (w_result), 5.45 + .waddr_i (write_idx_w), 5.46 + .raddr_i (instruction_f[25:21]), 5.47 // ----- Outputs ----- 5.48 - .Q(regfile_data_0) 5.49 + .rdata_o (regfile_data_0) 5.50 ); 5.51 5.52 - pmi_ram_dp 5.53 + lm32_dp_ram 5.54 #( 5.55 - // ----- Parameters ----- 5.56 - .pmi_wr_addr_depth(1<<5), 5.57 - .pmi_wr_addr_width(5), 5.58 - .pmi_wr_data_width(32), 5.59 - .pmi_rd_addr_depth(1<<5), 5.60 - .pmi_rd_addr_width(5), 5.61 - .pmi_rd_data_width(32), 5.62 - .pmi_regmode("noreg"), 5.63 - .pmi_gsr("enable"), 5.64 - .pmi_resetmode("sync"), 5.65 - .pmi_init_file("none"), 5.66 - .pmi_init_file_format("binary"), 5.67 - .pmi_family(`LATTICE_FAMILY), 5.68 - .module_type("pmi_ram_dp") 5.69 + .addr_depth(1<<5), 5.70 + .addr_width(5), 5.71 + .data_width(32) 5.72 ) 5.73 reg_1 5.74 ( 5.75 // ----- Inputs ----- 5.76 - .Data(w_result), 5.77 - .WrAddress(write_idx_w), 5.78 - .RdAddress(instruction_f[20:16]), 5.79 - .WrClock(clk_i), 5.80 - .RdClock(clk_i), 5.81 - .WrClockEn(`TRUE), 5.82 - .RdClockEn(`TRUE), 5.83 - .WE(reg_write_enable_q_w), 5.84 - .Reset(rst_i), 5.85 + .clk_i (clk_i), 5.86 + .rst_i (rst_i), 5.87 + .we_i (reg_write_enable_q_w), 5.88 + .wdata_i (w_result), 5.89 + .waddr_i (write_idx_w), 5.90 + .raddr_i (instruction_f[20:16]), 5.91 // ----- Outputs ----- 5.92 - .Q(regfile_data_1) 5.93 + .rdata_o (regfile_data_1) 5.94 ); 5.95 `endif 5.96 5.97 @@ -1882,7 +1856,9 @@ 5.98 exception has occured. This stall will ensure that D_CYC_O and 5.99 store_m will both be low for one cycle. 5.100 */ 5.101 +`ifdef CFG_INTERRUPTS_ENABLED 5.102 || ((store_x == `TRUE) && (interrupt_exception == `TRUE)) 5.103 +`endif 5.104 || (load_m == `TRUE) 5.105 || (load_x == `TRUE) 5.106 ) 5.107 @@ -2042,15 +2018,29 @@ 5.108 5.109 // Cache flush 5.110 `ifdef CFG_ICACHE_ENABLED 5.111 -assign iflush = (csr_write_enable_d == `TRUE) 5.112 - && (csr_d == `LM32_CSR_ICC) 5.113 - && (stall_d == `FALSE) 5.114 - && (kill_d == `FALSE) 5.115 - && (valid_d == `TRUE); 5.116 +assign iflush = ( (csr_write_enable_d == `TRUE) 5.117 + && (csr_d == `LM32_CSR_ICC) 5.118 + && (stall_d == `FALSE) 5.119 + && (kill_d == `FALSE) 5.120 + && (valid_d == `TRUE)) 5.121 +// Added by GSI: needed to flush cache after loading firmware per JTAG 5.122 +`ifdef CFG_HW_DEBUG_ENABLED 5.123 + || 5.124 + ( (jtag_csr_write_enable == `TRUE) 5.125 + && (jtag_csr == `LM32_CSR_ICC)) 5.126 +`endif 5.127 + ; 5.128 `endif 5.129 `ifdef CFG_DCACHE_ENABLED 5.130 -assign dflush_x = (csr_write_enable_q_x == `TRUE) 5.131 - && (csr_x == `LM32_CSR_DCC); 5.132 +assign dflush_x = ( (csr_write_enable_q_x == `TRUE) 5.133 + && (csr_x == `LM32_CSR_DCC)) 5.134 +// Added by GSI: needed to flush cache after loading firmware per JTAG 5.135 +`ifdef CFG_HW_DEBUG_ENABLED 5.136 + || 5.137 + ( (jtag_csr_write_enable == `TRUE) 5.138 + && (jtag_csr == `LM32_CSR_DCC)) 5.139 +`endif 5.140 + ; 5.141 `endif 5.142 5.143 // Extract CSR index 5.144 @@ -2252,7 +2242,7 @@ 5.145 operand_0_x <= {`LM32_WORD_WIDTH{1'b0}}; 5.146 operand_1_x <= {`LM32_WORD_WIDTH{1'b0}}; 5.147 store_operand_x <= {`LM32_WORD_WIDTH{1'b0}}; 5.148 - branch_target_x <= {`LM32_WORD_WIDTH{1'b0}}; 5.149 + branch_target_x <= {`LM32_PC_WIDTH{1'b0}}; 5.150 x_result_sel_csr_x <= `FALSE; 5.151 `ifdef LM32_MC_ARITHMETIC_ENABLED 5.152 x_result_sel_mc_arith_x <= `FALSE; 5.153 @@ -2313,7 +2303,7 @@ 5.154 `endif 5.155 csr_write_enable_x <= `FALSE; 5.156 operand_m <= {`LM32_WORD_WIDTH{1'b0}}; 5.157 - branch_target_m <= {`LM32_WORD_WIDTH{1'b0}}; 5.158 + branch_target_m <= {`LM32_PC_WIDTH{1'b0}}; 5.159 m_result_sel_compare_m <= `FALSE; 5.160 `ifdef CFG_PL_BARREL_SHIFT_ENABLED 5.161 m_result_sel_shift_m <= `FALSE;
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/lm32_dp_ram.v Sun Mar 06 21:03:32 2011 +0000 6.3 @@ -0,0 +1,35 @@ 6.4 +module lm32_dp_ram( 6.5 + clk_i, 6.6 + rst_i, 6.7 + we_i, 6.8 + waddr_i, 6.9 + wdata_i, 6.10 + raddr_i, 6.11 + rdata_o); 6.12 + 6.13 +parameter addr_width = 32; 6.14 +parameter addr_depth = 1024; 6.15 +parameter data_width = 8; 6.16 + 6.17 +input clk_i; 6.18 +input rst_i; 6.19 +input we_i; 6.20 +input [addr_width-1:0] waddr_i; 6.21 +input [data_width-1:0] wdata_i; 6.22 +input [addr_width-1:0] raddr_i; 6.23 +output [data_width-1:0] rdata_o; 6.24 + 6.25 +reg [data_width-1:0] ram[addr_depth-1:0]; 6.26 + 6.27 +reg [addr_width-1:0] raddr_r; 6.28 +assign rdata_o = ram[raddr_r]; 6.29 + 6.30 +always @ (posedge clk_i) 6.31 +begin 6.32 + if (we_i) 6.33 + ram[waddr_i] <= wdata_i; 6.34 + raddr_r <= raddr_i; 6.35 +end 6.36 + 6.37 +endmodule 6.38 +
7.1 --- a/lm32_jtag.v Sun Mar 06 19:49:17 2011 +0000 7.2 +++ b/lm32_jtag.v Sun Mar 06 21:03:32 2011 +0000 7.3 @@ -170,13 +170,15 @@ 7.4 // Internal nets and registers 7.5 ///////////////////////////////////////////////////// 7.6 7.7 -reg rx_toggle; // Clock-domain crossing registers 7.8 -reg rx_toggle_r; // Registered version of rx_toggle 7.9 -reg rx_toggle_r_r; // Registered version of rx_toggle_r 7.10 -reg rx_toggle_r_r_r; // Registered version of rx_toggle_r_r 7.11 +reg rx_update; // Clock-domain crossing registers 7.12 +reg rx_update_r; // Registered version of rx_update 7.13 +reg rx_update_r_r; // Registered version of rx_update_r 7.14 +reg rx_update_r_r_r; // Registered version of rx_update_r_r 7.15 7.16 -reg [`LM32_BYTE_RNG] rx_byte; 7.17 -reg [2:0] rx_addr; 7.18 +// These wires come from the JTAG clock domain. 7.19 +// They have been held unchanged for an entire JTAG clock cycle before the jtag_update toggle flips 7.20 +wire [`LM32_BYTE_RNG] rx_byte; 7.21 +wire [2:0] rx_addr; 7.22 7.23 `ifdef CFG_JTAG_UART_ENABLED 7.24 reg [`LM32_BYTE_RNG] uart_tx_byte; // UART TX data 7.25 @@ -229,36 +231,26 @@ 7.26 // Sequential Logic 7.27 ///////////////////////////////////////////////////// 7.28 7.29 -// Toggle a flag when a JTAG write occurs 7.30 - 7.31 -always @(negedge jtag_update `CFG_RESET_SENSITIVITY) 7.32 -begin 7.33 -if (rst_i == `TRUE) 7.34 - rx_toggle <= 1'b0; 7.35 -else 7.36 - rx_toggle <= ~rx_toggle; 7.37 -end 7.38 +assign rx_byte = jtag_reg_q; 7.39 +assign rx_addr = jtag_reg_addr_q; 7.40 7.41 -always @(*) 7.42 -begin 7.43 - rx_byte = jtag_reg_q; 7.44 - rx_addr = jtag_reg_addr_q; 7.45 -end 7.46 - 7.47 -// Clock domain crossing from JTAG clock domain to CPU clock domain 7.48 +// The JTAG latched jtag_reg[_addr]_q at least one JTCK before jtag_update is raised 7.49 +// Thus, they are stable (and safe to sample) when jtag_update is high 7.50 always @(posedge clk_i `CFG_RESET_SENSITIVITY) 7.51 begin 7.52 if (rst_i == `TRUE) 7.53 begin 7.54 - rx_toggle_r <= 1'b0; 7.55 - rx_toggle_r_r <= 1'b0; 7.56 - rx_toggle_r_r_r <= 1'b0; 7.57 + rx_update <= 1'b0; 7.58 + rx_update_r <= 1'b0; 7.59 + rx_update_r_r <= 1'b0; 7.60 + rx_update_r_r_r <= 1'b0; 7.61 end 7.62 else 7.63 begin 7.64 - rx_toggle_r <= rx_toggle; 7.65 - rx_toggle_r_r <= rx_toggle_r; 7.66 - rx_toggle_r_r_r <= rx_toggle_r_r; 7.67 + rx_update <= jtag_update; 7.68 + rx_update_r <= rx_update; 7.69 + rx_update_r_r <= rx_update_r; 7.70 + rx_update_r_r_r <= rx_update_r_r; 7.71 end 7.72 end 7.73 7.74 @@ -319,7 +311,7 @@ 7.75 `LM32_JTAG_STATE_READ_COMMAND: 7.76 begin 7.77 // Wait for rx register to toggle which indicates new data is available 7.78 - if (rx_toggle_r_r != rx_toggle_r_r_r) 7.79 + if ((~rx_update_r_r_r & rx_update_r_r) == `TRUE) 7.80 begin 7.81 command <= rx_byte[7:4]; 7.82 case (rx_addr) 7.83 @@ -384,7 +376,7 @@ 7.84 `ifdef CFG_HW_DEBUG_ENABLED 7.85 `LM32_JTAG_STATE_READ_BYTE_0: 7.86 begin 7.87 - if (rx_toggle_r_r != rx_toggle_r_r_r) 7.88 + if ((~rx_update_r_r_r & rx_update_r_r) == `TRUE) 7.89 begin 7.90 jtag_byte_0 <= rx_byte; 7.91 state <= `LM32_JTAG_STATE_READ_BYTE_1; 7.92 @@ -392,7 +384,7 @@ 7.93 end 7.94 `LM32_JTAG_STATE_READ_BYTE_1: 7.95 begin 7.96 - if (rx_toggle_r_r != rx_toggle_r_r_r) 7.97 + if ((~rx_update_r_r_r & rx_update_r_r) == `TRUE) 7.98 begin 7.99 jtag_byte_1 <= rx_byte; 7.100 state <= `LM32_JTAG_STATE_READ_BYTE_2; 7.101 @@ -400,7 +392,7 @@ 7.102 end 7.103 `LM32_JTAG_STATE_READ_BYTE_2: 7.104 begin 7.105 - if (rx_toggle_r_r != rx_toggle_r_r_r) 7.106 + if ((~rx_update_r_r_r & rx_update_r_r) == `TRUE) 7.107 begin 7.108 jtag_byte_2 <= rx_byte; 7.109 state <= `LM32_JTAG_STATE_READ_BYTE_3; 7.110 @@ -408,7 +400,7 @@ 7.111 end 7.112 `LM32_JTAG_STATE_READ_BYTE_3: 7.113 begin 7.114 - if (rx_toggle_r_r != rx_toggle_r_r_r) 7.115 + if ((~rx_update_r_r_r & rx_update_r_r) == `TRUE) 7.116 begin 7.117 jtag_byte_3 <= rx_byte; 7.118 if (command == `LM32_DP_READ_MEMORY) 7.119 @@ -419,7 +411,7 @@ 7.120 end 7.121 `LM32_JTAG_STATE_READ_BYTE_4: 7.122 begin 7.123 - if (rx_toggle_r_r != rx_toggle_r_r_r) 7.124 + if ((~rx_update_r_r_r & rx_update_r_r) == `TRUE) 7.125 begin 7.126 jtag_byte_4 <= rx_byte; 7.127 state <= `LM32_JTAG_STATE_PROCESS_COMMAND;