From 832ef96ff2afe467d587e960d610ebd78085ea76 Mon Sep 17 00:00:00 2001 From: hfaroo9 Date: Tue, 27 Sep 2022 12:03:28 -0500 Subject: [PATCH 01/64] Trying to simulate --- prim/rtl/dv_fcov_macros.svh | 113 ++++++++++++++ prim/rtl/prim_assert.sv | 180 +++++++++++++++++++++++ prim/rtl/prim_assert_sec_cm.svh | 62 ++++++++ prim/rtl/prim_assert_standard_macros.svh | 80 ++++++++++ prim/rtl/prim_flop_macros.sv | 75 ++++++++++ 5 files changed, 510 insertions(+) create mode 100644 prim/rtl/dv_fcov_macros.svh create mode 100644 prim/rtl/prim_assert.sv create mode 100644 prim/rtl/prim_assert_sec_cm.svh create mode 100644 prim/rtl/prim_assert_standard_macros.svh create mode 100644 prim/rtl/prim_flop_macros.sv diff --git a/prim/rtl/dv_fcov_macros.svh b/prim/rtl/dv_fcov_macros.svh new file mode 100644 index 0000000..9439ea5 --- /dev/null +++ b/prim/rtl/dv_fcov_macros.svh @@ -0,0 +1,113 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Include FCOV RTL by default. Disable it for synthesis and where explicitly requested (by defining +// DV_FCOV_DISABLE). +`ifdef SYNTHESIS + `define DV_FCOV_DISABLE +`elsif YOSYS + `define DV_FCOV_DISABLE +`endif + +// Disable instantiations of FCOV coverpoints or covergroups. +`ifdef VERILATOR + `define DV_FCOV_DISABLE_CP +`elsif DV_FCOV_DISABLE + `define DV_FCOV_DISABLE_CP +`endif + +// Instantiates a covergroup in an interface or module. +// +// This macro assumes that a covergroup of the same name as the NAME_ arg is defined in the +// interface or module. It just adds some extra signals and logic to control the creation of the +// covergroup instance with ~bit en_~. This defaults to 0. It is ORed with the external +// COND_ signal. The testbench can modify it at t = 0 based on the test being run. +// NOTE: This is not meant to be invoked inside a class. +// +// NAME_ : Name of the covergroup. +// COND_ : External condition / expr that controls the creation of the covergroup. +// ARGS_ : Arguments to covergroup instance, if any. Args MUST BE wrapped in (..). +`ifndef DV_FCOV_INSTANTIATE_CG +`ifdef DV_FCOV_DISABLE_CP + `define DV_FCOV_INSTANTIATE_CG(NAME_, COND_ = 1'b1, ARGS_ = ()) +`else + `define DV_FCOV_INSTANTIATE_CG(NAME_, COND_ = 1'b1, ARGS_ = ()) \ + bit en_``NAME_ = 1'b0; \ + NAME_ NAME_``_inst; \ + initial begin \ + /* The #1 delay below allows any part of the tb to control the conditions first at t = 0. */ \ + #1; \ + if ((en_``NAME_) || (COND_)) begin \ + $display("%0t: (%0s:%0d) [%m] %0s", $time, `__FILE__, `__LINE__, \ + {"Creating covergroup ", `"NAME_`"}); \ + NAME_``_inst = new``ARGS_; \ + end \ + end +`endif +`endif + +// Creates a coverpoint for an expression where only the expression true case is of interest for +// coverage (e.g. where the expression indicates an event has occured). +`ifndef DV_FCOV_EXPR_SEEN +`ifdef DV_FCOV_DISABLE_CP + `define DV_FCOV_EXPR_SEEN(NAME_, EXPR_) +`else + `define DV_FCOV_EXPR_SEEN(NAME_, EXPR_) cp_``NAME_: coverpoint EXPR_ { bins seen = {1}; } +`endif +`endif + +// Creates a SVA cover that can be used in a covergroup. +// +// This macro creates an unnamed SVA cover from the property (or an expression) `PROP_` and an event +// with the name `EV_NAME_`. When the SVA cover is hit, the event is triggered. A coverpoint can +// cover the `triggered` property of the event. +`ifndef DV_FCOV_SVA +`ifdef DV_FCOV_DISABLE + `define DV_FCOV_SVA(EV_NAME_, PROP_, CLK_ = clk_i, RST_ = rst_ni) +`else + `define DV_FCOV_SVA(EV_NAME_, PROP_, CLK_ = clk_i, RST_ = rst_ni) \ + event EV_NAME_; \ + cover property (@(posedge CLK_) disable iff (RST_ == 0) (PROP_)) begin \ + -> EV_NAME_; \ + end +`endif +`endif + +// Coverage support is not always available but it's useful to include extra fcov signals for +// linting purposes. They need to be marked as unused to avoid warnings. +`ifndef DV_FCOV_MARK_UNUSED + `define DV_FCOV_MARK_UNUSED(TYPE_, NAME_) \ + TYPE_ unused_fcov_``NAME_; \ + assign unused_fcov_``NAME_ = fcov_``NAME_; +`endif + +// Define a signal and expression in the design for capture in functional coverage +`ifndef DV_FCOV_SIGNAL +`ifdef DV_FCOV_DISABLE + `define DV_FCOV_SIGNAL(TYPE_, NAME_, EXPR_) +`else + `define DV_FCOV_SIGNAL(TYPE_, NAME_, EXPR_) \ + TYPE_ fcov_``NAME_; \ + assign fcov_``NAME_ = EXPR_; \ + `DV_FCOV_MARK_UNUSED(TYPE_, NAME_) +`endif +`endif + +// Define a signal and expression in the design for capture in functional coverage depending on +// design configuration. The input GEN_COND_ must be a constant or parameter. +`ifndef DV_FCOV_SIGNAL_GEN_IF +`ifdef DV_FCOV_DISABLE + `define DV_FCOV_SIGNAL_GEN_IF(TYPE_, NAME_, EXPR_, GEN_COND_, DEFAULT_ = '0) +`else + `define DV_FCOV_SIGNAL_GEN_IF(TYPE_, NAME_, EXPR_, GEN_COND_, DEFAULT_ = '0) \ + TYPE_ fcov_``NAME_; \ + if (GEN_COND_) begin : g_fcov_``NAME_ \ + assign fcov_``NAME_ = EXPR_; \ + end else begin : g_no_fcov_``NAME_ \ + assign fcov_``NAME_ = DEFAULT_; \ + end \ + `DV_FCOV_MARK_UNUSED(TYPE_, NAME_) +`endif +`endif + diff --git a/prim/rtl/prim_assert.sv b/prim/rtl/prim_assert.sv new file mode 100644 index 0000000..bb9251b --- /dev/null +++ b/prim/rtl/prim_assert.sv @@ -0,0 +1,180 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Macros and helper code for using assertions. +// - Provides default clk and rst options to simplify code +// - Provides boiler plate template for common assertions + +`ifndef PRIM_ASSERT_SV +`define PRIM_ASSERT_SV + +/////////////////// +// Helper macros // +/////////////////// + +// Default clk and reset signals used by assertion macros below. +`define ASSERT_DEFAULT_CLK clk_i +`define ASSERT_DEFAULT_RST !rst_ni + +// Converts an arbitrary block of code into a Verilog string +`define PRIM_STRINGIFY(__x) `"__x`" + +// ASSERT_ERROR logs an error message with either `uvm_error or with $error. +// +// This somewhat duplicates `DV_ERROR macro defined in hw/dv/sv/dv_utils/dv_macros.svh. The reason +// for redefining it here is to avoid creating a dependency. +`define ASSERT_ERROR(__name) \ +`ifdef UVM \ + uvm_pkg::uvm_report_error("ASSERT FAILED", `PRIM_STRINGIFY(__name), uvm_pkg::UVM_NONE, \ + `__FILE__, `__LINE__, "", 1); \ +`else \ + $error("%0t: (%0s:%0d) [%m] [ASSERT FAILED] %0s", $time, `__FILE__, `__LINE__, \ + `PRIM_STRINGIFY(__name)); \ +`endif + +// This macro is suitable for conditionally triggering lint errors, e.g., if a Sec parameter takes +// on a non-default value. This may be required for pre-silicon/FPGA evaluation but we don't want +// to allow this for tapeout. +`define ASSERT_STATIC_LINT_ERROR(__name, __prop) \ + localparam int __name = (__prop) ? 1 : 2; \ + always_comb begin \ + logic unused_assert_static_lint_error; \ + unused_assert_static_lint_error = __name'(1'b1); \ + end + +// Static assertions for checks inside SV packages. If the conditions is not true, this will +// trigger an error during elaboration. +`define ASSERT_STATIC_IN_PACKAGE(__name, __prop) \ + function automatic bit assert_static_in_package_``__name(); \ + bit unused_bit [((__prop) ? 1 : -1)]; \ + unused_bit = '{default: 1'b0}; \ + return unused_bit[0]; \ + endfunction + +// The basic helper macros are actually defined in "implementation headers". The macros should do +// the same thing in each case (except for the dummy flavour), but in a way that the respective +// tools support. +// +// If the tool supports assertions in some form, we also define INC_ASSERT (which can be used to +// hide signal definitions that are only used for assertions). +// +// The list of basic macros supported is: +// +// ASSERT_I: Immediate assertion. Note that immediate assertions are sensitive to simulation +// glitches. +// +// ASSERT_INIT: Assertion in initial block. Can be used for things like parameter checking. +// +// ASSERT_INIT_NET: Assertion in initial block. Can be used for initial value of a net. +// +// ASSERT_FINAL: Assertion in final block. Can be used for things like queues being empty at end of +// sim, all credits returned at end of sim, state machines in idle at end of sim. +// +// ASSERT: Assert a concurrent property directly. It can be called as a module (or +// interface) body item. +// +// Note: We use (__rst !== '0) in the disable iff statements instead of (__rst == +// '1). This properly disables the assertion in cases when reset is X at the +// beginning of a simulation. For that case, (reset == '1) does not disable the +// assertion. +// +// ASSERT_NEVER: Assert a concurrent property NEVER happens +// +// ASSERT_KNOWN: Assert that signal has a known value (each bit is either '0' or '1') after reset. +// It can be called as a module (or interface) body item. +// +// COVER: Cover a concurrent property +// +// ASSUME: Assume a concurrent property +// +// ASSUME_I: Assume an immediate property + +`ifdef VERILATOR + `include "prim_assert_dummy_macros.svh" +`elsif SYNTHESIS + `include "prim_assert_dummy_macros.svh" +`elsif YOSYS + `include "prim_assert_yosys_macros.svh" + `define INC_ASSERT +`else + `include "prim_assert_standard_macros.svh" + `define INC_ASSERT +`endif + +////////////////////////////// +// Complex assertion macros // +////////////////////////////// + +// Assert that signal is an active-high pulse with pulse length of 1 clock cycle +`define ASSERT_PULSE(__name, __sig, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + `ASSERT(__name, $rose(__sig) |=> !(__sig), __clk, __rst) + +// Assert that a property is true only when an enable signal is set. It can be called as a module +// (or interface) body item. +`define ASSERT_IF(__name, __prop, __enable, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + `ASSERT(__name, (__enable) |-> (__prop), __clk, __rst) + +// Assert that signal has a known value (each bit is either '0' or '1') after reset if enable is +// set. It can be called as a module (or interface) body item. +`define ASSERT_KNOWN_IF(__name, __sig, __enable, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + `ASSERT_KNOWN(__name``KnownEnable, __enable, __clk, __rst) \ + `ASSERT_IF(__name, !$isunknown(__sig), __enable, __clk, __rst) + +////////////////////////////////// +// For formal verification only // +////////////////////////////////// + +// Note that the existing set of ASSERT macros specified above shall be used for FPV, +// thereby ensuring that the assertions are evaluated during DV simulations as well. + +// ASSUME_FPV +// Assume a concurrent property during formal verification only. +`define ASSUME_FPV(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef FPV_ON \ + `ASSUME(__name, __prop, __clk, __rst) \ +`endif + +// ASSUME_I_FPV +// Assume a concurrent property during formal verification only. +`define ASSUME_I_FPV(__name, __prop) \ +`ifdef FPV_ON \ + `ASSUME_I(__name, __prop) \ +`endif + +// COVER_FPV +// Cover a concurrent property during formal verification +`define COVER_FPV(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef FPV_ON \ + `COVER(__name, __prop, __clk, __rst) \ +`endif + +// FPV assertion that proves that the FSM control flow is linear (no loops) +// The sequence triggers whenever the state changes and stores the current state as "initial_state". +// Then thereafter we must never see that state again until reset. +// It is possible for the reset to release ahead of the clock. +// Create a small "gray" window beyond the usual rst time to avoid +// checking. +`define ASSERT_FPV_LINEAR_FSM(__name, __state, __type, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + `ifdef INC_ASSERT \ + bit __name``_cond; \ + always_ff @(posedge __clk or posedge __rst) begin \ + if (__rst) begin \ + __name``_cond <= 0; \ + end else begin \ + __name``_cond <= 1; \ + end \ + end \ + property __name``_p; \ + __type initial_state; \ + (!$stable(__state) & __name``_cond, initial_state = $past(__state)) |-> \ + (__state != initial_state) until (__rst == 1'b1); \ + endproperty \ + `ASSERT(__name, __name``_p, __clk, __rst) \ + `endif + +`include "prim_assert_sec_cm.svh" +`include "prim_flop_macros.sv" + +`endif // PRIM_ASSERT_SV + diff --git a/prim/rtl/prim_assert_sec_cm.svh b/prim/rtl/prim_assert_sec_cm.svh new file mode 100644 index 0000000..55aa346 --- /dev/null +++ b/prim/rtl/prim_assert_sec_cm.svh @@ -0,0 +1,62 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// // Macros and helper code for security countermeasures. + +`ifndef PRIM_ASSERT_SEC_CM_SVH +`define PRIM_ASSERT_SEC_CM_SVH + +// Helper macros +`define ASSERT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_, MAX_CYCLES_, ERR_NAME_) \ + `ASSERT(FpvSecCm``NAME_``, \ + $rose(PRIM_HIER_.ERR_NAME_) && !(GATE_) \ + |-> ##[0:MAX_CYCLES_] (ALERT_.alert_p)) \ + `ifdef INC_ASSERT \ + assign PRIM_HIER_.unused_assert_connected = 1'b1; \ + `endif \ + `ASSUME_FPV(``NAME_``TriggerAfterAlertInit_S, $stable(rst_ni) == 0 |-> \ + PRIM_HIER_.ERR_NAME_ == 0 [*10]) + +`define ASSERT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_, MAX_CYCLES_, ERR_NAME_, CLK_, RST_) \ + `ASSERT(FpvSecCm``NAME_``, \ + $rose(PRIM_HIER_.ERR_NAME_) && !(GATE_) \ + |-> ##[0:MAX_CYCLES_] (ERR_), CLK_, RST_) \ + `ifdef INC_ASSERT \ + assign PRIM_HIER_.unused_assert_connected = 1'b1; \ + `endif + +// macros for security countermeasures that will trigger alert +`define ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_ = 0, MAX_CYCLES_ = 7) \ + `ASSERT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_, MAX_CYCLES_, err_o) + +`define ASSERT_PRIM_DOUBLE_LFSR_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_ = 0, MAX_CYCLES_ = 7) \ + `ASSERT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_, MAX_CYCLES_, err_o) + +`define ASSERT_PRIM_FSM_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_ = 0, MAX_CYCLES_ = 7) \ + `ASSERT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_, MAX_CYCLES_, unused_err_o) + +`define ASSERT_PRIM_ONEHOT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_ = 0, MAX_CYCLES_ = 7) \ + `ASSERT_ERROR_TRIGGER_ALERT(NAME_, PRIM_HIER_, ALERT_, GATE_, MAX_CYCLES_, err_o) + +`define ASSERT_PRIM_REG_WE_ONEHOT_ERROR_TRIGGER_ALERT(NAME_, REG_TOP_HIER_, ALERT_, GATE_ = 0, MAX_CYCLES_ = 7) \ + `ASSERT_PRIM_ONEHOT_ERROR_TRIGGER_ALERT(NAME_, REG_TOP_HIER_.u_prim_reg_we_check.u_prim_onehot_check, ALERT_, GATE_, MAX_CYCLES_) + +// macros for security countermeasures that will trigger other errors +`define ASSERT_PRIM_FSM_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_ = 0, MAX_CYCLES_ = 2, CLK_ = clk_i, RST_ = !rst_ni) \ + `ASSERT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_, MAX_CYCLES_, unused_err_o, CLK_, RST_) + +`define ASSERT_PRIM_COUNT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_ = 0, MAX_CYCLES_ = 2, CLK_ = clk_i, RST_ = !rst_ni) \ + `ASSERT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_, MAX_CYCLES_, err_o, CLK_, RST_) + +`define ASSERT_PRIM_DOUBLE_LFSR_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_ = 0, MAX_CYCLES_ = 2, CLK_ = clk_i, RST_ = !rst_ni) \ + `ASSERT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_, MAX_CYCLES_, err_o, CLK_, RST_) + +`define ASSERT_PRIM_ONEHOT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_ = 0, MAX_CYCLES_ = 2, CLK_ = clk_i, RST_ = !rst_ni) \ + `ASSERT_ERROR_TRIGGER_ERR(NAME_, PRIM_HIER_, ERR_, GATE_, MAX_CYCLES_, err_o, CLK_, RST_) + +`define ASSERT_PRIM_REG_WE_ONEHOT_ERROR_TRIGGER_ERR(NAME_, REG_TOP_HIER_, ERR_, GATE_ = 0, MAX_CYCLES_ = 7, CLK_ = clk_i, RST_ = !rst_ni) \ + `ASSERT_PRIM_ONEHOT_ERROR_TRIGGER_ERR(NAME_, REG_TOP_HIER_.u_prim_reg_we_check.u_prim_onehot_check, ERR_, GATE_, MAX_CYCLES_, CLK_, RST_) + +`endif // PRIM_ASSERT_SEC_CM_SVH + diff --git a/prim/rtl/prim_assert_standard_macros.svh b/prim/rtl/prim_assert_standard_macros.svh new file mode 100644 index 0000000..5a5222c --- /dev/null +++ b/prim/rtl/prim_assert_standard_macros.svh @@ -0,0 +1,80 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Macro bodies included by prim_assert.sv for tools that support full SystemVerilog and SVA syntax. +// See prim_assert.sv for documentation for each of the macros. + +`define ASSERT_I(__name, __prop) \ + __name: assert (__prop) \ + else begin \ + `ASSERT_ERROR(__name) \ + end + +// Formal tools will ignore the initial construct, so use static assertion as a workaround. +// This workaround terminates design elaboration if the __prop predict is false. +// It calls $fatal() with the first argument equal to 2, it outputs the statistics about the memory +// and CPU time. +`define ASSERT_INIT(__name, __prop) \ +`ifdef FPV_ON \ + if (!(__prop)) $fatal(2, "Fatal static assertion [%s]: (%s) is not true.", \ + (__name), (__prop)); \ +`else \ + initial begin \ + __name: assert (__prop) \ + else begin \ + `ASSERT_ERROR(__name) \ + end \ + end \ +`endif + +`define ASSERT_INIT_NET(__name, __prop) \ + initial begin \ + // When a net is assigned with a value, the assignment is evaluated after \ + // initial in Xcelium. Add 1ps delay to check value after the assignment is \ + // completed. \ + #1ps; \ + __name: assert (__prop) \ + else begin \ + `ASSERT_ERROR(__name) \ + end \ + end \ + +`define ASSERT_FINAL(__name, __prop) \ + final begin \ + __name: assert (__prop || $test$plusargs("disable_assert_final_checks")) \ + else begin \ + `ASSERT_ERROR(__name) \ + end \ + end + +`define ASSERT(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + __name: assert property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop)) \ + else begin \ + `ASSERT_ERROR(__name) \ + end + +`define ASSERT_NEVER(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + __name: assert property (@(posedge __clk) disable iff ((__rst) !== '0) not (__prop)) \ + else begin \ + `ASSERT_ERROR(__name) \ + end + +`define ASSERT_KNOWN(__name, __sig, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + `ASSERT(__name, !$isunknown(__sig), __clk, __rst) + +`define COVER(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + __name: cover property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop)); + +`define ASSUME(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ + __name: assume property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop)) \ + else begin \ + `ASSERT_ERROR(__name) \ + end + +`define ASSUME_I(__name, __prop) \ + __name: assume (__prop) \ + else begin \ + `ASSERT_ERROR(__name) \ + end + diff --git a/prim/rtl/prim_flop_macros.sv b/prim/rtl/prim_flop_macros.sv new file mode 100644 index 0000000..d836103 --- /dev/null +++ b/prim/rtl/prim_flop_macros.sv @@ -0,0 +1,75 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +`ifndef PRIM_FLOP_MACROS_SV +`define PRIM_FLOP_MACROS_SV + +///////////////////////////////////// +// Default Values for Macros below // +///////////////////////////////////// + +`define PRIM_FLOP_CLK clk_i +`define PRIM_FLOP_RST rst_ni +`define PRIM_FLOP_RESVAL '0 + +///////////////////// +// Register Macros // +///////////////////// + +// TODO: define other variations of register macros so that they can be used throughout all designs +// to make the code more concise. + +// Register with asynchronous reset. +`define PRIM_FLOP_A(__d, __q, __resval = `PRIM_FLOP_RESVAL, __clk = `PRIM_FLOP_CLK, __rst_n = `PRIM_FLOP_RST) \ + always_ff @(posedge __clk or negedge __rst_n) begin \ + if (!__rst_n) begin \ + __q <= __resval; \ + end else begin \ + __q <= __d; \ + end \ + end + +/////////////////////////// +// Macro for Sparse FSMs // +/////////////////////////// + +// Simulation tools typically infer FSMs and report coverage for these separately. However, tools +// like Xcelium and VCS seem to have problems inferring FSMs if the state register is not coded in +// a behavioral always_ff block in the same hierarchy. To that end, this uses a modified variant +// with a second behavioral register definition for RTL simulations so that FSMs can be inferred. +// Note that in this variant, the __q output is disconnected from prim_sparse_fsm_flop and attached +// to the behavioral flop. An assertion is added to ensure equivalence between the +// prim_sparse_fsm_flop output and the behavioral flop output in that case. +`define PRIM_FLOP_SPARSE_FSM(__name, __d, __q, __type, __resval = `PRIM_FLOP_RESVAL, __clk = `PRIM_FLOP_CLK, __rst_n = `PRIM_FLOP_RST, __alert_trigger_sva_en = 1) \ + `ifdef SIMULATION \ + prim_sparse_fsm_flop #( \ + .StateEnumT(__type), \ + .Width($bits(__type)), \ + .ResetValue($bits(__type)'(__resval)), \ + .EnableAlertTriggerSVA(__alert_trigger_sva_en), \ + .CustomForceName(`PRIM_STRINGIFY(__q)) \ + ) __name ( \ + .clk_i ( __clk ), \ + .rst_ni ( __rst_n ), \ + .state_i ( __d ), \ + .state_o ( ) \ + ); \ + `PRIM_FLOP_A(__d, __q, __resval, __clk, __rst_n) \ + `ASSERT(``__name``_A, __q === ``__name``.state_o) \ + `else \ + prim_sparse_fsm_flop #( \ + .StateEnumT(__type), \ + .Width($bits(__type)), \ + .ResetValue($bits(__type)'(__resval)), \ + .EnableAlertTriggerSVA(__alert_trigger_sva_en) \ + ) __name ( \ + .clk_i ( __clk ), \ + .rst_ni ( __rst_n ), \ + .state_i ( __d ), \ + .state_o ( __q ) \ + ); \ + `endif + +`endif // PRIM_FLOP_MACROS_SV + From d2d7abc188cdd731b21a47a1d0a236998e20ef3f Mon Sep 17 00:00:00 2001 From: hfaroo9 Date: Thu, 29 Sep 2022 05:50:04 -0500 Subject: [PATCH 02/64] Adding vproc_config.sv --- .gitignore | 2 +- vproc_config.sv | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 vproc_config.sv diff --git a/.gitignore b/.gitignore index 54060b2..89d3364 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,4 @@ *.fst *.fst.hier *.gtkw -vproc_config.sv + diff --git a/vproc_config.sv b/vproc_config.sv new file mode 100644 index 0000000..a56bad4 --- /dev/null +++ b/vproc_config.sv @@ -0,0 +1,49 @@ +// Auto-generated on Thu Sep 29 05:37:34 CDT 2022 + +// Vector coprocessor default configuration package +// +// The package defined in this file provides *consistent default values* for +// configuration parameters of the vproc_core module for the configuration +// shown below across different synthesis and simulation workflows. The +// constants defined in this package are intended to be used *exclusively* as +// *default values* for the parameters of the vproc_core module and should +// *not* be used anywhere else in the code, such that a design instantiating +// the vproc_core module can override any parameter with a different value. + +// Configuration details: +// - Vector register width: 128 bits +// - Vector pipelines: +// * Pipeline 0: 32 bits wide, contains VLSU, VALU, VMUL, VSLD, VELEM +// Uses 1 128-bit vreg read ports and write port 0 +// - Vector register file needs 2 read ports and 1 write ports + +`define MAIN_CORE_IBEX + +package vproc_config; + + import vproc_pkg::*; + + parameter vreg_type VREG_TYPE = VREG_GENERIC; + parameter int unsigned VREG_W = 128; + parameter int unsigned VPORT_RD_CNT = 2; + parameter int unsigned VPORT_RD_W [VPORT_RD_CNT] = '{default: VREG_W}; + parameter int unsigned VPORT_WR_CNT = 1; + parameter int unsigned VPORT_WR_W [VPORT_WR_CNT] = '{default: VREG_W}; + + parameter int unsigned PIPE_CNT = 1; + parameter bit [UNIT_CNT-1:0] PIPE_UNITS [PIPE_CNT] = '{ + (UNIT_CNT'(1) << UNIT_LSU) | (UNIT_CNT'(1) << UNIT_ALU) | (UNIT_CNT'(1) << UNIT_MUL) | (UNIT_CNT'(1) << UNIT_SLD) | (UNIT_CNT'(1) << UNIT_ELEM) + }; + parameter int unsigned PIPE_W [PIPE_CNT] = '{32}; + parameter int unsigned PIPE_VPORT_CNT [PIPE_CNT] = '{1}; + parameter int unsigned PIPE_VPORT_IDX [PIPE_CNT] = '{1}; + parameter int unsigned PIPE_VPORT_WR [PIPE_CNT] = '{0}; + + parameter int unsigned VLSU_QUEUE_SZ = 4; + parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0; + parameter mul_type MUL_TYPE = MUL_GENERIC; + + parameter int unsigned INSTR_QUEUE_SZ = 2; + parameter bit [BUF_FLAGS_W-1:0] BUF_FLAGS = (BUF_FLAGS_W'(1) << BUF_DEQUEUE) | (BUF_FLAGS_W'(1) << BUF_VREG_PEND); + +endpackage From 3b83823128b89bae516ae0629b90fd50de6fa83a Mon Sep 17 00:00:00 2001 From: hfaroo9 Date: Wed, 5 Oct 2022 15:56:47 -0500 Subject: [PATCH 03/64] tb works for 498(?) --- Makefile | 187 +++++++++++++++++++++++++++++++++++++++++++ config.mk | 0 demo/rtl/demo_top.sv | 6 +- sim/vproc_tb.sv | 39 ++++++--- 4 files changed, 220 insertions(+), 12 deletions(-) create mode 100755 Makefile mode change 100644 => 100755 config.mk diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..1d57545 --- /dev/null +++ b/Makefile @@ -0,0 +1,187 @@ +# Copyright TU Wien +# Licensed under the Solderpad Hardware License v2.1, see LICENSE.txt for details +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + + +# Makefile for generating the configuration package +# +# The intent of the dynamically generated configuration package is to provide +# *consistent default values* for configuration parameters of the vproc_core +# module across different synthesis and simulation workflows. +# +# The configuration is controlled with following (environment) variables: +# - VMEM_W: The width (in bits) of the vector coprocessor's memory interface +# - VREG_W: The width (in bits) of the vector coprocessor's vector registers +# - VPROC_PIPELINES: Defines the vector pipelines. Each pipeline is defined by +# a string of the form "WIDTH:UNIT[,UNIT]*" where WIDTH is the width in bits +# of the pipeline's datapath and each occurence of UNIT selects one of the +# vector execution units (either VLSU, VALU, VMUL, VSLD, or VELEM). +# - VPROC_CONFIG: Sets default values for the other parameters (that can be +# individually overriden) depending on the desired number of vector +# pipelines (choose 1, 2, 3, or 5 pipelines by setting this variable to +# compact, dual, triple, or legacy, respectively). + +VPROC_CONFIG_PKG ?= vproc_config.sv + +VPROC_CONFIG ?= compact +ifeq ($(VPROC_CONFIG), compact) + VPORT_POLICY ?= some + VMEM_W ?= 32 + VREG_W ?= 128 + VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VMUL,VSLD,VELEM +else +ifeq ($(VPROC_CONFIG), dual) + VPORT_POLICY ?= some + VMEM_W ?= 32 + VREG_W ?= 128 + VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD +else +ifeq ($(VPROC_CONFIG), triple) + VPORT_POLICY ?= some + VMEM_W ?= 32 + VREG_W ?= 256 + VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD +else +ifeq ($(VPROC_CONFIG), legacy) + VPORT_POLICY ?= some + VMEM_W ?= 32 + VREG_W ?= 128 + VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU $(VPIPE_W_VMUL):VMUL \ + $(VPIPE_W_DFLT):VSLD 32:VELEM +else +$(error Unknown vector coprocessor configuration $(VPROC_CONFIG)) +endif +endif +endif +endif + +# default widths of vector pipelines based on VPORT_POLICY +ifeq ($(VPORT_POLICY), few) + VPIPE_W_DFLT := $(shell echo $$(($(VREG_W) / 2))) + VPIPE_W_VMUL := $(shell echo $$(($(VREG_W) / 4))) +else +ifeq ($(VPORT_POLICY), some) + VPIPE_W_DFLT := $(shell echo $$(($(VREG_W) / 2))) + VPIPE_W_VMUL := $(shell echo $$(($(VREG_W) / 2))) +else +ifeq ($(VPORT_POLICY), many) + VPIPE_W_DFLT := $(VREG_W) + VPIPE_W_VMUL := $(VREG_W) +else +$(error Unknown vector register file port policy $(VPORT_POLICY)) +endif +endif +endif + +.PHONY: $(VPROC_CONFIG_PKG) +$(VPROC_CONFIG_PKG): + @echo "// Auto-generated on $$(date)" >$@; \ + echo "" >>$@; \ + echo "// Vector coprocessor default configuration package" >>$@; \ + echo "//" >>$@; \ + echo "// The package defined in this file provides *consistent default values* for" >>$@; \ + echo "// configuration parameters of the vproc_core module for the configuration" >>$@; \ + echo "// shown below across different synthesis and simulation workflows. The" >>$@; \ + echo "// constants defined in this package are intended to be used *exclusively* as" >>$@; \ + echo "// *default values* for the parameters of the vproc_core module and should" >>$@; \ + echo "// *not* be used anywhere else in the code, such that a design instantiating" >>$@; \ + echo "// the vproc_core module can override any parameter with a different value." >>$@; \ + echo "" >>$@; \ + echo "// Configuration details:" >>$@; \ + echo "// - Vector register width: $(VREG_W) bits" >>$@; \ + echo "// - Vector pipelines:" >>$@; \ + vport_rd_cnt=1; \ + vport_wr_capacities=""; \ + pipe_cnt=0; \ + pipe_units=""; \ + pipe_widths=""; \ + pipe_vport_cnt=""; \ + pipe_vport_idx=""; \ + pipe_vport_wr=""; \ + for pipe in $(VPROC_PIPELINES); do \ + width=`echo $$pipe | cut -d ":" -f 1`; \ + unit_str=`echo $$pipe | cut -d ":" -f 2 | sed 's/,/, /g'`; \ + unit_mask=`echo $$pipe | cut -d ":" -f 2 | sed 's/,/ | /g' | \ + sed "s/V\(LSU\|ALU\|MUL\|SLD\|ELEM\)/(UNIT_CNT'(1) << UNIT_\1)/g"`; \ + vport_cnt=1; \ + if echo "$$pipe" | grep -q "VMUL" && [ $$(($$width * 4)) -gt "$(VREG_W)" ]; then \ + vport_cnt=2; \ + fi; \ + if [ $$(($$width * 2)) -gt "$(VREG_W)" ]; then \ + vport_cnt=$$(($$vport_cnt + 1)); \ + fi; \ + vport_wr=0; \ + remaining_cap=$$(($(VREG_W) - $$width)); \ + for cap in $$(echo $$vport_wr_capacities); do \ + if [ "$$cap" -ge "$$width" ]; then \ + remaining_cap=$$(($$cap - $$width)); \ + break; \ + fi; \ + vport_wr=$$(($$vport_wr + 1)); \ + done; \ + if [ -z "$$pipe_units" ]; then \ + pipe_units="$${unit_mask}"; \ + pipe_widths="$${width}"; \ + pipe_vport_cnt="$${vport_cnt}"; \ + pipe_vport_idx="$${vport_rd_cnt}"; \ + pipe_vport_wr="$${vport_wr}"; \ + else \ + pipe_units="$${pipe_units}, $${unit_mask}"; \ + pipe_widths="$${pipe_widths}, $${width}"; \ + pipe_vport_cnt="$${pipe_vport_cnt}, $${vport_cnt}"; \ + pipe_vport_idx="$${pipe_vport_idx}, $${vport_rd_cnt}"; \ + pipe_vport_wr="$${pipe_vport_wr}, $${vport_wr}"; \ + fi; \ + vport_rd_cnt=$$(($$vport_rd_cnt + $$vport_cnt)); \ + if [ "$$vport_wr" = `echo $${vport_wr_capacities} | wc -w` ]; then \ + vport_wr_capacities="$${vport_wr_capacities} $${remaining_cap}"; \ + else \ + awk_word_idx=$$(($$vport_wr + 1)); \ + vport_wr_capacities=`echo "$${vport_wr_capacities}" | \ + awk -v n=$$awk_word_idx -v r=$$remaining_cap '{$$n=r} 1'`; \ + fi; \ + echo "// * Pipeline $${pipe_cnt}: $${width} bits wide, contains $${unit_str}" >>$@; \ + echo "// Uses $${vport_cnt} $(VREG_W)-bit vreg read ports" \ + "and write port $${vport_wr}" >>$@; \ + pipe_cnt=$$(($$pipe_cnt + 1)); \ + done; \ + pipe_widths="'{$${pipe_widths}}"; \ + pipe_vport_cnt="'{$${pipe_vport_cnt}}"; \ + pipe_vport_idx="'{$${pipe_vport_idx}}"; \ + pipe_vport_wr="'{$${pipe_vport_wr}}"; \ + vport_wr_cnt=`echo $${vport_wr_capacities} | wc -w`; \ + echo "// - Vector register file needs $${vport_rd_cnt} read ports and $${vport_wr_cnt}" \ + "write ports" >>$@; \ + buf_flags="(BUF_FLAGS_W'(1) << BUF_DEQUEUE) | (BUF_FLAGS_W'(1) << BUF_VREG_PEND)"; \ + if [ -n "$(TIMEPRED)" ] && [ "$(TIMEPRED)" != "0" ]; then \ + buf_flags="$${buf_flags} | (BUF_FLAGS_W'(1) << BUF_VREG_WR_MUX_TIMEPRED)"; \ + fi; \ + echo "" >>$@; \ + echo "package vproc_config;" >>$@; \ + echo "" >>$@; \ + echo " import vproc_pkg::*;" >>$@; \ + echo "" >>$@; \ + echo " parameter vreg_type VREG_TYPE = VREG_GENERIC;" >>$@; \ + echo " parameter int unsigned VREG_W = $(VREG_W);" >>$@; \ + echo " parameter int unsigned VPORT_RD_CNT = $$vport_rd_cnt;" >>$@; \ + echo " parameter int unsigned VPORT_RD_W [VPORT_RD_CNT] = '{default: VREG_W};" >>$@; \ + echo " parameter int unsigned VPORT_WR_CNT = $$vport_wr_cnt;" >>$@; \ + echo " parameter int unsigned VPORT_WR_W [VPORT_WR_CNT] = '{default: VREG_W};" >>$@; \ + echo "" >>$@; \ + echo " parameter int unsigned PIPE_CNT = $$pipe_cnt;" >>$@; \ + echo " parameter bit [UNIT_CNT-1:0] PIPE_UNITS [PIPE_CNT] = '{" >>$@; \ + echo " $$pipe_units" >>$@; \ + echo " };" >>$@; \ + echo " parameter int unsigned PIPE_W [PIPE_CNT] = $$pipe_widths;" >>$@; \ + echo " parameter int unsigned PIPE_VPORT_CNT [PIPE_CNT] = $$pipe_vport_cnt;" >>$@; \ + echo " parameter int unsigned PIPE_VPORT_IDX [PIPE_CNT] = $$pipe_vport_idx;" >>$@; \ + echo " parameter int unsigned PIPE_VPORT_WR [PIPE_CNT] = $$pipe_vport_wr;" >>$@; \ + echo "" >>$@; \ + echo " parameter int unsigned VLSU_QUEUE_SZ = 4;" >>$@; \ + echo " parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0;" >>$@; \ + echo " parameter mul_type MUL_TYPE = MUL_GENERIC;" >>$@; \ + echo "" >>$@; \ + echo " parameter int unsigned INSTR_QUEUE_SZ = 2;" >>$@; \ + echo " parameter bit [BUF_FLAGS_W-1:0] BUF_FLAGS = $${buf_flags};" >>$@; \ + echo "" >>$@; \ + echo "endpackage" >>$@; diff --git a/config.mk b/config.mk old mode 100644 new mode 100755 diff --git a/demo/rtl/demo_top.sv b/demo/rtl/demo_top.sv index 1101c18..f6ea243 100644 --- a/demo/rtl/demo_top.sv +++ b/demo/rtl/demo_top.sv @@ -4,7 +4,7 @@ module demo_top #( - parameter RAM_FPATH = "", + parameter RAM_FPATH = "/home/hfaroo9/ece498hk-RISCV-V-Extension/src/vicuna/sim/files.txt", parameter int unsigned RAM_SIZE = 262144, parameter bit DIFF_CLK = 1'b0, parameter real SYSCLK_PER = 0.0, @@ -88,8 +88,8 @@ module demo_top #( vproc_top #( .MEM_W ( 32 ), .VMEM_W ( 32 ), - .VREG_TYPE ( vproc_pkg::VREG_XLNX_RAM32M ), - .MUL_TYPE ( vproc_pkg::MUL_XLNX_DSP48E1 ) + .VREG_TYPE ( vproc_pkg::VREG_GENERIC ), + .MUL_TYPE ( vproc_pkg::MUL_GENERIC ) ) vproc ( .clk_i ( clk ), .rst_ni ( rst_n ), diff --git a/sim/vproc_tb.sv b/sim/vproc_tb.sv index 625ea6e..798e48c 100644 --- a/sim/vproc_tb.sv +++ b/sim/vproc_tb.sv @@ -4,7 +4,7 @@ module vproc_tb #( - parameter PROG_PATHS_LIST = "", + parameter PROG_PATHS_LIST = "/home/hfaroo9/ece498hk-RISCV-V-Extension/src/vicuna/sim/files.txt", parameter int unsigned MEM_W = 32, parameter int unsigned MEM_SZ = 262144, parameter int unsigned MEM_LATENCY = 1, @@ -35,8 +35,8 @@ module vproc_tb #( vproc_top #( .MEM_W ( MEM_W ), .VMEM_W ( VMEM_W ), - .VREG_TYPE ( vproc_pkg::VREG_XLNX_RAM32M ), - .MUL_TYPE ( vproc_pkg::MUL_XLNX_DSP48E1 ), + .VREG_TYPE ( vproc_pkg::VREG_GENERIC ), + .MUL_TYPE ( vproc_pkg::MUL_GENERIC ), .ICACHE_SZ ( ICACHE_SZ ), .ICACHE_LINE_W ( ICACHE_LINE_W ), .DCACHE_SZ ( DCACHE_SZ ), @@ -63,7 +63,8 @@ module vproc_tb #( logic mem_rvalid_queue[MEM_LATENCY]; logic [31:0] mem_rdata_queue [MEM_LATENCY]; logic mem_err_queue [MEM_LATENCY]; - always_ff @(posedge clk) begin + always begin + #5; if (mem_req & mem_we) begin for (int i = 0; i < MEM_W/8; i++) begin if (mem_be[i]) begin @@ -91,12 +92,13 @@ module vproc_tb #( mem_rdata <= mem_rdata_queue [MEM_LATENCY-1]; mem_err <= mem_err_queue [MEM_LATENCY-1]; end - for (int i = 0; i < MEM_SZ; i++) begin +// for (int i = 0; i < MEM_SZ; i++) begin // set the don't care values in the memory to 0 during the first rising edge - if ($isunknown(mem[i]) & ($time < 10)) begin - mem[i] <= '0; - end - end +// if ($isunknown(mem[i]) & ($time < 10)) begin +// mem[i] <= '0; +// end +// end + #5; end logic prog_end, done; @@ -105,6 +107,7 @@ module vproc_tb #( integer fd1, fd2, cnt, ref_start, ref_end, dump_start, dump_end; string line, prog_path, ref_path, dump_path; initial begin + $display("STARTING TB"); done = 1'b0; fd1 = $fopen(PROG_PATHS_LIST, "r"); @@ -126,15 +129,30 @@ module vproc_tb #( continue; end + $display("ABOUT TO READ MEM (%s)", prog_path); $readmemh(prog_path, mem); + $display("FINISHED READ MEM"); + + for(int j = 0; j < MEM_SZ; j++) begin + if($isunknown(mem[j])) begin + mem[j] = 0; + end + end fd2 = $fopen(ref_path, "w"); + $display("REF PATH OPEN (%s)", ref_path); for (int j = ref_start / (MEM_W/8); j < ref_end / (MEM_W/8); j++) begin for (int k = 0; k < MEM_W/32; k++) begin + if($isunknown(mem[j][k*32 +: 32])) begin + mem[j][k*32 +: 32] = 0; + end + + // $display("%x", mem[j][k*32 +: 32]); $fwrite(fd2, "%x\n", mem[j][k*32 +: 32]); end end $fclose(fd2); + $display("REF PATH CLOSED"); // reset for 10 cycles #100 @@ -142,12 +160,14 @@ module vproc_tb #( // wait for completion (i.e. request of instr mem addr 0x00000000) //@(posedge prog_end); + $display("STARTING WHILE LOOP"); while (1) begin @(posedge clk); if (prog_end) begin break; end end + $display("OUT OF WHILE LOOP"); fd2 = $fopen(dump_path, "w"); for (int j = dump_start / (MEM_W/8); j < dump_end / (MEM_W/8); j++) begin @@ -159,6 +179,7 @@ module vproc_tb #( end $fclose(fd1); done = 1'b1; + $finish; end endmodule From ff6d7af29ebdcb21d4c637850275df9d011f440b Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Thu, 6 Oct 2022 21:26:20 -0500 Subject: [PATCH 04/64] Added div types and decoder logic, implemented most of vproc_div_block.sv --- rtl/vproc_decoder.sv | 27 ++++++++++++++++ rtl/vproc_div.sv | 25 +++++++++++++++ rtl/vproc_div_block.sv | 73 ++++++++++++++++++++++++++++++++++++++++++ rtl/vproc_pkg.sv | 24 ++++++++++++++ 4 files changed, 149 insertions(+) create mode 100644 rtl/vproc_div.sv create mode 100644 rtl/vproc_div_block.sv diff --git a/rtl/vproc_decoder.sv b/rtl/vproc_decoder.sv index 0bd34f1..c5523ff 100644 --- a/rtl/vproc_decoder.sv +++ b/rtl/vproc_decoder.sv @@ -1093,6 +1093,33 @@ module vproc_decoder #( endcase end + /***** ECE 498 HK MODIFICATIONS *****/ + // DIV unit: + {6'b100000, 3'b010}, // vdivu VV + {6'b100000, 3'b110}: begin // vidvu VX + unit_o = UNIT_DIV; // CREATE UNIT_DIV!!! + mode_o.div.op = DIV_VDIVU; // CREAT DIV TYPES!!! + // ... + end + {6'b100001, 3'b010}, // vdiv VV + {6'b100001, 3'b110}: begin // vdiv VX + unit_o = UNIT_DIV; + mode_o.div.op = DIV_VDIV; + // ... + end + {6'b100010, 3'b010}, // vremu VV + {6'b100010, 3'b110}: begin // vremu VX + unit_o = UNIT_DIV; + mode_o.div.op = DIV_VREMU; + // ... + end + {6'b100011, 3'b010}, // vrem VV + {6'b100011, 3'b110}: begin // vrem VX + unit_o = UNIT_DIV; + mode_o.div.op = DIV_VREM; + // ... + end + /***** END ECE 498 HK MODIFICATIONS *****/ // MUL unit: {6'b100100, 3'b010}, // vmulhu VV diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv new file mode 100644 index 0000000..52290e2 --- /dev/null +++ b/rtl/vproc_div.sv @@ -0,0 +1,25 @@ +// TODO DO THIS + +module vproc_div #( + parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, + parameter bit BUF_OPERANDS = 1'b1, + parameter bit BUF_DIV_IN = 1'b1, + parameter bit BUF_DIV_OUT = 1'b1, + parameter bit BUF_RESULTS = 1'b1 + // parameter bit DONT_CARE_ZERO = 1'b0 + )( + input logic clk_i, + input logic async_rst_ni, + input logic sync_rst_ni, + + input logic pipe_in_valid_i, + output logic pipe_in_ready_o, + + output logic pipe_out_valid_o, + input logic pipe_out_ready_i + ); + + import vproc_pkg::*; + +endmodule + diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv new file mode 100644 index 0000000..ebf606e --- /dev/null +++ b/rtl/vproc_div_block.sv @@ -0,0 +1,73 @@ +// TODO DO THIS + +module vproc_div_block #( + parameter vproc_pkg::div_types DIV_TYPE = vproc_pkg::DIV_GENERIC, + parameter bit BUF_OPS = 1'b0, // buffer operands (op1_i and op2_i) + parameter bit BUF_DIV = 1'b0, // buffer division result + parameter bit BUF_RES = 1'b0 // buffer final result (res_o) + // Other parameters... + )( + input logic clk_i, + input logic async_rst_ni, + input logic sync_rst_ni, + + input logic [16:0] op1_i, + input logic [16:0] op2_i, + + output logic [32:0] res_o + + // May or may not need more ports... + ); + + generate + case (DIV_TYPE) + + vproc_pkg::DIV_GENERIC: begin + + logic [16:0] op1_q, op2_q; + logic [32:0] div_q, div_d; + logic [32:0] res_q, res_d; + + if (BUF_OPS) begin + always_ff @(posedge clk_i) begin + op1_q <= op1_i; + op2_q <= op2_i; + end + end else begin + always_comb begin + op1_q = op1_i; + op2_q = op2_i; + end + end + + if (BUF_DIV) begin + always_ff @(posedge clk_i) begin + div_q <= div_d; + end + end else begin + always_comb begin + div_q = div_d; + end + end + + if (BUF_RES) begin + always_ff @(posedge clk_i) begin + res_q <= res_d; + end + end else begin + always_comb begin + res_q = res_d; + end + end + + assign div_d = $signed(op1_q) / $signed(op2_q); + assign res_o = res_q; + + end + + default: ; + + endcase + endgenerate + +endmodule diff --git a/rtl/vproc_pkg.sv b/rtl/vproc_pkg.sv index b84f18d..e23838a 100644 --- a/rtl/vproc_pkg.sv +++ b/rtl/vproc_pkg.sv @@ -40,6 +40,12 @@ typedef enum { MUL_XLNX_DSP48E1 = 1 } mul_type; +/***** ECE 498 HK MODIFICATIONS *****/ +typedef enum { + DIV_GENERIC = 0 +} div_type; +/***** END ECE 498 HK MODIFICATIONS *****/ + typedef enum logic [1:0] { INSTR_INVALID, INSTR_SPECULATIVE, @@ -98,6 +104,7 @@ typedef enum logic [1:0] { typedef enum logic [2:0] { UNIT_LSU, UNIT_ALU, + UNIT_DIV, // ECE 498 HK MODIFICATION UNIT_MUL, UNIT_SLD, UNIT_ELEM, @@ -207,6 +214,22 @@ typedef struct packed { `endif } op_mode_mul; +/***** ECE 498 HK MODIFICATIONS *****/ +typedef enum logic [1:0] { + DIV_VDIVU, // unsigned divide + DIV_VDIV, // signed divide + DIV_VREMU, // unsigned remainder + DIV_VREM // signed remainder +} opcode_div; + +typedef struct packed { + // TODO + opcode_div op; + logic op1_signed; + logic op2_signed; +} op_mode_div; +/***** END ECE 498 HK MODIFICATIONS *****/ + typedef enum logic [0:0] { SLD_UP, SLD_DOWN @@ -289,6 +312,7 @@ typedef struct packed { `endif op_mode_lsu lsu; op_mode_alu alu; + op_mode_div div; // ECE 498 HK MODIFICATION op_mode_mul mul; op_mode_sld sld; op_mode_elem elem; From eddbecdaace7ee0994c50cc6a64d79341676479f Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 19 Oct 2022 13:24:10 -0500 Subject: [PATCH 05/64] Create main.yml --- .github/workflows/main.yml | 125 +++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..b78e239 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,125 @@ +name: CI +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + + +jobs: + verilator: + runs-on: ubuntu-22.04 + steps: + - uses: actions/cache@v2 + id: cache-verilator + with: + path: /opt/verilator + key: ubuntu-22_04-verilator-4_210 + + - name: Install Verilator + if: steps.cache-verilator.outputs.cache-hit != 'true' + run: | + sudo apt-get update + sudo apt-get install git perl python3 g++ flex bison ccache libfl2 libfl-dev zlib1g zlib1g-dev + git clone https://github.com/verilator/verilator + unset VERILATOR_ROOT + cd verilator + git checkout tags/v4.210 + autoconf + ./configure --prefix /opt/verilator + make + sudo make install + df -h + cd .. + rm -rf verilator + + + lint: + needs: verilator + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v2 + with: + submodules: false + + - uses: actions/cache@v2 + id: cache-verilator + with: + path: /opt/verilator + key: ubuntu-22_04-verilator-4_210 + + - name: Abort if no cache + if: steps.cache-verilator.outputs.cache-hit != 'true' + run: exit 1 + + - name: Install packages + run: | + sudo apt-get update + sudo apt-get install git perl python3 g++ ccache libfl2 libfl-dev zlib1g zlib1g-dev + + - name: Install verible and lint + run: | + curl -sSL https://api.github.com/repos/chipsalliance/verible/releases/latest | grep browser_download_url | grep Ubuntu-20.04 | cut -d '"' -f 4 | wget -qi - + mkdir verible + tar -xf verible*.tar.gz -C verible --strip-components=1 + export PATH=$PATH:$PWD/verible/bin:/opt/verilator/bin + cd test && make lint + + + unit: + needs: verilator + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + unit: [lsu, alu, mul, sld, elem, csr, misc] + main_core: [ibex, cv32e40x] + steps: + - uses: actions/checkout@v2 + with: + submodules: true + + - uses: actions/cache@v2 + id: cache-verilator + with: + path: /opt/verilator + key: ubuntu-22_04-verilator-4_210 + + - name: Abort if no cache + if: steps.cache-verilator.outputs.cache-hit != 'true' + run: exit 1 + + - name: Install packages + run: | + sudo apt-get update + sudo apt-get install git perl python3 g++ ccache libfl2 libfl-dev zlib1g zlib1g-dev + sudo apt-get install srecord llvm-14 clang-14 + sudo ln -sf /usr/bin/llvm-objdump-14 /usr/bin/llvm-objdump + sudo ln -sf /usr/bin/llvm-objcopy-14 /usr/bin/llvm-objcopy + + - name: Run tests + shell: bash {0} # disable fail-fast behavior + run: | + export PATH=$PATH:/opt/verilator/bin:/opt/riscv-gcc/bin + verilator --version + retval=0 + while IFS= read -ra line; do + if [ -z "$line" ] || [ "${line:0:1}" = "#" ]; then + continue + fi + echo "$line" > test/${{ matrix.unit }}/test_configs.conf + vcd=`echo "${{ matrix.unit }}_${{ matrix.main_core }}_${line}.vcd" | sed 's/ */_/g'` + make -C test ${{ matrix.unit }} CORE=${{ matrix.main_core }} FULL_LOG=1 TRACE_VCD=$vcd + if [ $? -ne 0 ]; then + retval=1 + fi + done < .github/test_configs.conf + exit $retval + + - name: Archive VCD trace files + if: failure() + uses: actions/upload-artifact@v3 + with: + name: vcd-trace-files + path: | + test/${{ matrix.unit }}/*.vcd From d3f074b09bb67ad306ecd511bc27ba7d49704dc0 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 19 Oct 2022 16:21:57 -0500 Subject: [PATCH 06/64] Update default.yml Removed cv32e40x --- .github/workflows/default.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index b78e239..1211035 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -73,7 +73,7 @@ jobs: fail-fast: false matrix: unit: [lsu, alu, mul, sld, elem, csr, misc] - main_core: [ibex, cv32e40x] + main_core: [ibex] steps: - uses: actions/checkout@v2 with: From 3c40928aaadf22d75f733991f864307a1e74d17e Mon Sep 17 00:00:00 2001 From: William Eustis Date: Mon, 24 Oct 2022 15:57:22 -0500 Subject: [PATCH 07/64] added DIV in config.mk and vprov_pkg --- config.mk | 15 +++++++++------ rtl/vproc_pkg.sv | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/config.mk b/config.mk index 1d57545..1b92cde 100755 --- a/config.mk +++ b/config.mk @@ -15,7 +15,7 @@ # - VPROC_PIPELINES: Defines the vector pipelines. Each pipeline is defined by # a string of the form "WIDTH:UNIT[,UNIT]*" where WIDTH is the width in bits # of the pipeline's datapath and each occurence of UNIT selects one of the -# vector execution units (either VLSU, VALU, VMUL, VSLD, or VELEM). +# vector execution units (either VLSU, VALU, VMUL, VSLD, VELEM, or VDIV). # - VPROC_CONFIG: Sets default values for the other parameters (that can be # individually overriden) depending on the desired number of vector # pipelines (choose 1, 2, 3, or 5 pipelines by setting this variable to @@ -28,26 +28,26 @@ ifeq ($(VPROC_CONFIG), compact) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 128 - VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VMUL,VSLD,VELEM + VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VMUL,VSLD,VELEM,VDIV else ifeq ($(VPROC_CONFIG), dual) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 128 - VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD + VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD,VDIV else ifeq ($(VPROC_CONFIG), triple) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 256 - VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD + VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD,VDIV else ifeq ($(VPROC_CONFIG), legacy) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 128 VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU $(VPIPE_W_VMUL):VMUL \ - $(VPIPE_W_DFLT):VSLD 32:VELEM + $(VPIPE_W_DFLT):VSLD 32:VELEM $(VPIPE_W_VMUL):VDIV else $(error Unknown vector coprocessor configuration $(VPROC_CONFIG)) endif @@ -102,10 +102,13 @@ $(VPROC_CONFIG_PKG): width=`echo $$pipe | cut -d ":" -f 1`; \ unit_str=`echo $$pipe | cut -d ":" -f 2 | sed 's/,/, /g'`; \ unit_mask=`echo $$pipe | cut -d ":" -f 2 | sed 's/,/ | /g' | \ - sed "s/V\(LSU\|ALU\|MUL\|SLD\|ELEM\)/(UNIT_CNT'(1) << UNIT_\1)/g"`; \ + sed "s/V\(LSU\|ALU\|MUL\|SLD\|ELEM\|DIV\)/(UNIT_CNT'(1) << UNIT_\1)/g"`; \ vport_cnt=1; \ if echo "$$pipe" | grep -q "VMUL" && [ $$(($$width * 4)) -gt "$(VREG_W)" ]; then \ vport_cnt=2; \ + fi; \ + if echo "$$pipe" | grep -q "VDIV" && [ $$(($$width * 4)) -gt "$(VREG_W)" ]; then \ + vport_cnt=2; \ fi; \ if [ $$(($$width * 2)) -gt "$(VREG_W)" ]; then \ vport_cnt=$$(($$vport_cnt + 1)); \ diff --git a/rtl/vproc_pkg.sv b/rtl/vproc_pkg.sv index e23838a..d96e291 100644 --- a/rtl/vproc_pkg.sv +++ b/rtl/vproc_pkg.sv @@ -113,7 +113,7 @@ typedef enum logic [2:0] { } op_unit; // The number of different types of execution units (excludes pseudo-units) -parameter int unsigned UNIT_CNT = 5; +parameter int unsigned UNIT_CNT = 6; typedef enum logic [1:0] { COUNT_INC_1, From 2377ab7c34016105165b671920a95e63a0527850 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 26 Oct 2022 15:58:06 -0500 Subject: [PATCH 08/64] Added signed div ops signals to decode, instantiated div_block in div --- rtl/vproc_decoder.sv | 8 +++ rtl/vproc_div.sv | 147 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) diff --git a/rtl/vproc_decoder.sv b/rtl/vproc_decoder.sv index c5523ff..6045c6d 100644 --- a/rtl/vproc_decoder.sv +++ b/rtl/vproc_decoder.sv @@ -1099,24 +1099,32 @@ module vproc_decoder #( {6'b100000, 3'b110}: begin // vidvu VX unit_o = UNIT_DIV; // CREATE UNIT_DIV!!! mode_o.div.op = DIV_VDIVU; // CREAT DIV TYPES!!! + mode_o.div.op1_signed = 1'b0; + mode_o.div.op2_signed = 1'b0; // ... end {6'b100001, 3'b010}, // vdiv VV {6'b100001, 3'b110}: begin // vdiv VX unit_o = UNIT_DIV; mode_o.div.op = DIV_VDIV; + mode_o.div.op1_signed = 1'b1; + mode_o.div.op2_signed = 1'b1; // ... end {6'b100010, 3'b010}, // vremu VV {6'b100010, 3'b110}: begin // vremu VX unit_o = UNIT_DIV; mode_o.div.op = DIV_VREMU; + mode_o.div.op1_signed = 1'b0; + mode_o.div.op2_signed = 1'b0; // ... end {6'b100011, 3'b010}, // vrem VV {6'b100011, 3'b110}: begin // vrem VX unit_o = UNIT_DIV; mode_o.div.op = DIV_VREM; + mode_o.div.op1_signed = 1'b1; + mode_o.div.op2_signed = 1'b1; // ... end /***** END ECE 498 HK MODIFICATIONS *****/ diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 52290e2..56856fd 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -1,6 +1,7 @@ // TODO DO THIS module vproc_div #( + parameter int unsigned DIV_OP_W = 64, // DIV unit operand width in bits (NOT PERMANENT) parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, parameter bit BUF_OPERANDS = 1'b1, parameter bit BUF_DIV_IN = 1'b1, @@ -21,5 +22,151 @@ module vproc_div #( import vproc_pkg::*; + /////////////////////////////////////////////////////////////////////////// + // MUL BUFFERS + + logic state_ex1_ready, state_ex2_ready, state_ex3_ready, state_res_ready; + logic state_ex1_valid_q, state_ex1_valid_d, state_ex2_valid_q, state_ex3_valid_q, state_res_valid_q; + CTRL_T state_ex1_q, state_ex1_d, state_ex2_q, state_ex3_q, state_res_q; + + generate + if (BUF_OPERANDS) begin + always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_ex1_valid + if (~async_rst_ni) begin + state_ex1_valid_q <= 1'b0; + end + else if (~sync_rst_ni) begin + state_ex1_valid_q <= 1'b0; + end + else if (state_ex1_ready) begin + state_ex1_valid_q <= state_ex1_valid_d; + end + end + always_ff @(posedge c lk_i) begin : vproc_div_stage_ex1 + if (state_ex1_ready & state_ex1_valid_d) begin + state_ex1_q <= state_ex1_d; + operand1_q <= operand1_d; + operand2_q <= operand2_d; + end + end + assign state_ex1_ready = ~state_ex1_valid_q | state_ex2_ready; + end else begin + always_comb begin + state_ex1_valid_q = state_ex1_valid_d; + state_ex1_q = state_ex1_d; + operand1_q = operand1_d; + operand2_q = operand2_d; + end + end + + if (BUF_DIV_IN) begin + always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_ex2_valid + if (~async_rst_ni) begin + state_ex2_valid_q <= 1'b0; + end + else if (~sync_rst_ni) begin + state_ex2_valid_q <= 1'b0; + end + else if (state_ex2_ready) begin + state_ex2_valid_q <= state_ex1_valid_q; + end + end + always_ff @(posedge clk_i) begin : vproc_div_stage_ex2 + if (state_ex2_ready & state_ex1_valid_q) begin + state_ex2_q <= state_ex1_q; + end + end + assign state_ex2_ready = ~state_ex2_valid_q | state_ex3_ready; + end else begin + always_comb begin + state_ex2_valid_q = state_ex1_valid_q; + state_ex2_q = state_ex1_q; + end + assign state_ex2_ready = state_ex3_ready; + end + + if (BUF_DIV_OUT) begin + always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_ex3_valid + if (~async_rst_ni) begin + state_ex3_valid_q <= 1'b0; + end + else if (~sync_rst_ni) begin + state_ex3_valid_q <= 1'b0; + end + else if (state_ex3_ready) begin + state_ex3_valid_q <= state_ex2_valid_q; + end + end + always_ff @(posedge clk_i) begin : vproc_div_stage_ex3 + if (state_ex3_ready & state_ex2_valid_q) begin + state_ex3_q <= state_ex2_q; + end + end + assign state_ex3_ready = ~state_ex3_valid_q | state_res_ready; + end else begin + always_comb begin + state_ex3_valid_q = state_ex2_valid_q; + state_ex3_q = state_ex2_q; + end + assign state_ex3_ready = state_res_ready; + end + + if (BUF_RESULTS) begin + always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_res_valid + if (~async_rst_ni) begin + state_res_valid_q <= 1'b0; + end + else if (~sync_rst_ni) begin + state_res_valid_q <= 1'b0; + end + else if (state_res_ready) begin + state_res_valid_q <= state_ex3_valid_q; + end + end + always_ff @(posedge clk_i) begin : vproc_div_stage_res + if (state_res_ready & state_ex3_valid_q) begin + state_res_q <= state_ex3_q; + result_q <= result_d; + end + end + assign state_res_ready = ~state_res_valid_q | pipe_out_ready_i; + end else begin + always_comb begin + state_res_valid_q = state_ex3_valid_q; + state_res_q = state_ex3_q; + result_q = result_d; + end + assign state_res_ready = pipe_out_ready_i; + end + endgenerate + + logic [(DIV_OP_W/8)*17-1:0] div_op1, div_op2; + + // perform signed division of xx-bit integers + logic [(DIV_TOP_W/8)*33-1:0] div_res; + genvar g; + generate + for (g = 0; g < DIV_OP_W / 8; g++) begin + vproc_div_block #( + .DIV_TYPE (DIV_TYPE ), + .BUF_OPS (BUF_DIV_IN ), + .BUF_DIV (BUF_DIV_OUT ), + .BUF_RES (1'b0 ) + ) div_block ( + .clk_i (clk_i ), + .async_rst_ni (async_rst_ni ), + .sync_rst_ni (sync_rst_ni ), + .op1_i (div_op1 [17*g +: 17] ), + .op2_i (div_op2 [17*g +: 17] ), + .res_o (div_res [33*g +: 33] ) + ); + end + endgenerate + + // compose result + alwasy_comb begin + + end + endmodule From 0f54a4f7e9b446add1fec38115cf165889251055 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Thu, 27 Oct 2022 04:44:06 -0500 Subject: [PATCH 09/64] Added input formatting for divider for different VSEW values, including signs --- rtl/vproc_div.sv | 90 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 56856fd..aa18a54 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -42,7 +42,7 @@ module vproc_div #( state_ex1_valid_q <= state_ex1_valid_d; end end - always_ff @(posedge c lk_i) begin : vproc_div_stage_ex1 + always_ff @(posedge clk_i) begin : vproc_div_stage_ex1 if (state_ex1_ready & state_ex1_valid_d) begin state_ex1_q <= state_ex1_d; operand1_q <= operand1_d; @@ -140,7 +140,93 @@ module vproc_div #( end endgenerate + + /////////////////////////////////////////////////////////////////////////// + // DIV ARITHMETIC + + logic [DIV_OP_W/8-1:0] op1_signs, op2_signs; + always_comb begin + op1_signs = DONT_CARE_ZERO ? '0 : 'x; + op2_signs = DONT_CARE_ZERO ? '0 : 'x; + for (int i = 0; i < DIV_OP_W/8; i++) begin + op1_signs[i] = state_ex1_q.mode.mul.op1_signed & operand1_q[8*i+7]; + op2_signs[i] = state_ex1_q.mode.mul.op2_signed & operand2_q[8*i+7]; + end + end + + logic ex1_vsew_8, ex1_vsew_32; + always_comb begin + ex1_vsew_8 = DONT_CARE_ZERO ? '0 : 'x; + ex1_vsew_32 = DONT_CARE_ZERO ? '0 : 'x; + unique case (state_ex1_q.eew) + VSEW_8: ex1_vsew_8 = 1'b1; + VSEW_16: ex1_vsew_8 = 1'b0; + VSEW_32: ex1_vsew_8 = 1'b0; + default: ; + endcase + unique case (state_ex1_q.eew) + VSEW_8: ex1_vsew_32 = 1'b0; + VSEW_16: ex1_vsew_32 = 1'b0; + VSEW_32: ex1_vsew_32 = 1'b1; + default: ; + endcase + end + logic [(DIV_OP_W/8)*17-1:0] div_op1, div_op2; + always_comb begin + div_op1 = DONT_CARE_ZERO ? '0 : 'x; + for (int i = 0; i < DIV_OP_W / 32; i++) begin + div_op1[68*i +: 68] = { + // VSEW_8: byte 3, VSEW_32: upper halfword + op1_signs[4*i+3] , ~ex1_vsew_32 ? {{8{op1_signs[4*i+3]}}, operand1_q[32*i+24 +: 8]} : operand1_q[32*i+16 +: 16], + // VSEW_8: byte 2, VSEW_16 and VSEW_32: upper halfword + op1_signs[4*i+3] , ex1_vsew_8 ? {8{op1_signs[4*i+2]}} : operand1_q[32*i+24 +: 8], operand1_q[32*i+16 +: 8 ], + // VSEW_8: byte 1, VSEW_32: lower halfword + 1'b0 , ~ex1_vsew_32 ? {{8{op1_signs[4*i+1]}}, operand1_q[32*i+8 +: 8]} : operand1_q[32*i +: 16], + // VSEW_8: byte 0, VSEW_16 and VSEW_32: lower halfword + ~ex1_vsew_32 & op1_signs[4*i+1], ex1_vsew_8 ? {8{op1_signs[4*i ]}} : operand1_q[32*i+8 +: 8], operand1_q[32*i +: 8 ] + }; + end + div_op2 = DONT_CARE_ZERO ? '0 : 'x; + for (int i = 0; i < DIV_OP_W / 32; i++) begin + div_op2[68*i +: 68] = { + // VSEW_8: byte 3, VSEW_32: lower halfword + 1'b0 , ~ex1_vsew_32 ? {{8{op2_signs[4*i+3]}}, operand2_q[32*i+24 +: 8]} : operand2_q[32*i +: 16], + // VSEW_8: byte 2, VSEW_16 and VSEW_32: upper halfword + op2_signs[4*i+3] , ex1_vsew_8 ? {8{op2_signs[4*i+2]}} : operand2_q[32*i+24 +: 8], operand2_q[32*i+16 +: 8 ], + // VSEW_8: byte 1, VSEW_32: upper halfword + op2_signs[4*i+3] , ~ex1_vsew_32 ? {{8{op2_signs[4*i+1]}}, operand2_q[32*i+8 +: 8]} : operand2_q[32*i+16 +: 16], + // VSEW_8: byte 0, VSEW_16 and VSEW_32: lower halfword + ~ex1_vsew_32 & op2_signs[4*i+1], ex1_vsew_8 ? {8{op2_signs[4*i ]}} : operand2_q[32*i+8 +: 8], operand2_q[32*i +: 8 ] + }; + end + end + + // Not sure if this is needed... + logic ex2_vsew_8, ex2_vsew_16, ex2_vsew_32; + always_comb begin + ex2_vsew_8 = DONT_CARE_ZERO ? '0 : 'x; + ex2_vsew_16 = DONT_CARE_ZERO ? '0 : 'x; + ex2_vsew_32 = DONT_CARE_ZERO ? '0 : 'x; + unique case (state_ex2_q.eew) + VSEW_8: ex2_vsew_8 = 1'b1; + VSEW_16: ex2_vsew_8 = 1'b0; + VSEW_32: ex2_vsew_8 = 1'b0; + default: ; + endcase + unique case (state_ex2_q.eew) + VSEW_8: ex2_vsew_16 = 1'b0; + VSEW_16: ex2_vsew_16 = 1'b1; + VSEW_32: ex2_vsew_16 = 1'b0; + default: ; + endcase + unique case (state_ex2_q.eew) + VSEW_8: ex2_vsew_32 = 1'b0; + VSEW_16: ex2_vsew_32 = 1'b0; + VSEW_32: ex2_vsew_32 = 1'b1; + default: ; + endcase + end // perform signed division of xx-bit integers logic [(DIV_TOP_W/8)*33-1:0] div_res; @@ -164,7 +250,7 @@ module vproc_div #( endgenerate // compose result - alwasy_comb begin + always_comb begin end From 294c1d7854907ba46932efc67b47a372b40bd9f0 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Mon, 31 Oct 2022 10:02:24 -0500 Subject: [PATCH 10/64] Initial divider used by vproc_div_block.sv --- rtl/div_int.sv | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 rtl/div_int.sv diff --git a/rtl/div_int.sv b/rtl/div_int.sv new file mode 100644 index 0000000..1dbd279 --- /dev/null +++ b/rtl/div_int.sv @@ -0,0 +1,53 @@ +module div_int #(parameter WIDTH=4) ( + input wire logic clk, + input wire logic start, // start signal + output logic busy, // calculation in progress + output logic valid, // quotient and remainder are valid + output logic dbz, // divide by zero flag + input wire logic [WIDTH-1:0] x, // dividend + input wire logic [WIDTH-1:0] y, // divisor + output logic [WIDTH-1:0] q, // quotient + output logic [WIDTH-1:0] r // remainder + ); + + logic [WIDTH-1:0] y1; // copy of divisor + logic [WIDTH-1:0] q1, q1_next; // intermediate quotient + logic [WIDTH:0] ac, ac_next; // accumulator (1 bit wider) + logic [$clog2(WIDTH)-1:0] i; // iteration counter + + always_comb begin + if (ac >= {1'b0,y1}) begin + ac_next = ac - y1; + {ac_next, q1_next} = {ac_next[WIDTH-1:0], q1, 1'b1}; + end else begin + {ac_next, q1_next} = {ac, q1} << 1; + end + end + + always_ff @(posedge clk) begin + if (start) begin + valid <= 0; + i <= 0; + if (y == 0) begin // catch divide by zero + busy <= 0; + dbz <= 1; + end else begin // initialize values + busy <= 1; + dbz <= 0; + y1 <= y; + {ac, q1} <= {{WIDTH{1'b0}}, x, 1'b0}; + end + end else if (busy) begin + if (i == WIDTH-1) begin // we're done + busy <= 0; + valid <= 1; + q <= q1_next; + r <= ac_next[WIDTH:1]; // undo final shift + end else begin // next iteration + i <= i + 1; + ac <= ac_next; + q1 <= q1_next; + end + end + end +endmodule From ce27649d7006835ca04e89355f8a2042ff48c341 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 02:26:21 -0500 Subject: [PATCH 11/64] Added support to choose div or mod in div_block --- rtl/vproc_div.sv | 3 ++- rtl/vproc_div_block.sv | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index aa18a54..2e6dc9d 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -228,7 +228,7 @@ module vproc_div #( endcase end - // perform signed division of xx-bit integers + // perform unsigned division of xx-bit integers logic [(DIV_TOP_W/8)*33-1:0] div_res; genvar g; generate @@ -242,6 +242,7 @@ module vproc_div #( .clk_i (clk_i ), .async_rst_ni (async_rst_ni ), .sync_rst_ni (sync_rst_ni ), + .mod ( ), // tells div_block to mod or not .op1_i (div_op1 [17*g +: 17] ), .op2_i (div_op2 [17*g +: 17] ), .res_o (div_res [33*g +: 33] ) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index ebf606e..ce354c5 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -11,6 +11,8 @@ module vproc_div_block #( input logic async_rst_ni, input logic sync_rst_ni, + input logic mod, // 0 = quotient, 1 = modulo + input logic [16:0] op1_i, input logic [16:0] op2_i, @@ -60,7 +62,8 @@ module vproc_div_block #( end end - assign div_d = $signed(op1_q) / $signed(op2_q); + assign div_d = (mod) ? (op1_q % op2_q) : (op1_q / op2_q); + assign res_d = div_d; assign res_o = res_q; end From a7abbc677227a747148396d7ecacccb0d76dd584 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 02:55:18 -0500 Subject: [PATCH 12/64] Changed vproc_div_block to do signed div/mod --- rtl/vproc_div_block.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index ce354c5..68172d5 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -62,7 +62,7 @@ module vproc_div_block #( end end - assign div_d = (mod) ? (op1_q % op2_q) : (op1_q / op2_q); + assign div_d = (mod) ? ($signed(op1_q) % $signed(op2_q)) : ($signed(op1_q) / $signed(op2_q)); assign res_d = div_d; assign res_o = res_q; From 07fb611610de98e5ad79f04a78193ff85fa3fd5d Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 12:57:09 -0500 Subject: [PATCH 13/64] vector division pipelining --- rtl/vproc_core.sv | 2 ++ rtl/vproc_div.sv | 32 ++++++++++++++++++++-- rtl/vproc_pipeline.sv | 4 ++- rtl/vproc_pipeline_wrapper.sv | 49 +++++++++++++++++++++++++--------- rtl/vproc_pkg.sv | 2 ++ rtl/vproc_unit_mux.sv | 2 ++ rtl/vproc_unit_wrapper.sv | 50 +++++++++++++++++++++++++++++++++++ vproc_config.sv | 1 + 8 files changed, 127 insertions(+), 15 deletions(-) diff --git a/rtl/vproc_core.sv b/rtl/vproc_core.sv index 92f385e..a4f2af5 100644 --- a/rtl/vproc_core.sv +++ b/rtl/vproc_core.sv @@ -28,6 +28,7 @@ module vproc_core import vproc_pkg::*; #( parameter int unsigned VLSU_QUEUE_SZ = vproc_config::VLSU_QUEUE_SZ, parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = vproc_config::VLSU_FLAGS, parameter mul_type MUL_TYPE = vproc_config::MUL_TYPE, + parameter div_type DIV_TYPE = vproc_config::DIV_TYPE, // Miscellaneous configuration parameter int unsigned INSTR_QUEUE_SZ = vproc_config::INSTR_QUEUE_SZ, @@ -975,6 +976,7 @@ module vproc_core import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .DECODER_DATA_T ( decoder_data ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipe ( diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 2e6dc9d..e50d56f 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -6,7 +6,8 @@ module vproc_div #( parameter bit BUF_OPERANDS = 1'b1, parameter bit BUF_DIV_IN = 1'b1, parameter bit BUF_DIV_OUT = 1'b1, - parameter bit BUF_RESULTS = 1'b1 + parameter bit BUF_RESULTS = 1'b1, + parameter type CTRL_T = logic // parameter bit DONT_CARE_ZERO = 1'b0 )( input logic clk_i, @@ -15,9 +16,19 @@ module vproc_div #( input logic pipe_in_valid_i, output logic pipe_in_ready_o, + + input CTRL_T pipe_in_ctrl_i, + input logic [DIV_OP_W -1:0] pipe_in_op1_i, + input logic [DIV_OP_W -1:0] pipe_in_op2_i, + input logic [DIV_OP_W -1:0] pipe_in_op3_i, + input logic [DIV_OP_W/8-1:0] pipe_in_mask_i, output logic pipe_out_valid_o, - input logic pipe_out_ready_i + input logic pipe_out_ready_i, + + output CTRL_T pipe_out_ctrl_o, + output logic [DIV_OP_W -1:0] pipe_out_res_o, + output logic [DIV_OP_W/8-1:0] pipe_out_mask_o ); import vproc_pkg::*; @@ -29,6 +40,14 @@ module vproc_div #( logic state_ex1_valid_q, state_ex1_valid_d, state_ex2_valid_q, state_ex3_valid_q, state_res_valid_q; CTRL_T state_ex1_q, state_ex1_d, state_ex2_q, state_ex3_q, state_res_q; + logic [DIV_OP_W -1:0] operand1_q, operand1_d; + logic [DIV_OP_W -1:0] operand2_q, operand2_d; + logic [DIV_OP_W/8-1:0] operand_mask_q, operand_mask_d; + logic [DIV_OP_W -1:0] result_q, result_d; + logic [DIV_OP_W/8-1:0] result_mask1_q, result_mask1_d; // mask out stage 1 buffer (MUL_IN) + logic [DIV_OP_W/8-1:0] result_mask2_q, result_mask2_d; // mask out stage 2 buffer (MUL_OUT) + logic [DIV_OP_W/8-1:0] result_mask3_q, result_mask3_d; // mask out stage 3 buffer (RESULTS) + // needed for vregunpack to mask write destinations generate if (BUF_OPERANDS) begin always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_ex1_valid @@ -47,6 +66,7 @@ module vproc_div #( state_ex1_q <= state_ex1_d; operand1_q <= operand1_d; operand2_q <= operand2_d; + operand_mask_q <= operand_mask_d; end end assign state_ex1_ready = ~state_ex1_valid_q | state_ex2_ready; @@ -56,7 +76,9 @@ module vproc_div #( state_ex1_q = state_ex1_d; operand1_q = operand1_d; operand2_q = operand2_d; + operand_mask_q <= operand_mask_d; end + assign state_ex1_ready = state_ex2_ready; end if (BUF_DIV_IN) begin @@ -74,6 +96,7 @@ module vproc_div #( always_ff @(posedge clk_i) begin : vproc_div_stage_ex2 if (state_ex2_ready & state_ex1_valid_q) begin state_ex2_q <= state_ex1_q; + result_mask1_q <= result_mask1_d; end end assign state_ex2_ready = ~state_ex2_valid_q | state_ex3_ready; @@ -81,6 +104,7 @@ module vproc_div #( always_comb begin state_ex2_valid_q = state_ex1_valid_q; state_ex2_q = state_ex1_q; + result_mask1_q = result_mask1_d; end assign state_ex2_ready = state_ex3_ready; end @@ -100,6 +124,7 @@ module vproc_div #( always_ff @(posedge clk_i) begin : vproc_div_stage_ex3 if (state_ex3_ready & state_ex2_valid_q) begin state_ex3_q <= state_ex2_q; + result_mask2_q <= result_mask2_d; end end assign state_ex3_ready = ~state_ex3_valid_q | state_res_ready; @@ -107,6 +132,7 @@ module vproc_div #( always_comb begin state_ex3_valid_q = state_ex2_valid_q; state_ex3_q = state_ex2_q; + result_mask2_q = result_mask2_d; end assign state_ex3_ready = state_res_ready; end @@ -127,6 +153,7 @@ module vproc_div #( if (state_res_ready & state_ex3_valid_q) begin state_res_q <= state_ex3_q; result_q <= result_d; + result_mask3_q <= result_mask3_d; end end assign state_res_ready = ~state_res_valid_q | pipe_out_ready_i; @@ -135,6 +162,7 @@ module vproc_div #( state_res_valid_q = state_ex3_valid_q; state_res_q = state_ex3_q; result_q = result_d; + result_mask3_q = result_mask3_d; end assign state_res_ready = pipe_out_ready_i; end diff --git a/rtl/vproc_pipeline.sv b/rtl/vproc_pipeline.sv index e8ea027..d73e7dc 100644 --- a/rtl/vproc_pipeline.sv +++ b/rtl/vproc_pipeline.sv @@ -43,6 +43,7 @@ module vproc_pipeline import vproc_pkg::*; #( parameter int unsigned VLSU_QUEUE_SZ = 4, parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0, parameter mul_type MUL_TYPE = MUL_GENERIC, + parameter div_type DIV_TYPE = DIV_GENERIC, parameter type INIT_STATE_T = logic, parameter bit DONT_CARE_ZERO = 1'b0 // initialize don't care values to zero )( @@ -121,7 +122,7 @@ module vproc_pipeline import vproc_pkg::*; #( typedef struct packed { counter_t count; // main counter - counter_t alt_count; // alternative counter (used by some ops) + counter_t alt_count; // alternative counter (used by some ops (UNIT_SLD)) count_inc_e count_inc; // counter increment policy logic [AUX_COUNTER_W-1:0] aux_count; // auxiliary counter (for dyn addr ops) logic [2:0] field_count; // field counter (for segment loads/stores) @@ -880,6 +881,7 @@ module vproc_pipeline import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .CTRL_T ( ctrl_t ), .COUNTER_T ( counter_t ), .COUNTER_W ( COUNTER_W ), diff --git a/rtl/vproc_pipeline_wrapper.sv b/rtl/vproc_pipeline_wrapper.sv index 94ae9ad..95c83ed 100644 --- a/rtl/vproc_pipeline_wrapper.sv +++ b/rtl/vproc_pipeline_wrapper.sv @@ -28,6 +28,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( parameter int unsigned VLSU_QUEUE_SZ = 4, parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0, parameter mul_type MUL_TYPE = MUL_GENERIC, + parameter div_type DIV_TYPE = DIV_GENERIC, parameter type DECODER_DATA_T = logic, parameter bit DONT_CARE_ZERO = 1'b0 // initialize don't care values to zero )( @@ -112,7 +113,9 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // - ELEM unit additionally requires indices -3 and -2, hence a minimum of 5 operands // - if MUL and ELEM units are both present in same pipeline, then all 6 operands are required // - in case a pipeline contains only the SLD unit the operand count is 2 (indices 0 and -1) - localparam int unsigned OP_CNT = UNITS[UNIT_MUL] ? ( + + // ECE498HK edits + localparam int unsigned OP_CNT = (UNITS[UNIT_MUL] | UNITS[UNIT_DIV]) ? ( UNITS[UNIT_ELEM] ? 6 : 4 ) : ( UNITS[UNIT_ELEM] ? 5 : ( @@ -127,7 +130,8 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // should be fetched at the latest possible stage, since the pipeline waits until the alt count // completes its cycle before accepting the next instruction. localparam int unsigned OP0_SRC = 0; - localparam int unsigned OP1_SRC = (VPORT_CNT >= (UNITS[UNIT_MUL] ? 3 : 2)) ? 1 : 0; + // ece498hk edits + localparam int unsigned OP1_SRC = (VPORT_CNT >= ((UNITS[UNIT_MUL] | UNITS[UNIT_DIV]) ? 3 : 2)) ? 1 : 0; localparam int unsigned OP2_SRC = VPORT_CNT - 1; localparam int unsigned MIN_STAGE = 1; // first possible unpack stage // start by fetching op 0, then op1, except for ELEM unit which needs to fetch op1 first since @@ -154,6 +158,13 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( VPORT_W[OP0_SRC], VPORT_W[OP0_SRC] / 4, "however, the specified operand width is %d bits).", MAX_OP_W); end + if (UNITS[UNIT_DIV] & (OP0_SRC == OP1_SRC) & (OP0_SRC == OP2_SRC) & (MAX_OP_W * 2 >= VPORT_W[OP0_SRC])) begin + $fatal(1, "If operands 0, 1, and 2 share the same source read port, then the operand ", + "width must not be larger than one quarter of the read port width (the current ", + "read port width is %d bits, hence the operand width can be at most %d bits; ", + VPORT_W[OP0_SRC], VPORT_W[OP0_SRC] / 4, + "however, the specified operand width is %d bits).", MAX_OP_W); + end // Number of stages for required for operand unpacking (maximum of operand stages + 1) localparam int unsigned UNPACK_STAGES = 1 + ((OP0_STAGE > OP1_STAGE) ? ( @@ -165,9 +176,12 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // operand flags localparam bit OP_DYN_ADDR_OFFSET = UNITS[UNIT_ELEM]; // operand with dynamic addr used localparam bit OP_SECOND_MASK = UNITS[UNIT_ELEM]; // second mask operand used - localparam bit OP0_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_ELEM]; - localparam bit OP1_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU]; - localparam bit OP1_XREG = UNITS[UNIT_MUL] | UNITS[UNIT_ALU]; + + // ECE498HK edits + // The fixed-point instructions help preserve precision in narrow operands by supporting scaling and rounding + localparam bit OP0_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_ELEM] | UNITS[UNIT_DIV]; + localparam bit OP1_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_DIV]; + localparam bit OP1_XREG = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_DIV]; // OPMVX category of instr use GPT x register rs1 as scalar operand. vdiv/vrem supports .vv and .vx localparam bit OP0_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; localparam bit OP1_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; localparam bit OPMASK_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; @@ -216,12 +230,14 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( } state_t; // identify the unit of the supplied instruction - logic unit_lsu, unit_alu, unit_mul, unit_sld, unit_elem; - assign unit_lsu = UNITS[UNIT_LSU ] & (pipe_in_data_i.unit == UNIT_LSU ); - assign unit_alu = UNITS[UNIT_ALU ] & (pipe_in_data_i.unit == UNIT_ALU ); - assign unit_mul = UNITS[UNIT_MUL ] & (pipe_in_data_i.unit == UNIT_MUL ); - assign unit_sld = UNITS[UNIT_SLD ] & (pipe_in_data_i.unit == UNIT_SLD ); - assign unit_elem = UNITS[UNIT_ELEM] & (pipe_in_data_i.unit == UNIT_ELEM); + // ECE498HK additions + logic unit_lsu, unit_alu, unit_mul, unit_sld, unit_elem, unit_div; + assign unit_lsu = UNITS[UNIT_LSU ] & (pipe_in_data_i.unit == UNIT_LSU ); + assign unit_alu = UNITS[UNIT_ALU ] & (pipe_in_data_i.unit == UNIT_ALU ); + assign unit_div = UNITS[UNIT_DIV ] & (pipe_in_data_i.unit == UNIT_DIV ); + assign unit_mul = UNITS[UNIT_MUL ] & (pipe_in_data_i.unit == UNIT_MUL ); + assign unit_sld = UNITS[UNIT_SLD ] & (pipe_in_data_i.unit == UNIT_SLD ); + assign unit_elem = UNITS[UNIT_ELEM] & (pipe_in_data_i.unit == UNIT_ELEM); // identify the type of data that vs2 supplies for ELEM instructions logic elem_flush, elem_vs2_data, elem_vs2_mask, elem_vs2_dyn_addr; @@ -450,6 +466,8 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( unique case (1'b1) unit_lsu: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.lsu.masked; unit_alu: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.alu.op_mask != ALU_MASK_NONE; + // ECE498HK additions + unit_div: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.div.masked unit_mul: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.mul.masked; unit_sld: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.sld.masked; unit_elem: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.elem.masked; @@ -475,11 +493,17 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( state_init.res_narrow[0 ] = pipe_in_data_i.widenarrow == OP_NARROWING; state_init.res_vreg [RES_CNT-1] = pipe_in_data_i.mode.alu.cmp; end + // ECE498HK additions + if (unit_div) begin + state_init.op_vaddr[0] = pipe_in_data_i.rs2.r.vaddr; // for vmadd and vnmsub, MUL stores vaddr in .rd.addr, this is not needed for div + state_init.op_flags[0].sigext = pipe_in_data_i.mode.div.op2_signed; + state_init.op_flags[1].sigext = pipe_in_data_i.mode.div.op1_signed; + end if (unit_mul) begin state_init.op_vaddr[0] = pipe_in_data_i.mode.mul.op2_is_vd ? pipe_in_data_i.rd.addr : pipe_in_data_i.rs2.r.vaddr; state_init.op_flags[0].sigext = pipe_in_data_i.mode.mul.op2_signed; state_init.op_flags[1].sigext = pipe_in_data_i.mode.mul.op1_signed; - state_init.op_flags[(OP_CNT >= 3) ? 2 : 0].vreg = pipe_in_data_i.mode.mul.op == MUL_VMACC; + state_init.op_flags[(OP_CNT >= 3) ? 2 : 0].vreg = pipe_in_data_i.mode.mul.op == MUL_VMACC; // macc is desctructive, vd overwrites addend or minuend state_init.op_vaddr[(OP_CNT >= 3) ? 2 : 0] = pipe_in_data_i.mode.mul.op2_is_vd ? pipe_in_data_i.rs2.r.vaddr : pipe_in_data_i.rd.addr; end if (unit_elem) begin @@ -560,6 +584,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( diff --git a/rtl/vproc_pkg.sv b/rtl/vproc_pkg.sv index d96e291..3863cc7 100644 --- a/rtl/vproc_pkg.sv +++ b/rtl/vproc_pkg.sv @@ -114,6 +114,7 @@ typedef enum logic [2:0] { // The number of different types of execution units (excludes pseudo-units) parameter int unsigned UNIT_CNT = 6; +// LSU ALU DIV MUL STD ELEM typedef enum logic [1:0] { COUNT_INC_1, @@ -224,6 +225,7 @@ typedef enum logic [1:0] { typedef struct packed { // TODO + logic masked; opcode_div op; logic op1_signed; logic op2_signed; diff --git a/rtl/vproc_unit_mux.sv b/rtl/vproc_unit_mux.sv index 95a32b0..2e4e293 100644 --- a/rtl/vproc_unit_mux.sv +++ b/rtl/vproc_unit_mux.sv @@ -15,6 +15,7 @@ module vproc_unit_mux import vproc_pkg::*; #( parameter int unsigned VLSU_QUEUE_SZ = 4, parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0, parameter mul_type MUL_TYPE = MUL_GENERIC, + parameter div_type DIV_TYPE = DIV_GENERIC, parameter type CTRL_T = logic, parameter type COUNTER_T = logic, parameter int unsigned COUNTER_W = 0, @@ -132,6 +133,7 @@ module vproc_unit_mux import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .CTRL_T ( CTRL_T ), .COUNTER_T ( COUNTER_T ), .COUNTER_W ( COUNTER_W ), diff --git a/rtl/vproc_unit_wrapper.sv b/rtl/vproc_unit_wrapper.sv index b631ec3..6ba8261 100644 --- a/rtl/vproc_unit_wrapper.sv +++ b/rtl/vproc_unit_wrapper.sv @@ -15,6 +15,7 @@ module vproc_unit_wrapper import vproc_pkg::*; #( parameter int unsigned VLSU_QUEUE_SZ = 4, parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0, parameter mul_type MUL_TYPE = MUL_GENERIC, + parameter div_type DIV_TYPE = DIV_GENERIC, parameter type CTRL_T = logic, parameter type COUNTER_T = logic, parameter int unsigned COUNTER_W = 0, @@ -182,6 +183,55 @@ module vproc_unit_wrapper import vproc_pkg::*; #( assign pipe_out_pend_clear_cnt_o = '0; assign pipe_out_instr_done_o = unit_out_ctrl.last_cycle; end + // ECE498HK edits + else if (UNIT == UNIT_DIV) begin + CTRL_T unit_out_ctrl; + logic [MAX_OP_W -1:0] unit_out_res; + logic [MAX_OP_W/8-1:0] unit_out_mask; + vproc_div #( + .DIV_OP_W ( MAX_OP_W ), + .DIV_TYPE ( DIV_TYPE ), + .CTRL_T ( CTRL_T ) + ) div ( + .clk_i ( clk_i ), + .async_rst_ni ( async_rst_ni ), + .sync_rst_ni ( sync_rst_ni ), + + .pipe_in_valid_i ( pipe_in_valid_i ), + .pipe_in_ready_o ( pipe_in_ready_o ), + + .pipe_in_ctrl_i ( pipe_in_ctrl_i ), + .pipe_in_op1_i ( pipe_in_op_data_i[1] ), // TODO double check this + .pipe_in_op2_i ( pipe_in_op_data_i[0] ), // TODO should be swapped + .pipe_in_op3_i ( pipe_in_op_data_i[2] ), // TODO might be different for DIV (since can only do vec-scalar div not scalar-vec) + .pipe_in_mask_i ( pipe_in_op_data_i[OP_CNT-1][MAX_OP_W/8-1:0] ), + + .pipe_out_valid_o ( pipe_out_valid_o ), + .pipe_out_ready_i ( pipe_out_ready_i ), + + .pipe_out_ctrl_o ( unit_out_ctrl ), + .pipe_out_res_o ( unit_out_res ), + .pipe_out_mask_o ( unit_out_mask ) + ); + always_comb begin + pipe_out_instr_id_o = unit_out_ctrl.id; + pipe_out_eew_o = unit_out_ctrl.eew; + pipe_out_vaddr_o = unit_out_ctrl.res_vaddr; + pipe_out_res_store_o = '0; + pipe_out_res_valid_o = '0; + pipe_out_res_flags_o = '{default: pack_flags'('0)}; + pipe_out_res_data_o = '0; + pipe_out_res_mask_o = '0; + pipe_out_res_flags_o[0].shift = 1'b1; + pipe_out_res_store_o[0] = unit_out_ctrl.res_store; + pipe_out_res_valid_o[0] = pipe_out_valid_o; + pipe_out_res_data_o [0] = unit_out_res; + pipe_out_res_mask_o [0][MAX_OP_W/8-1:0] = unit_out_mask; + end + assign pipe_out_pend_clear_o = unit_out_ctrl.res_store; + assign pipe_out_pend_clear_cnt_o = '0; + assign pipe_out_instr_done_o = unit_out_ctrl.last_cycle; + end else if (UNIT == UNIT_MUL) begin CTRL_T unit_out_ctrl; logic [MAX_OP_W -1:0] unit_out_res; diff --git a/vproc_config.sv b/vproc_config.sv index a56bad4..258b981 100644 --- a/vproc_config.sv +++ b/vproc_config.sv @@ -42,6 +42,7 @@ package vproc_config; parameter int unsigned VLSU_QUEUE_SZ = 4; parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0; parameter mul_type MUL_TYPE = MUL_GENERIC; + parameter div_type DIV_TYPE = DIV_GENERIC; parameter int unsigned INSTR_QUEUE_SZ = 2; parameter bit [BUF_FLAGS_W-1:0] BUF_FLAGS = (BUF_FLAGS_W'(1) << BUF_DEQUEUE) | (BUF_FLAGS_W'(1) << BUF_VREG_PEND); From 2967416af8432356469c0dd4cdb36d868e1fde7a Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 14:02:51 -0500 Subject: [PATCH 14/64] divider support w/ 2 op --- config.mk | 1 + rtl/vproc_decoder.sv | 4 +-- rtl/vproc_div.sv | 64 ++++++++++++++++++++++++++++++----- rtl/vproc_pipeline_wrapper.sv | 27 ++++++++------- rtl/vproc_pkg.sv | 8 ++--- rtl/vproc_unit_wrapper.sv | 7 ++-- vproc_config.sv | 2 +- 7 files changed, 81 insertions(+), 32 deletions(-) diff --git a/config.mk b/config.mk index 1b92cde..dea132b 100755 --- a/config.mk +++ b/config.mk @@ -183,6 +183,7 @@ $(VPROC_CONFIG_PKG): echo " parameter int unsigned VLSU_QUEUE_SZ = 4;" >>$@; \ echo " parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0;" >>$@; \ echo " parameter mul_type MUL_TYPE = MUL_GENERIC;" >>$@; \ + echo " parameter div_type DIV_TYPE = DIV_GENERIC;" >>$@; \ echo "" >>$@; \ echo " parameter int unsigned INSTR_QUEUE_SZ = 2;" >>$@; \ echo " parameter bit [BUF_FLAGS_W-1:0] BUF_FLAGS = $${buf_flags};" >>$@; \ diff --git a/rtl/vproc_decoder.sv b/rtl/vproc_decoder.sv index 6045c6d..d70bf89 100644 --- a/rtl/vproc_decoder.sv +++ b/rtl/vproc_decoder.sv @@ -1098,7 +1098,7 @@ module vproc_decoder #( {6'b100000, 3'b010}, // vdivu VV {6'b100000, 3'b110}: begin // vidvu VX unit_o = UNIT_DIV; // CREATE UNIT_DIV!!! - mode_o.div.op = DIV_VDIVU; // CREAT DIV TYPES!!! + mode_o.div.op = DIV_VDIV; // CREAT DIV TYPES!!! mode_o.div.op1_signed = 1'b0; mode_o.div.op2_signed = 1'b0; // ... @@ -1114,7 +1114,7 @@ module vproc_decoder #( {6'b100010, 3'b010}, // vremu VV {6'b100010, 3'b110}: begin // vremu VX unit_o = UNIT_DIV; - mode_o.div.op = DIV_VREMU; + mode_o.div.op = DIV_VREM; mode_o.div.op1_signed = 1'b0; mode_o.div.op2_signed = 1'b0; // ... diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index e50d56f..5787c35 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -7,8 +7,8 @@ module vproc_div #( parameter bit BUF_DIV_IN = 1'b1, parameter bit BUF_DIV_OUT = 1'b1, parameter bit BUF_RESULTS = 1'b1, - parameter type CTRL_T = logic - // parameter bit DONT_CARE_ZERO = 1'b0 + parameter type CTRL_T = logic, + parameter bit DONT_CARE_ZERO = 1'b0 )( input logic clk_i, input logic async_rst_ni, @@ -20,7 +20,7 @@ module vproc_div #( input CTRL_T pipe_in_ctrl_i, input logic [DIV_OP_W -1:0] pipe_in_op1_i, input logic [DIV_OP_W -1:0] pipe_in_op2_i, - input logic [DIV_OP_W -1:0] pipe_in_op3_i, + input logic [DIV_OP_W/8-1:0] pipe_in_mask_i, output logic pipe_out_valid_o, @@ -34,7 +34,7 @@ module vproc_div #( import vproc_pkg::*; /////////////////////////////////////////////////////////////////////////// - // MUL BUFFERS + // DIV BUFFERS logic state_ex1_ready, state_ex2_ready, state_ex3_ready, state_res_ready; logic state_ex1_valid_q, state_ex1_valid_d, state_ex2_valid_q, state_ex3_valid_q, state_res_valid_q; @@ -170,6 +170,27 @@ module vproc_div #( /////////////////////////////////////////////////////////////////////////// + // DIV operand conversion + assign pipe_in_ready_o = state_ex1_ready; + assign state_ex1_valid_d = pipe_in_valid_i; + assign state_ex1_d = pipe_in_ctrl_i; + assign operand1_d = pipe_in_op1_i; + assign operand2_d = pipe_in_op2_i; + assign operand_mask_d = pipe_in_mask_i; + + logic [MUL_OP_W/8-1:0] vl_mask; + assign vl_mask = ~state_ex1_q.vl_part_0 ? ({(MUL_OP_W/8){1'b1}} >> (~state_ex1_q.vl_part)) : '0; + assign result_mask1_d = (state_ex1_q.mode.div.masked ? operand_mask_q : {(DIV_OP_W/8){1'b1}}) & vl_mask; + + assign result_mask2_d = result_mask1_q; + assign result_mask3_d = result_mask2_q; + + assign pipe_out_valid_o = state_res_valid_q; + assign pipe_out_ctrl_o = state_res_q; + assign pipe_out_res_o = result_q; + assign pipe_out_mask_o = result_mask3_q; + + // DIV ARITHMETIC logic [DIV_OP_W/8-1:0] op1_signs, op2_signs; @@ -177,8 +198,8 @@ module vproc_div #( op1_signs = DONT_CARE_ZERO ? '0 : 'x; op2_signs = DONT_CARE_ZERO ? '0 : 'x; for (int i = 0; i < DIV_OP_W/8; i++) begin - op1_signs[i] = state_ex1_q.mode.mul.op1_signed & operand1_q[8*i+7]; - op2_signs[i] = state_ex1_q.mode.mul.op2_signed & operand2_q[8*i+7]; + op1_signs[i] = state_ex1_q.mode.div.op1_signed & operand1_q[8*i+7]; + op2_signs[i] = state_ex1_q.mode.div.op2_signed & operand2_q[8*i+7]; end end @@ -230,7 +251,6 @@ module vproc_div #( end end - // Not sure if this is needed... logic ex2_vsew_8, ex2_vsew_16, ex2_vsew_32; always_comb begin ex2_vsew_8 = DONT_CARE_ZERO ? '0 : 'x; @@ -270,7 +290,7 @@ module vproc_div #( .clk_i (clk_i ), .async_rst_ni (async_rst_ni ), .sync_rst_ni (sync_rst_ni ), - .mod ( ), // tells div_block to mod or not + .mod (state_ex3_q.mode.div.op), // tells div_block to mod or not .op1_i (div_op1 [17*g +: 17] ), .op2_i (div_op2 [17*g +: 17] ), .res_o (div_res [33*g +: 33] ) @@ -280,7 +300,35 @@ module vproc_div #( // compose result always_comb begin + result_d = DONT_CARE_ZERO ? '0 : 'x; + unique case (state_ex3_q.mode.div.op) + // multiplication retaining low part + /* + DIV_VDIV, // divide + DIV_VREM // rem + */ + DIV_VDIV, DIV_VREM: begin + unique case (state_ex3_q.eew) + VSEW_8: begin + for (int i = 0; i < (MUL_OP_W / 8 ); i++) + result_d[8 *i +: 8 ] = div_res[16*i +: 8 ]; + end + VSEW_16: begin + for (int i = 0; i < (MUL_OP_W / 16); i++) + result_d[16*i +: 16] = div_res[32*i +: 16]; + end + VSEW_32: begin + for (int i = 0; i < (MUL_OP_W / 32); i++) + result_d[32*i +: 32] = div_res[64*i +: 32]; + end + default: ; + endcase + end + + default: ; + + endcase end endmodule diff --git a/rtl/vproc_pipeline_wrapper.sv b/rtl/vproc_pipeline_wrapper.sv index 95c83ed..c103d4b 100644 --- a/rtl/vproc_pipeline_wrapper.sv +++ b/rtl/vproc_pipeline_wrapper.sv @@ -115,7 +115,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // - in case a pipeline contains only the SLD unit the operand count is 2 (indices 0 and -1) // ECE498HK edits - localparam int unsigned OP_CNT = (UNITS[UNIT_MUL] | UNITS[UNIT_DIV]) ? ( + localparam int unsigned OP_CNT = (UNITS[UNIT_MUL]) ? ( UNITS[UNIT_ELEM] ? 6 : 4 ) : ( UNITS[UNIT_ELEM] ? 5 : ( @@ -131,7 +131,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // completes its cycle before accepting the next instruction. localparam int unsigned OP0_SRC = 0; // ece498hk edits - localparam int unsigned OP1_SRC = (VPORT_CNT >= ((UNITS[UNIT_MUL] | UNITS[UNIT_DIV]) ? 3 : 2)) ? 1 : 0; + localparam int unsigned OP1_SRC = (VPORT_CNT >= ((UNITS[UNIT_MUL]) ? 3 : 2)) ? 1 : 0; localparam int unsigned OP2_SRC = VPORT_CNT - 1; localparam int unsigned MIN_STAGE = 1; // first possible unpack stage // start by fetching op 0, then op1, except for ELEM unit which needs to fetch op1 first since @@ -158,13 +158,6 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( VPORT_W[OP0_SRC], VPORT_W[OP0_SRC] / 4, "however, the specified operand width is %d bits).", MAX_OP_W); end - if (UNITS[UNIT_DIV] & (OP0_SRC == OP1_SRC) & (OP0_SRC == OP2_SRC) & (MAX_OP_W * 2 >= VPORT_W[OP0_SRC])) begin - $fatal(1, "If operands 0, 1, and 2 share the same source read port, then the operand ", - "width must not be larger than one quarter of the read port width (the current ", - "read port width is %d bits, hence the operand width can be at most %d bits; ", - VPORT_W[OP0_SRC], VPORT_W[OP0_SRC] / 4, - "however, the specified operand width is %d bits).", MAX_OP_W); - end // Number of stages for required for operand unpacking (maximum of operand stages + 1) localparam int unsigned UNPACK_STAGES = 1 + ((OP0_STAGE > OP1_STAGE) ? ( @@ -179,9 +172,9 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // ECE498HK edits // The fixed-point instructions help preserve precision in narrow operands by supporting scaling and rounding - localparam bit OP0_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_ELEM] | UNITS[UNIT_DIV]; - localparam bit OP1_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_DIV]; - localparam bit OP1_XREG = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_DIV]; // OPMVX category of instr use GPT x register rs1 as scalar operand. vdiv/vrem supports .vv and .vx + localparam bit OP0_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_ELEM]; + localparam bit OP1_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU]; + localparam bit OP1_XREG = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_DIV]; // OPMVX category of instr use GPR x register rs1 as scalar operand. vdiv/vrem supports .vv and .vx localparam bit OP0_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; localparam bit OP1_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; localparam bit OPMASK_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; @@ -192,7 +185,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( localparam int unsigned MAX_RES_W = MAX_OP_W; // result flags - localparam bit RES0_ALWAYS_VREG = ~UNITS[UNIT_LSU] & ~UNITS[UNIT_ALU] & ~UNITS[UNIT_ELEM]; + localparam bit RES0_ALWAYS_VREG = ~UNITS[UNIT_LSU] & ~UNITS[UNIT_ALU] & ~UNITS[UNIT_ELEM]; // true for DIV localparam bit RES0_NARROW = UNITS[UNIT_ALU]; localparam bit RES0_ALLOW_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; @@ -650,6 +643,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -715,6 +709,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -780,6 +775,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -845,6 +841,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -910,6 +907,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -975,6 +973,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -1040,6 +1039,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( @@ -1105,6 +1105,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( .VLSU_QUEUE_SZ ( VLSU_QUEUE_SZ ), .VLSU_FLAGS ( VLSU_FLAGS ), .MUL_TYPE ( MUL_TYPE ), + .DIV_TYPE ( DIV_TYPE ), .INIT_STATE_T ( state_t ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) pipeline ( diff --git a/rtl/vproc_pkg.sv b/rtl/vproc_pkg.sv index 3863cc7..1f83037 100644 --- a/rtl/vproc_pkg.sv +++ b/rtl/vproc_pkg.sv @@ -216,11 +216,9 @@ typedef struct packed { } op_mode_mul; /***** ECE 498 HK MODIFICATIONS *****/ -typedef enum logic [1:0] { - DIV_VDIVU, // unsigned divide - DIV_VDIV, // signed divide - DIV_VREMU, // unsigned remainder - DIV_VREM // signed remainder +typedef enum logic [0:0] { + DIV_VDIV, // divide + DIV_VREM // remainder } opcode_div; typedef struct packed { diff --git a/rtl/vproc_unit_wrapper.sv b/rtl/vproc_unit_wrapper.sv index 6ba8261..0768be1 100644 --- a/rtl/vproc_unit_wrapper.sv +++ b/rtl/vproc_unit_wrapper.sv @@ -191,7 +191,8 @@ module vproc_unit_wrapper import vproc_pkg::*; #( vproc_div #( .DIV_OP_W ( MAX_OP_W ), .DIV_TYPE ( DIV_TYPE ), - .CTRL_T ( CTRL_T ) + .CTRL_T ( CTRL_T ), + .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) div ( .clk_i ( clk_i ), .async_rst_ni ( async_rst_ni ), @@ -201,9 +202,9 @@ module vproc_unit_wrapper import vproc_pkg::*; #( .pipe_in_ready_o ( pipe_in_ready_o ), .pipe_in_ctrl_i ( pipe_in_ctrl_i ), - .pipe_in_op1_i ( pipe_in_op_data_i[1] ), // TODO double check this + .pipe_in_op1_i ( pipe_in_op_data_i[1] ), // TODO double check if this .pipe_in_op2_i ( pipe_in_op_data_i[0] ), // TODO should be swapped - .pipe_in_op3_i ( pipe_in_op_data_i[2] ), // TODO might be different for DIV (since can only do vec-scalar div not scalar-vec) + .pipe_in_mask_i ( pipe_in_op_data_i[OP_CNT-1][MAX_OP_W/8-1:0] ), .pipe_out_valid_o ( pipe_out_valid_o ), diff --git a/vproc_config.sv b/vproc_config.sv index 258b981..7cdb21b 100644 --- a/vproc_config.sv +++ b/vproc_config.sv @@ -32,7 +32,7 @@ package vproc_config; parameter int unsigned PIPE_CNT = 1; parameter bit [UNIT_CNT-1:0] PIPE_UNITS [PIPE_CNT] = '{ - (UNIT_CNT'(1) << UNIT_LSU) | (UNIT_CNT'(1) << UNIT_ALU) | (UNIT_CNT'(1) << UNIT_MUL) | (UNIT_CNT'(1) << UNIT_SLD) | (UNIT_CNT'(1) << UNIT_ELEM) + (UNIT_CNT'(1) << UNIT_LSU) | (UNIT_CNT'(1) << UNIT_ALU) | (UNIT_CNT'(1) << UNIT_DIV) | (UNIT_CNT'(1) << UNIT_MUL) | (UNIT_CNT'(1) << UNIT_SLD) | (UNIT_CNT'(1) << UNIT_ELEM) }; parameter int unsigned PIPE_W [PIPE_CNT] = '{32}; parameter int unsigned PIPE_VPORT_CNT [PIPE_CNT] = '{1}; From a788f0187d2063c2d261835b16c321f20ffe1889 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 15:30:37 -0500 Subject: [PATCH 15/64] divider support --- rtl/vproc_div.sv | 112 ++++++++++++++++------------------------- rtl/vproc_div_block.sv | 11 ++-- 2 files changed, 49 insertions(+), 74 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 5787c35..e9ad8c2 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -1,7 +1,7 @@ // TODO DO THIS module vproc_div #( - parameter int unsigned DIV_OP_W = 64, // DIV unit operand width in bits (NOT PERMANENT) + parameter int unsigned DIV_OP_W = 64, // DIV unit operand width in bits parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, parameter bit BUF_OPERANDS = 1'b1, parameter bit BUF_DIV_IN = 1'b1, @@ -44,8 +44,8 @@ module vproc_div #( logic [DIV_OP_W -1:0] operand2_q, operand2_d; logic [DIV_OP_W/8-1:0] operand_mask_q, operand_mask_d; logic [DIV_OP_W -1:0] result_q, result_d; - logic [DIV_OP_W/8-1:0] result_mask1_q, result_mask1_d; // mask out stage 1 buffer (MUL_IN) - logic [DIV_OP_W/8-1:0] result_mask2_q, result_mask2_d; // mask out stage 2 buffer (MUL_OUT) + logic [DIV_OP_W/8-1:0] result_mask1_q, result_mask1_d; // mask out stage 1 buffer (DIV_IN) + logic [DIV_OP_W/8-1:0] result_mask2_q, result_mask2_d; // mask out stage 2 buffer (DIV_OUT) logic [DIV_OP_W/8-1:0] result_mask3_q, result_mask3_d; // mask out stage 3 buffer (RESULTS) // needed for vregunpack to mask write destinations generate @@ -178,8 +178,8 @@ module vproc_div #( assign operand2_d = pipe_in_op2_i; assign operand_mask_d = pipe_in_mask_i; - logic [MUL_OP_W/8-1:0] vl_mask; - assign vl_mask = ~state_ex1_q.vl_part_0 ? ({(MUL_OP_W/8){1'b1}} >> (~state_ex1_q.vl_part)) : '0; + logic [DIV_OP_W/8-1:0] vl_mask; + assign vl_mask = ~state_ex1_q.vl_part_0 ? ({(DIV_OP_W/8){1'b1}} >> (~state_ex1_q.vl_part)) : '0; assign result_mask1_d = (state_ex1_q.mode.div.masked ? operand_mask_q : {(DIV_OP_W/8){1'b1}}) & vl_mask; assign result_mask2_d = result_mask1_q; @@ -221,63 +221,45 @@ module vproc_div #( endcase end - logic [(DIV_OP_W/8)*17-1:0] div_op1, div_op2; + logic [(DIV_OP_W*4-1:0] div_op1, div_op2; always_comb begin div_op1 = DONT_CARE_ZERO ? '0 : 'x; - for (int i = 0; i < DIV_OP_W / 32; i++) begin - div_op1[68*i +: 68] = { - // VSEW_8: byte 3, VSEW_32: upper halfword - op1_signs[4*i+3] , ~ex1_vsew_32 ? {{8{op1_signs[4*i+3]}}, operand1_q[32*i+24 +: 8]} : operand1_q[32*i+16 +: 16], - // VSEW_8: byte 2, VSEW_16 and VSEW_32: upper halfword - op1_signs[4*i+3] , ex1_vsew_8 ? {8{op1_signs[4*i+2]}} : operand1_q[32*i+24 +: 8], operand1_q[32*i+16 +: 8 ], - // VSEW_8: byte 1, VSEW_32: lower halfword - 1'b0 , ~ex1_vsew_32 ? {{8{op1_signs[4*i+1]}}, operand1_q[32*i+8 +: 8]} : operand1_q[32*i +: 16], - // VSEW_8: byte 0, VSEW_16 and VSEW_32: lower halfword - ~ex1_vsew_32 & op1_signs[4*i+1], ex1_vsew_8 ? {8{op1_signs[4*i ]}} : operand1_q[32*i+8 +: 8], operand1_q[32*i +: 8 ] - }; - end div_op2 = DONT_CARE_ZERO ? '0 : 'x; for (int i = 0; i < DIV_OP_W / 32; i++) begin - div_op2[68*i +: 68] = { - // VSEW_8: byte 3, VSEW_32: lower halfword - 1'b0 , ~ex1_vsew_32 ? {{8{op2_signs[4*i+3]}}, operand2_q[32*i+24 +: 8]} : operand2_q[32*i +: 16], - // VSEW_8: byte 2, VSEW_16 and VSEW_32: upper halfword - op2_signs[4*i+3] , ex1_vsew_8 ? {8{op2_signs[4*i+2]}} : operand2_q[32*i+24 +: 8], operand2_q[32*i+16 +: 8 ], - // VSEW_8: byte 1, VSEW_32: upper halfword - op2_signs[4*i+3] , ~ex1_vsew_32 ? {{8{op2_signs[4*i+1]}}, operand2_q[32*i+8 +: 8]} : operand2_q[32*i+16 +: 16], - // VSEW_8: byte 0, VSEW_16 and VSEW_32: lower halfword - ~ex1_vsew_32 & op2_signs[4*i+1], ex1_vsew_8 ? {8{op2_signs[4*i ]}} : operand2_q[32*i+8 +: 8], operand2_q[32*i +: 8 ] - }; - end - end + unique case (state_ex1_q.eew) + VSEW_8: begin + div_op1[32*(4*i+0) +: 32] = {{24{op1_signs[4*i+0]}, operand1_q[32*(i)+8*0 +: 8]}}; + div_op1[32*(4*i+1) +: 32] = {{24{op1_signs[4*i+1]}, operand1_q[32*(i)+8*1 +: 8]}}; + div_op1[32*(4*i+2) +: 32] = {{24{op1_signs[4*i+2]}, operand1_q[32*(i)+8*2 +: 8]}}; + div_op1[32*(4*i+3) +: 32] = {{24{op1_signs[4*i+3]}, operand1_q[32*(i)+8*3 +: 8]}}; - logic ex2_vsew_8, ex2_vsew_16, ex2_vsew_32; - always_comb begin - ex2_vsew_8 = DONT_CARE_ZERO ? '0 : 'x; - ex2_vsew_16 = DONT_CARE_ZERO ? '0 : 'x; - ex2_vsew_32 = DONT_CARE_ZERO ? '0 : 'x; - unique case (state_ex2_q.eew) - VSEW_8: ex2_vsew_8 = 1'b1; - VSEW_16: ex2_vsew_8 = 1'b0; - VSEW_32: ex2_vsew_8 = 1'b0; - default: ; - endcase - unique case (state_ex2_q.eew) - VSEW_8: ex2_vsew_16 = 1'b0; - VSEW_16: ex2_vsew_16 = 1'b1; - VSEW_32: ex2_vsew_16 = 1'b0; - default: ; - endcase - unique case (state_ex2_q.eew) - VSEW_8: ex2_vsew_32 = 1'b0; - VSEW_16: ex2_vsew_32 = 1'b0; - VSEW_32: ex2_vsew_32 = 1'b1; - default: ; - endcase + div_op2[32*(4*i+0) +: 32] = {{24{op2_signs[4*i+0]}, operand2_q[32*(i)+8*0 +: 8]}}; + div_op2[32*(4*i+1) +: 32] = {{24{op2_signs[4*i+1]}, operand2_q[32*(i)+8*1 +: 8]}}; + div_op2[32*(4*i+2) +: 32] = {{24{op2_signs[4*i+2]}, operand2_q[32*(i)+8*2 +: 8]}}; + div_op2[32*(4*i+3) +: 32] = {{24{op2_signs[4*i+3]}, operand2_q[32*(i)+8*3 +: 8]}}; + end + + + VSEW_16:begin + div_op1[32*(2*i+0) +: 32] = {{16{op1_signs[4*i+0]}, operand1_q[32*i+16*0 +: 16]}}; + div_op1[32*(2*i+1) +: 32] = {{16{op1_signs[4*i+2]}, operand1_q[32*i+16*1 +: 16]}}; + + div_op2[32*(2*i+0) +: 32] = {{16{op2_signs[4*i+0]}, operand2_q[32*i+16*0 +: 16]}}; + div_op2[32*(2*i+1) +: 32] = {{16{op2_signs[4*i+2]}, operand2_q[32*i+16*1 +: 16]}}; + end + + VSEW_32: begin + div_op1[32*i +: 32] = operand1_q[32*i +: 32]; + + div_op2[32*i +: 32] = operand2_q[32*i +: 32]; + end + default: ; + endcase + end end // perform unsigned division of xx-bit integers - logic [(DIV_TOP_W/8)*33-1:0] div_res; + logic [(DIV_OP_W*4-1:0] div_res; genvar g; generate for (g = 0; g < DIV_OP_W / 8; g++) begin @@ -291,9 +273,9 @@ module vproc_div #( .async_rst_ni (async_rst_ni ), .sync_rst_ni (sync_rst_ni ), .mod (state_ex3_q.mode.div.op), // tells div_block to mod or not - .op1_i (div_op1 [17*g +: 17] ), - .op2_i (div_op2 [17*g +: 17] ), - .res_o (div_res [33*g +: 33] ) + .op1_i (div_op1 [32*g +: 32] ), + .op2_i (div_op2 [32*g +: 32] ), + .res_o (div_res [32*g +: 32] ) ); end endgenerate @@ -302,25 +284,19 @@ module vproc_div #( always_comb begin result_d = DONT_CARE_ZERO ? '0 : 'x; unique case (state_ex3_q.mode.div.op) - - // multiplication retaining low part - /* - DIV_VDIV, // divide - DIV_VREM // rem - */ DIV_VDIV, DIV_VREM: begin unique case (state_ex3_q.eew) VSEW_8: begin - for (int i = 0; i < (MUL_OP_W / 8 ); i++) - result_d[8 *i +: 8 ] = div_res[16*i +: 8 ]; + for (int i = 0; i < (DIV_OP_W / 8 ); i++) + result_d[8 *i +: 8 ] = div_res[32*i +: 8 ]; end VSEW_16: begin - for (int i = 0; i < (MUL_OP_W / 16); i++) + for (int i = 0; i < (DIV_OP_W / 16); i++) result_d[16*i +: 16] = div_res[32*i +: 16]; end VSEW_32: begin - for (int i = 0; i < (MUL_OP_W / 32); i++) - result_d[32*i +: 32] = div_res[64*i +: 32]; + for (int i = 0; i < (DIV_OP_W / 32); i++) + result_d[32*i +: 32] = div_res[32*i +: 32]; end default: ; endcase diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 68172d5..2cbeaf4 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -13,12 +13,11 @@ module vproc_div_block #( input logic mod, // 0 = quotient, 1 = modulo - input logic [16:0] op1_i, - input logic [16:0] op2_i, + input logic [31:0] op1_i, + input logic [31:0] op2_i, - output logic [32:0] res_o + output logic [31:0] res_o - // May or may not need more ports... ); generate @@ -27,8 +26,8 @@ module vproc_div_block #( vproc_pkg::DIV_GENERIC: begin logic [16:0] op1_q, op2_q; - logic [32:0] div_q, div_d; - logic [32:0] res_q, res_d; + logic [16:0] div_q, div_d; + logic [16:0] res_q, res_d; if (BUF_OPS) begin always_ff @(posedge clk_i) begin From f144dba5fae4273ec8c1dfe528b479b65ea4b07b Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 17:02:42 -0500 Subject: [PATCH 16/64] Added divide by zero handling --- rtl/vproc_div_block.sv | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 2cbeaf4..2b2559e 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -61,7 +61,31 @@ module vproc_div_block #( end end - assign div_d = (mod) ? ($signed(op1_q) % $signed(op2_q)) : ($signed(op1_q) / $signed(op2_q)); + always_comb begin + unique case(mod) + + // DIV/U + 1'b0 : begin + if (op2_q == 0) begin + div_d = '1; + end + else begin + div_d = $signed(op1_q) / $signed(op2_q); + end + end + + // REM/U + 1'b1 : begin + if (op2_q == 0) begin + div_d = op1_q; + end + else begin + div_d = $signed(op1_q) % $signed(op2_q); + end + end + endcase + end + assign res_d = div_d; assign res_o = res_q; From 6278d989e49f6a5fda81ab7655d3dd89c7867501 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 17:37:18 -0500 Subject: [PATCH 17/64] Added division overflow handling --- rtl/vproc_div_block.sv | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 2b2559e..f39f533 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -69,6 +69,9 @@ module vproc_div_block #( if (op2_q == 0) begin div_d = '1; end + else if (dividend == {1'b1, 32{1'b0}} & divisor == 32{1'b1}) begin + div_d = '0; // DOUBLE CHECK THIS!!! + end else begin div_d = $signed(op1_q) / $signed(op2_q); end @@ -79,13 +82,17 @@ module vproc_div_block #( if (op2_q == 0) begin div_d = op1_q; end + else if (dividend == {1'b1, 32{1'b0}} & divisor == 32{1'b1}) begin + div_d = '0; + end else begin div_d = $signed(op1_q) % $signed(op2_q); end end + endcase end - + assign res_d = div_d; assign res_o = res_q; From 0629502d95b7edf6af13bee560ca486932025982 Mon Sep 17 00:00:00 2001 From: weustis2 Date: Wed, 2 Nov 2022 18:59:06 -0500 Subject: [PATCH 18/64] syntax fixes --- rtl/div_int.sv | 53 ----------------------------------- rtl/vproc_core.sv | 1 - rtl/vproc_div.sv | 30 ++++++++++---------- rtl/vproc_div_block.sv | 10 +++---- rtl/vproc_pipeline_wrapper.sv | 2 +- vproc_config.sv | 8 ++---- 6 files changed, 24 insertions(+), 80 deletions(-) delete mode 100644 rtl/div_int.sv diff --git a/rtl/div_int.sv b/rtl/div_int.sv deleted file mode 100644 index 1dbd279..0000000 --- a/rtl/div_int.sv +++ /dev/null @@ -1,53 +0,0 @@ -module div_int #(parameter WIDTH=4) ( - input wire logic clk, - input wire logic start, // start signal - output logic busy, // calculation in progress - output logic valid, // quotient and remainder are valid - output logic dbz, // divide by zero flag - input wire logic [WIDTH-1:0] x, // dividend - input wire logic [WIDTH-1:0] y, // divisor - output logic [WIDTH-1:0] q, // quotient - output logic [WIDTH-1:0] r // remainder - ); - - logic [WIDTH-1:0] y1; // copy of divisor - logic [WIDTH-1:0] q1, q1_next; // intermediate quotient - logic [WIDTH:0] ac, ac_next; // accumulator (1 bit wider) - logic [$clog2(WIDTH)-1:0] i; // iteration counter - - always_comb begin - if (ac >= {1'b0,y1}) begin - ac_next = ac - y1; - {ac_next, q1_next} = {ac_next[WIDTH-1:0], q1, 1'b1}; - end else begin - {ac_next, q1_next} = {ac, q1} << 1; - end - end - - always_ff @(posedge clk) begin - if (start) begin - valid <= 0; - i <= 0; - if (y == 0) begin // catch divide by zero - busy <= 0; - dbz <= 1; - end else begin // initialize values - busy <= 1; - dbz <= 0; - y1 <= y; - {ac, q1} <= {{WIDTH{1'b0}}, x, 1'b0}; - end - end else if (busy) begin - if (i == WIDTH-1) begin // we're done - busy <= 0; - valid <= 1; - q <= q1_next; - r <= ac_next[WIDTH:1]; // undo final shift - end else begin // next iteration - i <= i + 1; - ac <= ac_next; - q1 <= q1_next; - end - end - end -endmodule diff --git a/rtl/vproc_core.sv b/rtl/vproc_core.sv index a4f2af5..4fcc544 100644 --- a/rtl/vproc_core.sv +++ b/rtl/vproc_core.sv @@ -265,7 +265,6 @@ module vproc_core import vproc_pkg::*; #( logic pend_load; logic pend_store; } decoder_data; - // signals for decoder and for decoder buffer logic dec_ready, dec_valid, dec_clear; logic dec_buf_valid_q, dec_buf_valid_d; diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index e9ad8c2..7af9894 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -221,31 +221,31 @@ module vproc_div #( endcase end - logic [(DIV_OP_W*4-1:0] div_op1, div_op2; + logic [(DIV_OP_W*4)-1:0] div_op1, div_op2; always_comb begin div_op1 = DONT_CARE_ZERO ? '0 : 'x; div_op2 = DONT_CARE_ZERO ? '0 : 'x; for (int i = 0; i < DIV_OP_W / 32; i++) begin unique case (state_ex1_q.eew) VSEW_8: begin - div_op1[32*(4*i+0) +: 32] = {{24{op1_signs[4*i+0]}, operand1_q[32*(i)+8*0 +: 8]}}; - div_op1[32*(4*i+1) +: 32] = {{24{op1_signs[4*i+1]}, operand1_q[32*(i)+8*1 +: 8]}}; - div_op1[32*(4*i+2) +: 32] = {{24{op1_signs[4*i+2]}, operand1_q[32*(i)+8*2 +: 8]}}; - div_op1[32*(4*i+3) +: 32] = {{24{op1_signs[4*i+3]}, operand1_q[32*(i)+8*3 +: 8]}}; - - div_op2[32*(4*i+0) +: 32] = {{24{op2_signs[4*i+0]}, operand2_q[32*(i)+8*0 +: 8]}}; - div_op2[32*(4*i+1) +: 32] = {{24{op2_signs[4*i+1]}, operand2_q[32*(i)+8*1 +: 8]}}; - div_op2[32*(4*i+2) +: 32] = {{24{op2_signs[4*i+2]}, operand2_q[32*(i)+8*2 +: 8]}}; - div_op2[32*(4*i+3) +: 32] = {{24{op2_signs[4*i+3]}, operand2_q[32*(i)+8*3 +: 8]}}; + div_op1[32*(4*i+0) +: 32] = {{24{op1_signs[4*i+0]}}, operand1_q[32*(i)+8*0 +: 8]}; + div_op1[32*(4*i+1) +: 32] = {{24{op1_signs[4*i+1]}}, operand1_q[32*(i)+8*1 +: 8]}; + div_op1[32*(4*i+2) +: 32] = {{24{op1_signs[4*i+2]}}, operand1_q[32*(i)+8*2 +: 8]}; + div_op1[32*(4*i+3) +: 32] = {{24{op1_signs[4*i+3]}}, operand1_q[32*(i)+8*3 +: 8]}; + + div_op2[32*(4*i+0) +: 32] = {{24{op2_signs[4*i+0]}}, operand2_q[32*(i)+8*0 +: 8]}; + div_op2[32*(4*i+1) +: 32] = {{24{op2_signs[4*i+1]}}, operand2_q[32*(i)+8*1 +: 8]}; + div_op2[32*(4*i+2) +: 32] = {{24{op2_signs[4*i+2]}}, operand2_q[32*(i)+8*2 +: 8]}; + div_op2[32*(4*i+3) +: 32] = {{24{op2_signs[4*i+3]}}, operand2_q[32*(i)+8*3 +: 8]}; end VSEW_16:begin - div_op1[32*(2*i+0) +: 32] = {{16{op1_signs[4*i+0]}, operand1_q[32*i+16*0 +: 16]}}; - div_op1[32*(2*i+1) +: 32] = {{16{op1_signs[4*i+2]}, operand1_q[32*i+16*1 +: 16]}}; + div_op1[32*(2*i+0) +: 32] = {{16{op1_signs[4*i+0]}}, operand1_q[32*i+16*0 +: 16]}; + div_op1[32*(2*i+1) +: 32] = {{16{op1_signs[4*i+2]}}, operand1_q[32*i+16*1 +: 16]}; - div_op2[32*(2*i+0) +: 32] = {{16{op2_signs[4*i+0]}, operand2_q[32*i+16*0 +: 16]}}; - div_op2[32*(2*i+1) +: 32] = {{16{op2_signs[4*i+2]}, operand2_q[32*i+16*1 +: 16]}}; + div_op2[32*(2*i+0) +: 32] = {{16{op2_signs[4*i+0]}}, operand2_q[32*i+16*0 +: 16]}; + div_op2[32*(2*i+1) +: 32] = {{16{op2_signs[4*i+2]}}, operand2_q[32*i+16*1 +: 16]}; end VSEW_32: begin @@ -259,7 +259,7 @@ module vproc_div #( end // perform unsigned division of xx-bit integers - logic [(DIV_OP_W*4-1:0] div_res; + logic [DIV_OP_W*4-1:0] div_res; genvar g; generate for (g = 0; g < DIV_OP_W / 8; g++) begin diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index f39f533..7da0f93 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -1,7 +1,7 @@ // TODO DO THIS module vproc_div_block #( - parameter vproc_pkg::div_types DIV_TYPE = vproc_pkg::DIV_GENERIC, + parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, parameter bit BUF_OPS = 1'b0, // buffer operands (op1_i and op2_i) parameter bit BUF_DIV = 1'b0, // buffer division result parameter bit BUF_RES = 1'b0 // buffer final result (res_o) @@ -66,11 +66,11 @@ module vproc_div_block #( // DIV/U 1'b0 : begin - if (op2_q == 0) begin + if (op2_q == '0) begin div_d = '1; end - else if (dividend == {1'b1, 32{1'b0}} & divisor == 32{1'b1}) begin - div_d = '0; // DOUBLE CHECK THIS!!! + else if (op1_q == {1'b1, {31{1'b0}}} & op2_q == '1) begin + div_d = op1_q; end else begin div_d = $signed(op1_q) / $signed(op2_q); @@ -82,7 +82,7 @@ module vproc_div_block #( if (op2_q == 0) begin div_d = op1_q; end - else if (dividend == {1'b1, 32{1'b0}} & divisor == 32{1'b1}) begin + else if (op1_q == {1'b1, {31{1'b0}}} & op2_q == '1) begin div_d = '0; end else begin diff --git a/rtl/vproc_pipeline_wrapper.sv b/rtl/vproc_pipeline_wrapper.sv index c103d4b..8b4d2fb 100644 --- a/rtl/vproc_pipeline_wrapper.sv +++ b/rtl/vproc_pipeline_wrapper.sv @@ -460,7 +460,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( unit_lsu: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.lsu.masked; unit_alu: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.alu.op_mask != ALU_MASK_NONE; // ECE498HK additions - unit_div: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.div.masked + unit_div: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.div.masked; unit_mul: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.mul.masked; unit_sld: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.sld.masked; unit_elem: state_init.op_flags[OP_CNT-1].vreg = pipe_in_data_i.mode.elem.masked; diff --git a/vproc_config.sv b/vproc_config.sv index 7cdb21b..106b739 100644 --- a/vproc_config.sv +++ b/vproc_config.sv @@ -1,4 +1,4 @@ -// Auto-generated on Thu Sep 29 05:37:34 CDT 2022 +// Auto-generated on Wed Nov 2 18:43:57 CDT 2022 // Vector coprocessor default configuration package // @@ -13,12 +13,10 @@ // Configuration details: // - Vector register width: 128 bits // - Vector pipelines: -// * Pipeline 0: 32 bits wide, contains VLSU, VALU, VMUL, VSLD, VELEM +// * Pipeline 0: 32 bits wide, contains VLSU, VALU, VMUL, VSLD, VELEM, VDIV // Uses 1 128-bit vreg read ports and write port 0 // - Vector register file needs 2 read ports and 1 write ports -`define MAIN_CORE_IBEX - package vproc_config; import vproc_pkg::*; @@ -32,7 +30,7 @@ package vproc_config; parameter int unsigned PIPE_CNT = 1; parameter bit [UNIT_CNT-1:0] PIPE_UNITS [PIPE_CNT] = '{ - (UNIT_CNT'(1) << UNIT_LSU) | (UNIT_CNT'(1) << UNIT_ALU) | (UNIT_CNT'(1) << UNIT_DIV) | (UNIT_CNT'(1) << UNIT_MUL) | (UNIT_CNT'(1) << UNIT_SLD) | (UNIT_CNT'(1) << UNIT_ELEM) + (UNIT_CNT'(1) << UNIT_LSU) | (UNIT_CNT'(1) << UNIT_ALU) | (UNIT_CNT'(1) << UNIT_MUL) | (UNIT_CNT'(1) << UNIT_SLD) | (UNIT_CNT'(1) << UNIT_ELEM) | (UNIT_CNT'(1) << UNIT_DIV) }; parameter int unsigned PIPE_W [PIPE_CNT] = '{32}; parameter int unsigned PIPE_VPORT_CNT [PIPE_CNT] = '{1}; From e30aea022a5b4169e61b02137543b78482fcb337 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 19:59:50 -0500 Subject: [PATCH 19/64] Update default.yml --- .github/workflows/default.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index 1211035..2a5e1bc 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -1,9 +1,9 @@ name: CI on: push: - branches: [ main ] + branches: [ main, divide ] pull_request: - branches: [ main ] + branches: [ main, divide ] jobs: From dd346a0123ae347c1b5c1b51896fbd805c6ab6da Mon Sep 17 00:00:00 2001 From: weustis2 Date: Wed, 2 Nov 2022 20:00:49 -0500 Subject: [PATCH 20/64] successful compilation --- Makefile | 187 ------------------------------------------ config.mk | 3 - rtl/vproc_core.sv | 2 +- rtl/vproc_pipeline.sv | 2 +- rtl/vproc_pkg.sv | 3 + vproc_config.sv | 3 +- 6 files changed, 7 insertions(+), 193 deletions(-) delete mode 100755 Makefile diff --git a/Makefile b/Makefile deleted file mode 100755 index 1d57545..0000000 --- a/Makefile +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright TU Wien -# Licensed under the Solderpad Hardware License v2.1, see LICENSE.txt for details -# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 - - -# Makefile for generating the configuration package -# -# The intent of the dynamically generated configuration package is to provide -# *consistent default values* for configuration parameters of the vproc_core -# module across different synthesis and simulation workflows. -# -# The configuration is controlled with following (environment) variables: -# - VMEM_W: The width (in bits) of the vector coprocessor's memory interface -# - VREG_W: The width (in bits) of the vector coprocessor's vector registers -# - VPROC_PIPELINES: Defines the vector pipelines. Each pipeline is defined by -# a string of the form "WIDTH:UNIT[,UNIT]*" where WIDTH is the width in bits -# of the pipeline's datapath and each occurence of UNIT selects one of the -# vector execution units (either VLSU, VALU, VMUL, VSLD, or VELEM). -# - VPROC_CONFIG: Sets default values for the other parameters (that can be -# individually overriden) depending on the desired number of vector -# pipelines (choose 1, 2, 3, or 5 pipelines by setting this variable to -# compact, dual, triple, or legacy, respectively). - -VPROC_CONFIG_PKG ?= vproc_config.sv - -VPROC_CONFIG ?= compact -ifeq ($(VPROC_CONFIG), compact) - VPORT_POLICY ?= some - VMEM_W ?= 32 - VREG_W ?= 128 - VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VMUL,VSLD,VELEM -else -ifeq ($(VPROC_CONFIG), dual) - VPORT_POLICY ?= some - VMEM_W ?= 32 - VREG_W ?= 128 - VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD -else -ifeq ($(VPROC_CONFIG), triple) - VPORT_POLICY ?= some - VMEM_W ?= 32 - VREG_W ?= 256 - VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD -else -ifeq ($(VPROC_CONFIG), legacy) - VPORT_POLICY ?= some - VMEM_W ?= 32 - VREG_W ?= 128 - VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU $(VPIPE_W_VMUL):VMUL \ - $(VPIPE_W_DFLT):VSLD 32:VELEM -else -$(error Unknown vector coprocessor configuration $(VPROC_CONFIG)) -endif -endif -endif -endif - -# default widths of vector pipelines based on VPORT_POLICY -ifeq ($(VPORT_POLICY), few) - VPIPE_W_DFLT := $(shell echo $$(($(VREG_W) / 2))) - VPIPE_W_VMUL := $(shell echo $$(($(VREG_W) / 4))) -else -ifeq ($(VPORT_POLICY), some) - VPIPE_W_DFLT := $(shell echo $$(($(VREG_W) / 2))) - VPIPE_W_VMUL := $(shell echo $$(($(VREG_W) / 2))) -else -ifeq ($(VPORT_POLICY), many) - VPIPE_W_DFLT := $(VREG_W) - VPIPE_W_VMUL := $(VREG_W) -else -$(error Unknown vector register file port policy $(VPORT_POLICY)) -endif -endif -endif - -.PHONY: $(VPROC_CONFIG_PKG) -$(VPROC_CONFIG_PKG): - @echo "// Auto-generated on $$(date)" >$@; \ - echo "" >>$@; \ - echo "// Vector coprocessor default configuration package" >>$@; \ - echo "//" >>$@; \ - echo "// The package defined in this file provides *consistent default values* for" >>$@; \ - echo "// configuration parameters of the vproc_core module for the configuration" >>$@; \ - echo "// shown below across different synthesis and simulation workflows. The" >>$@; \ - echo "// constants defined in this package are intended to be used *exclusively* as" >>$@; \ - echo "// *default values* for the parameters of the vproc_core module and should" >>$@; \ - echo "// *not* be used anywhere else in the code, such that a design instantiating" >>$@; \ - echo "// the vproc_core module can override any parameter with a different value." >>$@; \ - echo "" >>$@; \ - echo "// Configuration details:" >>$@; \ - echo "// - Vector register width: $(VREG_W) bits" >>$@; \ - echo "// - Vector pipelines:" >>$@; \ - vport_rd_cnt=1; \ - vport_wr_capacities=""; \ - pipe_cnt=0; \ - pipe_units=""; \ - pipe_widths=""; \ - pipe_vport_cnt=""; \ - pipe_vport_idx=""; \ - pipe_vport_wr=""; \ - for pipe in $(VPROC_PIPELINES); do \ - width=`echo $$pipe | cut -d ":" -f 1`; \ - unit_str=`echo $$pipe | cut -d ":" -f 2 | sed 's/,/, /g'`; \ - unit_mask=`echo $$pipe | cut -d ":" -f 2 | sed 's/,/ | /g' | \ - sed "s/V\(LSU\|ALU\|MUL\|SLD\|ELEM\)/(UNIT_CNT'(1) << UNIT_\1)/g"`; \ - vport_cnt=1; \ - if echo "$$pipe" | grep -q "VMUL" && [ $$(($$width * 4)) -gt "$(VREG_W)" ]; then \ - vport_cnt=2; \ - fi; \ - if [ $$(($$width * 2)) -gt "$(VREG_W)" ]; then \ - vport_cnt=$$(($$vport_cnt + 1)); \ - fi; \ - vport_wr=0; \ - remaining_cap=$$(($(VREG_W) - $$width)); \ - for cap in $$(echo $$vport_wr_capacities); do \ - if [ "$$cap" -ge "$$width" ]; then \ - remaining_cap=$$(($$cap - $$width)); \ - break; \ - fi; \ - vport_wr=$$(($$vport_wr + 1)); \ - done; \ - if [ -z "$$pipe_units" ]; then \ - pipe_units="$${unit_mask}"; \ - pipe_widths="$${width}"; \ - pipe_vport_cnt="$${vport_cnt}"; \ - pipe_vport_idx="$${vport_rd_cnt}"; \ - pipe_vport_wr="$${vport_wr}"; \ - else \ - pipe_units="$${pipe_units}, $${unit_mask}"; \ - pipe_widths="$${pipe_widths}, $${width}"; \ - pipe_vport_cnt="$${pipe_vport_cnt}, $${vport_cnt}"; \ - pipe_vport_idx="$${pipe_vport_idx}, $${vport_rd_cnt}"; \ - pipe_vport_wr="$${pipe_vport_wr}, $${vport_wr}"; \ - fi; \ - vport_rd_cnt=$$(($$vport_rd_cnt + $$vport_cnt)); \ - if [ "$$vport_wr" = `echo $${vport_wr_capacities} | wc -w` ]; then \ - vport_wr_capacities="$${vport_wr_capacities} $${remaining_cap}"; \ - else \ - awk_word_idx=$$(($$vport_wr + 1)); \ - vport_wr_capacities=`echo "$${vport_wr_capacities}" | \ - awk -v n=$$awk_word_idx -v r=$$remaining_cap '{$$n=r} 1'`; \ - fi; \ - echo "// * Pipeline $${pipe_cnt}: $${width} bits wide, contains $${unit_str}" >>$@; \ - echo "// Uses $${vport_cnt} $(VREG_W)-bit vreg read ports" \ - "and write port $${vport_wr}" >>$@; \ - pipe_cnt=$$(($$pipe_cnt + 1)); \ - done; \ - pipe_widths="'{$${pipe_widths}}"; \ - pipe_vport_cnt="'{$${pipe_vport_cnt}}"; \ - pipe_vport_idx="'{$${pipe_vport_idx}}"; \ - pipe_vport_wr="'{$${pipe_vport_wr}}"; \ - vport_wr_cnt=`echo $${vport_wr_capacities} | wc -w`; \ - echo "// - Vector register file needs $${vport_rd_cnt} read ports and $${vport_wr_cnt}" \ - "write ports" >>$@; \ - buf_flags="(BUF_FLAGS_W'(1) << BUF_DEQUEUE) | (BUF_FLAGS_W'(1) << BUF_VREG_PEND)"; \ - if [ -n "$(TIMEPRED)" ] && [ "$(TIMEPRED)" != "0" ]; then \ - buf_flags="$${buf_flags} | (BUF_FLAGS_W'(1) << BUF_VREG_WR_MUX_TIMEPRED)"; \ - fi; \ - echo "" >>$@; \ - echo "package vproc_config;" >>$@; \ - echo "" >>$@; \ - echo " import vproc_pkg::*;" >>$@; \ - echo "" >>$@; \ - echo " parameter vreg_type VREG_TYPE = VREG_GENERIC;" >>$@; \ - echo " parameter int unsigned VREG_W = $(VREG_W);" >>$@; \ - echo " parameter int unsigned VPORT_RD_CNT = $$vport_rd_cnt;" >>$@; \ - echo " parameter int unsigned VPORT_RD_W [VPORT_RD_CNT] = '{default: VREG_W};" >>$@; \ - echo " parameter int unsigned VPORT_WR_CNT = $$vport_wr_cnt;" >>$@; \ - echo " parameter int unsigned VPORT_WR_W [VPORT_WR_CNT] = '{default: VREG_W};" >>$@; \ - echo "" >>$@; \ - echo " parameter int unsigned PIPE_CNT = $$pipe_cnt;" >>$@; \ - echo " parameter bit [UNIT_CNT-1:0] PIPE_UNITS [PIPE_CNT] = '{" >>$@; \ - echo " $$pipe_units" >>$@; \ - echo " };" >>$@; \ - echo " parameter int unsigned PIPE_W [PIPE_CNT] = $$pipe_widths;" >>$@; \ - echo " parameter int unsigned PIPE_VPORT_CNT [PIPE_CNT] = $$pipe_vport_cnt;" >>$@; \ - echo " parameter int unsigned PIPE_VPORT_IDX [PIPE_CNT] = $$pipe_vport_idx;" >>$@; \ - echo " parameter int unsigned PIPE_VPORT_WR [PIPE_CNT] = $$pipe_vport_wr;" >>$@; \ - echo "" >>$@; \ - echo " parameter int unsigned VLSU_QUEUE_SZ = 4;" >>$@; \ - echo " parameter bit [VLSU_FLAGS_W-1:0] VLSU_FLAGS = '0;" >>$@; \ - echo " parameter mul_type MUL_TYPE = MUL_GENERIC;" >>$@; \ - echo "" >>$@; \ - echo " parameter int unsigned INSTR_QUEUE_SZ = 2;" >>$@; \ - echo " parameter bit [BUF_FLAGS_W-1:0] BUF_FLAGS = $${buf_flags};" >>$@; \ - echo "" >>$@; \ - echo "endpackage" >>$@; diff --git a/config.mk b/config.mk index dea132b..9d6fc5a 100755 --- a/config.mk +++ b/config.mk @@ -106,9 +106,6 @@ $(VPROC_CONFIG_PKG): vport_cnt=1; \ if echo "$$pipe" | grep -q "VMUL" && [ $$(($$width * 4)) -gt "$(VREG_W)" ]; then \ vport_cnt=2; \ - fi; \ - if echo "$$pipe" | grep -q "VDIV" && [ $$(($$width * 4)) -gt "$(VREG_W)" ]; then \ - vport_cnt=2; \ fi; \ if [ $$(($$width * 2)) -gt "$(VREG_W)" ]; then \ vport_cnt=$$(($$vport_cnt + 1)); \ diff --git a/rtl/vproc_core.sv b/rtl/vproc_core.sv index 4fcc544..2d25852 100644 --- a/rtl/vproc_core.sv +++ b/rtl/vproc_core.sv @@ -721,7 +721,7 @@ module vproc_core import vproc_pkg::*; #( generate if (INSTR_QUEUE_SZ > 0) begin vproc_queue #( - .WIDTH ( $bits(decoder_data) ), + .WIDTH ( 119 ), .DEPTH ( INSTR_QUEUE_SZ ) ) instr_queue ( .clk_i ( clk_i ), diff --git a/rtl/vproc_pipeline.sv b/rtl/vproc_pipeline.sv index d73e7dc..b5e662f 100644 --- a/rtl/vproc_pipeline.sv +++ b/rtl/vproc_pipeline.sv @@ -823,7 +823,7 @@ module vproc_pipeline import vproc_pkg::*; #( .OP_HOLD_FLAG ( OP_HOLD_FLAG ), .UNPACK_STAGES ( UNPACK_STAGES ), .FLAGS_T ( unpack_flags ), - .CTRL_DATA_W ( $bits(ctrl_t) ), + .CTRL_DATA_W ( 83 ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) unpack ( .clk_i ( clk_i ), diff --git a/rtl/vproc_pkg.sv b/rtl/vproc_pkg.sv index 1f83037..e08dfa8 100644 --- a/rtl/vproc_pkg.sv +++ b/rtl/vproc_pkg.sv @@ -227,6 +227,9 @@ typedef struct packed { opcode_div op; logic op1_signed; logic op2_signed; +`ifdef VPROC_OP_MODE_UNION + logic [8:0] unused; +`endif } op_mode_div; /***** END ECE 498 HK MODIFICATIONS *****/ diff --git a/vproc_config.sv b/vproc_config.sv index 106b739..5566499 100644 --- a/vproc_config.sv +++ b/vproc_config.sv @@ -1,4 +1,4 @@ -// Auto-generated on Wed Nov 2 18:43:57 CDT 2022 +// Auto-generated on Wed Nov 2 19:23:49 CDT 2022 // Vector coprocessor default configuration package // @@ -16,6 +16,7 @@ // * Pipeline 0: 32 bits wide, contains VLSU, VALU, VMUL, VSLD, VELEM, VDIV // Uses 1 128-bit vreg read ports and write port 0 // - Vector register file needs 2 read ports and 1 write ports +`define MAIN_CORE_IBEX package vproc_config; From 68393a03ff024db78cb98d10687ba6189d7f4fc7 Mon Sep 17 00:00:00 2001 From: weustis2 Date: Wed, 2 Nov 2022 20:04:52 -0500 Subject: [PATCH 21/64] back to --- rtl/vproc_core.sv | 2 +- rtl/vproc_pipeline.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/vproc_core.sv b/rtl/vproc_core.sv index 2d25852..0f42283 100644 --- a/rtl/vproc_core.sv +++ b/rtl/vproc_core.sv @@ -721,7 +721,7 @@ module vproc_core import vproc_pkg::*; #( generate if (INSTR_QUEUE_SZ > 0) begin vproc_queue #( - .WIDTH ( 119 ), + .WIDTH ( $bits(decoder_data)), .DEPTH ( INSTR_QUEUE_SZ ) ) instr_queue ( .clk_i ( clk_i ), diff --git a/rtl/vproc_pipeline.sv b/rtl/vproc_pipeline.sv index b5e662f..fddc3eb 100644 --- a/rtl/vproc_pipeline.sv +++ b/rtl/vproc_pipeline.sv @@ -823,7 +823,7 @@ module vproc_pipeline import vproc_pkg::*; #( .OP_HOLD_FLAG ( OP_HOLD_FLAG ), .UNPACK_STAGES ( UNPACK_STAGES ), .FLAGS_T ( unpack_flags ), - .CTRL_DATA_W ( 83 ), + .CTRL_DATA_W ( $bits(ctrl_t) ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) unpack ( .clk_i ( clk_i ), From d56efdd86128e7916f0c32181f5c002fb5e65240 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 20:27:29 -0500 Subject: [PATCH 22/64] Update default.yml --- .github/workflows/default.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index b78e239..c82f4b1 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -1,9 +1,9 @@ name: CI on: push: - branches: [ main ] + branches: [ main, divide ] pull_request: - branches: [ main ] + branches: [ main, divide ] jobs: From 5a24090f9d4649ec06f073244bc5388c46137ed9 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 20:29:13 -0500 Subject: [PATCH 23/64] Update default.yml --- .github/workflows/default.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index c82f4b1..2a5e1bc 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -73,7 +73,7 @@ jobs: fail-fast: false matrix: unit: [lsu, alu, mul, sld, elem, csr, misc] - main_core: [ibex, cv32e40x] + main_core: [ibex] steps: - uses: actions/checkout@v2 with: From 3612aea7bc2cac5034e865cde8d17842369fe391 Mon Sep 17 00:00:00 2001 From: weustis2 Date: Wed, 2 Nov 2022 20:31:20 -0500 Subject: [PATCH 24/64] default case --- rtl/vproc_div_block.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 7da0f93..0c824b8 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -89,7 +89,7 @@ module vproc_div_block #( div_d = $signed(op1_q) % $signed(op2_q); end end - + default: ; endcase end From ebade29969c5c6e7d247bb5736716297f9468bba Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 20:39:25 -0500 Subject: [PATCH 25/64] Update vproc_tb.sv --- sim/vproc_tb.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/vproc_tb.sv b/sim/vproc_tb.sv index 798e48c..44ba7b0 100644 --- a/sim/vproc_tb.sv +++ b/sim/vproc_tb.sv @@ -4,7 +4,7 @@ module vproc_tb #( - parameter PROG_PATHS_LIST = "/home/hfaroo9/ece498hk-RISCV-V-Extension/src/vicuna/sim/files.txt", + parameter PROG_PATHS_LIST = "", parameter int unsigned MEM_W = 32, parameter int unsigned MEM_SZ = 262144, parameter int unsigned MEM_LATENCY = 1, From 03eabd24eb0780006aee7b03220aec38a88d22a3 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 20:42:12 -0500 Subject: [PATCH 26/64] Update vproc_tb.sv --- sim/vproc_tb.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/vproc_tb.sv b/sim/vproc_tb.sv index 798e48c..44ba7b0 100644 --- a/sim/vproc_tb.sv +++ b/sim/vproc_tb.sv @@ -4,7 +4,7 @@ module vproc_tb #( - parameter PROG_PATHS_LIST = "/home/hfaroo9/ece498hk-RISCV-V-Extension/src/vicuna/sim/files.txt", + parameter PROG_PATHS_LIST = "", parameter int unsigned MEM_W = 32, parameter int unsigned MEM_SZ = 262144, parameter int unsigned MEM_LATENCY = 1, From 334ef30d309c2c7d1e5ee7066619fdef0bdbd63b Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 20:45:18 -0500 Subject: [PATCH 27/64] Fixed linting --- rtl/vproc_div.sv | 8 ++++---- rtl/vproc_div_block.sv | 2 +- rtl/vproc_pipeline.sv | 2 +- rtl/vproc_pipeline_wrapper.sv | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 7af9894..b76f8e6 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -22,7 +22,7 @@ module vproc_div #( input logic [DIV_OP_W -1:0] pipe_in_op2_i, input logic [DIV_OP_W/8-1:0] pipe_in_mask_i, - + output logic pipe_out_valid_o, input logic pipe_out_ready_i, @@ -76,7 +76,7 @@ module vproc_div #( state_ex1_q = state_ex1_d; operand1_q = operand1_d; operand2_q = operand2_d; - operand_mask_q <= operand_mask_d; + operand_mask_q = operand_mask_d; end assign state_ex1_ready = state_ex2_ready; end @@ -226,7 +226,7 @@ module vproc_div #( div_op1 = DONT_CARE_ZERO ? '0 : 'x; div_op2 = DONT_CARE_ZERO ? '0 : 'x; for (int i = 0; i < DIV_OP_W / 32; i++) begin - unique case (state_ex1_q.eew) + unique case (state_ex1_q.eew) VSEW_8: begin div_op1[32*(4*i+0) +: 32] = {{24{op1_signs[4*i+0]}}, operand1_q[32*(i)+8*0 +: 8]}; div_op1[32*(4*i+1) +: 32] = {{24{op1_signs[4*i+1]}}, operand1_q[32*(i)+8*1 +: 8]}; @@ -254,7 +254,7 @@ module vproc_div #( div_op2[32*i +: 32] = operand2_q[32*i +: 32]; end default: ; - endcase + endcase end end diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 0c824b8..e638246 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -89,7 +89,7 @@ module vproc_div_block #( div_d = $signed(op1_q) % $signed(op2_q); end end - default: ; + default: ; endcase end diff --git a/rtl/vproc_pipeline.sv b/rtl/vproc_pipeline.sv index fddc3eb..fb7fffe 100644 --- a/rtl/vproc_pipeline.sv +++ b/rtl/vproc_pipeline.sv @@ -823,7 +823,7 @@ module vproc_pipeline import vproc_pkg::*; #( .OP_HOLD_FLAG ( OP_HOLD_FLAG ), .UNPACK_STAGES ( UNPACK_STAGES ), .FLAGS_T ( unpack_flags ), - .CTRL_DATA_W ( $bits(ctrl_t) ), + .CTRL_DATA_W ($bits(ctrl_t) ), .DONT_CARE_ZERO ( DONT_CARE_ZERO ) ) unpack ( .clk_i ( clk_i ), diff --git a/rtl/vproc_pipeline_wrapper.sv b/rtl/vproc_pipeline_wrapper.sv index 8b4d2fb..4ae55c9 100644 --- a/rtl/vproc_pipeline_wrapper.sv +++ b/rtl/vproc_pipeline_wrapper.sv @@ -172,7 +172,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( // ECE498HK edits // The fixed-point instructions help preserve precision in narrow operands by supporting scaling and rounding - localparam bit OP0_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_ELEM]; + localparam bit OP0_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_ELEM]; localparam bit OP1_NARROW = UNITS[UNIT_MUL] | UNITS[UNIT_ALU]; localparam bit OP1_XREG = UNITS[UNIT_MUL] | UNITS[UNIT_ALU] | UNITS[UNIT_DIV]; // OPMVX category of instr use GPR x register rs1 as scalar operand. vdiv/vrem supports .vv and .vx localparam bit OP0_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; @@ -185,7 +185,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( localparam int unsigned MAX_RES_W = MAX_OP_W; // result flags - localparam bit RES0_ALWAYS_VREG = ~UNITS[UNIT_LSU] & ~UNITS[UNIT_ALU] & ~UNITS[UNIT_ELEM]; // true for DIV + localparam bit RES0_ALWAYS_VREG = ~UNITS[UNIT_LSU] & ~UNITS[UNIT_ALU] & ~UNITS[UNIT_ELEM]; // true for DIV localparam bit RES0_NARROW = UNITS[UNIT_ALU]; localparam bit RES0_ALLOW_ELEMWISE = UNITS[UNIT_LSU] | UNITS[UNIT_ELEM]; @@ -487,11 +487,11 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #( state_init.res_vreg [RES_CNT-1] = pipe_in_data_i.mode.alu.cmp; end // ECE498HK additions - if (unit_div) begin + if (unit_div) begin state_init.op_vaddr[0] = pipe_in_data_i.rs2.r.vaddr; // for vmadd and vnmsub, MUL stores vaddr in .rd.addr, this is not needed for div state_init.op_flags[0].sigext = pipe_in_data_i.mode.div.op2_signed; state_init.op_flags[1].sigext = pipe_in_data_i.mode.div.op1_signed; - end + end if (unit_mul) begin state_init.op_vaddr[0] = pipe_in_data_i.mode.mul.op2_is_vd ? pipe_in_data_i.rd.addr : pipe_in_data_i.rs2.r.vaddr; state_init.op_flags[0].sigext = pipe_in_data_i.mode.mul.op2_signed; From 40793bc83e55f00aca6102fa5e56ae99862d5334 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 20:47:24 -0500 Subject: [PATCH 28/64] fixed linting again --- rtl/vproc_div_block.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index e638246..cd4cfbb 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -89,7 +89,7 @@ module vproc_div_block #( div_d = $signed(op1_q) % $signed(op2_q); end end - default: ; + default: ; endcase end From 8aef6acbf2215382d25b1a37e4ce5bafbd04db05 Mon Sep 17 00:00:00 2001 From: Spenser Fong Date: Wed, 2 Nov 2022 20:50:58 -0500 Subject: [PATCH 29/64] Fixed bit widths in div_block --- rtl/vproc_div_block.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index cd4cfbb..eb0c0c6 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -25,9 +25,9 @@ module vproc_div_block #( vproc_pkg::DIV_GENERIC: begin - logic [16:0] op1_q, op2_q; - logic [16:0] div_q, div_d; - logic [16:0] res_q, res_d; + logic [31:0] op1_q, op2_q; + logic [31:0] div_q, div_d; + logic [31:0] res_q, res_d; if (BUF_OPS) begin always_ff @(posedge clk_i) begin From 3a02dd7eb8991581a121ce7e28aaf60e07d9e803 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 22:26:50 -0500 Subject: [PATCH 30/64] pushing div and rem cases --- test/div/vdiv_16.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vdiv_32.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vdiv_8.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vdivu_16.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vdivu_32.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vdivu_8.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vrem_16.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vrem_32.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vrem_8.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vremu_16.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vremu_32.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ test/div/vremu_8.S | 96 +++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 1152 insertions(+) create mode 100644 test/div/vdiv_16.S create mode 100644 test/div/vdiv_32.S create mode 100644 test/div/vdiv_8.S create mode 100644 test/div/vdivu_16.S create mode 100644 test/div/vdivu_32.S create mode 100644 test/div/vdivu_8.S create mode 100644 test/div/vrem_16.S create mode 100644 test/div/vrem_32.S create mode 100644 test/div/vrem_8.S create mode 100644 test/div/vremu_16.S create mode 100644 test/div/vremu_32.S create mode 100644 test/div/vremu_8.S diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S new file mode 100644 index 0000000..0ef7714 --- /dev/null +++ b/test/div/vdiv_16.S @@ -0,0 +1,96 @@ + +# vdiv 16 +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vdiv.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0xc9022cbf + .word 0xfdd6064a + .word 0x39b908c3 + .word 0x88c4a16e + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0xf8240664 + .word 0xffb000e6 + .word 0x083f0140 + .word 0xeef7f27d + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S new file mode 100644 index 0000000..080d789 --- /dev/null +++ b/test/div/vdiv_32.S @@ -0,0 +1,96 @@ + +# vdiv 32 +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vdiv.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0xdba568d5 + .word 0x4c29d615 + .word 0x4b989c7d + .word 0xe64595f7 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0xface7cb0 + .word 0x0ae167b9 + .word 0x0acca8a4 + .word 0xfc53156c + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S new file mode 100644 index 0000000..6161670 --- /dev/null +++ b/test/div/vdiv_8.S @@ -0,0 +1,96 @@ + +# vdiv 8 +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vdiv.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x61831a41 + .word 0x1faef0a7 + .word 0xab59e740 + .word 0xaa5d4130 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x0dee0309 + .word 0x04f4fdf3 + .word 0xf30cfc09 + .word 0xf30d0906 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S new file mode 100644 index 0000000..8a23aa6 --- /dev/null +++ b/test/div/vdivu_16.S @@ -0,0 +1,96 @@ + +# vdiv u 16 +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vdivu.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x851e7629 + .word 0x13c40607 + .word 0x56ff5f09 + .word 0xb7bb7f5a + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0xee7210e1 + .word 0x02d200dc + .word 0x0c6d0d93 + .word 0xf5ad1231 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S new file mode 100644 index 0000000..248e64d --- /dev/null +++ b/test/div/vdivu_32.S @@ -0,0 +1,96 @@ + +# vdiv u 32 +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vdivu.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0xaa8e500d + .word 0xa9bf1111 + .word 0xa82cae01 + .word 0xe72f618f + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0xf3cb3001 + .word 0xf3ad94b9 + .word 0xf37418db + .word 0xfc747ba6 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S new file mode 100644 index 0000000..5dad05b --- /dev/null +++ b/test/div/vdivu_8.S @@ -0,0 +1,96 @@ + +# vdiv u 8 +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vdivu.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x371ba2a9 + .word 0xd423be7e + .word 0x1ad45c2e + .word 0xb8c8a741 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x0703f2f3 + .word 0xf905f612 + .word 0x03f90d06 + .word 0xf5f8f309 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S new file mode 100644 index 0000000..415f7a9 --- /dev/null +++ b/test/div/vrem_16.S @@ -0,0 +1,96 @@ + +# vrem 16 +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vrem.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x8f8b4ffe + .word 0x141e0e8b + .word 0xaf567833 + .word 0x3c39473a + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00020003 + .word 0x00050006 + .word 0x00000006 + .word 0x00030006 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S new file mode 100644 index 0000000..3734113 --- /dev/null +++ b/test/div/vrem_32.S @@ -0,0 +1,96 @@ + +# vrem 32 +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vrem.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x85cb83cc + .word 0x6e0b9321 + .word 0x0cd33312 + .word 0x59516499 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00000003 + .word 0x00000004 + .word 0x00000005 + .word 0x00000006 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S new file mode 100644 index 0000000..258090d --- /dev/null +++ b/test/div/vrem_8.S @@ -0,0 +1,96 @@ + +# vrem 8 +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vrem.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x0ac13f5d + .word 0x35526fd3 + .word 0x3e94ebd7 + .word 0x4d658752 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x03000002 + .word 0x04050604 + .word 0x06040001 + .word 0x00030505 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S new file mode 100644 index 0000000..b0ac5ec --- /dev/null +++ b/test/div/vremu_16.S @@ -0,0 +1,96 @@ + +# vrem u 16 +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vremu.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x7052126e + .word 0x1d1e029c + .word 0x57dfe41d + .word 0x64cadb32 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00050000 + .word 0x00060003 + .word 0x00040001 + .word 0x00000000 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S new file mode 100644 index 0000000..33047e7 --- /dev/null +++ b/test/div/vremu_32.S @@ -0,0 +1,96 @@ + +# vrem u 32 +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vremu.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x057b80a6 + .word 0x536bf9b0 + .word 0xcfcd9224 + .word 0x9bdc96a0 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00000005 + .word 0x00000006 + .word 0x00000001 + .word 0x00000000 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S new file mode 100644 index 0000000..29b3a4e --- /dev/null +++ b/test/div/vremu_8.S @@ -0,0 +1,96 @@ + +# vrem u 8 +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vremu.vx v0, v0, t0 + vse32.v v0, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x249172a9 + .word 0x530ff976 + .word 0xd8f7fb8d + .word 0x6bb8deaa + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x01010204 + .word 0x06010006 + .word 0x02050204 + .word 0x02050105 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file From 39f8df3842b87d39c9a6e8dac28787ddeee17967 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 22:29:21 -0500 Subject: [PATCH 31/64] Update default.yml --- .github/workflows/default.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index 2a5e1bc..ea4f142 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -72,7 +72,7 @@ jobs: strategy: fail-fast: false matrix: - unit: [lsu, alu, mul, sld, elem, csr, misc] + unit: [lsu, alu, mul, sld, elem, csr, misc, div] main_core: [ibex] steps: - uses: actions/checkout@v2 From 13082811c9e947ba4b6e54610767aad133c7de06 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 2 Nov 2022 22:32:14 -0500 Subject: [PATCH 32/64] Update Makefile --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 8bec238..5919029 100644 --- a/test/Makefile +++ b/test/Makefile @@ -15,7 +15,7 @@ TEST_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) SIMULATOR ?= verilator # test directories -TEST_DIRS := lsu alu mul sld elem csr kernel misc +TEST_DIRS := lsu alu mul sld elem csr kernel misc div # test targets TESTS_ALL := $(TEST_DIRS) $(addsuffix /, $(TEST_DIRS)) From 6aacfc1373f02e9c5a0bef2ae5ed7e50d811c4cb Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 22:37:06 -0500 Subject: [PATCH 33/64] oopsie woopsie did I do that? --- test/div/vdiv_16.S | 18 ++++++++++-------- test/div/vdiv_32.S | 18 ++++++++++-------- test/div/vdiv_8.S | 18 ++++++++++-------- test/div/vdivu_16.S | 18 ++++++++++-------- test/div/vdivu_32.S | 18 ++++++++++-------- test/div/vdivu_8.S | 18 ++++++++++-------- test/div/vrem_16.S | 18 ++++++++++-------- test/div/vrem_32.S | 12 +++++++----- test/div/vrem_8.S | 18 ++++++++++-------- test/div/vremu_16.S | 18 ++++++++++-------- test/div/vremu_32.S | 14 ++++++++------ test/div/vremu_8.S | 18 ++++++++++-------- 12 files changed, 115 insertions(+), 91 deletions(-) diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S index 0ef7714..4cc88d1 100644 --- a/test/div/vdiv_16.S +++ b/test/div/vdiv_16.S @@ -1,5 +1,7 @@ # vdiv 16 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xc9022cbf - .word 0xfdd6064a - .word 0x39b908c3 - .word 0x88c4a16e + .word 0x1d1f7edd + .word 0x3deea7b9 + .word 0xbb1fdff2 + .word 0x08114441 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf8240664 - .word 0xffb000e6 - .word 0x083f0140 - .word 0xeef7f27d + .word 0x0429121f + .word 0x08d8f363 + .word 0xf629fb6b + .word 0x012709c0 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index 080d789..98bc0fd 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -1,5 +1,7 @@ # vdiv 32 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xdba568d5 - .word 0x4c29d615 - .word 0x4b989c7d - .word 0xe64595f7 + .word 0x6275b669 + .word 0x9a745c7b + .word 0x47394cfe + .word 0x236a25ea .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xface7cb0 - .word 0x0ae167b9 - .word 0x0acca8a4 - .word 0xfc53156c + .word 0x0e10d0ea + .word 0xf17e565a + .word 0x0a2cc1db + .word 0x050f29fc .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S index 6161670..17639cb 100644 --- a/test/div/vdiv_8.S +++ b/test/div/vdiv_8.S @@ -1,5 +1,7 @@ # vdiv 8 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x61831a41 - .word 0x1faef0a7 - .word 0xab59e740 - .word 0xaa5d4130 + .word 0x280aeba4 + .word 0x4ca6d706 + .word 0x6d487bd6 + .word 0x435632f0 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0dee0309 - .word 0x04f4fdf3 - .word 0xf30cfc09 - .word 0xf30d0906 + .word 0x0501fdf2 + .word 0x0af3fa00 + .word 0x0f0a11fa + .word 0x090c07fd .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S index 8a23aa6..68682fc 100644 --- a/test/div/vdivu_16.S +++ b/test/div/vdivu_16.S @@ -1,5 +1,7 @@ # vdiv u 16 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x851e7629 - .word 0x13c40607 - .word 0x56ff5f09 - .word 0xb7bb7f5a + .word 0xc9e08679 + .word 0x9cbd81e1 + .word 0x1e4b7570 + .word 0x632b869b .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xee7210e1 - .word 0x02d200dc - .word 0x0c6d0d93 - .word 0xf5ad1231 + .word 0xf844eea3 + .word 0xf1d1edfb + .word 0x045310c6 + .word 0x0e2aeea8 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S index 248e64d..03ad721 100644 --- a/test/div/vdivu_32.S +++ b/test/div/vdivu_32.S @@ -1,5 +1,7 @@ # vdiv u 32 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xaa8e500d - .word 0xa9bf1111 - .word 0xa82cae01 - .word 0xe72f618f + .word 0x87e60c19 + .word 0x3ced0830 + .word 0x1552cd76 + .word 0x15417fbc .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf3cb3001 - .word 0xf3ad94b9 - .word 0xf37418db - .word 0xfc747ba6 + .word 0xeed7b895 + .word 0x08b425bd + .word 0x030bd435 + .word 0x03095b64 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S index 5dad05b..d09a35f 100644 --- a/test/div/vdivu_8.S +++ b/test/div/vdivu_8.S @@ -1,5 +1,7 @@ # vdiv u 8 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x371ba2a9 - .word 0xd423be7e - .word 0x1ad45c2e - .word 0xb8c8a741 + .word 0x9cdc67d1 + .word 0xb335dfe3 + .word 0x7f6867b1 + .word 0x115f1faf .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0703f2f3 - .word 0xf905f612 - .word 0x03f90d06 - .word 0xf5f8f309 + .word 0xf1fa0ef9 + .word 0xf507fbfb + .word 0x120e0ef4 + .word 0x020d04f4 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S index 415f7a9..b58142f 100644 --- a/test/div/vrem_16.S +++ b/test/div/vrem_16.S @@ -1,5 +1,7 @@ # vrem 16 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x8f8b4ffe - .word 0x141e0e8b - .word 0xaf567833 - .word 0x3c39473a + .word 0xfffe5fa4 + .word 0x55b6cfc0 + .word 0xeeb5ee06 + .word 0x80275a7f .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00020003 - .word 0x00050006 - .word 0x00000006 - .word 0x00030006 + .word 0x00050005 + .word 0x00040003 + .word 0x00040004 + .word 0x00030004 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S index 3734113..bb92dce 100644 --- a/test/div/vrem_32.S +++ b/test/div/vrem_32.S @@ -1,5 +1,7 @@ # vrem 32 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x85cb83cc - .word 0x6e0b9321 - .word 0x0cd33312 - .word 0x59516499 + .word 0x3afd6811 + .word 0xfaada33e + .word 0xdebd47cb + .word 0xfa4a096b .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -63,7 +65,7 @@ vref_start: .word 0x00000003 .word 0x00000004 .word 0x00000005 - .word 0x00000006 + .word 0x00000005 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S index 258090d..caf7f1d 100644 --- a/test/div/vrem_8.S +++ b/test/div/vrem_8.S @@ -1,5 +1,7 @@ # vrem 8 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x0ac13f5d - .word 0x35526fd3 - .word 0x3e94ebd7 - .word 0x4d658752 + .word 0xb2403082 + .word 0x4cf473b5 + .word 0x8e1f7e7e + .word 0x5c0055b7 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x03000002 - .word 0x04050604 - .word 0x06040001 - .word 0x00030505 + .word 0x06010600 + .word 0x06020302 + .word 0x05030000 + .word 0x01000104 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S index b0ac5ec..5fb2e0c 100644 --- a/test/div/vremu_16.S +++ b/test/div/vremu_16.S @@ -1,5 +1,7 @@ # vrem u 16 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x7052126e - .word 0x1d1e029c - .word 0x57dfe41d - .word 0x64cadb32 + .word 0x9bd23b5b + .word 0x65213d2d + .word 0xf3af75a9 + .word 0xb2814de0 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00050000 - .word 0x00060003 - .word 0x00040001 - .word 0x00000000 + .word 0x00020005 + .word 0x00030002 + .word 0x00040000 + .word 0x00060000 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S index 33047e7..c36d7cc 100644 --- a/test/div/vremu_32.S +++ b/test/div/vremu_32.S @@ -1,5 +1,7 @@ # vrem u 32 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x057b80a6 - .word 0x536bf9b0 - .word 0xcfcd9224 - .word 0x9bdc96a0 + .word 0x513263a1 + .word 0x0d461edf + .word 0xaf9be0be + .word 0x3ee14779 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,9 +62,9 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00000005 + .word 0x00000003 + .word 0x00000006 .word 0x00000006 - .word 0x00000001 .word 0x00000000 .word 0x3f44383b .word 0x37424d54 diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S index 29b3a4e..2c553c4 100644 --- a/test/div/vremu_8.S +++ b/test/div/vremu_8.S @@ -1,5 +1,7 @@ # vrem u 8 + .text + .global main main: la a0, vdata_start @@ -22,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x249172a9 - .word 0x530ff976 - .word 0xd8f7fb8d - .word 0x6bb8deaa + .word 0x85aa32b6 + .word 0x2c843100 + .word 0x586860ec + .word 0xc56efee7 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -60,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x01010204 - .word 0x06010006 - .word 0x02050204 - .word 0x02050105 + .word 0x03050103 + .word 0x02020000 + .word 0x04060501 + .word 0x04050503 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 From de6a22560978d4126e93c1098f9a1c5f31d34fe9 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 22:43:41 -0500 Subject: [PATCH 34/64] flip operators for div and rem' --- rtl/vproc_unit_wrapper.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/vproc_unit_wrapper.sv b/rtl/vproc_unit_wrapper.sv index 0768be1..5bf2734 100644 --- a/rtl/vproc_unit_wrapper.sv +++ b/rtl/vproc_unit_wrapper.sv @@ -202,8 +202,8 @@ module vproc_unit_wrapper import vproc_pkg::*; #( .pipe_in_ready_o ( pipe_in_ready_o ), .pipe_in_ctrl_i ( pipe_in_ctrl_i ), - .pipe_in_op1_i ( pipe_in_op_data_i[1] ), // TODO double check if this - .pipe_in_op2_i ( pipe_in_op_data_i[0] ), // TODO should be swapped + .pipe_in_op1_i ( pipe_in_op_data_i[0] ), // TODO double check if this + .pipe_in_op2_i ( pipe_in_op_data_i[1] ), // TODO should be swapped .pipe_in_mask_i ( pipe_in_op_data_i[OP_CNT-1][MAX_OP_W/8-1:0] ), From 947346df2af58c008c622c017c60503d83c7566f Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 23:03:07 -0500 Subject: [PATCH 35/64] debug --- rtl/vproc_div.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index b76f8e6..ea6f26d 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -139,6 +139,7 @@ module vproc_div #( if (BUF_RESULTS) begin always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_res_valid + $display("DEBUG: %h %h %h %h", pipe_in_op1_i, pipe_in_op2_i, pipe_out_res_o, pipe_out_valid_o); if (~async_rst_ni) begin state_res_valid_q <= 1'b0; end From 732f9e307308fc71924718a15565abd4a1a88783 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 23:12:20 -0500 Subject: [PATCH 36/64] turn off buffering for div --- rtl/vproc_div.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index ea6f26d..0e21dd3 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -3,10 +3,10 @@ module vproc_div #( parameter int unsigned DIV_OP_W = 64, // DIV unit operand width in bits parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, - parameter bit BUF_OPERANDS = 1'b1, - parameter bit BUF_DIV_IN = 1'b1, - parameter bit BUF_DIV_OUT = 1'b1, - parameter bit BUF_RESULTS = 1'b1, + parameter bit BUF_OPERANDS = 1'b0, + parameter bit BUF_DIV_IN = 1'b0, + parameter bit BUF_DIV_OUT = 1'b0, + parameter bit BUF_RESULTS = 1'b0, parameter type CTRL_T = logic, parameter bit DONT_CARE_ZERO = 1'b0 )( From 838026d2335a6b643ffdf69a9f6c668cfa35fb14 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 2 Nov 2022 23:40:12 -0500 Subject: [PATCH 37/64] fixed tests --- test/div/vdiv_16.S | 16 ++++++++-------- test/div/vdiv_32.S | 16 ++++++++-------- test/div/vdiv_8.S | 16 ++++++++-------- test/div/vdivu_16.S | 16 ++++++++-------- test/div/vdivu_32.S | 16 ++++++++-------- test/div/vdivu_8.S | 16 ++++++++-------- test/div/vrem_16.S | 16 ++++++++-------- test/div/vrem_32.S | 14 +++++++------- test/div/vrem_8.S | 16 ++++++++-------- test/div/vremu_16.S | 16 ++++++++-------- test/div/vremu_32.S | 16 ++++++++-------- test/div/vremu_8.S | 16 ++++++++-------- 12 files changed, 95 insertions(+), 95 deletions(-) diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S index 4cc88d1..0a52618 100644 --- a/test/div/vdiv_16.S +++ b/test/div/vdiv_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x1d1f7edd - .word 0x3deea7b9 - .word 0xbb1fdff2 - .word 0x08114441 + .word 0x918905f4 + .word 0xfd18de71 + .word 0x2baa1f03 + .word 0x53f30f13 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0429121f - .word 0x08d8f363 - .word 0xf629fb6b - .word 0x012709c0 + .word 0xf03800d9 + .word 0xff95fb34 + .word 0x063c046e + .word 0x0bfe0227 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index 98bc0fd..c798d0c 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x6275b669 - .word 0x9a745c7b - .word 0x47394cfe - .word 0x236a25ea + .word 0xd9e63bb4 + .word 0x669d1dbc + .word 0x176b7540 + .word 0xc01fd55b .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0e10d0ea - .word 0xf17e565a - .word 0x0a2cc1db - .word 0x050f29fc + .word 0xfa8e9ad0 + .word 0x0ea8bb1a + .word 0x03587e76 + .word 0xf6dff9e8 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S index 17639cb..547bada 100644 --- a/test/div/vdiv_8.S +++ b/test/div/vdiv_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x280aeba4 - .word 0x4ca6d706 - .word 0x6d487bd6 - .word 0x435632f0 + .word 0x9757edd6 + .word 0x5a8477eb + .word 0xaccaa175 + .word 0x1eb38b7d .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0501fdf2 - .word 0x0af3fa00 - .word 0x0f0a11fa - .word 0x090c07fd + .word 0xf10cfdfa + .word 0x0cee11fd + .word 0xf4f8f210 + .word 0x04f5ef11 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S index 68682fc..6fd1d52 100644 --- a/test/div/vdivu_16.S +++ b/test/div/vdivu_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xc9e08679 - .word 0x9cbd81e1 - .word 0x1e4b7570 - .word 0x632b869b + .word 0xd2fe77fb + .word 0xb7e33494 + .word 0x49c41b7e + .word 0x4f3b79df .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf844eea3 - .word 0xf1d1edfb - .word 0x045310c6 - .word 0x0e2aeea8 + .word 0x1e241123 + .word 0x1a450782 + .word 0x0a8903ed + .word 0x0b511169 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S index 03ad721..7bfef63 100644 --- a/test/div/vdivu_32.S +++ b/test/div/vdivu_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x87e60c19 - .word 0x3ced0830 - .word 0x1552cd76 - .word 0x15417fbc + .word 0xef008431 + .word 0xbfbec2e1 + .word 0x997d4e34 + .word 0x856d7809 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xeed7b895 - .word 0x08b425bd - .word 0x030bd435 - .word 0x03095b64 + .word 0x2224a52b + .word 0x1b6464fb + .word 0x15ed5450 + .word 0x130fa36f .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S index d09a35f..56c2409 100644 --- a/test/div/vdivu_8.S +++ b/test/div/vdivu_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x9cdc67d1 - .word 0xb335dfe3 - .word 0x7f6867b1 - .word 0x115f1faf + .word 0x7e10f1e0 + .word 0x272df295 + .word 0x6a87954e + .word 0x308e2542 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf1fa0ef9 - .word 0xf507fbfb - .word 0x120e0ef4 - .word 0x020d04f4 + .word 0x12022220 + .word 0x05062215 + .word 0x0f13150b + .word 0x06140509 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S index b58142f..07ca8c0 100644 --- a/test/div/vrem_16.S +++ b/test/div/vrem_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xfffe5fa4 - .word 0x55b6cfc0 - .word 0xeeb5ee06 - .word 0x80275a7f + .word 0x573f883c + .word 0xf61fc7da + .word 0xe85b0443 + .word 0x8a6cb626 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00050005 - .word 0x00040003 - .word 0x00040004 - .word 0x00030004 + .word 0x0005fff9 + .word 0xfffefffd + .word 0xfffb0006 + .word 0xfff9fffa .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S index bb92dce..7f6eea3 100644 --- a/test/div/vrem_32.S +++ b/test/div/vrem_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x3afd6811 - .word 0xfaada33e - .word 0xdebd47cb - .word 0xfa4a096b + .word 0x1dfb229b + .word 0xe6069a29 + .word 0x86e66ebf + .word 0x6d8d76ec .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -63,9 +63,9 @@ vdata_end: .global vref_end vref_start: .word 0x00000003 - .word 0x00000004 - .word 0x00000005 - .word 0x00000005 + .word 0xffffffff + .word 0xfffffffc + .word 0x00000000 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S index caf7f1d..3b9d1f0 100644 --- a/test/div/vrem_8.S +++ b/test/div/vrem_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xb2403082 - .word 0x4cf473b5 - .word 0x8e1f7e7e - .word 0x5c0055b7 + .word 0xc2ab4aa3 + .word 0xb154f510 + .word 0xb24289a7 + .word 0x3c010231 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x06010600 - .word 0x06020302 - .word 0x05030000 - .word 0x01000104 + .word 0xfaff04fe + .word 0xfe00fc02 + .word 0xff03f9fb + .word 0x04010200 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S index 5fb2e0c..2c8811d 100644 --- a/test/div/vremu_16.S +++ b/test/div/vremu_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x9bd23b5b - .word 0x65213d2d - .word 0xf3af75a9 - .word 0xb2814de0 + .word 0x6d966451 + .word 0xbf28278b + .word 0xc8dc474c + .word 0x16ee6263 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00020005 - .word 0x00030002 - .word 0x00040000 - .word 0x00060000 + .word 0x00050005 + .word 0x00060001 + .word 0x00050003 + .word 0x00040001 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S index c36d7cc..a8bd92c 100644 --- a/test/div/vremu_32.S +++ b/test/div/vremu_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x513263a1 - .word 0x0d461edf - .word 0xaf9be0be - .word 0x3ee14779 + .word 0x4954177f + .word 0x51df3d4e + .word 0x35ce9fa3 + .word 0x7c6600c6 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00000003 - .word 0x00000006 - .word 0x00000006 - .word 0x00000000 + .word 0x00000005 + .word 0x00000002 + .word 0x00000004 + .word 0x00000001 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S index 2c553c4..13d4799 100644 --- a/test/div/vremu_8.S +++ b/test/div/vremu_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x85aa32b6 - .word 0x2c843100 - .word 0x586860ec - .word 0xc56efee7 + .word 0x8ef44957 + .word 0x260006db + .word 0x10a66527 + .word 0x9680131e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x03050103 - .word 0x02020000 - .word 0x04060501 - .word 0x04050503 + .word 0x02060303 + .word 0x03000602 + .word 0x02050304 + .word 0x03020502 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 From ef7f488c0a85f82e5712d3b72892b4cea4ea6dfd Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 00:50:00 -0500 Subject: [PATCH 38/64] fix sigext and tests --- rtl/vproc_div.sv | 9 ++++----- test/div/vdiv_16.S | 20 ++++++++++---------- test/div/vdiv_32.S | 20 ++++++++++---------- test/div/vdiv_8.S | 20 ++++++++++---------- test/div/vdivu_16.S | 20 ++++++++++---------- test/div/vdivu_32.S | 20 ++++++++++---------- test/div/vdivu_8.S | 20 ++++++++++---------- test/div/vrem_16.S | 18 +++++++++--------- test/div/vrem_32.S | 20 ++++++++++---------- test/div/vrem_8.S | 20 ++++++++++---------- test/div/vremu_16.S | 20 ++++++++++---------- test/div/vremu_32.S | 16 ++++++++-------- test/div/vremu_8.S | 20 ++++++++++---------- 13 files changed, 121 insertions(+), 122 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 0e21dd3..f9f612c 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -139,7 +139,6 @@ module vproc_div #( if (BUF_RESULTS) begin always_ff @(posedge clk_i or negedge async_rst_ni) begin : vproc_div_stage_res_valid - $display("DEBUG: %h %h %h %h", pipe_in_op1_i, pipe_in_op2_i, pipe_out_res_o, pipe_out_valid_o); if (~async_rst_ni) begin state_res_valid_q <= 1'b0; end @@ -242,11 +241,11 @@ module vproc_div #( VSEW_16:begin - div_op1[32*(2*i+0) +: 32] = {{16{op1_signs[4*i+0]}}, operand1_q[32*i+16*0 +: 16]}; - div_op1[32*(2*i+1) +: 32] = {{16{op1_signs[4*i+2]}}, operand1_q[32*i+16*1 +: 16]}; + div_op1[32*(2*i+0) +: 32] = {{16{op1_signs[4*i+1]}}, operand1_q[32*i+16*0 +: 16]}; + div_op1[32*(2*i+1) +: 32] = {{16{op1_signs[4*i+3]}}, operand1_q[32*i+16*1 +: 16]}; - div_op2[32*(2*i+0) +: 32] = {{16{op2_signs[4*i+0]}}, operand2_q[32*i+16*0 +: 16]}; - div_op2[32*(2*i+1) +: 32] = {{16{op2_signs[4*i+2]}}, operand2_q[32*i+16*1 +: 16]}; + div_op2[32*(2*i+0) +: 32] = {{16{op2_signs[4*i+1]}}, operand2_q[32*i+16*0 +: 16]}; + div_op2[32*(2*i+1) +: 32] = {{16{op2_signs[4*i+3]}}, operand2_q[32*i+16*1 +: 16]}; end VSEW_32: begin diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S index 0a52618..c5713d9 100644 --- a/test/div/vdiv_16.S +++ b/test/div/vdiv_16.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vdiv.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vdiv.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x918905f4 - .word 0xfd18de71 - .word 0x2baa1f03 - .word 0x53f30f13 + .word 0xb5575f7d + .word 0xebc519a5 + .word 0x79506572 + .word 0xc14cb2b1 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf03800d9 - .word 0xff95fb34 - .word 0x063c046e - .word 0x0bfe0227 + .word 0xf5550da4 + .word 0xfd1c03a9 + .word 0x11540e7e + .word 0xf70af4f4 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index c798d0c..b831e62 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vdiv.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vdiv.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xd9e63bb4 - .word 0x669d1dbc - .word 0x176b7540 - .word 0xc01fd55b + .word 0xdfdd46b0 + .word 0xf30fdbc9 + .word 0x3704c611 + .word 0x61dc0c0e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfa8e9ad0 - .word 0x0ea8bb1a - .word 0x03587e76 - .word 0xf6dff9e8 + .word 0xfb68c0f4 + .word 0xfe26d641 + .word 0x07dc1c4b + .word 0x0dfadd26 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S index 547bada..8b92e01 100644 --- a/test/div/vdiv_8.S +++ b/test/div/vdiv_8.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vdiv.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vdiv.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x9757edd6 - .word 0x5a8477eb - .word 0xaccaa175 - .word 0x1eb38b7d + .word 0x8ff21cd5 + .word 0x9670a82c + .word 0xc8ef7777 + .word 0xea4ea44b .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf10cfdfa - .word 0x0cee11fd - .word 0xf4f8f210 - .word 0x04f5ef11 + .word 0xeffe04f9 + .word 0xf010f306 + .word 0xf8fd1111 + .word 0xfc0bf20a .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S index 6fd1d52..19bf224 100644 --- a/test/div/vdivu_16.S +++ b/test/div/vdivu_16.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vdivu.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vdivu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xd2fe77fb - .word 0xb7e33494 - .word 0x49c41b7e - .word 0x4f3b79df + .word 0x2b2bbea9 + .word 0x657b65ab + .word 0x0de6cf7c + .word 0x688da485 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x1e241123 - .word 0x1a450782 - .word 0x0a8903ed - .word 0x0b511169 + .word 0x062a1b3c + .word 0x0e7f0e86 + .word 0x01fc1da4 + .word 0x0eef1780 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S index 7bfef63..ea66d0a 100644 --- a/test/div/vdivu_32.S +++ b/test/div/vdivu_32.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vdivu.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vdivu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xef008431 - .word 0xbfbec2e1 - .word 0x997d4e34 - .word 0x856d7809 + .word 0x9143737a + .word 0x56028fd7 + .word 0x13051046 + .word 0xb989bdb4 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x2224a52b - .word 0x1b6464fb - .word 0x15ed5450 - .word 0x130fa36f + .word 0x14c07e36 + .word 0x0c498243 + .word 0x02b7949c + .word 0x1a81643e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S index 56c2409..97c98f3 100644 --- a/test/div/vdivu_8.S +++ b/test/div/vdivu_8.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vdivu.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vdivu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x7e10f1e0 - .word 0x272df295 - .word 0x6a87954e - .word 0x308e2542 + .word 0xfb546ea6 + .word 0x03ae2427 + .word 0xc1f22b93 + .word 0x644aa626 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x12022220 - .word 0x05062215 - .word 0x0f13150b - .word 0x06140509 + .word 0x230c0f17 + .word 0x00180505 + .word 0x1b220615 + .word 0x0e0a1705 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S index 07ca8c0..27ac589 100644 --- a/test/div/vrem_16.S +++ b/test/div/vrem_16.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vrem.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vrem.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x573f883c - .word 0xf61fc7da - .word 0xe85b0443 - .word 0x8a6cb626 + .word 0xd17b8878 + .word 0x95fd7bed + .word 0x249fa560 + .word 0xc22465cf .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0005fff9 .word 0xfffefffd - .word 0xfffb0006 - .word 0xfff9fffa + .word 0xfff90001 + .word 0x0002fffe + .word 0xfffe0002 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S index 7f6eea3..5ac5c92 100644 --- a/test/div/vrem_32.S +++ b/test/div/vrem_32.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vrem.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vrem.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x1dfb229b - .word 0xe6069a29 - .word 0x86e66ebf - .word 0x6d8d76ec + .word 0x9a59d12c + .word 0xdb04ca5b + .word 0x4b479efc + .word 0x9b4be289 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00000003 - .word 0xffffffff - .word 0xfffffffc - .word 0x00000000 + .word 0xfffffffd + .word 0xfffffffb + .word 0x00000002 + .word 0xfffffffe .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S index 3b9d1f0..251d257 100644 --- a/test/div/vrem_8.S +++ b/test/div/vrem_8.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vrem.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vrem.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xc2ab4aa3 - .word 0xb154f510 - .word 0xb24289a7 - .word 0x3c010231 + .word 0xcb8e0adc + .word 0x9eda839e + .word 0x1600564c + .word 0x98c2539f .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfaff04fe - .word 0xfe00fc02 - .word 0xff03f9fb - .word 0x04010200 + .word 0xfcfe03ff + .word 0xf9fdfaf9 + .word 0x01000206 + .word 0xfafa06fa .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S index 2c8811d..f1f093a 100644 --- a/test/div/vremu_16.S +++ b/test/div/vremu_16.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vremu.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vremu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x6d966451 - .word 0xbf28278b - .word 0xc8dc474c - .word 0x16ee6263 + .word 0xa6c9f52d + .word 0x81cfa642 + .word 0x91772b45 + .word 0x6d37c3e4 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00050005 - .word 0x00060001 - .word 0x00050003 - .word 0x00040001 + .word 0x00040003 + .word 0x00020002 + .word 0x00060003 + .word 0x00010000 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S index a8bd92c..c01da89 100644 --- a/test/div/vremu_32.S +++ b/test/div/vremu_32.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vremu.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vremu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x4954177f - .word 0x51df3d4e - .word 0x35ce9fa3 - .word 0x7c6600c6 + .word 0x92023757 + .word 0x43b42755 + .word 0x08f9fce2 + .word 0xcb37f44b .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00000005 .word 0x00000002 + .word 0x00000003 .word 0x00000004 - .word 0x00000001 + .word 0x00000006 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S index 13d4799..3268ffd 100644 --- a/test/div/vremu_8.S +++ b/test/div/vremu_8.S @@ -11,8 +11,8 @@ main: li t0, 7 vle32.v v0, (a0) # load v0 with a0 - vremu.vx v0, v0, t0 - vse32.v v0, (a0) # store into a0 + vremu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x8ef44957 - .word 0x260006db - .word 0x10a66527 - .word 0x9680131e + .word 0x0c5d3bd0 + .word 0x5cf4c250 + .word 0xce0348d7 + .word 0x50e693c6 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x02060303 - .word 0x03000602 - .word 0x02050304 - .word 0x03020502 + .word 0x05020305 + .word 0x01060503 + .word 0x03030205 + .word 0x03060002 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 From fdfe64a0c0dc04d757730150892438c05b5a5791 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 00:59:13 -0500 Subject: [PATCH 39/64] pls --- rtl/vproc_div_block.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index eb0c0c6..3ce7b48 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -93,7 +93,7 @@ module vproc_div_block #( endcase end - assign res_d = div_d; + assign res_d = div_q; assign res_o = res_q; end From 95ae9a6c64ae5df265b7253bd6b635c9b3a79849 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 01:19:34 -0500 Subject: [PATCH 40/64] & is not && --- .github/workflows/default.yml | 2 +- rtl/vproc_div_block.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index ea4f142..dbe9278 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -72,7 +72,7 @@ jobs: strategy: fail-fast: false matrix: - unit: [lsu, alu, mul, sld, elem, csr, misc, div] + unit: [sld, div] main_core: [ibex] steps: - uses: actions/checkout@v2 diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 3ce7b48..cef7498 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -69,7 +69,7 @@ module vproc_div_block #( if (op2_q == '0) begin div_d = '1; end - else if (op1_q == {1'b1, {31{1'b0}}} & op2_q == '1) begin + else if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin div_d = op1_q; end else begin @@ -82,7 +82,7 @@ module vproc_div_block #( if (op2_q == 0) begin div_d = op1_q; end - else if (op1_q == {1'b1, {31{1'b0}}} & op2_q == '1) begin + else if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin div_d = '0; end else begin From d58f773c3ccefe235a20f9d4fdf9144e335a7431 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 01:41:02 -0500 Subject: [PATCH 41/64] :ambulance: --- rtl/vproc_div.sv | 18 ------------------ rtl/vproc_div_block.sv | 13 +++++-------- test/div/vdiv_16.S | 20 ++++++++++---------- test/div/vdiv_32.S | 16 ++++++++-------- test/div/vdiv_8.S | 20 ++++++++++---------- test/div/vdivu_16.S | 20 ++++++++++---------- test/div/vdivu_32.S | 16 ++++++++-------- test/div/vdivu_8.S | 20 ++++++++++---------- test/div/vrem_16.S | 20 ++++++++++---------- test/div/vrem_32.S | 14 +++++++------- test/div/vrem_8.S | 20 ++++++++++---------- test/div/vremu_16.S | 20 ++++++++++---------- test/div/vremu_32.S | 14 +++++++------- test/div/vremu_8.S | 20 ++++++++++---------- 14 files changed, 115 insertions(+), 136 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index f9f612c..9c3989c 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -203,24 +203,6 @@ module vproc_div #( end end - logic ex1_vsew_8, ex1_vsew_32; - always_comb begin - ex1_vsew_8 = DONT_CARE_ZERO ? '0 : 'x; - ex1_vsew_32 = DONT_CARE_ZERO ? '0 : 'x; - unique case (state_ex1_q.eew) - VSEW_8: ex1_vsew_8 = 1'b1; - VSEW_16: ex1_vsew_8 = 1'b0; - VSEW_32: ex1_vsew_8 = 1'b0; - default: ; - endcase - unique case (state_ex1_q.eew) - VSEW_8: ex1_vsew_32 = 1'b0; - VSEW_16: ex1_vsew_32 = 1'b0; - VSEW_32: ex1_vsew_32 = 1'b1; - default: ; - endcase - end - logic [(DIV_OP_W*4)-1:0] div_op1, div_op2; always_comb begin div_op1 = DONT_CARE_ZERO ? '0 : 'x; diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index cef7498..6cd9f65 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -66,28 +66,25 @@ module vproc_div_block #( // DIV/U 1'b0 : begin + div_d = $signed(op1_q) / $signed(op2_q); if (op2_q == '0) begin div_d = '1; end - else if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin div_d = op1_q; end - else begin - div_d = $signed(op1_q) / $signed(op2_q); - end + end // REM/U 1'b1 : begin + div_d = $signed(op1_q) % $signed(op2_q); if (op2_q == 0) begin div_d = op1_q; end - else if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin div_d = '0; end - else begin - div_d = $signed(op1_q) % $signed(op2_q); - end end default: ; endcase diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S index c5713d9..79d0613 100644 --- a/test/div/vdiv_16.S +++ b/test/div/vdiv_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle16.v v0, (a0) # load v0 with a0 vdiv.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse16.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xb5575f7d - .word 0xebc519a5 - .word 0x79506572 - .word 0xc14cb2b1 + .word 0x568ddeda + .word 0x9cf96b00 + .word 0xf8c25cbd + .word 0xcd01d829 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xf5550da4 - .word 0xfd1c03a9 - .word 0x11540e7e - .word 0xf70af4f4 + .word 0x0c5dfb43 + .word 0xf1da0f49 + .word 0xfef70d3f + .word 0xf8b7fa4f .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index b831e62..480f2cb 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xdfdd46b0 - .word 0xf30fdbc9 - .word 0x3704c611 - .word 0x61dc0c0e + .word 0xed35a74b + .word 0xea17c0b2 + .word 0xf9bc6506 + .word 0x6c6a2bc8 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfb68c0f4 - .word 0xfe26d641 - .word 0x07dc1c4b - .word 0x0dfadd26 + .word 0xfd50cec1 + .word 0xfcded262 + .word 0xff1ae9dc + .word 0x0f7ce1ae .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S index 8b92e01..9251b82 100644 --- a/test/div/vdiv_8.S +++ b/test/div/vdiv_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle8.v v0, (a0) # load v0 with a0 vdiv.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse8.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x8ff21cd5 - .word 0x9670a82c - .word 0xc8ef7777 - .word 0xea4ea44b + .word 0x7403f246 + .word 0x17cd3fe5 + .word 0xfc619c43 + .word 0x7ad5a12b .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xeffe04f9 - .word 0xf010f306 - .word 0xf8fd1111 - .word 0xfc0bf20a + .word 0x1000fe0a + .word 0x03f809fc + .word 0xff0df109 + .word 0x11f9f206 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S index 19bf224..34f47d6 100644 --- a/test/div/vdivu_16.S +++ b/test/div/vdivu_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle16.v v0, (a0) # load v0 with a0 vdivu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse16.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x2b2bbea9 - .word 0x657b65ab - .word 0x0de6cf7c - .word 0x688da485 + .word 0xde02a01f + .word 0xbee70e60 + .word 0x9ed75bbc + .word 0x5d7b59c1 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x062a1b3c - .word 0x0e7f0e86 - .word 0x01fc1da4 - .word 0x0eef1780 + .word 0x1fb716df + .word 0x1b45020d + .word 0x16b10d1a + .word 0x0d5a0cd2 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S index ea66d0a..e2a524f 100644 --- a/test/div/vdivu_32.S +++ b/test/div/vdivu_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x9143737a - .word 0x56028fd7 - .word 0x13051046 - .word 0xb989bdb4 + .word 0x2ad77dea + .word 0x1060d421 + .word 0x274934b9 + .word 0xc7ae8d3e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x14c07e36 - .word 0x0c498243 - .word 0x02b7949c - .word 0x1a81643e + .word 0x061ec8d8 + .word 0x0256f9bb + .word 0x059cbe63 + .word 0x1c86a676 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S index 97c98f3..e7e2a40 100644 --- a/test/div/vdivu_8.S +++ b/test/div/vdivu_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle8.v v0, (a0) # load v0 with a0 vdivu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse8.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xfb546ea6 - .word 0x03ae2427 - .word 0xc1f22b93 - .word 0x644aa626 + .word 0xc1b60b4d + .word 0xa712aeae + .word 0x49564f81 + .word 0x71241229 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x230c0f17 - .word 0x00180505 - .word 0x1b220615 - .word 0x0e0a1705 + .word 0x1b1a010b + .word 0x17021818 + .word 0x0a0c0b12 + .word 0x10050205 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S index 27ac589..b67dcbb 100644 --- a/test/div/vrem_16.S +++ b/test/div/vrem_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle16.v v0, (a0) # load v0 with a0 vrem.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse16.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xd17b8878 - .word 0x95fd7bed - .word 0x249fa560 - .word 0xc22465cf + .word 0x6466b622 + .word 0x614868a0 + .word 0x8daae682 + .word 0xd3547ec7 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfffefffd - .word 0xfff90001 - .word 0x0002fffe - .word 0xfffe0002 + .word 0x0005fffd + .word 0x00050002 + .word 0xfffdfffe + .word 0xfffb0003 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S index 5ac5c92..1449ef0 100644 --- a/test/div/vrem_32.S +++ b/test/div/vrem_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x9a59d12c - .word 0xdb04ca5b - .word 0x4b479efc - .word 0x9b4be289 + .word 0x5a1dc928 + .word 0xfdb23e3a + .word 0x3723f66c + .word 0xea3993f2 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,9 +62,9 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfffffffd - .word 0xfffffffb - .word 0x00000002 + .word 0x00000005 + .word 0xfffffffa + .word 0x00000006 .word 0xfffffffe .word 0x3f44383b .word 0x37424d54 diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S index 251d257..675c6d0 100644 --- a/test/div/vrem_8.S +++ b/test/div/vrem_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle8.v v0, (a0) # load v0 with a0 vrem.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse8.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xcb8e0adc - .word 0x9eda839e - .word 0x1600564c - .word 0x98c2539f + .word 0x4f087f4e + .word 0x95f9f5c3 + .word 0xa31bdc66 + .word 0x00694342 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfcfe03ff - .word 0xf9fdfaf9 - .word 0x01000206 - .word 0xfafa06fa + .word 0x02010101 + .word 0xfef9fcfb + .word 0xfe06ff04 + .word 0x00000403 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S index f1f093a..6970503 100644 --- a/test/div/vremu_16.S +++ b/test/div/vremu_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle16.v v0, (a0) # load v0 with a0 vremu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse16.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xa6c9f52d - .word 0x81cfa642 - .word 0x91772b45 - .word 0x6d37c3e4 + .word 0x90b6998e + .word 0x4a26b13d + .word 0x7a69d63d + .word 0x6f2eeb9e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00040003 - .word 0x00020002 - .word 0x00060003 - .word 0x00010000 + .word 0x00020005 + .word 0x00050006 + .word 0x00050000 + .word 0x00000006 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S index c01da89..667c0f7 100644 --- a/test/div/vremu_32.S +++ b/test/div/vremu_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x92023757 - .word 0x43b42755 - .word 0x08f9fce2 - .word 0xcb37f44b + .word 0x7d07b3fc + .word 0x9d421702 + .word 0xc86535f2 + .word 0x818acc97 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: + .word 0x00000001 + .word 0x00000005 .word 0x00000002 - .word 0x00000003 - .word 0x00000004 - .word 0x00000006 + .word 0x00000000 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S index 3268ffd..77d06bf 100644 --- a/test/div/vremu_8.S +++ b/test/div/vremu_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 + vle8.v v0, (a0) # load v0 with a0 vremu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vse8.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x0c5d3bd0 - .word 0x5cf4c250 - .word 0xce0348d7 - .word 0x50e693c6 + .word 0xccc2b5fe + .word 0xfaa1ca98 + .word 0x22c5463f + .word 0x56b72baf .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x05020305 - .word 0x01060503 - .word 0x03030205 - .word 0x03060002 + .word 0x01050602 + .word 0x05000605 + .word 0x06010000 + .word 0x02010100 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 From 2d13475044d87030f79b0194081c0e4563f0c44e Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 01:54:48 -0500 Subject: [PATCH 42/64] rm exp --- rtl/vproc_div_block.sv | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 6cd9f65..c45d630 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -67,24 +67,24 @@ module vproc_div_block #( // DIV/U 1'b0 : begin div_d = $signed(op1_q) / $signed(op2_q); - if (op2_q == '0) begin - div_d = '1; - end - if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin - div_d = op1_q; - end + // if (op2_q == '0) begin + // div_d = '1; + // end + // if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + // div_d = op1_q; + // end end // REM/U 1'b1 : begin div_d = $signed(op1_q) % $signed(op2_q); - if (op2_q == 0) begin - div_d = op1_q; - end - if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin - div_d = '0; - end + // if (op2_q == 0) begin + // div_d = op1_q; + // end + // if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + // div_d = '0; + // end end default: ; endcase From b95cf9d7f29070de011a977581562b4a886e54e2 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 01:58:38 -0500 Subject: [PATCH 43/64] only compact config --- .github/test_configs.conf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/test_configs.conf b/.github/test_configs.conf index 692c069..8cfa2b2 100644 --- a/.github/test_configs.conf +++ b/.github/test_configs.conf @@ -1,6 +1,6 @@ VPROC_CONFIG=compact VREG_W=128 VMEM_W=32 -VPROC_CONFIG=dual VREG_W=128 VMEM_W=32 -VPROC_CONFIG=dual VREG_W=512 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 -VPROC_CONFIG=triple VREG_W=256 VMEM_W=32 -VPROC_CONFIG=triple VREG_W=1024 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 -VPROC_CONFIG=legacy VREG_W=128 VMEM_W=32 +# VPROC_CONFIG=dual VREG_W=128 VMEM_W=32 +# VPROC_CONFIG=dual VREG_W=512 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 +# VPROC_CONFIG=triple VREG_W=256 VMEM_W=32 +# VPROC_CONFIG=triple VREG_W=1024 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 +# VPROC_CONFIG=legacy VREG_W=128 VMEM_W=32 From cc3ca75e1fdfbabe4970d82c664144caf01b2b80 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 02:33:55 -0500 Subject: [PATCH 44/64] pls --- rtl/vproc_unit_wrapper.sv | 2 +- test/div/vdiv_16.S | 98 --------------------------------------- test/div/vdiv_32.S | 15 +++--- test/div/vdiv_8.S | 98 --------------------------------------- test/div/vdivu_16.S | 98 --------------------------------------- test/div/vdivu_32.S | 98 --------------------------------------- test/div/vdivu_8.S | 98 --------------------------------------- test/div/vrem_16.S | 98 --------------------------------------- test/div/vrem_32.S | 98 --------------------------------------- test/div/vrem_8.S | 98 --------------------------------------- test/div/vremu_16.S | 98 --------------------------------------- test/div/vremu_32.S | 98 --------------------------------------- test/div/vremu_8.S | 98 --------------------------------------- 13 files changed, 10 insertions(+), 1085 deletions(-) delete mode 100644 test/div/vdiv_16.S delete mode 100644 test/div/vdiv_8.S delete mode 100644 test/div/vdivu_16.S delete mode 100644 test/div/vdivu_32.S delete mode 100644 test/div/vdivu_8.S delete mode 100644 test/div/vrem_16.S delete mode 100644 test/div/vrem_32.S delete mode 100644 test/div/vrem_8.S delete mode 100644 test/div/vremu_16.S delete mode 100644 test/div/vremu_32.S delete mode 100644 test/div/vremu_8.S diff --git a/rtl/vproc_unit_wrapper.sv b/rtl/vproc_unit_wrapper.sv index 5bf2734..62b180c 100644 --- a/rtl/vproc_unit_wrapper.sv +++ b/rtl/vproc_unit_wrapper.sv @@ -223,7 +223,7 @@ module vproc_unit_wrapper import vproc_pkg::*; #( pipe_out_res_flags_o = '{default: pack_flags'('0)}; pipe_out_res_data_o = '0; pipe_out_res_mask_o = '0; - pipe_out_res_flags_o[0].shift = 1'b1; + pipe_out_res_flags_o[0].shift = unit_out_ctrl.res_shift; pipe_out_res_store_o[0] = unit_out_ctrl.res_store; pipe_out_res_valid_o[0] = pipe_out_valid_o; pipe_out_res_data_o [0] = unit_out_res; diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S deleted file mode 100644 index 79d0613..0000000 --- a/test/div/vdiv_16.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vdiv 16 - .text - .global main -main: - la a0, vdata_start - - li t0, 8 - vsetvli t0, t0, e16,m1,tu,mu - - li t0, 7 - - vle16.v v0, (a0) # load v0 with a0 - vdiv.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x568ddeda - .word 0x9cf96b00 - .word 0xf8c25cbd - .word 0xcd01d829 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x0c5dfb43 - .word 0xf1da0f49 - .word 0xfef70d3f - .word 0xf8b7fa4f - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index 480f2cb..e53bcd6 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -1,18 +1,21 @@ +# Copyright TU Wien +# Licensed under the Solderpad Hardware License v2.1, see LICENSE.txt for details +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + -# vdiv 32 .text .global main main: - la a0, vdata_start + la a0, vdata_start li t0, 4 - vsetvli t0, t0, e32,m1,tu,mu + vsetvli t0, t0, e32,m1,tu,mu li t0, 7 - vle32.v v0, (a0) # load v0 with a0 - vdiv.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vle32.v v0, (a0) + vdiv.vx v0, v0, t0 + vse32.v v0, (a0) la a0, vdata_start la a1, vdata_end diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S deleted file mode 100644 index 9251b82..0000000 --- a/test/div/vdiv_8.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vdiv 8 - .text - .global main -main: - la a0, vdata_start - - li t0, 16 - vsetvli t0, t0, e8,m1,tu,mu - - li t0, 7 - - vle8.v v0, (a0) # load v0 with a0 - vdiv.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x7403f246 - .word 0x17cd3fe5 - .word 0xfc619c43 - .word 0x7ad5a12b - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x1000fe0a - .word 0x03f809fc - .word 0xff0df109 - .word 0x11f9f206 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S deleted file mode 100644 index 34f47d6..0000000 --- a/test/div/vdivu_16.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vdiv u 16 - .text - .global main -main: - la a0, vdata_start - - li t0, 8 - vsetvli t0, t0, e16,m1,tu,mu - - li t0, 7 - - vle16.v v0, (a0) # load v0 with a0 - vdivu.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0xde02a01f - .word 0xbee70e60 - .word 0x9ed75bbc - .word 0x5d7b59c1 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x1fb716df - .word 0x1b45020d - .word 0x16b10d1a - .word 0x0d5a0cd2 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S deleted file mode 100644 index e2a524f..0000000 --- a/test/div/vdivu_32.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vdiv u 32 - .text - .global main -main: - la a0, vdata_start - - li t0, 4 - vsetvli t0, t0, e32,m1,tu,mu - - li t0, 7 - - vle32.v v0, (a0) # load v0 with a0 - vdivu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x2ad77dea - .word 0x1060d421 - .word 0x274934b9 - .word 0xc7ae8d3e - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x061ec8d8 - .word 0x0256f9bb - .word 0x059cbe63 - .word 0x1c86a676 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S deleted file mode 100644 index e7e2a40..0000000 --- a/test/div/vdivu_8.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vdiv u 8 - .text - .global main -main: - la a0, vdata_start - - li t0, 16 - vsetvli t0, t0, e8,m1,tu,mu - - li t0, 7 - - vle8.v v0, (a0) # load v0 with a0 - vdivu.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0xc1b60b4d - .word 0xa712aeae - .word 0x49564f81 - .word 0x71241229 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x1b1a010b - .word 0x17021818 - .word 0x0a0c0b12 - .word 0x10050205 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S deleted file mode 100644 index b67dcbb..0000000 --- a/test/div/vrem_16.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vrem 16 - .text - .global main -main: - la a0, vdata_start - - li t0, 8 - vsetvli t0, t0, e16,m1,tu,mu - - li t0, 7 - - vle16.v v0, (a0) # load v0 with a0 - vrem.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x6466b622 - .word 0x614868a0 - .word 0x8daae682 - .word 0xd3547ec7 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x0005fffd - .word 0x00050002 - .word 0xfffdfffe - .word 0xfffb0003 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S deleted file mode 100644 index 1449ef0..0000000 --- a/test/div/vrem_32.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vrem 32 - .text - .global main -main: - la a0, vdata_start - - li t0, 4 - vsetvli t0, t0, e32,m1,tu,mu - - li t0, 7 - - vle32.v v0, (a0) # load v0 with a0 - vrem.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x5a1dc928 - .word 0xfdb23e3a - .word 0x3723f66c - .word 0xea3993f2 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x00000005 - .word 0xfffffffa - .word 0x00000006 - .word 0xfffffffe - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S deleted file mode 100644 index 675c6d0..0000000 --- a/test/div/vrem_8.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vrem 8 - .text - .global main -main: - la a0, vdata_start - - li t0, 16 - vsetvli t0, t0, e8,m1,tu,mu - - li t0, 7 - - vle8.v v0, (a0) # load v0 with a0 - vrem.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x4f087f4e - .word 0x95f9f5c3 - .word 0xa31bdc66 - .word 0x00694342 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x02010101 - .word 0xfef9fcfb - .word 0xfe06ff04 - .word 0x00000403 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S deleted file mode 100644 index 6970503..0000000 --- a/test/div/vremu_16.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vrem u 16 - .text - .global main -main: - la a0, vdata_start - - li t0, 8 - vsetvli t0, t0, e16,m1,tu,mu - - li t0, 7 - - vle16.v v0, (a0) # load v0 with a0 - vremu.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x90b6998e - .word 0x4a26b13d - .word 0x7a69d63d - .word 0x6f2eeb9e - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x00020005 - .word 0x00050006 - .word 0x00050000 - .word 0x00000006 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S deleted file mode 100644 index 667c0f7..0000000 --- a/test/div/vremu_32.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vrem u 32 - .text - .global main -main: - la a0, vdata_start - - li t0, 4 - vsetvli t0, t0, e32,m1,tu,mu - - li t0, 7 - - vle32.v v0, (a0) # load v0 with a0 - vremu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0x7d07b3fc - .word 0x9d421702 - .word 0xc86535f2 - .word 0x818acc97 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x00000001 - .word 0x00000005 - .word 0x00000002 - .word 0x00000000 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S deleted file mode 100644 index 77d06bf..0000000 --- a/test/div/vremu_8.S +++ /dev/null @@ -1,98 +0,0 @@ - -# vrem u 8 - .text - .global main -main: - la a0, vdata_start - - li t0, 16 - vsetvli t0, t0, e8,m1,tu,mu - - li t0, 7 - - vle8.v v0, (a0) # load v0 with a0 - vremu.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 - - la a0, vdata_start - la a1, vdata_end - j spill_cache - - - .data - .align 10 - .global vdata_start - .global vdata_end -vdata_start: - .word 0xccc2b5fe - .word 0xfaa1ca98 - .word 0x22c5463f - .word 0x56b72baf - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vdata_end: - - .align 10 - .global vref_start - .global vref_end -vref_start: - .word 0x01050602 - .word 0x05000605 - .word 0x06010000 - .word 0x02010100 - .word 0x3f44383b - .word 0x37424d54 - .word 0x5e4b5049 - .word 0x4c4c4c4a - .word 0x4a505f3e - .word 0x485e5455 - .word 0x4d4c4a41 - .word 0x373b5451 - .word 0x41454c45 - .word 0x3a3e3738 - .word 0x312f2e2f - .word 0x3d433f45 - .word 0x46424949 - .word 0x494a4d51 - .word 0x49413c38 - .word 0x3e514143 - .word 0x47525353 - .word 0x514e5052 - .word 0x525a5b58 - .word 0x5e575254 - .word 0x56545058 - .word 0x5a534947 - .word 0x4744544f - .word 0x4e515051 - .word 0x5a4b4545 - .word 0x454c4342 - .word 0x40504a3f - .word 0x4448535a -vref_end: - \ No newline at end of file From 3c29d7d6c8014ec83f273fb00df8722378f61ffb Mon Sep 17 00:00:00 2001 From: William Eustis Date: Thu, 3 Nov 2022 02:48:31 -0500 Subject: [PATCH 45/64] add edge cases back --- rtl/vproc_div_block.sv | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index c45d630..6cd9f65 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -67,24 +67,24 @@ module vproc_div_block #( // DIV/U 1'b0 : begin div_d = $signed(op1_q) / $signed(op2_q); - // if (op2_q == '0) begin - // div_d = '1; - // end - // if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin - // div_d = op1_q; - // end + if (op2_q == '0) begin + div_d = '1; + end + if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + div_d = op1_q; + end end // REM/U 1'b1 : begin div_d = $signed(op1_q) % $signed(op2_q); - // if (op2_q == 0) begin - // div_d = op1_q; - // end - // if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin - // div_d = '0; - // end + if (op2_q == 0) begin + div_d = op1_q; + end + if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + div_d = '0; + end end default: ; endcase From be85c4f7fe2c739bc5b4e2a1dc966e717050f6c7 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 13:21:05 -0600 Subject: [PATCH 46/64] add vmul for behavior comparison --- test/div/vdiv_32.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index e53bcd6..ea4e154 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -17,6 +17,12 @@ main: vdiv.vx v0, v0, t0 vse32.v v0, (a0) + li t1, 2 + + vle32.v v1, (a0) + vmul.vx v1, v1, t1 + vse32.v v1, (a0) + la a0, vdata_start la a1, vdata_end j spill_cache From b02b40d6810f99beca70e4fc3da757f87bba6c5a Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 14:07:56 -0600 Subject: [PATCH 47/64] seperate div and mul for comparison --- test/div/vdiv_32.S | 6 --- test/div/vmul_32.S | 100 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 test/div/vmul_32.S diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index ea4e154..e53bcd6 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -17,12 +17,6 @@ main: vdiv.vx v0, v0, t0 vse32.v v0, (a0) - li t1, 2 - - vle32.v v1, (a0) - vmul.vx v1, v1, t1 - vse32.v v1, (a0) - la a0, vdata_start la a1, vdata_end j spill_cache diff --git a/test/div/vmul_32.S b/test/div/vmul_32.S new file mode 100644 index 0000000..7b23449 --- /dev/null +++ b/test/div/vmul_32.S @@ -0,0 +1,100 @@ +# Copyright TU Wien +# Licensed under the Solderpad Hardware License v2.1, see LICENSE.txt for details +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + + + .text + .global main +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 3 + + vle32.v v0, (a0) + vmul.vx v0, v0, t0 + vse32.v v0, (a0) + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x323b3f47 + .word 0x47434b3a + .word 0x302f2e32 + .word 0xe8404a51 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x96b1bdd5 + .word 0xd5c9e1ae + .word 0x908d8a96 + .word 0xb8c0def3 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: From f5e89f8a913ff880606489f6da0730de0156ceec Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 16:03:26 -0600 Subject: [PATCH 48/64] add div buffering --- rtl/vproc_div.sv | 8 ++++---- rtl/vproc_pipeline.sv | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 9c3989c..8f8b81b 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -3,10 +3,10 @@ module vproc_div #( parameter int unsigned DIV_OP_W = 64, // DIV unit operand width in bits parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, - parameter bit BUF_OPERANDS = 1'b0, - parameter bit BUF_DIV_IN = 1'b0, - parameter bit BUF_DIV_OUT = 1'b0, - parameter bit BUF_RESULTS = 1'b0, + parameter bit BUF_OPERANDS = 1'b1, + parameter bit BUF_DIV_IN = 1'b1, + parameter bit BUF_DIV_OUT = 1'b1, + parameter bit BUF_RESULTS = 1'b1, parameter type CTRL_T = logic, parameter bit DONT_CARE_ZERO = 1'b0 )( diff --git a/rtl/vproc_pipeline.sv b/rtl/vproc_pipeline.sv index fb7fffe..1142577 100644 --- a/rtl/vproc_pipeline.sv +++ b/rtl/vproc_pipeline.sv @@ -680,10 +680,10 @@ module vproc_pipeline import vproc_pkg::*; #( logic last_cycle; logic init_addr; // initialize address (used by LSU) logic requires_flush; - logic alt_count_valid; // alternative counter value is valid - logic [AUX_COUNTER_W-1:0] aux_count; - logic [XIF_ID_W-1:0] id; - op_unit unit; + logic alt_count_valid; // alternative counter value is valid // 75 + logic [AUX_COUNTER_W-1:0] aux_count; // 74, 73 + logic [XIF_ID_W-1:0] id; // 72, 71, 70 + op_unit unit; // unit 67, 68, 69 op_mode mode; cfg_vsew eew; // effective element width cfg_emul emul; // effective MUL factor From 83104c9d128f258f993ba19fb02300872c3b6007 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 16:10:18 -0600 Subject: [PATCH 49/64] add all div tests --- test/div/vdiv_16.S | 98 ++++++++++++++++++++++++++++++ test/div/vdiv_32.S | 15 ++--- test/div/vdiv_8.S | 98 ++++++++++++++++++++++++++++++ test/div/vdivu_16.S | 98 ++++++++++++++++++++++++++++++ test/div/{vmul_32.S => vdivu_32.S} | 34 +++++------ test/div/vdivu_8.S | 98 ++++++++++++++++++++++++++++++ test/div/vrem_16.S | 98 ++++++++++++++++++++++++++++++ test/div/vrem_32.S | 98 ++++++++++++++++++++++++++++++ test/div/vrem_8.S | 98 ++++++++++++++++++++++++++++++ test/div/vremu_16.S | 98 ++++++++++++++++++++++++++++++ test/div/vremu_32.S | 98 ++++++++++++++++++++++++++++++ test/div/vremu_8.S | 98 ++++++++++++++++++++++++++++++ 12 files changed, 1002 insertions(+), 27 deletions(-) create mode 100644 test/div/vdiv_16.S create mode 100644 test/div/vdiv_8.S create mode 100644 test/div/vdivu_16.S rename test/div/{vmul_32.S => vdivu_32.S} (77%) create mode 100644 test/div/vdivu_8.S create mode 100644 test/div/vrem_16.S create mode 100644 test/div/vrem_32.S create mode 100644 test/div/vrem_8.S create mode 100644 test/div/vremu_16.S create mode 100644 test/div/vremu_32.S create mode 100644 test/div/vremu_8.S diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S new file mode 100644 index 0000000..79d0613 --- /dev/null +++ b/test/div/vdiv_16.S @@ -0,0 +1,98 @@ + +# vdiv 16 + .text + .global main +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle16.v v0, (a0) # load v0 with a0 + vdiv.vx v1, v0, t0 + vse16.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x568ddeda + .word 0x9cf96b00 + .word 0xf8c25cbd + .word 0xcd01d829 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x0c5dfb43 + .word 0xf1da0f49 + .word 0xfef70d3f + .word 0xf8b7fa4f + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index e53bcd6..480f2cb 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -1,21 +1,18 @@ -# Copyright TU Wien -# Licensed under the Solderpad Hardware License v2.1, see LICENSE.txt for details -# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 - +# vdiv 32 .text .global main main: - la a0, vdata_start + la a0, vdata_start li t0, 4 - vsetvli t0, t0, e32,m1,tu,mu + vsetvli t0, t0, e32,m1,tu,mu li t0, 7 - vle32.v v0, (a0) - vdiv.vx v0, v0, t0 - vse32.v v0, (a0) + vle32.v v0, (a0) # load v0 with a0 + vdiv.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S new file mode 100644 index 0000000..9251b82 --- /dev/null +++ b/test/div/vdiv_8.S @@ -0,0 +1,98 @@ + +# vdiv 8 + .text + .global main +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle8.v v0, (a0) # load v0 with a0 + vdiv.vx v1, v0, t0 + vse8.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x7403f246 + .word 0x17cd3fe5 + .word 0xfc619c43 + .word 0x7ad5a12b + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x1000fe0a + .word 0x03f809fc + .word 0xff0df109 + .word 0x11f9f206 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S new file mode 100644 index 0000000..34f47d6 --- /dev/null +++ b/test/div/vdivu_16.S @@ -0,0 +1,98 @@ + +# vdiv u 16 + .text + .global main +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle16.v v0, (a0) # load v0 with a0 + vdivu.vx v1, v0, t0 + vse16.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0xde02a01f + .word 0xbee70e60 + .word 0x9ed75bbc + .word 0x5d7b59c1 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x1fb716df + .word 0x1b45020d + .word 0x16b10d1a + .word 0x0d5a0cd2 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vmul_32.S b/test/div/vdivu_32.S similarity index 77% rename from test/div/vmul_32.S rename to test/div/vdivu_32.S index 7b23449..e2a524f 100644 --- a/test/div/vmul_32.S +++ b/test/div/vdivu_32.S @@ -1,21 +1,18 @@ -# Copyright TU Wien -# Licensed under the Solderpad Hardware License v2.1, see LICENSE.txt for details -# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 - +# vdiv u 32 .text .global main main: - la a0, vdata_start + la a0, vdata_start li t0, 4 - vsetvli t0, t0, e32,m1,tu,mu + vsetvli t0, t0, e32,m1,tu,mu - li t0, 3 + li t0, 7 - vle32.v v0, (a0) - vmul.vx v0, v0, t0 - vse32.v v0, (a0) + vle32.v v0, (a0) # load v0 with a0 + vdivu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 la a0, vdata_start la a1, vdata_end @@ -27,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x323b3f47 - .word 0x47434b3a - .word 0x302f2e32 - .word 0xe8404a51 + .word 0x2ad77dea + .word 0x1060d421 + .word 0x274934b9 + .word 0xc7ae8d3e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -65,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x96b1bdd5 - .word 0xd5c9e1ae - .word 0x908d8a96 - .word 0xb8c0def3 + .word 0x061ec8d8 + .word 0x0256f9bb + .word 0x059cbe63 + .word 0x1c86a676 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -98,3 +95,4 @@ vref_start: .word 0x40504a3f .word 0x4448535a vref_end: + \ No newline at end of file diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S new file mode 100644 index 0000000..e7e2a40 --- /dev/null +++ b/test/div/vdivu_8.S @@ -0,0 +1,98 @@ + +# vdiv u 8 + .text + .global main +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle8.v v0, (a0) # load v0 with a0 + vdivu.vx v1, v0, t0 + vse8.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0xc1b60b4d + .word 0xa712aeae + .word 0x49564f81 + .word 0x71241229 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x1b1a010b + .word 0x17021818 + .word 0x0a0c0b12 + .word 0x10050205 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S new file mode 100644 index 0000000..b67dcbb --- /dev/null +++ b/test/div/vrem_16.S @@ -0,0 +1,98 @@ + +# vrem 16 + .text + .global main +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle16.v v0, (a0) # load v0 with a0 + vrem.vx v1, v0, t0 + vse16.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x6466b622 + .word 0x614868a0 + .word 0x8daae682 + .word 0xd3547ec7 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x0005fffd + .word 0x00050002 + .word 0xfffdfffe + .word 0xfffb0003 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S new file mode 100644 index 0000000..1449ef0 --- /dev/null +++ b/test/div/vrem_32.S @@ -0,0 +1,98 @@ + +# vrem 32 + .text + .global main +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vrem.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x5a1dc928 + .word 0xfdb23e3a + .word 0x3723f66c + .word 0xea3993f2 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00000005 + .word 0xfffffffa + .word 0x00000006 + .word 0xfffffffe + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S new file mode 100644 index 0000000..675c6d0 --- /dev/null +++ b/test/div/vrem_8.S @@ -0,0 +1,98 @@ + +# vrem 8 + .text + .global main +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle8.v v0, (a0) # load v0 with a0 + vrem.vx v1, v0, t0 + vse8.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x4f087f4e + .word 0x95f9f5c3 + .word 0xa31bdc66 + .word 0x00694342 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x02010101 + .word 0xfef9fcfb + .word 0xfe06ff04 + .word 0x00000403 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S new file mode 100644 index 0000000..6970503 --- /dev/null +++ b/test/div/vremu_16.S @@ -0,0 +1,98 @@ + +# vrem u 16 + .text + .global main +main: + la a0, vdata_start + + li t0, 8 + vsetvli t0, t0, e16,m1,tu,mu + + li t0, 7 + + vle16.v v0, (a0) # load v0 with a0 + vremu.vx v1, v0, t0 + vse16.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x90b6998e + .word 0x4a26b13d + .word 0x7a69d63d + .word 0x6f2eeb9e + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00020005 + .word 0x00050006 + .word 0x00050000 + .word 0x00000006 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S new file mode 100644 index 0000000..667c0f7 --- /dev/null +++ b/test/div/vremu_32.S @@ -0,0 +1,98 @@ + +# vrem u 32 + .text + .global main +main: + la a0, vdata_start + + li t0, 4 + vsetvli t0, t0, e32,m1,tu,mu + + li t0, 7 + + vle32.v v0, (a0) # load v0 with a0 + vremu.vx v1, v0, t0 + vse32.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0x7d07b3fc + .word 0x9d421702 + .word 0xc86535f2 + .word 0x818acc97 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x00000001 + .word 0x00000005 + .word 0x00000002 + .word 0x00000000 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S new file mode 100644 index 0000000..77d06bf --- /dev/null +++ b/test/div/vremu_8.S @@ -0,0 +1,98 @@ + +# vrem u 8 + .text + .global main +main: + la a0, vdata_start + + li t0, 16 + vsetvli t0, t0, e8,m1,tu,mu + + li t0, 7 + + vle8.v v0, (a0) # load v0 with a0 + vremu.vx v1, v0, t0 + vse8.v v1, (a0) # store into a0 + + la a0, vdata_start + la a1, vdata_end + j spill_cache + + + .data + .align 10 + .global vdata_start + .global vdata_end +vdata_start: + .word 0xccc2b5fe + .word 0xfaa1ca98 + .word 0x22c5463f + .word 0x56b72baf + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vdata_end: + + .align 10 + .global vref_start + .global vref_end +vref_start: + .word 0x01050602 + .word 0x05000605 + .word 0x06010000 + .word 0x02010100 + .word 0x3f44383b + .word 0x37424d54 + .word 0x5e4b5049 + .word 0x4c4c4c4a + .word 0x4a505f3e + .word 0x485e5455 + .word 0x4d4c4a41 + .word 0x373b5451 + .word 0x41454c45 + .word 0x3a3e3738 + .word 0x312f2e2f + .word 0x3d433f45 + .word 0x46424949 + .word 0x494a4d51 + .word 0x49413c38 + .word 0x3e514143 + .word 0x47525353 + .word 0x514e5052 + .word 0x525a5b58 + .word 0x5e575254 + .word 0x56545058 + .word 0x5a534947 + .word 0x4744544f + .word 0x4e515051 + .word 0x5a4b4545 + .word 0x454c4342 + .word 0x40504a3f + .word 0x4448535a +vref_end: + \ No newline at end of file From 61698e4fed7dffdfbffc7c1a9675530c22eb45c2 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 16:33:03 -0600 Subject: [PATCH 50/64] fixed python negative integer division floor-ing instead of zero-ing --- test/div/vdiv_16.S | 22 +++++++++++----------- test/div/vdiv_32.S | 22 +++++++++++----------- test/div/vdiv_8.S | 22 +++++++++++----------- test/div/vdivu_16.S | 22 +++++++++++----------- test/div/vdivu_32.S | 22 +++++++++++----------- test/div/vdivu_8.S | 22 +++++++++++----------- test/div/vrem_16.S | 22 +++++++++++----------- test/div/vrem_32.S | 20 ++++++++++---------- test/div/vrem_8.S | 22 +++++++++++----------- test/div/vremu_16.S | 22 +++++++++++----------- test/div/vremu_32.S | 18 +++++++++--------- test/div/vremu_8.S | 22 +++++++++++----------- 12 files changed, 129 insertions(+), 129 deletions(-) diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S index 79d0613..4005bf3 100644 --- a/test/div/vdiv_16.S +++ b/test/div/vdiv_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle16.v v0, (a0) # load v0 with a0 - vdiv.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 + vle16.v v0, (a0) + vdiv.vx v0, v0, t0 + vse16.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x568ddeda - .word 0x9cf96b00 - .word 0xf8c25cbd - .word 0xcd01d829 + .word 0x8b260d5f + .word 0x3d45c033 + .word 0xe0ed5d8e + .word 0x9cb16416 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0c5dfb43 - .word 0xf1da0f49 - .word 0xfef70d3f - .word 0xf8b7fa4f + .word 0xef4f01e9 + .word 0x08c0f6e3 + .word 0xfb900d5d + .word 0xf1d10e4c .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index 480f2cb..c82b1ae 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 - vdiv.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vle32.v v0, (a0) + vdiv.vx v0, v0, t0 + vse32.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xed35a74b - .word 0xea17c0b2 - .word 0xf9bc6506 - .word 0x6c6a2bc8 + .word 0x43863db2 + .word 0x3a03336a + .word 0xd9626133 + .word 0x4f71d115 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfd50cec1 - .word 0xfcded262 - .word 0xff1ae9dc - .word 0x0f7ce1ae + .word 0x09a57687 + .word 0x084999a1 + .word 0xfa7bc4bf + .word 0x0b596703 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S index 9251b82..23f4814 100644 --- a/test/div/vdiv_8.S +++ b/test/div/vdiv_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle8.v v0, (a0) # load v0 with a0 - vdiv.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 + vle8.v v0, (a0) + vdiv.vx v0, v0, t0 + vse8.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x7403f246 - .word 0x17cd3fe5 - .word 0xfc619c43 - .word 0x7ad5a12b + .word 0xe6138f73 + .word 0x47bae3f4 + .word 0xee00dec5 + .word 0xa0c03478 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x1000fe0a - .word 0x03f809fc - .word 0xff0df109 - .word 0x11f9f206 + .word 0xfd02f010 + .word 0x0af6fcff + .word 0xfe00fcf8 + .word 0xf3f70711 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S index 34f47d6..afb214f 100644 --- a/test/div/vdivu_16.S +++ b/test/div/vdivu_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle16.v v0, (a0) # load v0 with a0 - vdivu.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 + vle16.v v0, (a0) + vdivu.vx v0, v0, t0 + vse16.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xde02a01f - .word 0xbee70e60 - .word 0x9ed75bbc - .word 0x5d7b59c1 + .word 0x9c220819 + .word 0xee4aa6b1 + .word 0xfde66ff8 + .word 0x4acabc92 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x1fb716df - .word 0x1b45020d - .word 0x16b10d1a - .word 0x0d5a0cd2 + .word 0x164e0128 + .word 0x220a17d0 + .word 0x24450ffe + .word 0x0aaf1af0 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S index e2a524f..40d9bb9 100644 --- a/test/div/vdivu_32.S +++ b/test/div/vdivu_32.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 - vdivu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vle32.v v0, (a0) + vdivu.vx v0, v0, t0 + vse32.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x2ad77dea - .word 0x1060d421 - .word 0x274934b9 - .word 0xc7ae8d3e + .word 0x7aac3d13 + .word 0xee09308d + .word 0xb67109a1 + .word 0xacba8c77 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x061ec8d8 - .word 0x0256f9bb - .word 0x059cbe63 - .word 0x1c86a676 + .word 0x118651de + .word 0x22015014 + .word 0x1a1025f2 + .word 0x18acef7e .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S index e7e2a40..1f43533 100644 --- a/test/div/vdivu_8.S +++ b/test/div/vdivu_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle8.v v0, (a0) # load v0 with a0 - vdivu.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 + vle8.v v0, (a0) + vdivu.vx v0, v0, t0 + vse8.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xc1b60b4d - .word 0xa712aeae - .word 0x49564f81 - .word 0x71241229 + .word 0xf4abfa2b + .word 0xd04307e8 + .word 0xed88c629 + .word 0xa28394bc .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x1b1a010b - .word 0x17021818 - .word 0x0a0c0b12 - .word 0x10050205 + .word 0x22182306 + .word 0x1d090121 + .word 0x21131c05 + .word 0x1712151a .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S index b67dcbb..22d8c5b 100644 --- a/test/div/vrem_16.S +++ b/test/div/vrem_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle16.v v0, (a0) # load v0 with a0 - vrem.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 + vle16.v v0, (a0) + vrem.vx v0, v0, t0 + vse16.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x6466b622 - .word 0x614868a0 - .word 0x8daae682 - .word 0xd3547ec7 + .word 0x3bc270f1 + .word 0xa1878f55 + .word 0xc29053ef + .word 0xf5bf84a8 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x0005fffd - .word 0x00050002 - .word 0xfffdfffe - .word 0xfffb0003 + .word 0x00030003 + .word 0xfff9fffd + .word 0xfffa0004 + .word 0xfff9fffa .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S index 1449ef0..d4f6933 100644 --- a/test/div/vrem_32.S +++ b/test/div/vrem_32.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 - vrem.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vle32.v v0, (a0) + vrem.vx v0, v0, t0 + vse32.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x5a1dc928 - .word 0xfdb23e3a - .word 0x3723f66c - .word 0xea3993f2 + .word 0x97f99423 + .word 0xd9963a95 + .word 0xd8ac72fd + .word 0xca6a7b79 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00000005 - .word 0xfffffffa - .word 0x00000006 .word 0xfffffffe + .word 0xfffffffe + .word 0xfffffffe + .word 0xfffffffa .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S index 675c6d0..e0b95ff 100644 --- a/test/div/vrem_8.S +++ b/test/div/vrem_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle8.v v0, (a0) # load v0 with a0 - vrem.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 + vle8.v v0, (a0) + vrem.vx v0, v0, t0 + vse8.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x4f087f4e - .word 0x95f9f5c3 - .word 0xa31bdc66 - .word 0x00694342 + .word 0xbb1d99d6 + .word 0x05172ada + .word 0xd1cbb427 + .word 0xff5f23f1 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x02010101 - .word 0xfef9fcfb - .word 0xfe06ff04 - .word 0x00000403 + .word 0xfa01fbf9 + .word 0x050200fd + .word 0xfbfcfa04 + .word 0xff0400ff .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S index 6970503..a436bb0 100644 --- a/test/div/vremu_16.S +++ b/test/div/vremu_16.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle16.v v0, (a0) # load v0 with a0 - vremu.vx v1, v0, t0 - vse16.v v1, (a0) # store into a0 + vle16.v v0, (a0) + vremu.vx v0, v0, t0 + vse16.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x90b6998e - .word 0x4a26b13d - .word 0x7a69d63d - .word 0x6f2eeb9e + .word 0x67bb2a86 + .word 0x4d6a2cd6 + .word 0x3cb971e8 + .word 0xa8c23e95 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00020005 - .word 0x00050006 - .word 0x00050000 - .word 0x00000006 + .word 0x00040001 + .word 0x00010005 + .word 0x00050005 + .word 0x00050005 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S index 667c0f7..9051683 100644 --- a/test/div/vremu_32.S +++ b/test/div/vremu_32.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle32.v v0, (a0) # load v0 with a0 - vremu.vx v1, v0, t0 - vse32.v v1, (a0) # store into a0 + vle32.v v0, (a0) + vremu.vx v0, v0, t0 + vse32.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x7d07b3fc - .word 0x9d421702 - .word 0xc86535f2 - .word 0x818acc97 + .word 0xbd2aae44 + .word 0xe03f4c5f + .word 0x14cad933 + .word 0x0a5af7f0 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -63,9 +63,9 @@ vdata_end: .global vref_end vref_start: .word 0x00000001 - .word 0x00000005 - .word 0x00000002 .word 0x00000000 + .word 0x00000006 + .word 0x00000004 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S index 77d06bf..ab18473 100644 --- a/test/div/vremu_8.S +++ b/test/div/vremu_8.S @@ -10,9 +10,9 @@ main: li t0, 7 - vle8.v v0, (a0) # load v0 with a0 - vremu.vx v1, v0, t0 - vse8.v v1, (a0) # store into a0 + vle8.v v0, (a0) + vremu.vx v0, v0, t0 + vse8.v v0, (a0) la a0, vdata_start la a1, vdata_end @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xccc2b5fe - .word 0xfaa1ca98 - .word 0x22c5463f - .word 0x56b72baf + .word 0xc4820122 + .word 0x05c8ff27 + .word 0x4fd24530 + .word 0xa8e49c73 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x01050602 - .word 0x05000605 - .word 0x06010000 - .word 0x02010100 + .word 0x00040106 + .word 0x05040304 + .word 0x02000606 + .word 0x00040203 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 From 8fc31a4b3858777ece5200b37eddc163efde3855 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 16:50:58 -0600 Subject: [PATCH 51/64] sign extend 32b calculations to 33b to prevent unsigned 32b div/rem to think it is signed and negative --- rtl/vproc_div.sv | 36 ++++++++++++++++++------------------ rtl/vproc_div_block.sv | 12 ++++++------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 8f8b81b..0ea0735 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -203,37 +203,37 @@ module vproc_div #( end end - logic [(DIV_OP_W*4)-1:0] div_op1, div_op2; + logic [((DIV_OP_W+1)*4)-1:0] div_op1, div_op2; always_comb begin div_op1 = DONT_CARE_ZERO ? '0 : 'x; div_op2 = DONT_CARE_ZERO ? '0 : 'x; for (int i = 0; i < DIV_OP_W / 32; i++) begin unique case (state_ex1_q.eew) VSEW_8: begin - div_op1[32*(4*i+0) +: 32] = {{24{op1_signs[4*i+0]}}, operand1_q[32*(i)+8*0 +: 8]}; - div_op1[32*(4*i+1) +: 32] = {{24{op1_signs[4*i+1]}}, operand1_q[32*(i)+8*1 +: 8]}; - div_op1[32*(4*i+2) +: 32] = {{24{op1_signs[4*i+2]}}, operand1_q[32*(i)+8*2 +: 8]}; - div_op1[32*(4*i+3) +: 32] = {{24{op1_signs[4*i+3]}}, operand1_q[32*(i)+8*3 +: 8]}; - - div_op2[32*(4*i+0) +: 32] = {{24{op2_signs[4*i+0]}}, operand2_q[32*(i)+8*0 +: 8]}; - div_op2[32*(4*i+1) +: 32] = {{24{op2_signs[4*i+1]}}, operand2_q[32*(i)+8*1 +: 8]}; - div_op2[32*(4*i+2) +: 32] = {{24{op2_signs[4*i+2]}}, operand2_q[32*(i)+8*2 +: 8]}; - div_op2[32*(4*i+3) +: 32] = {{24{op2_signs[4*i+3]}}, operand2_q[32*(i)+8*3 +: 8]}; + div_op1[33*(4*i+0) +: 33] = {{25{op1_signs[4*i+0]}}, operand1_q[32*(i)+8*0 +: 8]}; + div_op1[33*(4*i+1) +: 33] = {{25{op1_signs[4*i+1]}}, operand1_q[32*(i)+8*1 +: 8]}; + div_op1[33*(4*i+2) +: 33] = {{25{op1_signs[4*i+2]}}, operand1_q[32*(i)+8*2 +: 8]}; + div_op1[33*(4*i+3) +: 33] = {{25{op1_signs[4*i+3]}}, operand1_q[32*(i)+8*3 +: 8]}; + + div_op2[33*(4*i+0) +: 33] = {{25{op2_signs[4*i+0]}}, operand2_q[32*(i)+8*0 +: 8]}; + div_op2[33*(4*i+1) +: 33] = {{25{op2_signs[4*i+1]}}, operand2_q[32*(i)+8*1 +: 8]}; + div_op2[33*(4*i+2) +: 33] = {{25{op2_signs[4*i+2]}}, operand2_q[32*(i)+8*2 +: 8]}; + div_op2[33*(4*i+3) +: 33] = {{25{op2_signs[4*i+3]}}, operand2_q[32*(i)+8*3 +: 8]}; end VSEW_16:begin - div_op1[32*(2*i+0) +: 32] = {{16{op1_signs[4*i+1]}}, operand1_q[32*i+16*0 +: 16]}; - div_op1[32*(2*i+1) +: 32] = {{16{op1_signs[4*i+3]}}, operand1_q[32*i+16*1 +: 16]}; + div_op1[33*(2*i+0) +: 33] = {{15{op1_signs[4*i+1]}}, operand1_q[32*i+16*0 +: 16]}; + div_op1[33*(2*i+1) +: 33] = {{15{op1_signs[4*i+3]}}, operand1_q[32*i+16*1 +: 16]}; - div_op2[32*(2*i+0) +: 32] = {{16{op2_signs[4*i+1]}}, operand2_q[32*i+16*0 +: 16]}; - div_op2[32*(2*i+1) +: 32] = {{16{op2_signs[4*i+3]}}, operand2_q[32*i+16*1 +: 16]}; + div_op2[33*(2*i+0) +: 33] = {{15{op2_signs[4*i+1]}}, operand2_q[32*i+16*0 +: 16]}; + div_op2[33*(2*i+1) +: 33] = {{15{op2_signs[4*i+3]}}, operand2_q[32*i+16*1 +: 16]}; end VSEW_32: begin - div_op1[32*i +: 32] = operand1_q[32*i +: 32]; + div_op1[33*i +: 33] = {{1{op1_signs[4*i+3]}}, operand1_q[32*i +: 32]}; // operand1_q[32*i +: 32]; - div_op2[32*i +: 32] = operand2_q[32*i +: 32]; + div_op2[33*i +: 33] = {{1{op2_signs[4*i+3]}}, operand2_q[32*i +: 32]}; // operand2_q[32*i +: 32]; end default: ; endcase @@ -255,8 +255,8 @@ module vproc_div #( .async_rst_ni (async_rst_ni ), .sync_rst_ni (sync_rst_ni ), .mod (state_ex3_q.mode.div.op), // tells div_block to mod or not - .op1_i (div_op1 [32*g +: 32] ), - .op2_i (div_op2 [32*g +: 32] ), + .op1_i (div_op1 [33*g +: 33] ), + .op2_i (div_op2 [33*g +: 33] ), .res_o (div_res [32*g +: 32] ) ); end diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 6cd9f65..870d6a1 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -13,8 +13,8 @@ module vproc_div_block #( input logic mod, // 0 = quotient, 1 = modulo - input logic [31:0] op1_i, - input logic [31:0] op2_i, + input logic [32:0] op1_i, + input logic [32:0] op2_i, output logic [31:0] res_o @@ -25,9 +25,9 @@ module vproc_div_block #( vproc_pkg::DIV_GENERIC: begin - logic [31:0] op1_q, op2_q; - logic [31:0] div_q, div_d; - logic [31:0] res_q, res_d; + logic [32:0] op1_q, op2_q; + logic [32:0] div_q, div_d; + logic [32:0] res_q, res_d; if (BUF_OPS) begin always_ff @(posedge clk_i) begin @@ -91,7 +91,7 @@ module vproc_div_block #( end assign res_d = div_q; - assign res_o = res_q; + assign res_o = res_q[31:0]; end From 6deacdf3fca20a0f6ac1a4ff24260f7b5b8ffbed Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 16:57:18 -0600 Subject: [PATCH 52/64] fixed typo for 16b sigext --- rtl/vproc_div.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index 0ea0735..e3c01e9 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -223,11 +223,11 @@ module vproc_div #( VSEW_16:begin - div_op1[33*(2*i+0) +: 33] = {{15{op1_signs[4*i+1]}}, operand1_q[32*i+16*0 +: 16]}; - div_op1[33*(2*i+1) +: 33] = {{15{op1_signs[4*i+3]}}, operand1_q[32*i+16*1 +: 16]}; + div_op1[33*(2*i+0) +: 33] = {{17{op1_signs[4*i+1]}}, operand1_q[32*i+16*0 +: 16]}; + div_op1[33*(2*i+1) +: 33] = {{17{op1_signs[4*i+3]}}, operand1_q[32*i+16*1 +: 16]}; - div_op2[33*(2*i+0) +: 33] = {{15{op2_signs[4*i+1]}}, operand2_q[32*i+16*0 +: 16]}; - div_op2[33*(2*i+1) +: 33] = {{15{op2_signs[4*i+3]}}, operand2_q[32*i+16*1 +: 16]}; + div_op2[33*(2*i+0) +: 33] = {{17{op2_signs[4*i+1]}}, operand2_q[32*i+16*0 +: 16]}; + div_op2[33*(2*i+1) +: 33] = {{17{op2_signs[4*i+3]}}, operand2_q[32*i+16*1 +: 16]}; end VSEW_32: begin From 4fd57f31186f8849c459bf3bdd573e3ea2956452 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:06:51 -0600 Subject: [PATCH 53/64] fix mod/div behavior state to 2 instead of 3 --- rtl/vproc_div.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index e3c01e9..bf20e1d 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -254,7 +254,7 @@ module vproc_div #( .clk_i (clk_i ), .async_rst_ni (async_rst_ni ), .sync_rst_ni (sync_rst_ni ), - .mod (state_ex3_q.mode.div.op), // tells div_block to mod or not + .mod (state_ex2_q.mode.div.op), // tells div_block to mod or not .op1_i (div_op1 [33*g +: 33] ), .op2_i (div_op2 [33*g +: 33] ), .res_o (div_res [32*g +: 32] ) From 91beab6ddc3344c5d84e20b2b6b9090f5f037b24 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:11:26 -0600 Subject: [PATCH 54/64] fix negative remainder behavior in python test generator --- test/div/vdiv_16.S | 16 ++++++++-------- test/div/vdiv_32.S | 16 ++++++++-------- test/div/vdiv_8.S | 16 ++++++++-------- test/div/vdivu_16.S | 16 ++++++++-------- test/div/vdivu_32.S | 16 ++++++++-------- test/div/vdivu_8.S | 16 ++++++++-------- test/div/vrem_16.S | 16 ++++++++-------- test/div/vrem_32.S | 14 +++++++------- test/div/vrem_8.S | 16 ++++++++-------- test/div/vremu_16.S | 14 +++++++------- test/div/vremu_32.S | 10 +++++----- test/div/vremu_8.S | 16 ++++++++-------- 12 files changed, 91 insertions(+), 91 deletions(-) diff --git a/test/div/vdiv_16.S b/test/div/vdiv_16.S index 4005bf3..60735b9 100644 --- a/test/div/vdiv_16.S +++ b/test/div/vdiv_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x8b260d5f - .word 0x3d45c033 - .word 0xe0ed5d8e - .word 0x9cb16416 + .word 0xcffd8f22 + .word 0x2672ae3b + .word 0xbc20d744 + .word 0x619780f3 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xef4f01e9 - .word 0x08c0f6e3 - .word 0xfb900d5d - .word 0xf1d10e4c + .word 0xf925efe1 + .word 0x057ef452 + .word 0xf64efa2f + .word 0x0df1edda .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_32.S b/test/div/vdiv_32.S index c82b1ae..7526e99 100644 --- a/test/div/vdiv_32.S +++ b/test/div/vdiv_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x43863db2 - .word 0x3a03336a - .word 0xd9626133 - .word 0x4f71d115 + .word 0x1e3afe22 + .word 0x23781b2b + .word 0xb4bd28fa + .word 0xdece3589 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x09a57687 - .word 0x084999a1 - .word 0xfa7bc4bf - .word 0x0b596703 + .word 0x04519204 + .word 0x05112873 + .word 0xf53f9824 + .word 0xfb4207a6 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdiv_8.S b/test/div/vdiv_8.S index 23f4814..b382806 100644 --- a/test/div/vdiv_8.S +++ b/test/div/vdiv_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xe6138f73 - .word 0x47bae3f4 - .word 0xee00dec5 - .word 0xa0c03478 + .word 0xce9cfd51 + .word 0xef02b593 + .word 0x3b987927 + .word 0xd3adb9dc .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfd02f010 - .word 0x0af6fcff - .word 0xfe00fcf8 - .word 0xf3f70711 + .word 0xf9f2000b + .word 0xfe00f6f1 + .word 0x08f21105 + .word 0xfaf5f6fb .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_16.S b/test/div/vdivu_16.S index afb214f..a320f0f 100644 --- a/test/div/vdivu_16.S +++ b/test/div/vdivu_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x9c220819 - .word 0xee4aa6b1 - .word 0xfde66ff8 - .word 0x4acabc92 + .word 0x6f31f594 + .word 0xe0eb44da + .word 0x4c5d4d64 + .word 0x5eb58465 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x164e0128 - .word 0x220a17d0 - .word 0x24450ffe - .word 0x0aaf1af0 + .word 0x0fe22315 + .word 0x202109d6 + .word 0x0ae80b0e + .word 0x0d8712e9 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_32.S b/test/div/vdivu_32.S index 40d9bb9..85d92a4 100644 --- a/test/div/vdivu_32.S +++ b/test/div/vdivu_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x7aac3d13 - .word 0xee09308d - .word 0xb67109a1 - .word 0xacba8c77 + .word 0xf7e77afa + .word 0x9583c2ad + .word 0xa36abb98 + .word 0x80740893 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x118651de - .word 0x22015014 - .word 0x1a1025f2 - .word 0x18acef7e + .word 0x236a3623 + .word 0x155bf73d + .word 0x175863f1 + .word 0x1259b815 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vdivu_8.S b/test/div/vdivu_8.S index 1f43533..91e01d3 100644 --- a/test/div/vdivu_8.S +++ b/test/div/vdivu_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xf4abfa2b - .word 0xd04307e8 - .word 0xed88c629 - .word 0xa28394bc + .word 0xdc9a262b + .word 0x73059d6a + .word 0xb795837c + .word 0xe49bbaeb .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x22182306 - .word 0x1d090121 - .word 0x21131c05 - .word 0x1712151a + .word 0x1f160506 + .word 0x1000160f + .word 0x1a151211 + .word 0x20161a21 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_16.S b/test/div/vrem_16.S index 22d8c5b..ba6a94e 100644 --- a/test/div/vrem_16.S +++ b/test/div/vrem_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x3bc270f1 - .word 0xa1878f55 - .word 0xc29053ef - .word 0xf5bf84a8 + .word 0x39bd3821 + .word 0x962a2c2d + .word 0xf7003d1f + .word 0x30712f75 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00030003 - .word 0xfff9fffd - .word 0xfffa0004 - .word 0xfff9fffa + .word 0x00040005 + .word 0xfffc0004 + .word 0xffff0002 + .word 0x00040004 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_32.S b/test/div/vrem_32.S index d4f6933..e7ff380 100644 --- a/test/div/vrem_32.S +++ b/test/div/vrem_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x97f99423 - .word 0xd9963a95 - .word 0xd8ac72fd - .word 0xca6a7b79 + .word 0xa4b7ef07 + .word 0x37ace96b + .word 0x0ca44246 + .word 0x124fe8c1 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -63,9 +63,9 @@ vdata_end: .global vref_end vref_start: .word 0xfffffffe - .word 0xfffffffe - .word 0xfffffffe - .word 0xfffffffa + .word 0x00000003 + .word 0x00000002 + .word 0x00000002 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vrem_8.S b/test/div/vrem_8.S index e0b95ff..a289fd6 100644 --- a/test/div/vrem_8.S +++ b/test/div/vrem_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xbb1d99d6 - .word 0x05172ada - .word 0xd1cbb427 - .word 0xff5f23f1 + .word 0x00dedebe + .word 0xae9cb4b0 + .word 0xc273b35d + .word 0xb31ade80 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0xfa01fbf9 - .word 0x050200fd - .word 0xfbfcfa04 - .word 0xff0400ff + .word 0x00fafafd + .word 0xfbfefafd + .word 0xfa030002 + .word 0x0005fafe .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_16.S b/test/div/vremu_16.S index a436bb0..3de969c 100644 --- a/test/div/vremu_16.S +++ b/test/div/vremu_16.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0x67bb2a86 - .word 0x4d6a2cd6 - .word 0x3cb971e8 - .word 0xa8c23e95 + .word 0xde449e5a + .word 0x392f8ffb + .word 0xd69a5b50 + .word 0x22645570 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -63,9 +63,9 @@ vdata_end: .global vref_end vref_start: .word 0x00040001 - .word 0x00010005 - .word 0x00050005 - .word 0x00050005 + .word 0x00020004 + .word 0x00020003 + .word 0x00050004 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_32.S b/test/div/vremu_32.S index 9051683..470d0d4 100644 --- a/test/div/vremu_32.S +++ b/test/div/vremu_32.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xbd2aae44 - .word 0xe03f4c5f - .word 0x14cad933 - .word 0x0a5af7f0 + .word 0xd26a687a + .word 0xf6f45748 + .word 0x11e2317b + .word 0x9f408209 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -63,9 +63,9 @@ vdata_end: .global vref_end vref_start: .word 0x00000001 - .word 0x00000000 .word 0x00000006 .word 0x00000004 + .word 0x00000004 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 diff --git a/test/div/vremu_8.S b/test/div/vremu_8.S index ab18473..95f4d4f 100644 --- a/test/div/vremu_8.S +++ b/test/div/vremu_8.S @@ -24,10 +24,10 @@ main: .global vdata_start .global vdata_end vdata_start: - .word 0xc4820122 - .word 0x05c8ff27 - .word 0x4fd24530 - .word 0xa8e49c73 + .word 0x9164ad46 + .word 0x797b143b + .word 0x076d7359 + .word 0xadaf535f .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 @@ -62,10 +62,10 @@ vdata_end: .global vref_start .global vref_end vref_start: - .word 0x00040106 - .word 0x05040304 - .word 0x02000606 - .word 0x00040203 + .word 0x05020500 + .word 0x02040603 + .word 0x00040305 + .word 0x05000604 .word 0x3f44383b .word 0x37424d54 .word 0x5e4b5049 From abc4e3a15e9a0309363dbef8129edfe89a71e0c9 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:14:39 -0600 Subject: [PATCH 55/64] fix linting --- rtl/vproc_div_block.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index 870d6a1..feb5025 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -70,7 +70,7 @@ module vproc_div_block #( if (op2_q == '0) begin div_d = '1; end - if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + if (op1_q == {1'b1, {32{1'b0}}} && op2_q == '1) begin div_d = op1_q; end @@ -82,7 +82,7 @@ module vproc_div_block #( if (op2_q == 0) begin div_d = op1_q; end - if (op1_q == {1'b1, {31{1'b0}}} && op2_q == '1) begin + if (op1_q == {1'b1, {32{1'b0}}} && op2_q == '1) begin div_d = '0; end end From 7242264e5d5e1bb6c7799c89cda4a832d6dac75f Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:18:27 -0600 Subject: [PATCH 56/64] run all tests --- .github/test_configs.conf | 10 +++++----- .github/workflows/default.yml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/test_configs.conf b/.github/test_configs.conf index 8cfa2b2..692c069 100644 --- a/.github/test_configs.conf +++ b/.github/test_configs.conf @@ -1,6 +1,6 @@ VPROC_CONFIG=compact VREG_W=128 VMEM_W=32 -# VPROC_CONFIG=dual VREG_W=128 VMEM_W=32 -# VPROC_CONFIG=dual VREG_W=512 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 -# VPROC_CONFIG=triple VREG_W=256 VMEM_W=32 -# VPROC_CONFIG=triple VREG_W=1024 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 -# VPROC_CONFIG=legacy VREG_W=128 VMEM_W=32 +VPROC_CONFIG=dual VREG_W=128 VMEM_W=32 +VPROC_CONFIG=dual VREG_W=512 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 +VPROC_CONFIG=triple VREG_W=256 VMEM_W=32 +VPROC_CONFIG=triple VREG_W=1024 VMEM_W=256 ICACHE_SZ=8192 DCACHE_SZ=65536 MEM_LATENCY=5 +VPROC_CONFIG=legacy VREG_W=128 VMEM_W=32 diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index dbe9278..ea4f142 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -72,7 +72,7 @@ jobs: strategy: fail-fast: false matrix: - unit: [sld, div] + unit: [lsu, alu, mul, sld, elem, csr, misc, div] main_core: [ibex] steps: - uses: actions/checkout@v2 From 9cc7bc0ae2ef0d271a7b1394a2de13966584fa3d Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 9 Nov 2022 17:21:25 -0600 Subject: [PATCH 57/64] Update Makefile --- sim/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/sim/Makefile b/sim/Makefile index f2569cc..2ef93ac 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -105,6 +105,7 @@ $(PROJ_DIR)/obj_dir/Vvproc_top.mk: verilator-version-check $(VPROC_CONFIG_PKG) -Wno-UNSIGNED -Wno-IMPLICIT -Wno-LITENDIAN -Wno-CASEINCOMPLETE \ -Wno-SYMRSVDWORD -Wno-BLKANDNBLK -Wno-BLKSEQ -Wno-SYNCASYNCNET \ -Wno-COMBDLY \ + -I$(TEST_DIR)/../ibex/rtl/ibex_pkg.sv -I$(SIM_DIR)/../rtl/ -I$(SIM_DIR)/../sva/ $$inc $$core \ -I$(SIM_DIR)/../rtl/ -I$(SIM_DIR)/../sva/ $$inc $$core \ -GMEM_W=$(MEM_W) -GVMEM_W=$(VMEM_W) \ -GICACHE_SZ=$(ICACHE_SZ) -GICACHE_LINE_W=$(ICACHE_LINE_W) \ From 873625d5027914186563ac96a16fa4618b7338d7 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 9 Nov 2022 17:25:18 -0600 Subject: [PATCH 58/64] Update Makefile --- sim/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/sim/Makefile b/sim/Makefile index 2ef93ac..f2569cc 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -105,7 +105,6 @@ $(PROJ_DIR)/obj_dir/Vvproc_top.mk: verilator-version-check $(VPROC_CONFIG_PKG) -Wno-UNSIGNED -Wno-IMPLICIT -Wno-LITENDIAN -Wno-CASEINCOMPLETE \ -Wno-SYMRSVDWORD -Wno-BLKANDNBLK -Wno-BLKSEQ -Wno-SYNCASYNCNET \ -Wno-COMBDLY \ - -I$(TEST_DIR)/../ibex/rtl/ibex_pkg.sv -I$(SIM_DIR)/../rtl/ -I$(SIM_DIR)/../sva/ $$inc $$core \ -I$(SIM_DIR)/../rtl/ -I$(SIM_DIR)/../sva/ $$inc $$core \ -GMEM_W=$(MEM_W) -GVMEM_W=$(VMEM_W) \ -GICACHE_SZ=$(ICACHE_SZ) -GICACHE_LINE_W=$(ICACHE_LINE_W) \ From d2147470c971472822fa2f7d879a434461c01fd0 Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:29:10 -0600 Subject: [PATCH 59/64] move div to default op width --- config.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.mk b/config.mk index 9d6fc5a..c0cc1f5 100755 --- a/config.mk +++ b/config.mk @@ -34,20 +34,20 @@ ifeq ($(VPROC_CONFIG), dual) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 128 - VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD,VDIV + VPROC_PIPELINES ?= $(VMEM_W):VLSU,VALU,VELEM,VDIV $(VPIPE_W_VMUL):VMUL,VSLD else ifeq ($(VPROC_CONFIG), triple) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 256 - VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU,VELEM $(VPIPE_W_VMUL):VMUL,VSLD,VDIV + VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU,VELEM,VDIV $(VPIPE_W_VMUL):VMUL,VSLD else ifeq ($(VPROC_CONFIG), legacy) VPORT_POLICY ?= some VMEM_W ?= 32 VREG_W ?= 128 VPROC_PIPELINES ?= $(VMEM_W):VLSU $(VPIPE_W_DFLT):VALU $(VPIPE_W_VMUL):VMUL \ - $(VPIPE_W_DFLT):VSLD 32:VELEM $(VPIPE_W_VMUL):VDIV + $(VPIPE_W_DFLT):VSLD 32:VELEM $(VPIPE_W_DFLT):VDIV else $(error Unknown vector coprocessor configuration $(VPROC_CONFIG)) endif From 90ef833da4d084ea6f9569dea80234216d3faf7c Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:32:31 -0600 Subject: [PATCH 60/64] fix splattered op widths for div --- rtl/vproc_div.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index bf20e1d..bcfc963 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -203,7 +203,7 @@ module vproc_div #( end end - logic [((DIV_OP_W+1)*4)-1:0] div_op1, div_op2; + logic [(DIV_OP_W*4)-1+DIV_OP_W/32:0] div_op1, div_op2; always_comb begin div_op1 = DONT_CARE_ZERO ? '0 : 'x; div_op2 = DONT_CARE_ZERO ? '0 : 'x; From e2384fb1b5578997d64fad3ad5b57818377d940a Mon Sep 17 00:00:00 2001 From: William Eustis Date: Wed, 9 Nov 2022 17:38:14 -0600 Subject: [PATCH 61/64] fix splattered op widths for div --- rtl/vproc_div.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/vproc_div.sv b/rtl/vproc_div.sv index bcfc963..99d9f0b 100644 --- a/rtl/vproc_div.sv +++ b/rtl/vproc_div.sv @@ -203,7 +203,7 @@ module vproc_div #( end end - logic [(DIV_OP_W*4)-1+DIV_OP_W/32:0] div_op1, div_op2; + logic [(DIV_OP_W/8*33)-1:0] div_op1, div_op2; always_comb begin div_op1 = DONT_CARE_ZERO ? '0 : 'x; div_op2 = DONT_CARE_ZERO ? '0 : 'x; From c02ecdb02bbec044f84f1e9180f52c53f265658e Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Wed, 9 Nov 2022 18:02:00 -0600 Subject: [PATCH 62/64] trying new makefile --- sim/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/sim/Makefile b/sim/Makefile index f2569cc..71d970b 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -92,6 +92,7 @@ $(PROJ_DIR)/obj_dir/Vvproc_top.mk: verilator-version-check $(VPROC_CONFIG_PKG) inc="$$inc -I$(CORE_DIR)/vendor/lowrisc_ip/dv/sv/dv_utils/"; \ inc="$$inc -I$(CORE_DIR)/vendor/lowrisc_ip/ip/prim/rtl/"; \ inc="$$inc -I$(CORE_DIR)/vendor/lowrisc_ip/ip/prim_generic/rtl/"; \ + cat ibex_pkg.sv; \ src="ibex_pkg.sv prim_pkg.sv prim_assert.sv prim_ram_1p_pkg.sv"; \ src="$$src ibex_register_file_ff.sv"; \ elif echo "$(CORE_DIR)" | grep -q "cv32e40x"; then \ From c0a91b9314dd686182f4c16ff6530ab83897f661 Mon Sep 17 00:00:00 2001 From: Hassan Farooq <1hassanfarooq@gmail.com> Date: Fri, 11 Nov 2022 12:51:16 -0600 Subject: [PATCH 63/64] Update Makefile --- sim/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sim/Makefile b/sim/Makefile index 71d970b..fca6bfa 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -86,6 +86,8 @@ $(PROJ_DIR)/obj_dir/Vvproc_top.mk: verilator-version-check $(VPROC_CONFIG_PKG) core=""; \ inc=""; \ src=""; \ + $(info $$SIM_DIR is [${SIM_DIR}]) \ + $(info $$CORE_DIR is [${CORE_DIR}]) \ if echo "$(CORE_DIR)" | grep -q "ibex"; then \ core="-DMAIN_CORE_IBEX"; \ inc="-I$(CORE_DIR)/rtl/ -I$(CORE_DIR)/dv/uvm/core_ibex/common/prim/"; \ From 62bace69b255a388c49063fcc46c13391f00af37 Mon Sep 17 00:00:00 2001 From: William Eustis <40376132+WeustiS@users.noreply.github.com> Date: Thu, 1 Dec 2022 00:01:52 -0600 Subject: [PATCH 64/64] Update vproc_div_block.sv --- rtl/vproc_div_block.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtl/vproc_div_block.sv b/rtl/vproc_div_block.sv index feb5025..b409e30 100644 --- a/rtl/vproc_div_block.sv +++ b/rtl/vproc_div_block.sv @@ -2,9 +2,9 @@ module vproc_div_block #( parameter vproc_pkg::div_type DIV_TYPE = vproc_pkg::DIV_GENERIC, - parameter bit BUF_OPS = 1'b0, // buffer operands (op1_i and op2_i) - parameter bit BUF_DIV = 1'b0, // buffer division result - parameter bit BUF_RES = 1'b0 // buffer final result (res_o) + parameter bit BUF_OPS = 1'b1, // buffer operands (op1_i and op2_i) + parameter bit BUF_DIV = 1'b1, // buffer division result + parameter bit BUF_RES = 1'b1 // buffer final result (res_o) // Other parameters... )( input logic clk_i,