diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL256B b/kernel/riscv64/KERNEL.RISCV64_ZVL256B index c8be1c6370..e69171abe7 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL256B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL256B @@ -283,6 +283,8 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) ifndef SBGEMM_BETA SBGEMM_BETA = gemm_beta_rvv.c endif +SBGEMVNKERNEL = sbgemv_n_vector.c +SBGEMVTKERNEL = sbgemv_t_vector.c endif SAXPBYKERNEL = axpby_vector_v2.c diff --git a/kernel/riscv64/sbgemv_n_vector.c b/kernel/riscv64/sbgemv_n_vector.c new file mode 100644 index 0000000000..94b9488cfc --- /dev/null +++ b/kernel/riscv64/sbgemv_n_vector.c @@ -0,0 +1,130 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +#define FLOAT_V_T vfloat32m8_t +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) + +#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n) + +#if defined(HFLOAT16) +#define IFLOAT_V_T vfloat16m4_t +#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfwmacc_vf_f32m8) +#else +#define IFLOAT_V_T vbfloat16m4_t +#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfwmaccbf16_vf_f32m8) +#endif + +int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) +{ + if (n < 0) return(0); + + IFLOAT *a_ptr, temp; + FLOAT *y_ptr; + BLASLONG i, j, vl; + IFLOAT_V_T va; + FLOAT_V_T vy; + + y_ptr = y; + if (inc_y == 1) { + if (beta == 0.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VFMVVF_FLOAT(0.0, vl); + VSEV_FLOAT(y_ptr, vy, vl); + y_ptr += vl; + } + } else if (beta != 1.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VLEV_FLOAT(y_ptr, vl); + vy = VFMULVF_FLOAT(vy, beta, vl); + VSEV_FLOAT(y_ptr, vy, vl); + y_ptr += vl; + } + } + for (j = 0; j < n; j++) { + temp = (IFLOAT)(alpha * (FLOAT)(x[0])); + y_ptr = y; + a_ptr = a; + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VLEV_FLOAT(y_ptr, vl); + va = VLEV_IFLOAT(a_ptr, vl); + vy = VFMACCVF_FLOAT(vy, temp, va, vl); + VSEV_FLOAT(y_ptr, vy, vl); + y_ptr += vl; + a_ptr += vl; + } + x += inc_x; + a += lda; + } + } else { + BLASLONG stride_y = inc_y * sizeof(FLOAT); + if (beta == 0.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VFMVVF_FLOAT(0.0, vl); + VSSEV_FLOAT(y_ptr, stride_y, vy, vl); + y_ptr += vl * inc_y; + } + } else if (beta != 1.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VLSEV_FLOAT(y_ptr, stride_y, vl); + vy = VFMULVF_FLOAT(vy, beta, vl); + VSSEV_FLOAT(y_ptr, stride_y, vy, vl); + y_ptr += vl * inc_y; + } + } + for (j = 0; j < n; j++) { + temp = (IFLOAT)(alpha * (FLOAT)(x[0])); + y_ptr = y; + a_ptr = a; + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VLSEV_FLOAT(y_ptr, stride_y, vl); + va = VLEV_IFLOAT(a_ptr, vl); + vy = VFMACCVF_FLOAT(vy, temp, va, vl); + VSSEV_FLOAT(y_ptr, stride_y, vy, vl); + y_ptr += vl * inc_y; + a_ptr += vl; + } + x += inc_x; + a += lda; + } + } + return(0); +} diff --git a/kernel/riscv64/sbgemv_t_vector.c b/kernel/riscv64/sbgemv_t_vector.c new file mode 100644 index 0000000000..f537ca4ead --- /dev/null +++ b/kernel/riscv64/sbgemv_t_vector.c @@ -0,0 +1,134 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +#define FLOAT_V_T vfloat32m8_t +#define FLOAT_V_T_M1 vfloat32m1_t +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) + +#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n) + +#if defined(HFLOAT16) +#define IFLOAT_V_T vfloat16m4_t +#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4) +#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_f16m4) +#define VFMACCVV_FLOAT(a,b,c,d) RISCV_RVV(vfwmul_vv_f32m8)(b,c,d) +#else +#define IFLOAT_V_T vbfloat16m4_t +#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4) +#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_bf16m4) +#define VFMACCVV_FLOAT RISCV_RVV(vfwmaccbf16_vv_f32m8) +#endif + +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m8_f32m1(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m8_f32m1) +#endif +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) + +int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i = 0, j = 0, k = 0; + BLASLONG ix = 0, iy = 0; + IFLOAT *a_ptr = a; + FLOAT temp; + + IFLOAT_V_T va, vx; +#if !defined(HFLOAT16) + FLOAT_V_T vz; +#endif + FLOAT_V_T vr; + BLASLONG gvl = 0; + FLOAT_V_T_M1 v_res; + + if (inc_x == 1) { + for (i = 0; i < n; i++) { + v_res = VFMVVF_FLOAT_M1(0, 1); + gvl = VSETVL(m); + j = 0; +#if !defined(HFLOAT16) + vz = VFMVVF_FLOAT(0, gvl); +#endif + for (k = 0; k < m/gvl; k++) { + va = VLEV_IFLOAT(&a_ptr[j], gvl); + vx = VLEV_IFLOAT(&x[j], gvl); + vr = VFMACCVV_FLOAT(vz, va, vx, gvl); // could vfmacc here and reduce outside loop + v_res = VFREDSUM_FLOAT(vr, v_res, gvl); // but that reordering diverges far enough from scalar path to make tests fail + j += gvl; + } + if (j < m) { + gvl = VSETVL(m-j); + va = VLEV_IFLOAT(&a_ptr[j], gvl); + vx = VLEV_IFLOAT(&x[j], gvl); + vr = VFMACCVV_FLOAT(vz, va, vx, gvl); + v_res = VFREDSUM_FLOAT(vr, v_res, gvl); + } + temp = (FLOAT)EXTRACT_FLOAT(v_res); + y[iy] = y[iy] * beta + alpha * temp; + + iy += inc_y; + a_ptr += lda; + } + } else { + BLASLONG stride_x = inc_x * sizeof(IFLOAT); + for (i = 0; i < n; i++) { + v_res = VFMVVF_FLOAT_M1(0, 1); + gvl = VSETVL(m); + j = 0; + ix = 0; +#if !defined(HFLOAT16) + vz = VFMVVF_FLOAT(0, gvl); +#endif + for (k = 0; k < m/gvl; k++) { + va = VLEV_IFLOAT(&a_ptr[j], gvl); + vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vz, va, vx, gvl); + v_res = VFREDSUM_FLOAT(vr, v_res, gvl); + j += gvl; + ix += inc_x * gvl; + } + if (j < m) { + gvl = VSETVL(m-j); + va = VLEV_IFLOAT(&a_ptr[j], gvl); + vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vz, va, vx, gvl); + v_res = VFREDSUM_FLOAT(vr, v_res, gvl); + } + temp = (FLOAT)EXTRACT_FLOAT(v_res); + y[iy] = y[iy] * beta + alpha * temp; + + iy += inc_y; + a_ptr += lda; + } + } + + return (0); +}