Skip to content

Commit 980ab34

Browse files
authored
Merge pull request #2 from xianyi/develop
rebase
2 parents 3853014 + 7f11e33 commit 980ab34

16 files changed

+166
-47
lines changed

CONTRIBUTORS.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,7 @@ In chronological order:
190190
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
191191

192192
* Danfeng Zhang <https://github.com/craft-zhang>
193-
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
193+
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
194+
195+
* PingTouGe Semiconductor Co., Ltd.
196+
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910

Makefile.sparc

+12-4
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,29 @@ RANLIB = ranlib
33

44
ifdef BINARY64
55

6+
ifeq ($(C_COMPILER), GCC)
67
CCOMMON_OPT += -mcpu=v9 -m64
8+
else
9+
CCOMMON_OPT += -m64
10+
endif
711
ifeq ($(COMPILER_F77), g77)
812
FCOMMON_OPT += -mcpu=v9 -m64
913
endif
10-
ifeq ($(COMPILER_F77), f90)
11-
FCOMMON_OPT += -xarch=v9
14+
ifeq ($(COMPILER_F77), f95)
15+
FCOMMON_OPT += -m64
1216
endif
1317
else
1418

19+
ifeq ($(C_COMPILER), GCC)
1520
CCOMMON_OPT += -mcpu=v9
21+
else
22+
CCOMMON_OPT += -xarch=v9
23+
endif
1624

1725
ifeq ($(COMPILER_F77), g77)
1826
FCOMMON_OPT += -mcpu=v9
1927
endif
20-
ifeq ($(COMPILER_F77), f90)
28+
ifeq ($(COMPILER_F77), f95)
2129
FCOMMON_OPT += -xarch=v8plusb
2230
endif
2331

@@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
3745
else
3846
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
3947
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
40-
endif
48+
endif

Makefile.system

+12-1
Original file line numberDiff line numberDiff line change
@@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w
11311131
ifeq ($(ARCH), x86)
11321132
CCOMMON_OPT += -m32
11331133
else
1134-
FCOMMON_OPT += -m64
1134+
ifdef BINARY64
1135+
CCOMMON_OPT += -m64
1136+
else
1137+
CCOMMON_OPT += -m32
1138+
endif
11351139
endif
11361140
endif
11371141

11381142
ifeq ($(F_COMPILER), SUN)
11391143
CCOMMON_OPT += -DF_INTERFACE_SUN
1144+
FCOMMON_OPT += -ftrap=%none -xrecursive
11401145
ifeq ($(ARCH), x86)
11411146
FCOMMON_OPT += -m32
11421147
else
1148+
ifdef BINARY64
11431149
FCOMMON_OPT += -m64
1150+
else
1151+
FCOMMON_OPT += -m32
1152+
endif
11441153
endif
11451154
ifeq ($(USE_OPENMP), 1)
11461155
FCOMMON_OPT += -xopenmp=parallel
@@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
13131322
include $(TOPDIR)/Makefile.$(ARCH)
13141323

13151324
ifneq ($(C_COMPILER), PGI)
1325+
ifneq ($(C_COMPILER), SUN)
13161326
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
13171327
endif
1328+
endif
13181329
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
13191330

13201331
ifeq ($(CORE), PPC440)

c_check

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
# Checking cross compile
77
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
88
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
9-
$hostarch = `uname -p` if ($hostos eq "AIX");
9+
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
10+
chop($hostarch);
1011
$hostarch = "x86_64" if ($hostarch eq "amd64");
1112
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
1213
$hostarch = "arm64" if ($hostarch eq "aarch64");

common_sparc.h

+6
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
7878
#define __BIG_ENDIAN__
7979
#endif
8080

81+
#ifdef C_SUN
82+
#ifndef __64BIT
83+
#define RETURN_BY_STACK
84+
#endif
85+
#endif
86+
8187
#ifdef DOUBLE
8288
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
8389
#else

kernel/arm/zdot.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
4848

4949
dot[0]=0.0;
5050
dot[1]=0.0;
51-
#if !defined(__PPC__)
51+
#if !defined(__PPC__) && !defined(__SunOS)
5252
CREAL(result) = 0.0 ;
5353
CIMAG(result) = 0.0 ;
5454
#else
@@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
7373
i++ ;
7474

7575
}
76-
#if !defined(__PPC__)
76+
#if !defined(__PPC__) && !defined(__SunOS)
7777
CREAL(result) = dot[0];
7878
CIMAG(result) = dot[1];
7979
#else

kernel/mips/cgemm_kernel_8x4_msa.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
758758
pc0[1] += alphar * res1; \
759759
pc0[1] += alphai * res0; \
760760
\
761-
pc1[2] += alphar * res2; \
762-
pc1[2] -= alphai * res3; \
763-
pc1[3] += alphar * res3; \
764-
pc1[3] += alphai * res2; \
761+
pc1[0] += alphar * res2; \
762+
pc1[0] -= alphai * res3; \
763+
pc1[1] += alphar * res3; \
764+
pc1[1] += alphai * res2; \
765765
}
766766

767767
#define CGEMM_SCALE_1X1 \
@@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10671067
pc0[1] = alphar * res1; \
10681068
pc0[1] += alphai * res0; \
10691069
\
1070-
pc1[2] = alphar * res2; \
1071-
pc1[2] -= alphai * res3; \
1072-
pc1[3] = alphar * res3; \
1073-
pc1[3] += alphai * res2; \
1070+
pc1[0] = alphar * res2; \
1071+
pc1[0] -= alphai * res3; \
1072+
pc1[1] = alphar * res3; \
1073+
pc1[1] += alphai * res2; \
10741074
}
10751075

10761076
#define CGEMM_TRMM_SCALE_1X1 \

kernel/mips/cgemv_n_msa.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5656
#if !defined(XCONJ)
5757
#define OP0 +=
5858
#define OP1 -=
59-
#define OP2 -=
59+
#define OP2 +=
6060
#else
6161
#define OP0 -=
6262
#define OP1 -=
63-
#define OP2 +=
63+
#define OP2 -=
6464
#endif
6565
#endif
6666

kernel/mips/cgemv_t_msa.c

+19-7
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3232
#undef OP1
3333
#undef OP2
3434

35-
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
36-
#define OP0 -=
37-
#define OP1 +=
38-
#define OP2 +=
35+
#if !defined(CONJ)
36+
#if !defined(XCONJ)
37+
#define OP0 -=
38+
#define OP1 +=
39+
#define OP2 +=
40+
#else
41+
#define OP0 +=
42+
#define OP1 +=
43+
#define OP2 -=
44+
#endif
3945
#else
40-
#define OP0 +=
41-
#define OP1 +=
42-
#define OP2 -=
46+
#if !defined(XCONJ)
47+
#define OP0 +=
48+
#define OP1 -=
49+
#define OP2 +=
50+
#else
51+
#define OP0 -=
52+
#define OP1 -=
53+
#define OP2 -=
54+
#endif
4355
#endif
4456

4557
#define CGEMV_T_8x4() \

kernel/mips/dswap_msa.c

+28-2
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
184184
}
185185
}
186186
}
187-
else
187+
else if ((inc_x != 0) && (inc_y != 0))
188188
{
189189
for (i = (n >> 3); i--;)
190190
{
@@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
248248
}
249249
}
250250
}
251-
251+
else
252+
{
253+
if (inc_x == inc_y)
254+
{
255+
if (n & 1)
256+
{
257+
x0 = *srcx;
258+
*srcx = *srcy;
259+
*srcy = x0;
260+
}
261+
else
262+
return (0);
263+
}
264+
else
265+
{
266+
BLASLONG ix = 0, iy = 0;
267+
while (i < n)
268+
{
269+
x0 = srcx[ix];
270+
srcx[ix] = srcy[iy];
271+
srcy[iy] = x0;
272+
ix += inc_x;
273+
iy += inc_y;
274+
i++;
275+
}
276+
}
277+
}
252278
return (0);
253279
}

kernel/mips/sswap_msa.c

+28-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
198198
}
199199
}
200200
}
201-
else
201+
else if ((inc_x != 0) && (inc_y != 0))
202202
{
203203
for (i = (n >> 3); i--;)
204204
{
@@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
262262
}
263263
}
264264
}
265+
else
266+
{
267+
if (inc_x == inc_y)
268+
{
269+
if (n & 1)
270+
{
271+
x0 = *srcx;
272+
*srcx = *srcy;
273+
*srcy = x0;
274+
}
275+
else
276+
return (0);
277+
}
278+
else
279+
{
280+
BLASLONG ix = 0, iy = 0;
281+
while (i < n)
282+
{
283+
x0 = srcx[ix];
284+
srcx[ix] = srcy[iy];
285+
srcy[iy] = x0;
286+
ix += inc_x;
287+
iy += inc_y;
288+
i++;
289+
}
290+
}
291+
}
265292

266293
return (0);
267294
}

kernel/mips/zgemv_n_msa.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5656
#if !defined(XCONJ)
5757
#define OP0 +=
5858
#define OP1 -=
59-
#define OP2 -=
59+
#define OP2 +=
6060
#else
6161
#define OP0 -=
6262
#define OP1 -=
63-
#define OP2 +=
63+
#define OP2 -=
6464
#endif
6565
#endif
6666

kernel/mips/zgemv_t_msa.c

+19-7
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434
#undef OP3
3535
#undef OP4
3636

37-
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
38-
#define OP0 -=
39-
#define OP1 +=
40-
#define OP2 +=
37+
#if !defined(CONJ)
38+
#if !defined(XCONJ)
39+
#define OP0 -=
40+
#define OP1 +=
41+
#define OP2 +=
42+
#else
43+
#define OP0 +=
44+
#define OP1 +=
45+
#define OP2 -=
46+
#endif
4147
#else
42-
#define OP0 +=
43-
#define OP1 +=
44-
#define OP2 -=
48+
#if !defined(XCONJ)
49+
#define OP0 +=
50+
#define OP1 -=
51+
#define OP2 +=
52+
#else
53+
#define OP0 -=
54+
#define OP1 -=
55+
#define OP2 -=
56+
#endif
4557
#endif
4658

4759
#define ZGEMV_T_8x1() \

kernel/sparc/KERNEL.sparc

+10
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
5454
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
5555
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
5656
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
57+
58+
59+
SDOTKERNEL = ../generic/dot.c
60+
SDSDOTKERNEL = ../generic/dot.c
61+
DSDOTKERNEL = ../generic/dot.c
62+
DDOTKERNEL = ../generic/dot.c
63+
CDOTKERNEL = ../arm/zdot.c
64+
ZDOTKERNEL = ../arm/zdot.c
65+
CSWAPKERNEL = ../arm/zswap.c
66+
ZSWAPKERNEL = ../arm/zswap.c

0 commit comments

Comments
 (0)