Skip to content

Commit b6552b1

Browse files
authored
Merge pull request #2 from xianyi/develop
merge develop
2 parents abea977 + 5fdf9ad commit b6552b1

27 files changed

+1701
-199
lines changed

CMakeLists.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 7.dev)
9+
set(OpenBLAS_PATCH_VERSION 8.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions
@@ -211,7 +211,8 @@ if (USE_THREAD)
211211
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
212212
endif()
213213

214-
if (MSVC OR NOT NOFORTRAN)
214+
#if (MSVC OR NOT NOFORTRAN)
215+
if (NOT NO_CBLAS)
215216
# Broken without fortran on unix
216217
add_subdirectory(utest)
217218
endif()

Changelog.txt

+42
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,46 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.7
4+
11-Aug 2019
5+
6+
common:
7+
* having the gmake special variables TARGET_ARCH or TARGET_MACH
8+
defined no longer causes build failures in ctest or utest
9+
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
10+
has the same effect as setting them to 1
11+
* a new test program was added to allow checking the library for
12+
thread safety
13+
* a new option USE_LOCKING was added to ensure thread safety when
14+
OpenBLAS itself is built without multithreading but will be
15+
called from multiple threads.
16+
* a build failure on Linux with glibc versions earlier than 2.5
17+
was fixed
18+
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
19+
on glibc 2.6 was fixed
20+
* NO_AFFINITY was added to the CMAKE options (and defaults to being
21+
active on Linux, as in the gmake builds)
22+
23+
x86_64:
24+
* the build-time logic for detection of AVX512 availability in
25+
the processor and compiler was fixed
26+
* gmake builds on OSX now set the internal name of the library to
27+
libopenblas.0.dylib (consistent with CMAKE)
28+
* the Haswell DGEMM kernel received a significant speedup through
29+
improved prefetch and load instructions
30+
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
31+
increased by avoiding vpermpd instructions
32+
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
33+
to fix remaining errors in DGEMM, DSYMM and DTRMM
34+
35+
## POWER:
36+
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
37+
* added optimized kernels for POWER9 SGEMM and STRMM
38+
39+
## ARMV7:
40+
* fixed the softfp implementations of xAMAX and IxAMAX
41+
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
42+
they were appropriate for only a subset of platforms
43+
244
====================================================================
345
Version 0.3.6
446
29-Apr-2019

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ endif
109109
ifeq ($(OSNAME), Darwin)
110110
@$(MAKE) -C exports dyn
111111
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
112+
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
112113
endif
113114
ifeq ($(OSNAME), WINNT)
114115
@$(MAKE) -C exports dll

Makefile.arm

+4-9
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
11
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
22
ifeq ($(OSNAME), Android)
3-
CCOMMON_OPT += -mfpu=neon -march=armv7-a
4-
FCOMMON_OPT += -mfpu=neon -march=armv7-a
3+
CCOMMON_OPT += -mfpu=neon
4+
FCOMMON_OPT += -mfpu=neon
55
else
66
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
77
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
88
endif
99
endif
1010

1111
ifeq ($(CORE), ARMV6)
12-
CCOMMON_OPT += -mfpu=vfp -march=armv6
13-
FCOMMON_OPT += -mfpu=vfp -march=armv6
14-
endif
15-
16-
ifeq ($(CORE), ARMV5)
17-
CCOMMON_OPT += -march=armv5
18-
FCOMMON_OPT += -march=armv5
12+
CCOMMON_OPT += -mfpu=vfp
13+
FCOMMON_OPT += -mfpu=vfp
1914
endif

Makefile.install

+2-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ ifeq ($(OSNAME), Darwin)
8383
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
8484
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
8585
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
86-
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
86+
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
87+
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
8788
endif
8889
ifeq ($(OSNAME), WINNT)
8990
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"

Makefile.rule

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.7.dev
6+
VERSION = 0.3.8.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

Makefile.system

+8-3
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ endif
142142
endif
143143

144144

145-
# On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch.
145+
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
146146
ifeq ($(ARCH), x86_64)
147-
ifneq ($(C_COMPILER), PGI)
147+
ifeq ($(findstring pgcc,$(HOSTCC)),)
148148
GETARCH_FLAGS += -march=native
149149
endif
150150
endif
@@ -267,9 +267,10 @@ OBJCOPY = $(CROSS_SUFFIX)objcopy
267267
OBJCONV = $(CROSS_SUFFIX)objconv
268268

269269

270-
# For detect fortran failed, only build BLAS.
270+
# When fortran support was either not detected or actively deselected, only build BLAS.
271271
ifeq ($(NOFORTRAN), 1)
272272
NO_LAPACK = 1
273+
override FEXTRALIB =
273274
endif
274275

275276
#
@@ -1124,8 +1125,12 @@ endif
11241125
endif
11251126

11261127
ifdef NO_AFFINITY
1128+
ifeq ($(NO_AFFINITY), 0)
1129+
override undefine NO_AFFINITY
1130+
else
11271131
CCOMMON_OPT += -DNO_AFFINITY
11281132
endif
1133+
endif
11291134

11301135
ifdef FUNCTION_PROFILE
11311136
CCOMMON_OPT += -DFUNCTION_PROFILE

appveyor.yml

+16-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,14 @@ environment:
3535
DYNAMIC_ARCH: ON
3636
WITH_FORTRAN: no
3737
- COMPILER: cl
38-
38+
- COMPILER: MinGW64-gcc-7.2.0-mingw
39+
DYNAMIC_ARCH: OFF
40+
WITH_FORTRAN: ignore
41+
- COMPILER: MinGW64-gcc-7.2.0
42+
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
43+
COMPILER: MinGW-gcc-5.3.0
44+
WITH_FORTRAN: ignore
45+
3946
install:
4047
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
4148
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
@@ -52,7 +59,14 @@ install:
5259
before_build:
5360
- ps: if (-Not (Test-Path .\build)) { mkdir build }
5461
- cd build
62+
- set PATH=%PATH:C:\Program Files\Git\usr\bin;=%
63+
- if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
64+
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
65+
- if [%COMPILER%]==[MinGW64-gcc-7.2.0] set PATH=C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
5566
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
67+
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
68+
- if [%COMPILER%]==[MinGW64-gcc-7.2.0] cmake -G "MSYS Makefiles" -DBINARY=32 -DNOFORTRAN=1 ..
69+
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
5670
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
5771
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
5872
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
@@ -64,3 +78,4 @@ test_script:
6478
- echo Running Test
6579
- cd utest
6680
- openblas_utest
81+

cmake/arch.cmake

+2-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ if (DYNAMIC_ARCH)
8181
endif ()
8282

8383
if (NOT DYNAMIC_CORE)
84-
unset(DYNAMIC_ARCH)
84+
message (STATUS "DYNAMIC_ARCH is not supported on this architecture, removing from options")
85+
unset(DYNAMIC_ARCH CACHE)
8586
endif ()
8687
endif ()
8788

cmake/prebuild.cmake

+8
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ set(FU "")
5959
if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
6060
set(FU "_")
6161
endif()
62+
if(MINGW AND NOT MINGW64)
63+
set(FU "_")
64+
endif()
6265

6366
set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
6467
if (${COMPILER_ID} STREQUAL "GNU")
@@ -82,6 +85,11 @@ endif ()
8285
# f_check
8386
if (NOT NOFORTRAN)
8487
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
88+
else ()
89+
file(APPEND ${TARGET_CONF_TEMP}
90+
"#define BUNDERSCORE _\n"
91+
"#define NEEDBUNDERSCORE 1\n")
92+
set(BU "_")
8593
endif ()
8694

8795
# Cannot run getarch on target if we are cross-compiling

cmake/system.cmake

+22-5
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,17 @@ if (DEFINED TARGET)
6666
endif ()
6767

6868
# On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch.
69-
if (X86_64)
69+
if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
7070
set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native")
7171
endif ()
7272

73+
# On x86 no AVX support is available
74+
if (X86 OR X86_64)
75+
if ((DEFINED BINARY AND BINARY EQUAL 32) OR ("$CMAKE_SIZEOF_VOID_P}" EQUAL "4"))
76+
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX -DNO_AVX2 -DNO_AVX512")
77+
endif ()
78+
endif ()
79+
7380
if (INTERFACE64)
7481
message(STATUS "Using 64-bit integers.")
7582
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
@@ -148,7 +155,9 @@ else()
148155
endif ()
149156

150157
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
151-
158+
if (DEFINED BINARY)
159+
message(STATUS "Compiling a ${BINARY}-bit binary.")
160+
endif ()
152161
if (NOT DEFINED NEED_PIC)
153162
set(NEED_PIC 1)
154163
endif ()
@@ -165,6 +174,9 @@ include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
165174
if (NOT NOFORTRAN)
166175
# Fortran Compiler dependent settings
167176
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
177+
else ()
178+
set(NO_LAPACK 1)
179+
set(NO_LAPACKE 1)
168180
endif ()
169181

170182
if (BINARY64)
@@ -190,9 +202,14 @@ if (NEED_PIC)
190202
endif ()
191203

192204
if (DYNAMIC_ARCH)
193-
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
194-
if (DYNAMIC_OLDER)
195-
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
205+
if (X86 OR X86_64 OR ARM64 OR PPC)
206+
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
207+
if (DYNAMIC_OLDER)
208+
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
209+
endif ()
210+
else ()
211+
unset (DYNAMIC_ARCH)
212+
message (STATUS "DYNAMIC_ARCH is not supported on the target architecture, removing")
196213
endif ()
197214
endif ()
198215

cmake/system_check.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ if (${HOST_OS} STREQUAL "LINUX")
1515
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
1616
if(${OPERATING_SYSTEM} MATCHES "Android")
1717
set(HOST_OS ANDROID)
18-
endif(${OPERATING_SYSTEM} MATCHES "Android")
18+
endif()
1919
endif()
2020

2121

cpuid_arm64.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ int get_feature(char *search)
9494
if( p == NULL ) return 0;
9595

9696
t = strtok(p," ");
97-
while( t = strtok(NULL," "))
97+
while( (t = strtok(NULL," ")))
9898
{
9999
if (!strcmp(t, search)) { return(1); }
100100
}
@@ -344,7 +344,7 @@ void get_features(void)
344344
if( p == NULL ) return;
345345

346346
t = strtok(p," ");
347-
while( t = strtok(NULL," "))
347+
while( (t = strtok(NULL," ")))
348348
{
349349
}
350350

cpuid_x86.c

+22-7
Original file line numberDiff line numberDiff line change
@@ -1211,7 +1211,7 @@ int get_cpuname(void){
12111211
return CPUTYPE_CORE2;
12121212
}
12131213
break;
1214-
case 1:
1214+
case 1: // family 6 exmodel 1
12151215
switch (model) {
12161216
case 6:
12171217
return CPUTYPE_CORE2;
@@ -1228,7 +1228,7 @@ int get_cpuname(void){
12281228
return CPUTYPE_DUNNINGTON;
12291229
}
12301230
break;
1231-
case 2:
1231+
case 2: // family 6 exmodel 2
12321232
switch (model) {
12331233
case 5:
12341234
//Intel Core (Clarkdale) / Core (Arrandale)
@@ -1257,7 +1257,7 @@ int get_cpuname(void){
12571257
return CPUTYPE_NEHALEM;
12581258
}
12591259
break;
1260-
case 3:
1260+
case 3: // family 6 exmodel 3
12611261
switch (model) {
12621262
case 7:
12631263
// Bay Trail
@@ -1287,7 +1287,7 @@ int get_cpuname(void){
12871287
return CPUTYPE_NEHALEM;
12881288
}
12891289
break;
1290-
case 4:
1290+
case 4: // family 6 exmodel 4
12911291
switch (model) {
12921292
case 5:
12931293
case 6:
@@ -1321,7 +1321,7 @@ int get_cpuname(void){
13211321
return CPUTYPE_NEHALEM;
13221322
}
13231323
break;
1324-
case 5:
1324+
case 5: // family 6 exmodel 5
13251325
switch (model) {
13261326
case 6:
13271327
//Broadwell
@@ -1364,7 +1364,7 @@ int get_cpuname(void){
13641364
return CPUTYPE_NEHALEM;
13651365
}
13661366
break;
1367-
case 6:
1367+
case 6: // family 6 exmodel 6
13681368
switch (model) {
13691369
case 6: // Cannon Lake
13701370
if(support_avx512())
@@ -1376,7 +1376,22 @@ int get_cpuname(void){
13761376
else
13771377
return CPUTYPE_NEHALEM;
13781378
}
1379-
break;
1379+
break;
1380+
case 7: // family 6 exmodel 7
1381+
switch (model) {
1382+
case 10: // Goldmont Plus
1383+
return CPUTYPE_NEHALEM;
1384+
case 14: // Ice Lake
1385+
if(support_avx512())
1386+
return CPUTYPE_SKYLAKEX;
1387+
if(support_avx2())
1388+
return CPUTYPE_HASWELL;
1389+
if(support_avx())
1390+
return CPUTYPE_SANDYBRIDGE;
1391+
else
1392+
return CPUTYPE_NEHALEM;
1393+
}
1394+
break;
13801395
case 9:
13811396
case 8:
13821397
switch (model) {

ctest/Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ TOPDIR = ..
66
include $(TOPDIR)/Makefile.system
77

88
override CFLAGS += -DADD$(BU) -DCBLAS
9+
override TARGET_ARCH=
10+
override TARGET_MACH=
911

1012
LIB = $(TOPDIR)/$(LIBNAME)
1113

0 commit comments

Comments
 (0)