Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Add allocation free matrix multiplication and solving #4595

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 37 additions & 15 deletions base/linalg/factorization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -210,19 +210,19 @@ function logdet{T<:Complex}(A::LU{T})
end


function (\){T<:BlasFloat}(A::LU{T}, B::StridedVecOrMat{T})
function A_ldiv_B!{T<:BlasFloat}(A::LU{T}, B::StridedVecOrMat{T})
if A.info > 0; throw(SingularException(A.info)); end
LAPACK.getrs!('N', A.factors, A.ipiv, copy(B))
LAPACK.getrs!('N', A.factors, A.ipiv, B)
end

function At_ldiv_B{T<:BlasFloat}(A::LU{T}, B::StridedVecOrMat{T})
function At_ldiv_B!{T<:BlasFloat}(A::LU{T}, B::StridedVecOrMat{T})
if A.info > 0; throw(SingularException(A.info)); end
LAPACK.getrs!('T', A.factors, A.ipiv, copy(B))
LAPACK.getrs!('T', A.factors, A.ipiv, B)
end

function Ac_ldiv_B{T<:BlasComplex}(A::LU{T}, B::StridedVecOrMat{T})
function Ac_ldiv_B!{T<:BlasComplex}(A::LU{T}, B::StridedVecOrMat{T})
if A.info > 0; throw(SingularException(A.info)); end
LAPACK.getrs!('C', A.factors, A.ipiv, copy(B))
LAPACK.getrs!('C', A.factors, A.ipiv, B)
end

function At_ldiv_Bt{T<:BlasFloat}(A::LU{T}, B::StridedVecOrMat{T})
Expand Down Expand Up @@ -317,8 +317,11 @@ function A_mul_Bc{T<:BlasFloat}(A::StridedVecOrMat{T}, B::QRPackedQ{T})
LAPACK.gemqrt!('R', iseltype(B.vs,Complex) ? 'C' : 'T', B.vs, B.T, Ac)
end
## Least squares solution. Should be more careful about cases with m < n
(\)(A::QR, B::StridedVector) = Triangular(A[:R], :U)\(A[:Q]'B)[1:size(A, 2)]
(\)(A::QR, B::StridedMatrix) = Triangular(A[:R], :U)\(A[:Q]'B)[1:size(A, 2),:]
function A_ldiv_B!(A::QR, B::StridedVecOrMat)
LAPACK.gemqrt!('L', iseltype(A.vs,Complex) ? 'C' : 'T', A.vs, A.T, B)
LAPACK.trtrs!('U', 'N', 'N', A.vs, B)
return B
end

type QRPivoted{T} <: Factorization{T}
hh::Matrix{T}
Expand Down Expand Up @@ -356,8 +359,11 @@ function getindex{T<:BlasFloat}(A::QRPivoted{T}, d::Symbol)
end

# Julia implementation similarly to xgelsy
function (\){T<:BlasFloat}(A::QRPivoted{T}, B::StridedMatrix{T}, rcond::Real)
nr = minimum(size(A.hh))
function A_ldiv_B!{T<:BlasFloat}(A::QRPivoted{T}, B::StridedMatrix{T}, rcond::Real = sqrt(eps(typeof(real(B[1])))))
mA, nA = size(A.hh)
nr = min(mA, nA)
mB = size(B, 1)
mB == max(mA, nA) || throw(DimensionMismatch(""))
nrhs = size(B, 2)
if nr == 0 return zeros(0, nrhs), 0 end
ar = abs(A.hh[1])
Expand All @@ -377,12 +383,28 @@ function (\){T<:BlasFloat}(A::QRPivoted{T}, B::StridedMatrix{T}, rcond::Real)
# if cond(r[1:rnk, 1:rnk])*rcond < 1 break end
end
C, tau = LAPACK.tzrzf!(A.hh[1:rnk,:])
X = [Triangular(C[1:rnk,1:rnk],:U)\(A[:Q]'B)[1:rnk,:]; zeros(T, size(A.hh, 2) - rnk, nrhs)]
LAPACK.ormrz!('L', iseltype(B, Complex) ? 'C' : 'T', C, tau, X)
return X[invperm(A[:p]),:], rnk
LAPACK.ormqr!('L', iseltype(B, Complex) ? 'C' : 'T', A.hh, A.tau, sub(B, 1:mA, 1:nrhs))
LAPACK.trtrs!('U', 'N', 'N', C[:,1:rnk], sub(B, 1:rnk, 1:nrhs))
# X = [Triangular(C[1:rnk,1:rnk],:U)\B[1:rnk,:]; zeros(T, size(A.hh, 2) - rnk, nrhs)]
LAPACK.ormrz!('L', iseltype(B, Complex) ? 'C' : 'T', C, tau, sub(B, 1:nA, 1:nrhs))
for j = 1:nrhs
for i = 1:nA
C[getindex(A.jpvt,i)] = B[i,j]
end
copy!(B, (1:nA) + mB*(j - 1), C, 1:nA)
end
return B, rnk
end
function (\){T}(A::QRPivoted, B::StridedMatrix{T})
m, n = size(A)
if m < n
Bc = [B;zeros(T, n - m, size(B, 2))]
else
Bc = copy(B)
end
return A_ldiv_B!(A, Bc)[1][1:n,:]
end
(\)(A::QRPivoted, B::StridedMatrix) = (\)(A, B, sqrt(eps(typeof(real(B[1])))))[1]
(\)(A::QRPivoted, B::StridedVector) = (\)(A, reshape(B, length(B), 1))[:]
(\)(A::QRPivoted, B::StridedVector) = vec((\)(A, reshape(copy(B), length(B), 1)))

type QRPivotedQ{T} <: AbstractMatrix{T}
hh::Matrix{T} # Householder transformations and R
Expand Down
41 changes: 19 additions & 22 deletions base/linalg/lapack.jl
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ for (tzrzf, ormrz, elty) in
# 27 * COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * )
function tzrzf!(A::StridedMatrix{$elty})
m, n = size(A)
if n < m throw(DimensionMismatch("Matrix cannot have fewer columns than rows")) end
lda = max(1, m)
n >= m || throw(DimensionMismatch("Matrix cannot have fewer columns than rows"))
lda = max(1, stride(A, 2))
tau = Array($elty, m)
work = Array($elty, 1)
lwork = -1
Expand Down Expand Up @@ -428,8 +428,8 @@ for (tzrzf, ormrz, elty) in
m, n = size(C)
k = length(tau)
l = size(A, 2) - size(A, 1)
lda = max(1, k)
ldc = max(1, m)
lda = max(1, stride(A,2))
ldc = max(1, stride(C,2))
work = Array($elty, 1)
lwork = -1
info = Array(BlasInt, 1)
Expand Down Expand Up @@ -622,8 +622,8 @@ for (gelsd, gelsy, elty) in
nrhs = size(B, 2)
if m != size(B, 1) throw(DimensionMismatch("left and right hand sides must have same number of rows")) end
newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
lda = max(1, m)
ldb = max(1, m, n)
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
jpvt = Array(BlasInt, n)
rcond = convert($elty, rcond)
rnk = Array(BlasInt, 1)
Expand Down Expand Up @@ -715,8 +715,8 @@ for (gelsd, gelsy, elty, relty) in
nrhs = size(B, 2)
if m != size(B, 1) throw(DimensionMismatch("left and right hand sides must have same number of rows")) end
newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
lda = max(1, m)
ldb = max(1, m, n)
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
jpvt = Array(BlasInt, n)
rcond = convert($relty, rcond)
rnk = Array(BlasInt, 1)
Expand Down Expand Up @@ -1057,8 +1057,8 @@ for (ggev, elty) in
n = size(A, 1)
if size(A, 2) != n | size(B, 1) != size(B, 2) throw(DimensionMismatch("matrices must be square")) end
if size(B, 1) != n throw(DimensionMismatch("matrices must have same size")) end
lda = max(1, n)
ldb = max(1, n)
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
alphar = Array($elty, n)
alphai = Array($elty, n)
beta = Array($elty, n)
Expand Down Expand Up @@ -1111,8 +1111,8 @@ for (ggev, elty, relty) in
n = size(A, 1)
if size(A, 2) != n | size(B, 1) != size(B, 2) throw(DimensionMismatch("matrices must be square")) end
if size(B, 1) != n throw(DimensionMismatch("matrices must have same size")) end
lda = max(1, n)
ldb = max(1, n)
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
alpha = Array($elty, n)
beta = Array($elty, n)
ldvl = jobvl == 'V' ? n : 1
Expand Down Expand Up @@ -1414,15 +1414,12 @@ for (orglq, orgqr, ormlq, ormqr, gemqrt, elty) in
m = size(C, 1)
n = size(C, 2)
nb, k = size(T)
ldv = max(1, stride(V, 2))
if k == 0 return C end
if side == 'L'
0 <= k <= m || error("Wrong value for k")
ldv = max(1, m)
wss = n*k
# if m != size(V, 1) throw(DimensionMismatch("")) end
elseif side == 'R'
0 <= k <= n || error("Wrong value for k")
ldv = max(1, n)
wss = m*k
# if n != size(V, 1) throw(DimensionMismatch("")) end
else
Expand Down Expand Up @@ -2425,8 +2422,8 @@ for (syev, syevr, sygvd, elty) in
n = size(A, 1)
if size(A, 2) != n | size(B, 1) != size(B, 2) throw(DimensionMismatch("Matrices must be square")) end
if size(B, 1) != n throw(DimensionMismatch("Matrices must have same size")) end
lda = max(1, n)
ldb = max(1, n)
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
w = Array($elty, n)
work = Array($elty, 1)
lwork = -one(BlasInt)
Expand Down Expand Up @@ -2576,8 +2573,8 @@ for (syev, syevr, sygvd, elty, relty) in
n = size(A, 1)
if size(A, 2) != n | size(B, 1) != size(B, 2) throw(DimensionMismatch("Matrices must be square")) end
if size(B, 1) != n throw(DimensionMismatch("Matrices must have same size")) end
lda = max(1, n)
ldb = max(1, n)
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
w = Array($relty, n)
work = Array($elty, 1)
lwork = -one(BlasInt)
Expand Down Expand Up @@ -2728,7 +2725,7 @@ for (gecon, elty) in
# DOUBLE PRECISION A( LDA, * ), WORK( * )
chkstride1(A)
n = size(A, 2)
lda = max(1, size(A, 1))
lda = max(1, stride(A, 2))
rcond = Array($elty, 1)
work = Array($elty, 4n)
iwork = Array(BlasInt, n)
Expand Down Expand Up @@ -2762,7 +2759,7 @@ for (gecon, elty, relty) in
# COMPLEX*16 A( LDA, * ), WORK( * )
chkstride1(A)
n = size(A, 2)
lda = max(1, size(A, 1))
lda = max(1, stride(A, 2))
rcond = Array($relty, 1)
work = Array($elty, 2n)
rwork = Array($relty, 2n)
Expand Down
Loading