Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented P-orthogonalization. #205

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions basis_set_exchange/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def get_basis(name,
remove_free_primitives=False,
make_general=False,
optimize_general=False,
optimize_segmented=False,
augment_diffuse=0,
augment_steep=0,
data_dir=None,
Expand Down Expand Up @@ -158,8 +159,18 @@ def get_basis(name,
If True, make the basis set as generally-contracted as possible. There will be one
shell per angular momentum (for each element)
optimize_general : bool
Optimize by removing general contractions that contain uncontracted
functions (see :func:`basis_set_exchange.manip.optimize_general`)
Optimize generally contracted basis for codes using segmented
contractions by dropping free primitives from the contracted
functions (see
:func:`basis_set_exchange.manip.optimize_general`).
optimize_segmented : bool
Optimize generally contracted basis for codes using segmented
contractions by dropping free primitives from the contracted
functions (see
:func:`basis_set_exchange.manip.optimize_general`), and then
running P-orthogonalization (a numerically stable Davidson
purification) to remove redundancies in the remaining
functions.
augment_diffuse : int
Add n diffuse functions by even-tempered extrapolation
augment_steep : int
Expand All @@ -173,6 +184,7 @@ def get_basis(name,
str or dict
The basis set in the desired format. If `fmt` is **None**, this will be a python
dictionary. Otherwise, it will be a string.

'''

data_dir = fix_data_dir(data_dir)
Expand Down Expand Up @@ -253,6 +265,9 @@ def get_basis(name,
if needs_pruning:
basis_dict = manip.prune_basis(basis_dict, False)

if optimize_segmented:
basis_dict = manip.P_orthogonalization(basis_dict, use_copy=False)

# Augment
if augment_diffuse > 0:
basis_dict = manip.geometric_augmentation(basis_dict,
Expand Down
5 changes: 4 additions & 1 deletion basis_set_exchange/cli/bse_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ def run_bse_cli():
subp.add_argument('--unc-spdf', action='store_true', help='Remove combined sp, spd, ... contractions')
subp.add_argument('--unc-seg', action='store_true', help='Remove general contractions')
subp.add_argument('--rm-free', action='store_true', help='Remove free primitives')
subp.add_argument('--opt-gen', action='store_true', help='Optimize general contractions')
subp.add_argument('--opt-gen', action='store_true', help='Drop free primitives from contractions')
subp.add_argument('--opt-seg',
action='store_true',
help='Run P-orthogonalization to convert general contractions to segmented ones')
subp.add_argument('--make-gen', action='store_true', help='Make the basis set as generally-contracted as possible')
subp.add_argument('--aug-steep', type=int, default=0, help='Augment with n steep functions')
subp.add_argument('--aug-diffuse', type=int, default=0, help='Augment with n diffuse functions')
Expand Down
1 change: 1 addition & 0 deletions basis_set_exchange/cli/bse_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def _bse_cli_get_basis(args):
uncontract_segmented=args.unc_seg,
remove_free_primitives=args.rm_free,
make_general=args.make_gen,
optimize_segmented=args.opt_seg,
optimize_general=args.opt_gen,
augment_diffuse=args.aug_diffuse,
augment_steep=args.aug_steep,
Expand Down
30 changes: 30 additions & 0 deletions basis_set_exchange/ints.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ def _transform_numpy(C0, P0):
return [[np_result[i][j] for i in range(np_result.shape[0])] for j in range(np_result.shape[1])]


def _inner_product_numpy(Cleft0, P0, Cright0):
'''Transforms the primitive integrals P into the contracted basis C using NumPy'''
Cleft = numpy.asarray(Cleft0)
Cright = numpy.asarray(Cright0)
P = numpy.asarray(P0)
ip = numpy.dot(Cleft, numpy.dot(P, Cright))
return ip


def _matmul(A, B):
'''Matrix multiply'''
assert (len(A[0]) == len(B))
Expand All @@ -43,6 +52,18 @@ def _transform_python(C, P):
return _matmul(C, _matmul(P, Ct))


def _inner_product_python(Cleft, P, Cright):
'''Calculates the inner product of Cleft and Cright against metric P in pure Python'''

assert len(Cleft) == len(P)
assert len(P[0]) == len(Cright)
assert len(Cleft) == len(Cright)
nprim = len(Cleft)

CP = [sum([Cleft[i] * P[i][j] for i in range(nprim)]) for j in range(nprim)]
return sum([CP[i] * Cright[i] for i in range(nprim)])


def _to_float(M):
'''Transforms a vector or matrix from string representation to floating point representation'''

Expand Down Expand Up @@ -80,6 +101,15 @@ def _transform(C, P):
return _transform_python(C, P)


def inner_product(Cleft, P, Cright):
'''Calculates the inner product of Cleft and Cright in metric P'''

if _use_numpy:
return _inner_product_numpy(Cleft, P, Cright)
else:
return _inner_product_python(Cleft, P, Cright)


def _zero_matrix(N):
'''Allocates a zero matrix of size N x N'''
return [[0.0 for _ in range(N)] for _ in range(N)]
Expand Down
232 changes: 209 additions & 23 deletions basis_set_exchange/manip.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
"""

import copy
from math import sqrt
from . import skel, misc
from .ints import inner_product, _gto_overlap
from .sort import sort_basis


def merge_element_data(dest, sources, use_copy=True):
Expand Down Expand Up @@ -467,45 +470,228 @@ def optimize_general(basis, use_copy=True):
continue

elshells = eldata['electron_shells']
new_shells = []
for sh in elshells:
exponents = sh['exponents']
coefficients = sh['coefficients']
nprim = len(exponents)
nam = len(sh['angular_momentum'])
ncontr = len(coefficients)

# Skip sp shells and shells with only one general contraction
if nam > 1 or len(coefficients) < 2:
if nam > 1 or len(coefficients) == 1:
new_shells.append(sh)
continue

# First, find columns (general contractions) with a single non-zero value
single_columns = [idx for idx, c in enumerate(coefficients) if _is_single_column(c)]

# Find the corresponding rows that have a value in one of these columns
# Note that at this stage, the row may have coefficients in more than one
# column. That is what we are looking for

# Also, test to see that each row is only represented once. That is, there should be
# no rows that are part of single columns (this would represent duplicate shells).
# This can happen in poorly-formatted basis sets and is an error
row_col_pairs = []
all_row_idx = []
# Find the indices of the free primitives
free_primitive_idx = []
for col_idx in single_columns:
col = coefficients[col_idx]
for row_idx in range(nprim):
if float(col[row_idx]) != 0.0:
if row_idx in all_row_idx:
raise RuntimeError("Badly-formatted basis. Row {} makes duplicate shells".format(row_idx))

# Store the index of the nonzero value in single_columns
row_col_pairs.append((row_idx, col_idx))
all_row_idx.append(row_idx)

# Now for each row/col pair, zero out the entire row
# EXCEPT for the column that has the single value
for row_idx, col_idx in row_col_pairs:
for idx, col in enumerate(coefficients):
if float(col[row_idx]) != 0.0 and col_idx != idx:
col[row_idx] = '0.0000000E+00'
free_primitive_idx.append(row_idx)
break

# Now, we can zero out the rows corresponding to free primitives
for icontr in range(ncontr):
# Skip the free primitives themselves
if icontr in single_columns:
continue

# Zero out the row for the free primitive
col = coefficients[icontr]
for iprim in free_primitive_idx:
if float(col[iprim]) != 0.0:
col[iprim] = '0.0'

# Next, we split off the free primitives onto their own
# shells; we need this for P-orthogonalization
contracted_rows = [irow for irow in range(nprim) if irow not in free_primitive_idx]
contracted_cols = [icol for icol in range(ncontr) if icol not in single_columns]
new_coefficients = [[coefficients[icol][irow] for irow in contracted_rows] for icol in contracted_cols]
new_exponents = [exponents[iprim] for iprim in contracted_rows]

nnewprim = len(new_exponents)
nnewcontr = len(new_coefficients)

for irow in free_primitive_idx:
new_shell = sh.copy()
new_shell['exponents'] = [exponents[irow]]
new_shell['coefficients'] = [['1.0']]
new_shells.append(new_shell)

if nnewprim > 0 and nnewcontr > 0:
sh['exponents'] = new_exponents
sh['coefficients'] = new_coefficients
new_shells.append(sh)

eldata['electron_shells'] = new_shells
return basis


def P_orthogonalization(basis, cutoff=1e-5, Cortho=1e-4, use_copy=True):
"""Optimizes a general contraction using P-orthogonalization

.. seealso :: | F. Jensen
| 'Unifying General and Segmented Contracted Basis Sets. Segmented Polarization Consistent Basis Sets'
| J. Chem. Theory Comput. 10, 1074 (2014)
| https://doi.org/10.1021/ct401026a

which is a numerically stabler version of Davidson's procedure for
removing redundancies in generally contracted sets (Davidson
purification)

.. seealso :: | T. Hashimoto, K. Hirao, H. Tatewaki
| 'Comment on "Comment on Dunning's correlation-consistent basis set"'
| Chemical Physics Letters 260, 514 (1996)
| https://doi.org/10.1016/0009-2614(96)00917-7

which is was an improvement over the observation that free
primitives can in principle be dropped from the contracted
functions without affecting numerical results

.. seealso :: | T. Hashimoto, K. Hirao, H. Tatewaki
| 'Comment on Dunning's correlation-consistent basis set'
| Chemical Physics Letters 243, 190 (1995)
| https://doi.org/10.1016/0009-2614(95)00807-G

P-orthogonalization, like Davidson's method, relies on numerical
transformations, thereby changing the matrix of contraction
coefficents.

Parameters
----------
basis: dict
Basis set dictionary to work with

cutoff: float
threshold for dropping primitives with small contraction
coefficients in the representation where the largest
coefficient is 1.0. Jensen states: "a coefficient cutoff
parameter of ∼1e-5 will for most practical applications
produce negligible differences in the final results".

Cortho: float
linear dependence threshold used in the procedure: if
1-O^P_{ij} < Cortho, the functions i and j are linearly
dependent up to the primitive function P (see eq 6 in paper).

use_copy: bool
If True, the input basis set is not modified.

"""

# First, we drop the free functions from the contractions
basis = optimize_general(basis, use_copy=use_copy)
# and sort the basis so that the exponents are in decreasing order
basis = sort_basis(basis, use_copy=False)

for eldata in basis['elements'].values():

if 'electron_shells' not in eldata:
continue

elshells = eldata['electron_shells']
for sh in elshells:
exponents = sh['exponents']
coefficients = sh['coefficients']
nprim = len(exponents)
ncontr = len(coefficients)
sh_am = sh['angular_momentum']
nam = len(sh_am)

# Skip sp shells
if nam > 1:
continue

# If the number of exponents and coefficients matches, we
# can free all primitives.
if nprim == ncontr:
coefficients = []
for icontr in range(ncontr):
coefficients.append(['1.0' if iprim == icontr else '0.0' for iprim in range(nprim)])
sh['coefficients'] = coefficients
continue

# Convert to floating-point format
exps = [float(x) for x in exponents]
C = [[float(c) for c in ccol] for ccol in coefficients]
# We also need the primitive overlap matrix
primitive_overlap = _gto_overlap(exps, sh_am[0])

def normalized_partial_overlap(Ci, S, Cj, nfun):
'''Computes the normalized partial overlap using only a subset of the primitives'''
assert nfun >= 1

Ssub = [[S[iidx][jidx] for jidx in range(nfun)] for iidx in range(nfun)]
Cisub = [Ci[idx] for idx in range(nfun)]
Cjsub = [Cj[idx] for idx in range(nfun)]
nprod = abs(
inner_product(Cisub, Ssub, Cjsub) /
sqrt(inner_product(Cisub, Ssub, Cisub) * inner_product(Cjsub, Ssub, Cjsub)))
return nprod

def inside_out():
'''Inside-out purification'''
for icontr in range(ncontr):
# Determine level of P-orthogonalization
for Plevel in range(nprim - 1, 0, -1):
# Compute the normalized partial overlaps
overlap_diff = [
1 - normalized_partial_overlap(C[icontr], primitive_overlap, C[jcontr], Plevel)
for jcontr in range(icontr + 1, ncontr)
]
# Is Plevel sufficient?
if (not all([diff <= Cortho for diff in overlap_diff])) and (Plevel != nprim):
# No, it is not
continue

# Orthogonalize functions
for jcontr in range(icontr + 1, ncontr):
# x factor according to eq 8
x = normalized_partial_overlap(C[icontr], primitive_overlap, C[jcontr], Plevel)
# Subtract
for iprim in range(nprim):
C[jcontr][iprim] -= x * C[icontr][iprim]

# Break the p loop, continue with next icontr
break

def outside_in():
'''Outside-in purification; this is stable according to Jensen'''
for icontr in range(ncontr - 1, -1, -1):
for jcontr in range(icontr - 1, -1, -1):
# Reference primitive index
refprim = nprim - 1 - (ncontr - 1 - icontr)
x = C[jcontr][refprim] / C[icontr][refprim]
# Subtract
for iprim in range(nprim):
C[jcontr][iprim] -= x * C[icontr][iprim]

def intermediate_normalization():
'''Normalize largest coefficient to +-1'''
for icontr in range(ncontr):
maxabs = max([abs(c) for c in C[icontr]])
C[icontr] = [C[icontr][iprim] / maxabs for iprim in range(nprim)]

def drop_small_coeffs():
'''Set small coefficients to zero'''
for icontr in range(ncontr):
for iprim in range(nprim):
if C[icontr][iprim] != 0.0 and abs(C[icontr][iprim]) <= cutoff:
C[icontr][iprim] = 0.0

inside_out()
outside_in()
intermediate_normalization()
drop_small_coeffs()

# Create new contraction matrix
coefficients = [['{:.15e}'.format(C[i][j]) for j in range(nprim)] for i in range(ncontr)]
sh['coefficients'] = coefficients

return basis

Expand Down