Skip to content

Commit 3e3ad8f

Browse files
authored
Merge 03d47fc into 32721c8
2 parents 32721c8 + 03d47fc commit 3e3ad8f

File tree

5 files changed

+261
-26
lines changed

5 files changed

+261
-26
lines changed

basis_set_exchange/api.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def get_basis(name,
100100
remove_free_primitives=False,
101101
make_general=False,
102102
optimize_general=False,
103+
optimize_segmented=False,
103104
augment_diffuse=0,
104105
augment_steep=0,
105106
data_dir=None,
@@ -158,8 +159,18 @@ def get_basis(name,
158159
If True, make the basis set as generally-contracted as possible. There will be one
159160
shell per angular momentum (for each element)
160161
optimize_general : bool
161-
Optimize by removing general contractions that contain uncontracted
162-
functions (see :func:`basis_set_exchange.manip.optimize_general`)
162+
Optimize generally contracted basis for codes using segmented
163+
contractions by dropping free primitives from the contracted
164+
functions (see
165+
:func:`basis_set_exchange.manip.optimize_general`).
166+
optimize_segmented : bool
167+
Optimize generally contracted basis for codes using segmented
168+
contractions by dropping free primitives from the contracted
169+
functions (see
170+
:func:`basis_set_exchange.manip.optimize_general`), and then
171+
running P-orthogonalization (a numerically stable Davidson
172+
purification) to remove redundancies in the remaining
173+
functions.
163174
augment_diffuse : int
164175
Add n diffuse functions by even-tempered extrapolation
165176
augment_steep : int
@@ -173,6 +184,7 @@ def get_basis(name,
173184
str or dict
174185
The basis set in the desired format. If `fmt` is **None**, this will be a python
175186
dictionary. Otherwise, it will be a string.
187+
176188
'''
177189

178190
data_dir = fix_data_dir(data_dir)
@@ -253,6 +265,9 @@ def get_basis(name,
253265
if needs_pruning:
254266
basis_dict = manip.prune_basis(basis_dict, False)
255267

268+
if optimize_segmented:
269+
basis_dict = manip.P_orthogonalization(basis_dict, False)
270+
256271
# Augment
257272
if augment_diffuse > 0:
258273
basis_dict = manip.geometric_augmentation(basis_dict,

basis_set_exchange/cli/bse_cli.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ def run_bse_cli():
100100
subp.add_argument('--unc-spdf', action='store_true', help='Remove combined sp, spd, ... contractions')
101101
subp.add_argument('--unc-seg', action='store_true', help='Remove general contractions')
102102
subp.add_argument('--rm-free', action='store_true', help='Remove free primitives')
103-
subp.add_argument('--opt-gen', action='store_true', help='Optimize general contractions')
103+
subp.add_argument('--opt-gen', action='store_true', help='Drop free primitives from contractions')
104+
subp.add_argument('--opt-seg',
105+
action='store_true',
106+
help='Run P-orthogonalization to convert general contractions to segmented ones')
104107
subp.add_argument('--make-gen', action='store_true', help='Make the basis set as generally-contracted as possible')
105108
subp.add_argument('--aug-steep', type=int, default=0, help='Augment with n steep functions')
106109
subp.add_argument('--aug-diffuse', type=int, default=0, help='Augment with n diffuse functions')

basis_set_exchange/cli/bse_handlers.py

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def _bse_cli_get_basis(args):
106106
uncontract_segmented=args.unc_seg,
107107
remove_free_primitives=args.rm_free,
108108
make_general=args.make_gen,
109+
optimize_segmented=args.opt_seg,
109110
optimize_general=args.opt_gen,
110111
augment_diffuse=args.aug_diffuse,
111112
augment_steep=args.aug_steep,

basis_set_exchange/ints.py

+30
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@ def _transform_numpy(C0, P0):
2121
return [[np_result[i][j] for i in range(np_result.shape[0])] for j in range(np_result.shape[1])]
2222

2323

24+
def _inner_product_numpy(Cleft0, P0, Cright0):
25+
'''Transforms the primitive integrals P into the contracted basis C using NumPy'''
26+
Cleft = numpy.asarray(Cleft0)
27+
Cright = numpy.asarray(Cright0)
28+
P = numpy.asarray(P0)
29+
ip = numpy.dot(Cleft, numpy.dot(P, Cright))
30+
return ip
31+
32+
2433
def _matmul(A, B):
2534
'''Matrix multiply'''
2635
assert (len(A[0]) == len(B))
@@ -43,6 +52,18 @@ def _transform_python(C, P):
4352
return _matmul(C, _matmul(P, Ct))
4453

4554

55+
def _inner_product_python(Cleft, P, Cright):
56+
'''Calculates the inner product of Cleft and Cright against metric P in pure Python'''
57+
58+
assert len(Cleft) == len(P)
59+
assert len(P[0]) == len(Cright)
60+
assert len(Cleft) == len(Cright)
61+
nprim = len(Cleft)
62+
63+
CP = [sum([Cleft[i] * P[i][j] for i in range(nprim)]) for j in range(nprim)]
64+
return sum([CP[i] * Cright[i] for i in range(nprim)])
65+
66+
4667
def _to_float(M):
4768
'''Transforms a vector or matrix from string representation to floating point representation'''
4869

@@ -80,6 +101,15 @@ def _transform(C, P):
80101
return _transform_python(C, P)
81102

82103

104+
def inner_product(Cleft, P, Cright):
105+
'''Calculates the inner product of Cleft and Cright in metric P'''
106+
107+
if _use_numpy:
108+
return _inner_product_numpy(Cleft, P, Cright)
109+
else:
110+
return _inner_product_python(Cleft, P, Cright)
111+
112+
83113
def _zero_matrix(N):
84114
'''Allocates a zero matrix of size N x N'''
85115
return [[0.0 for _ in range(N)] for _ in range(N)]

basis_set_exchange/manip.py

+209-23
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
"""
77

88
import copy
9+
from math import sqrt
910
from . import skel, misc
11+
from .ints import inner_product, _gto_overlap
12+
from .sort import sort_basis
1013

1114

1215
def merge_element_data(dest, sources, use_copy=True):
@@ -467,45 +470,228 @@ def optimize_general(basis, use_copy=True):
467470
continue
468471

469472
elshells = eldata['electron_shells']
473+
new_shells = []
470474
for sh in elshells:
471475
exponents = sh['exponents']
472476
coefficients = sh['coefficients']
473477
nprim = len(exponents)
474478
nam = len(sh['angular_momentum'])
479+
ncontr = len(coefficients)
475480

476481
# Skip sp shells and shells with only one general contraction
477-
if nam > 1 or len(coefficients) < 2:
482+
if nam > 1 or len(coefficients) == 1:
483+
new_shells.append(sh)
478484
continue
479485

480486
# First, find columns (general contractions) with a single non-zero value
481487
single_columns = [idx for idx, c in enumerate(coefficients) if _is_single_column(c)]
482488

483-
# Find the corresponding rows that have a value in one of these columns
484-
# Note that at this stage, the row may have coefficients in more than one
485-
# column. That is what we are looking for
486-
487-
# Also, test to see that each row is only represented once. That is, there should be
488-
# no rows that are part of single columns (this would represent duplicate shells).
489-
# This can happen in poorly-formatted basis sets and is an error
490-
row_col_pairs = []
491-
all_row_idx = []
489+
# Find the indices of the free primitives
490+
free_primitive_idx = []
492491
for col_idx in single_columns:
493492
col = coefficients[col_idx]
494493
for row_idx in range(nprim):
495494
if float(col[row_idx]) != 0.0:
496-
if row_idx in all_row_idx:
497-
raise RuntimeError("Badly-formatted basis. Row {} makes duplicate shells".format(row_idx))
498-
499-
# Store the index of the nonzero value in single_columns
500-
row_col_pairs.append((row_idx, col_idx))
501-
all_row_idx.append(row_idx)
502-
503-
# Now for each row/col pair, zero out the entire row
504-
# EXCEPT for the column that has the single value
505-
for row_idx, col_idx in row_col_pairs:
506-
for idx, col in enumerate(coefficients):
507-
if float(col[row_idx]) != 0.0 and col_idx != idx:
508-
col[row_idx] = '0.0000000E+00'
495+
free_primitive_idx.append(row_idx)
496+
break
497+
498+
# Now, we can zero out the rows corresponding to free primitives
499+
for icontr in range(ncontr):
500+
# Skip the free primitives themselves
501+
if icontr in single_columns:
502+
continue
503+
504+
# Zero out the row for the free primitive
505+
col = coefficients[icontr]
506+
for iprim in free_primitive_idx:
507+
if float(col[iprim]) != 0.0:
508+
col[iprim] = '0.0'
509+
510+
# Next, we split off the free primitives onto their own
511+
# shells; we need this for P-orthogonalization
512+
contracted_rows = [irow for irow in range(nprim) if irow not in free_primitive_idx]
513+
contracted_cols = [icol for icol in range(ncontr) if icol not in single_columns]
514+
new_coefficients = [[coefficients[icol][irow] for irow in contracted_rows] for icol in contracted_cols]
515+
new_exponents = [exponents[iprim] for iprim in contracted_rows]
516+
517+
nnewprim = len(new_exponents)
518+
nnewcontr = len(new_coefficients)
519+
520+
for irow in free_primitive_idx:
521+
new_shell = sh.copy()
522+
new_shell['exponents'] = [exponents[irow]]
523+
new_shell['coefficients'] = [['1.0']]
524+
new_shells.append(new_shell)
525+
526+
if nnewprim > 0 and nnewcontr > 0:
527+
sh['exponents'] = new_exponents
528+
sh['coefficients'] = new_coefficients
529+
new_shells.append(sh)
530+
531+
eldata['electron_shells'] = new_shells
532+
return basis
533+
534+
535+
def P_orthogonalization(basis, use_copy=True, cutoff=1e-5, Cortho=1e-4):
536+
"""Optimizes a general contraction using P-orthogonalization
537+
538+
.. seealso :: | F. Jensen
539+
| 'Unifying General and Segmented Contracted Basis Sets. Segmented Polarization Consistent Basis Sets'
540+
| J. Chem. Theory Comput. 10, 1074 (2014)
541+
| https://doi.org/10.1021/ct401026a
542+
543+
which is a numerically stabler version of Davidson's procedure for
544+
removing redundancies in generally contracted sets (Davidson
545+
purification)
546+
547+
.. seealso :: | T. Hashimoto, K. Hirao, H. Tatewaki
548+
| 'Comment on "Comment on Dunning's correlation-consistent basis set"'
549+
| Chemical Physics Letters 260, 514 (1996)
550+
| https://doi.org/10.1016/0009-2614(96)00917-7
551+
552+
which is was an improvement over the observation that free
553+
primitives can in principle be dropped from the contracted
554+
functions without affecting numerical results
555+
556+
.. seealso :: | T. Hashimoto, K. Hirao, H. Tatewaki
557+
| 'Comment on Dunning's correlation-consistent basis set'
558+
| Chemical Physics Letters 243, 190 (1995)
559+
| https://doi.org/10.1016/0009-2614(95)00807-G
560+
561+
P-orthogonalization, like Davidson's method, relies on numerical
562+
transformations, thereby changing the matrix of contraction
563+
coefficents.
564+
565+
Parameters
566+
----------
567+
basis: dict
568+
Basis set dictionary to work with
569+
570+
cutoff: float
571+
threshold for dropping primitives with small contraction
572+
coefficients in the representation where the largest
573+
coefficient is 1.0. Jensen states: "a coefficient cutoff
574+
parameter of ∼1e-5 will for most practical applications
575+
produce negligible differences in the final results".
576+
577+
Cortho: float
578+
linear dependence threshold used in the procedure: if
579+
1-O^P_{ij} < Cortho, the functions i and j are linearly
580+
dependent up to the primitive function P (see eq 6 in paper).
581+
582+
use_copy: bool
583+
If True, the input basis set is not modified.
584+
585+
"""
586+
587+
# First, we drop the free functions from the contractions
588+
basis = optimize_general(basis, use_copy=use_copy)
589+
# and sort the basis so that the exponents are in decreasing order
590+
basis = sort_basis(basis, use_copy=False)
591+
592+
for eldata in basis['elements'].values():
593+
594+
if 'electron_shells' not in eldata:
595+
continue
596+
597+
elshells = eldata['electron_shells']
598+
for sh in elshells:
599+
exponents = sh['exponents']
600+
coefficients = sh['coefficients']
601+
nprim = len(exponents)
602+
ncontr = len(coefficients)
603+
sh_am = sh['angular_momentum']
604+
nam = len(sh_am)
605+
606+
# Skip sp shells
607+
if nam > 1:
608+
continue
609+
610+
# If the number of exponents and coefficients matches, we
611+
# can free all primitives.
612+
if nprim == ncontr:
613+
coefficients = []
614+
for icontr in range(ncontr):
615+
coefficients.append(['1.0' if iprim == icontr else '0.0' for iprim in range(nprim)])
616+
sh['coefficients'] = coefficients
617+
continue
618+
619+
# Convert to floating-point format
620+
exps = [float(x) for x in exponents]
621+
C = [[float(c) for c in ccol] for ccol in coefficients]
622+
# We also need the primitive overlap matrix
623+
primitive_overlap = _gto_overlap(exps, sh_am[0])
624+
625+
def normalized_partial_overlap(Ci, S, Cj, nfun):
626+
'''Computes the normalized partial overlap using only a subset of the primitives'''
627+
assert nfun >= 1
628+
629+
Ssub = [[S[iidx][jidx] for jidx in range(nfun)] for iidx in range(nfun)]
630+
Cisub = [Ci[idx] for idx in range(nfun)]
631+
Cjsub = [Cj[idx] for idx in range(nfun)]
632+
nprod = abs(
633+
inner_product(Cisub, Ssub, Cjsub) /
634+
sqrt(inner_product(Cisub, Ssub, Cisub) * inner_product(Cjsub, Ssub, Cjsub)))
635+
return nprod
636+
637+
def inside_out():
638+
'''Inside-out purification'''
639+
for icontr in range(ncontr):
640+
# Determine level of P-orthogonalization
641+
for Plevel in range(nprim - 1, 0, -1):
642+
# Compute the normalized partial overlaps
643+
overlap_diff = [
644+
1 - normalized_partial_overlap(C[icontr], primitive_overlap, C[jcontr], Plevel)
645+
for jcontr in range(icontr + 1, ncontr)
646+
]
647+
# Is Plevel sufficient?
648+
if (not all([diff <= Cortho for diff in overlap_diff])) and (Plevel != nprim):
649+
# No, it is not
650+
continue
651+
652+
# Orthogonalize functions
653+
for jcontr in range(icontr + 1, ncontr):
654+
# x factor according to eq 8
655+
x = normalized_partial_overlap(C[icontr], primitive_overlap, C[jcontr], Plevel)
656+
# Subtract
657+
for iprim in range(nprim):
658+
C[jcontr][iprim] -= x * C[icontr][iprim]
659+
660+
# Break the p loop, continue with next icontr
661+
break
662+
663+
def outside_in():
664+
'''Outside-in purification; this is stable according to Jensen'''
665+
for icontr in range(ncontr - 1, -1, -1):
666+
for jcontr in range(icontr - 1, -1, -1):
667+
# Reference primitive index
668+
refprim = nprim - 1 - (ncontr - 1 - icontr)
669+
x = C[jcontr][refprim] / C[icontr][refprim]
670+
# Subtract
671+
for iprim in range(nprim):
672+
C[jcontr][iprim] -= x * C[icontr][iprim]
673+
674+
def intermediate_normalization():
675+
'''Normalize largest coefficient to +-1'''
676+
for icontr in range(ncontr):
677+
maxabs = max([abs(c) for c in C[icontr]])
678+
C[icontr] = [C[icontr][iprim] / maxabs for iprim in range(nprim)]
679+
680+
def drop_small_coeffs():
681+
'''Set small coefficients to zero'''
682+
for icontr in range(ncontr):
683+
for iprim in range(nprim):
684+
if C[icontr][iprim] != 0.0 and abs(C[icontr][iprim]) <= cutoff:
685+
C[icontr][iprim] = 0.0
686+
687+
inside_out()
688+
outside_in()
689+
intermediate_normalization()
690+
drop_small_coeffs()
691+
692+
# Create new contraction matrix
693+
coefficients = [['{:.15e}'.format(C[i][j]) for j in range(nprim)] for i in range(ncontr)]
694+
sh['coefficients'] = coefficients
509695

510696
return basis
511697

0 commit comments

Comments
 (0)