1 """ @package forcebalance.psi4io PSI4 force field input/output.     3 This serves as a good template for writing future force matching I/O     4 modules for other programs because it's so simple.     9 from __future__ 
import division
    10 from __future__ 
import print_function
    12 from builtins 
import str
    13 from builtins 
import range
    14 import os, sys, glob, shutil
    15 from re 
import match, sub, split, findall
    16 from forcebalance.nifty import isint, isfloat, _exec, warn_press_key, printcool_dictionary, wopen
    18 from forcebalance.leastsq 
import LeastSquares, CheckBasis
    19 from forcebalance 
import BaseReader
    22 from collections 
import defaultdict, OrderedDict
    24 from forcebalance.target 
import Target
    26 from forcebalance.output 
import getLogger
    27 logger = getLogger(__name__)
    35     """Finite state machine for parsing basis set files.    57     def feed(self, line, linindep=False):
    60         @param[in] line     The line of data    64             if match(
'^ *!',line): 
    68             line = sub(
'^ *!',
'',line)
    70         line       = line.split(
'!')[0].strip()
    74         if len(s) == 0 
or match(
'^ *!',line): 
return None, 
None    76         if match(
'^[A-Za-z][A-Za-z]? +[0-9]$',line):
    78             self.
element = s[0].capitalize()
    81         elif len(s) == 3 
and match(
'[SPDFGH]+',s[0]) 
and isint(s[1]) 
and isfloat(s[2]):
   100     def __init__(self,options,tgt_opts,forcefield):
   101         super(THCDF_Psi4,self).
__init__(options,tgt_opts,forcefield)
   108         for line 
in open(os.path.join(self.root,self.tgtdir,
"input.dat")).readlines():
   111             if len(s) >= 3 
and s[0].lower() == 
'molecule' and s[2] == 
'{':
   114             elif len(s) >= 1 
and s[0] == 
'}':
   116             elif MolSection 
and len(s) >= 4 
and match(
"^[A-Za-z]+$",s[0]) 
and isfloat(s[1]) 
and isfloat(s[2]) 
and isfloat(s[3]):
   117                 ElemList.append(s[0].capitalize())
   122             for pid 
in self.FF.plist[p].split():
   124                 Pelem.append(pid.split(
':')[1].split(
',')[0].split(
'=')[1])
   126             if len(self.
Elements.intersection(Pelem)) == 0:
   131         gbslist = [i 
for i 
in self.FF.fnms 
if os.path.splitext(i)[1] == 
'.gbs']
   132         if len(gbslist) != 1:
   133             warn_press_key(
"In %s, you should only have exactly one .gbs file in the list of force field files!" % __file__)
   138             datlist = [i 
for i 
in self.FF.fnms 
if os.path.splitext(i)[1] == 
'.dat']
   139             if len(datlist) != 1:
   140                 warn_press_key(
"In %s, you should only have exactly one .dat file in the list of force field files!" % __file__)
   146         abstempdir = os.path.join(self.root,self.tempdir)
   147         o = 
wopen(os.path.join(abstempdir,
"input.dat"))
   148         for line 
in open(os.path.join(self.root,self.tgtdir,
"input.dat")).readlines():
   149             s = line.split(
"#")[0].split()
   150             if len(s) == 3 
and s[0].lower() == 
'basis' and s[1].lower() == 
'file':
   151                 print(
"basis file %s" % self.
GBSfnm, file=o)
   153                 print(line, end=
' ', file=o)
   157         MAD = np.mean(np.abs(self.D))
   158         logger.info(
"\rTarget: %-15s Molecules = %-30s %s" % (self.name, str(self.
Molecules), 
"Mean (Max) Error: %8.4f%% (%8.4f%%) Energies: DF %+.3e MP2 %+.3e Delta % .3e Objective = %.5e" % (100*MAD / self.MAQ, 100*np.max(np.abs(self.D)) / self.MAQ, self.
DF_Energy, self.
MP2_Energy, self.
DF_Energy - self.
MP2_Energy, self.objective)))
   162         ln0 = list(range(len(open(fnm).readlines())))
   163         for layer 
in linedestroy:
   164             f = open(fnm).readlines()
   165             o = 
wopen(
'.tmp.gbs')
   167             for ln, line 
in enumerate(f):
   169                     print(line, end=
' ', file=o)
   170                     newln.append(ln0[ln])
   172             _exec(
"mv .tmp.gbs %s" % fnm, print_command=
False)
   179             logger.info(
"Now checking for linear dependencies.\n")
   180             _exec(
"cp %s %s.bak" % (self.
GBSfnm, self.
GBSfnm), print_command=
False)
   182             o = 
wopen(
".lindep.dat")
   183             for line 
in open(self.
DATfnm).readlines():
   184                 s = line.split(
"#")[0].split()
   185                 if len(s) == 3 
and s[0].lower() == 
'basis' and s[1].lower() == 
'file':
   186                     print(
"basis file %s" % self.
GBSfnm, file=o)
   188                     print(line, end=
' ', file=o)
   190             _exec(
"mv .lindep.dat %s" % self.
DATfnm, print_command=
False)
   191             _exec(
"psi4 %s" % self.
DATfnm, print_command=
False)
   195             for line 
in open(
'linindep.gbs'):
   196                 LI.feed(line,linindep=
True)
   197                 key = 
'.'.join([str(i) 
for i 
in (LI.element,LI.amom,LI.basis_number[LI.element],LI.contraction_number)])
   200                         logger.info(
"Duplicate key found:\n")
   201                         logger.info(
"%s\n" % key)
   202                         logger.info(str(LI_lines[key]))
   204                         warn_press_key(
"In %s, the LI_lines dictionary should not contain repeated keys!" % __file__)
   205                     LI_lines[key] = (line, LI.destroy)
   209             self.FF.linedestroy_this = []
   210             self.FF.prmdestroy_this = []
   211             for ln, line 
in enumerate(open(self.
GBSfnm).readlines()):
   213                 key = 
'.'.join([str(i) 
for i 
in (FK.element,FK.amom,FK.basis_number[FK.element],FK.contraction_number)])
   214                 if FK.isdata 
and key 
in LI_lines:
   216                         logger.info(
"Destroying line %i (originally %i): " % (ln, ln0[ln]))
   218                         self.FF.linedestroy_this.append(ln)
   219                         for p_destroy 
in [i 
for i, fld 
in enumerate(self.FF.pfields) 
if any([subfld[0] == self.
GBSfnm and subfld[1] == ln0[ln] 
for subfld 
in fld])]:
   220                             logger.info(
"Destroying parameter %i located at line %i (originally %i) with fields given by: %s" % (p_destroy, ln, ln0[ln], str(self.FF.pfields[p_destroy])))
   221                             self.FF.prmdestroy_this.append(p_destroy)
   222                     FK_lines.append(LI_lines[key][0])
   224                     FK_lines.append(line)
   225             o = 
wopen(
'franken.gbs')
   226             for line 
in FK_lines:
   227                 print(line, end=
' ', file=o)
   229             _exec(
"cp %s.bak %s" % (self.
GBSfnm, self.
GBSfnm), print_command=
False)
   231             if len(list(itertools.chain(*(self.FF.linedestroy_save + [self.FF.linedestroy_this])))) > 0:
   232                 logger.info(
"All lines removed: " + self.FF.linedestroy_save + [self.FF.linedestroy_this] + 
'\n')
   233                 logger.info(
"All prms removed: " + self.FF.prmdestroy_save + [self.FF.prmdestroy_this] + 
'\n')
   236         _exec(
"psi4", print_command=
False, outfnm=
"psi4.stdout")
   238             for line 
in open(
'psi4.stdout').readlines():
   239                 if "MP2 Energy:" in line:
   241                 elif "DF Energy:" in line:
   243         Ans = np.array([[float(i) 
for i 
in line.split()] 
for line 
in open(
"objective.dat").readlines()])
   244         os.unlink(
"objective.dat")
   248     """Finite state machine for parsing DVR grid files.   253         super(Grid_Reader,self).
__init__(fnm)
   265         return ptype+
":"+
"Elem=%s,Point=%i" % (self.
element, self.
point)
   267     def feed(self, line, linindep=False):
   270         @param[in] line     The line of data   273         line       = line.split(
'!')[0].strip()
   277         if len(s) == 0 
or match(
'^ *!',line): 
return None, 
None   279         if match(
'^[A-Za-z][A-Za-z]? +[0-9]$',line):
   281             self.
element = s[0].capitalize()
   293     """ Subclass of Target for R-DVR3 grid fitting.   295     - Multiple molecules are treated as a single target.   296     - R-DVR3 can only print out the objective function, it cannot print out the residual vector.   297     - We should be smart enough to mask derivatives.   300     def __init__(self,options,tgt_opts,forcefield):
   301         super(RDVR3_Psi4,self).
__init__(options,tgt_opts,forcefield)
   313         for d 
in sorted(os.listdir(self.tgtdir)):
   314             if os.path.isdir(os.path.join(self.tgtdir,d)) 
and os.path.exists(os.path.join(self.tgtdir,d,
'objective.dat')):
   316                 self.
objfiles[d] = open(os.path.join(self.tgtdir,d,
'objective.dat')).readlines()
   322                     if len(s) >= 3 
and s[0].lower() == 
'molecule' and s[2] == 
'{':
   324                         Molecules.append(s[1])
   325                     elif len(s) >= 1 
and s[0] == 
'}':
   327                     elif MolSection 
and len(s) >= 4 
and match(
"^[A-Za-z]+$",s[0]) 
and isfloat(s[1]) 
and isfloat(s[2]) 
and isfloat(s[3]):
   328                         ElemList.append(s[0].capitalize())
   331                 for p 
in range(self.FF.np):
   333                     for pid 
in self.FF.plist[p].split():
   335                         Pelem.append(pid.split(
':')[1].split(
',')[0].split(
'=')[1])
   337                     if len(self.
elements[d].intersection(Pelem)) == 0:
   341         PrintDict = OrderedDict()
   343             PrintDict[d] = 
"%15.9f" % self.
objvals[d]
   344         printcool_dictionary(PrintDict,title=
"Target: %s\nR-DVR Objective Function, Total = %15.9f\n %-10s %15s" % 
   345                              (self.name, self.
objective, 
"Molecule", 
"Objective"),keywidth=15)
   349     def submit_jobs(self, mvals, AGrad=True, AHess=True):
   353         self.
tdir = os.getcwd()
   358         def submit_psi(this_apath, dname, these_mvals):
   359             """ Create a grid file and a psi4 input file in the absolute path and submit it to the work queue. """   361             if not os.path.exists(this_apath) : os.makedirs(this_apath)
   363             self.FF.make(these_mvals)
   364             o = 
wopen(
'objective.dat')
   367                 if len(s) > 2 
and s[0] == 
'path' and s[1] == 
'=':
   368                     print(
"path = '%s'" % os.getcwd(), file=o)
   369                 elif len(s) > 2 
and s[0] == 
'set' and s[1] == 
'objective_path':
   370                     print(
"opath = '%s'" % os.getcwd(), file=o)
   371                     print(
"set objective_path $opath", file=o)
   373                     print(line, end=
' ', file=o)
   375             os.system(
"rm -f objective.out")
   377                 logger.info(
"There is no Work Queue!!!\n")
   380                 input_files = [(os.path.join(this_apath, i), i) 
for i 
in glob.glob(
"*")]
   381                 input_files += [(os.path.join(self.root, self.tgtdir, dname, 
"build.dat"), 
"build.dat")]
   382                 input_files += [(os.path.join(os.path.split(__file__)[0],
"data",
"run_psi_rdvr3_objective.sh"), 
"run_psi_rdvr3_objective.sh")]
   384                 queue_up_src_dest(wq,
"sh run_psi_rdvr3_objective.sh -c %s &> run_psi_rdvr3_objective.log" % os.path.join(self.root, self.tgtdir, dname),
   385                                   input_files=input_files,
   386                                   output_files=[(os.path.join(this_apath, i),i) 
for i 
in [
"run_psi_rdvr3_objective.log", 
"output.dat"]], verbose=
False)
   390             logger.info(
"\rNow working on" + str(d) + 50*
' ' + 
'\r')
   391             odir = os.path.join(os.getcwd(),d)
   394             if not os.path.exists(odir): os.makedirs(odir)
   395             apath = os.path.join(odir, 
"current")
   396             submit_psi(apath, d, mvals)
   397             for p 
in range(self.FF.np):
   398                 def subjob(mvals_,h):
   399                     apath = os.path.join(odir, str(p), str(h))
   400                     submit_psi(apath, d, mvals_)
   405                         f12d3p(
fdwrap(subjob, mvals, p, h=self.h), h = self.h, f0 = 0.0)
   408                             f12d3p(
fdwrap(subjob, mvals, p, h=self.h), h = self.h, f0 = 0.0)
   410                             f1d2p(
fdwrap(subjob, mvals, p, h=self.h), h = self.h, f0 = 0.0)
   412     def driver(self, mvals, d):
   414         pvals = self.FF.make(mvals)
   416         odir = os.path.join(os.getcwd(),d)
   419         if not os.path.exists(odir): os.makedirs(odir)
   421         o = 
wopen(
'objective.dat')
   424             if len(s) > 2 
and s[0] == 
'path' and s[1] == 
'=':
   425                 print(
"path = '%s'" % self.
tdir, file=o)
   426             elif len(s) > 2 
and s[0] == 
'set' and s[1] == 
'objective_path':
   427                 print(
"opath = '%s'" % os.getcwd(), file=o)
   428                 print(
"set objective_path $opath", file=o)
   430                 print(line, end=
' ', file=o)
   432         os.system(
"rm -f objective.out")
   433         _exec(
"psi4 objective.dat", print_command=
False)
   434         answer = float(open(
'objective.out').readlines()[0].split()[1])*self.
factor   438     def get(self, mvals, AGrad=False, AHess=False):
   442         This subroutine builds the objective function from Psi4.   444         @param[in] mvals Mathematical parameter values   445         @param[in] AGrad Switch to turn on analytic gradient   446         @param[in] AHess Switch to turn on analytic Hessian   447         @return Answer Contribution to the objective function   455         pvals = self.FF.make(mvals)
   457         self.
objd = OrderedDict()
   458         self.
gradd = OrderedDict()
   459         self.
hdiagd = OrderedDict()
   462         def fdwrap2(func,mvals0,pidx,qidx,key=None,**kwargs):
   463             def func2(arg1,arg2):
   467                 logger.info(
"\rfdwrap2:" + func.__name__ + 
"[%i] = % .1e , [%i] = % .1e" % (pidx, arg1, qidx, arg2) + 
' '*50)
   469                     return func(mvals,**kwargs)[key]
   471                     return func(mvals,**kwargs)
   475             fpp, fpm, fmp, fmm = [f(i*h,j*h) 
for i,j 
in [(1,1),(1,-1),(-1,1),(-1,-1)]]
   476             fpp = (fpp-fpm-fmp+fmm)/(4*h*h)
   479         def f2d4p(f, h, f0 = None):
   481                 fpp, fp0, f0p, f0 = [f(i*h,j*h) 
for i,j 
in [(1,1),(1,0),(0,1),(0,0)]]
   483                 fpp, fp0, f0p = [f(i*h,j*h) 
for i,j 
in [(1,1),(1,0),(0,1)]]
   484             fpp = (fpp-fp0-f0p+f0)/(h*h)
   488             logger.info(
"\rNow working on" + str(d) + 50*
' ' + 
'\r')
   493             hess  = np.zeros((n,n))
   494             apath = os.path.join(self.
tdir, d, 
"current")
   495             x = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor   496             for p 
in range(self.FF.np):
   498                     def reader(mvals_,h):
   499                         apath = os.path.join(self.
tdir, d, str(p), str(h))
   500                         answer = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor   504                             apath = os.path.join(self.
tdir, d, 
"current")
   505                             x = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor   506                             grad[p], hdiag[p] = 
f12d3p(
fdwrap(reader, mvals, p, h=self.h), h = self.h, f0 = x)
   513                                 apath = os.path.join(self.
tdir, d, 
"current")
   514                                 x = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor   515                                 grad[p], _ = 
f12d3p(
fdwrap(reader, mvals, p, h=self.h), h = self.h, f0 = x)
   522                                 grad[p] = 
f1d2p(
fdwrap(reader, mvals, p, h=self.h), h = self.h, f0 = x)
   540         if float(
'Inf') 
in pvals:
   541             return {
'X' : 1e10, 
'G' : G, 
'H' : H}
   542         return {
'X' : X, 
'G' : G, 
'H' : H}
 
def build_pid(self, pfld)
 
def driver(self, mvals, d)
 
def get(self, mvals, AGrad=False, AHess=False)
LPW 04-17-2013. 
 
def feed(self, line, linindep=False)
Feed in a line. 
 
Nifty functions, intended to be imported by any module within ForceBalance. 
 
def feed(self, line, linindep=False)
Feed in a line. 
 
Finite state machine for parsing DVR grid files. 
 
GBSfnm
Psi4 basis set file. 
 
MP2_Energy
Actually run PSI4. 
 
DATfnm
Psi4 input file for calculation of linear dependencies This is actually a file in 'forcefield' until ...
 
def isint(word)
ONLY matches integers! If you have a decimal point? None shall pass! 
 
Interaction type -> Parameter Dictionary. 
 
Subclass of Target for R-DVR3 grid fitting. 
 
def in_fd()
Invoking this function from anywhere will tell us whether we're being called by a finite-difference f...
 
def fdwrap(func, mvals0, pidx, key=None, kwargs)
A function wrapper for finite difference designed for differentiating 'get'-type functions. 
 
def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue. 
 
objfiles
Which parameters are differentiated? 
 
def submit_jobs(self, mvals, AGrad=True, AHess=True)
Create a grid file and a psi4 input file in the absolute path and submit it to the work queue...
 
def warn_press_key(warning, timeout=10)
 
def printcool_dictionary(Dict, title="Dictionary Keys : Values", bold=False, color=2, keywidth=25, topwidth=50, center=True, leftpad=0)
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
 
def write_nested_destroy(self, fnm, linedestroy)
 
def prepare_temp_directory(self, options, tgt_opts)
 
def __init__(self, fnm=None)
 
def __init__(self, options, tgt_opts, forcefield)
 
def f12d3p(f, h, f0=None)
A three-point finite difference stencil. 
 
def build_pid(self, pfld)
 
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, integer, or what have you...
 
def wopen(dest, binary=False)
If trying to write to a symbolic link, remove it first. 
 
def __init__(self, fnm=None)
 
def f1d2p(f, h, f0=None)
A two-point finite difference stencil.