1 """ @package forcebalance.psi4io PSI4 force field input/output. 3 This serves as a good template for writing future force matching I/O 4 modules for other programs because it's so simple. 9 from __future__
import division
10 from __future__
import print_function
12 from builtins
import str
13 from builtins
import range
14 import os, sys, glob, shutil
15 from re
import match, sub, split, findall
16 from forcebalance.nifty import isint, isfloat, _exec, warn_press_key, printcool_dictionary, wopen
18 from forcebalance.leastsq
import LeastSquares, CheckBasis
19 from forcebalance
import BaseReader
22 from collections
import defaultdict, OrderedDict
24 from forcebalance.target
import Target
26 from forcebalance.output
import getLogger
27 logger = getLogger(__name__)
35 """Finite state machine for parsing basis set files. 57 def feed(self, line, linindep=False):
60 @param[in] line The line of data 64 if match(
'^ *!',line):
68 line = sub(
'^ *!',
'',line)
70 line = line.split(
'!')[0].strip()
74 if len(s) == 0
or match(
'^ *!',line):
return None,
None 76 if match(
'^[A-Za-z][A-Za-z]? +[0-9]$',line):
78 self.
element = s[0].capitalize()
81 elif len(s) == 3
and match(
'[SPDFGH]+',s[0])
and isint(s[1])
and isfloat(s[2]):
100 def __init__(self,options,tgt_opts,forcefield):
101 super(THCDF_Psi4,self).
__init__(options,tgt_opts,forcefield)
108 for line
in open(os.path.join(self.root,self.tgtdir,
"input.dat")).readlines():
111 if len(s) >= 3
and s[0].lower() ==
'molecule' and s[2] ==
'{':
114 elif len(s) >= 1
and s[0] ==
'}':
116 elif MolSection
and len(s) >= 4
and match(
"^[A-Za-z]+$",s[0])
and isfloat(s[1])
and isfloat(s[2])
and isfloat(s[3]):
117 ElemList.append(s[0].capitalize())
122 for pid
in self.FF.plist[p].split():
124 Pelem.append(pid.split(
':')[1].split(
',')[0].split(
'=')[1])
126 if len(self.
Elements.intersection(Pelem)) == 0:
131 gbslist = [i
for i
in self.FF.fnms
if os.path.splitext(i)[1] ==
'.gbs']
132 if len(gbslist) != 1:
133 warn_press_key(
"In %s, you should only have exactly one .gbs file in the list of force field files!" % __file__)
138 datlist = [i
for i
in self.FF.fnms
if os.path.splitext(i)[1] ==
'.dat']
139 if len(datlist) != 1:
140 warn_press_key(
"In %s, you should only have exactly one .dat file in the list of force field files!" % __file__)
146 abstempdir = os.path.join(self.root,self.tempdir)
147 o =
wopen(os.path.join(abstempdir,
"input.dat"))
148 for line
in open(os.path.join(self.root,self.tgtdir,
"input.dat")).readlines():
149 s = line.split(
"#")[0].split()
150 if len(s) == 3
and s[0].lower() ==
'basis' and s[1].lower() ==
'file':
151 print(
"basis file %s" % self.
GBSfnm, file=o)
153 print(line, end=
' ', file=o)
157 MAD = np.mean(np.abs(self.D))
158 logger.info(
"\rTarget: %-15s Molecules = %-30s %s" % (self.name, str(self.
Molecules),
"Mean (Max) Error: %8.4f%% (%8.4f%%) Energies: DF %+.3e MP2 %+.3e Delta % .3e Objective = %.5e" % (100*MAD / self.MAQ, 100*np.max(np.abs(self.D)) / self.MAQ, self.
DF_Energy, self.
MP2_Energy, self.
DF_Energy - self.
MP2_Energy, self.objective)))
162 ln0 = list(range(len(open(fnm).readlines())))
163 for layer
in linedestroy:
164 f = open(fnm).readlines()
165 o =
wopen(
'.tmp.gbs')
167 for ln, line
in enumerate(f):
169 print(line, end=
' ', file=o)
170 newln.append(ln0[ln])
172 _exec(
"mv .tmp.gbs %s" % fnm, print_command=
False)
179 logger.info(
"Now checking for linear dependencies.\n")
180 _exec(
"cp %s %s.bak" % (self.
GBSfnm, self.
GBSfnm), print_command=
False)
182 o =
wopen(
".lindep.dat")
183 for line
in open(self.
DATfnm).readlines():
184 s = line.split(
"#")[0].split()
185 if len(s) == 3
and s[0].lower() ==
'basis' and s[1].lower() ==
'file':
186 print(
"basis file %s" % self.
GBSfnm, file=o)
188 print(line, end=
' ', file=o)
190 _exec(
"mv .lindep.dat %s" % self.
DATfnm, print_command=
False)
191 _exec(
"psi4 %s" % self.
DATfnm, print_command=
False)
195 for line
in open(
'linindep.gbs'):
196 LI.feed(line,linindep=
True)
197 key =
'.'.join([str(i)
for i
in (LI.element,LI.amom,LI.basis_number[LI.element],LI.contraction_number)])
200 logger.info(
"Duplicate key found:\n")
201 logger.info(
"%s\n" % key)
202 logger.info(str(LI_lines[key]))
204 warn_press_key(
"In %s, the LI_lines dictionary should not contain repeated keys!" % __file__)
205 LI_lines[key] = (line, LI.destroy)
209 self.FF.linedestroy_this = []
210 self.FF.prmdestroy_this = []
211 for ln, line
in enumerate(open(self.
GBSfnm).readlines()):
213 key =
'.'.join([str(i)
for i
in (FK.element,FK.amom,FK.basis_number[FK.element],FK.contraction_number)])
214 if FK.isdata
and key
in LI_lines:
216 logger.info(
"Destroying line %i (originally %i): " % (ln, ln0[ln]))
218 self.FF.linedestroy_this.append(ln)
219 for p_destroy
in [i
for i, fld
in enumerate(self.FF.pfields)
if any([subfld[0] == self.
GBSfnm and subfld[1] == ln0[ln]
for subfld
in fld])]:
220 logger.info(
"Destroying parameter %i located at line %i (originally %i) with fields given by: %s" % (p_destroy, ln, ln0[ln], str(self.FF.pfields[p_destroy])))
221 self.FF.prmdestroy_this.append(p_destroy)
222 FK_lines.append(LI_lines[key][0])
224 FK_lines.append(line)
225 o =
wopen(
'franken.gbs')
226 for line
in FK_lines:
227 print(line, end=
' ', file=o)
229 _exec(
"cp %s.bak %s" % (self.
GBSfnm, self.
GBSfnm), print_command=
False)
231 if len(list(itertools.chain(*(self.FF.linedestroy_save + [self.FF.linedestroy_this])))) > 0:
232 logger.info(
"All lines removed: " + self.FF.linedestroy_save + [self.FF.linedestroy_this] +
'\n')
233 logger.info(
"All prms removed: " + self.FF.prmdestroy_save + [self.FF.prmdestroy_this] +
'\n')
236 _exec(
"psi4", print_command=
False, outfnm=
"psi4.stdout")
238 for line
in open(
'psi4.stdout').readlines():
239 if "MP2 Energy:" in line:
241 elif "DF Energy:" in line:
243 Ans = np.array([[float(i)
for i
in line.split()]
for line
in open(
"objective.dat").readlines()])
244 os.unlink(
"objective.dat")
248 """Finite state machine for parsing DVR grid files. 253 super(Grid_Reader,self).
__init__(fnm)
265 return ptype+
":"+
"Elem=%s,Point=%i" % (self.
element, self.
point)
267 def feed(self, line, linindep=False):
270 @param[in] line The line of data 273 line = line.split(
'!')[0].strip()
277 if len(s) == 0
or match(
'^ *!',line):
return None,
None 279 if match(
'^[A-Za-z][A-Za-z]? +[0-9]$',line):
281 self.
element = s[0].capitalize()
293 """ Subclass of Target for R-DVR3 grid fitting. 295 - Multiple molecules are treated as a single target. 296 - R-DVR3 can only print out the objective function, it cannot print out the residual vector. 297 - We should be smart enough to mask derivatives. 300 def __init__(self,options,tgt_opts,forcefield):
301 super(RDVR3_Psi4,self).
__init__(options,tgt_opts,forcefield)
313 for d
in sorted(os.listdir(self.tgtdir)):
314 if os.path.isdir(os.path.join(self.tgtdir,d))
and os.path.exists(os.path.join(self.tgtdir,d,
'objective.dat')):
316 self.
objfiles[d] = open(os.path.join(self.tgtdir,d,
'objective.dat')).readlines()
322 if len(s) >= 3
and s[0].lower() ==
'molecule' and s[2] ==
'{':
324 Molecules.append(s[1])
325 elif len(s) >= 1
and s[0] ==
'}':
327 elif MolSection
and len(s) >= 4
and match(
"^[A-Za-z]+$",s[0])
and isfloat(s[1])
and isfloat(s[2])
and isfloat(s[3]):
328 ElemList.append(s[0].capitalize())
331 for p
in range(self.FF.np):
333 for pid
in self.FF.plist[p].split():
335 Pelem.append(pid.split(
':')[1].split(
',')[0].split(
'=')[1])
337 if len(self.
elements[d].intersection(Pelem)) == 0:
341 PrintDict = OrderedDict()
343 PrintDict[d] =
"%15.9f" % self.
objvals[d]
344 printcool_dictionary(PrintDict,title=
"Target: %s\nR-DVR Objective Function, Total = %15.9f\n %-10s %15s" %
345 (self.name, self.
objective,
"Molecule",
"Objective"),keywidth=15)
349 def submit_jobs(self, mvals, AGrad=True, AHess=True):
353 self.
tdir = os.getcwd()
358 def submit_psi(this_apath, dname, these_mvals):
359 """ Create a grid file and a psi4 input file in the absolute path and submit it to the work queue. """ 361 if not os.path.exists(this_apath) : os.makedirs(this_apath)
363 self.FF.make(these_mvals)
364 o =
wopen(
'objective.dat')
367 if len(s) > 2
and s[0] ==
'path' and s[1] ==
'=':
368 print(
"path = '%s'" % os.getcwd(), file=o)
369 elif len(s) > 2
and s[0] ==
'set' and s[1] ==
'objective_path':
370 print(
"opath = '%s'" % os.getcwd(), file=o)
371 print(
"set objective_path $opath", file=o)
373 print(line, end=
' ', file=o)
375 os.system(
"rm -f objective.out")
377 logger.info(
"There is no Work Queue!!!\n")
380 input_files = [(os.path.join(this_apath, i), i)
for i
in glob.glob(
"*")]
381 input_files += [(os.path.join(self.root, self.tgtdir, dname,
"build.dat"),
"build.dat")]
382 input_files += [(os.path.join(os.path.split(__file__)[0],
"data",
"run_psi_rdvr3_objective.sh"),
"run_psi_rdvr3_objective.sh")]
384 queue_up_src_dest(wq,
"sh run_psi_rdvr3_objective.sh -c %s &> run_psi_rdvr3_objective.log" % os.path.join(self.root, self.tgtdir, dname),
385 input_files=input_files,
386 output_files=[(os.path.join(this_apath, i),i)
for i
in [
"run_psi_rdvr3_objective.log",
"output.dat"]], verbose=
False)
390 logger.info(
"\rNow working on" + str(d) + 50*
' ' +
'\r')
391 odir = os.path.join(os.getcwd(),d)
394 if not os.path.exists(odir): os.makedirs(odir)
395 apath = os.path.join(odir,
"current")
396 submit_psi(apath, d, mvals)
397 for p
in range(self.FF.np):
398 def subjob(mvals_,h):
399 apath = os.path.join(odir, str(p), str(h))
400 submit_psi(apath, d, mvals_)
405 f12d3p(
fdwrap(subjob, mvals, p, h=self.h), h = self.h, f0 = 0.0)
408 f12d3p(
fdwrap(subjob, mvals, p, h=self.h), h = self.h, f0 = 0.0)
410 f1d2p(
fdwrap(subjob, mvals, p, h=self.h), h = self.h, f0 = 0.0)
412 def driver(self, mvals, d):
414 pvals = self.FF.make(mvals)
416 odir = os.path.join(os.getcwd(),d)
419 if not os.path.exists(odir): os.makedirs(odir)
421 o =
wopen(
'objective.dat')
424 if len(s) > 2
and s[0] ==
'path' and s[1] ==
'=':
425 print(
"path = '%s'" % self.
tdir, file=o)
426 elif len(s) > 2
and s[0] ==
'set' and s[1] ==
'objective_path':
427 print(
"opath = '%s'" % os.getcwd(), file=o)
428 print(
"set objective_path $opath", file=o)
430 print(line, end=
' ', file=o)
432 os.system(
"rm -f objective.out")
433 _exec(
"psi4 objective.dat", print_command=
False)
434 answer = float(open(
'objective.out').readlines()[0].split()[1])*self.
factor 438 def get(self, mvals, AGrad=False, AHess=False):
442 This subroutine builds the objective function from Psi4. 444 @param[in] mvals Mathematical parameter values 445 @param[in] AGrad Switch to turn on analytic gradient 446 @param[in] AHess Switch to turn on analytic Hessian 447 @return Answer Contribution to the objective function 455 pvals = self.FF.make(mvals)
457 self.
objd = OrderedDict()
458 self.
gradd = OrderedDict()
459 self.
hdiagd = OrderedDict()
462 def fdwrap2(func,mvals0,pidx,qidx,key=None,**kwargs):
463 def func2(arg1,arg2):
467 logger.info(
"\rfdwrap2:" + func.__name__ +
"[%i] = % .1e , [%i] = % .1e" % (pidx, arg1, qidx, arg2) +
' '*50)
469 return func(mvals,**kwargs)[key]
471 return func(mvals,**kwargs)
475 fpp, fpm, fmp, fmm = [f(i*h,j*h)
for i,j
in [(1,1),(1,-1),(-1,1),(-1,-1)]]
476 fpp = (fpp-fpm-fmp+fmm)/(4*h*h)
479 def f2d4p(f, h, f0 = None):
481 fpp, fp0, f0p, f0 = [f(i*h,j*h)
for i,j
in [(1,1),(1,0),(0,1),(0,0)]]
483 fpp, fp0, f0p = [f(i*h,j*h)
for i,j
in [(1,1),(1,0),(0,1)]]
484 fpp = (fpp-fp0-f0p+f0)/(h*h)
488 logger.info(
"\rNow working on" + str(d) + 50*
' ' +
'\r')
493 hess = np.zeros((n,n))
494 apath = os.path.join(self.
tdir, d,
"current")
495 x = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor 496 for p
in range(self.FF.np):
498 def reader(mvals_,h):
499 apath = os.path.join(self.
tdir, d, str(p), str(h))
500 answer = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor 504 apath = os.path.join(self.
tdir, d,
"current")
505 x = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor 506 grad[p], hdiag[p] =
f12d3p(
fdwrap(reader, mvals, p, h=self.h), h = self.h, f0 = x)
513 apath = os.path.join(self.
tdir, d,
"current")
514 x = float(open(os.path.join(apath,
'objective.out')).readlines()[0].split()[1])*self.
factor 515 grad[p], _ =
f12d3p(
fdwrap(reader, mvals, p, h=self.h), h = self.h, f0 = x)
522 grad[p] =
f1d2p(
fdwrap(reader, mvals, p, h=self.h), h = self.h, f0 = x)
540 if float(
'Inf')
in pvals:
541 return {
'X' : 1e10,
'G' : G,
'H' : H}
542 return {
'X' : X,
'G' : G,
'H' : H}
def build_pid(self, pfld)
def driver(self, mvals, d)
def get(self, mvals, AGrad=False, AHess=False)
LPW 04-17-2013.
def feed(self, line, linindep=False)
Feed in a line.
Nifty functions, intended to be imported by any module within ForceBalance.
def feed(self, line, linindep=False)
Feed in a line.
Finite state machine for parsing DVR grid files.
GBSfnm
Psi4 basis set file.
MP2_Energy
Actually run PSI4.
DATfnm
Psi4 input file for calculation of linear dependencies This is actually a file in 'forcefield' until ...
def isint(word)
ONLY matches integers! If you have a decimal point? None shall pass!
Interaction type -> Parameter Dictionary.
Subclass of Target for R-DVR3 grid fitting.
def in_fd()
Invoking this function from anywhere will tell us whether we're being called by a finite-difference f...
def fdwrap(func, mvals0, pidx, key=None, kwargs)
A function wrapper for finite difference designed for differentiating 'get'-type functions.
def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue.
objfiles
Which parameters are differentiated?
def submit_jobs(self, mvals, AGrad=True, AHess=True)
Create a grid file and a psi4 input file in the absolute path and submit it to the work queue...
def warn_press_key(warning, timeout=10)
def printcool_dictionary(Dict, title="Dictionary Keys : Values", bold=False, color=2, keywidth=25, topwidth=50, center=True, leftpad=0)
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
def write_nested_destroy(self, fnm, linedestroy)
def prepare_temp_directory(self, options, tgt_opts)
def __init__(self, fnm=None)
def __init__(self, options, tgt_opts, forcefield)
def f12d3p(f, h, f0=None)
A three-point finite difference stencil.
def build_pid(self, pfld)
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, integer, or what have you...
def wopen(dest, binary=False)
If trying to write to a symbolic link, remove it first.
def __init__(self, fnm=None)
def f1d2p(f, h, f0=None)
A two-point finite difference stencil.