1 """ @package forcebalance.hydration Hydration free energy fitting module 6 from __future__
import division
8 from builtins
import range
12 from copy
import deepcopy
13 from forcebalance.target
import Target
15 from re
import match, sub
17 from collections
import defaultdict, OrderedDict
18 from forcebalance.nifty import getWorkQueue, queue_up, LinkFile, printcool, link_dir_contents, lp_dump, lp_load, _exec, kb, col, flat, uncommadash, statisticalInefficiency, isfloat
20 from forcebalance.output
import getLogger
21 logger = getLogger(__name__)
25 """ Subclass of Target for fitting force fields to hydration free energies.""" 27 def __init__(self,options,tgt_opts,forcefield):
31 super(Hydration,self).
__init__(options,tgt_opts,forcefield)
36 self.set_option(tgt_opts,
'hfedata_txt',
'datafile')
37 self.set_option(tgt_opts,
'hfemode')
39 self.set_option(tgt_opts,
'normalize')
41 self.set_option(tgt_opts,
'energy_denom',
'denom')
43 self.set_option(tgt_opts,
'liquid_eq_steps',forceprint=
True)
45 self.set_option(tgt_opts,
'liquid_md_steps',forceprint=
True)
47 self.set_option(tgt_opts,
'liquid_timestep',forceprint=
True)
49 self.set_option(tgt_opts,
'liquid_interval',forceprint=
True)
51 self.set_option(tgt_opts,
'gas_eq_steps',forceprint=
True)
53 self.set_option(tgt_opts,
'gas_md_steps',forceprint=
True)
55 self.set_option(tgt_opts,
'gas_timestep',forceprint=
True)
57 self.set_option(tgt_opts,
'gas_interval',forceprint=
True)
59 self.set_option(tgt_opts,
'hfe_temperature',forceprint=
True)
61 self.set_option(tgt_opts,
'hfe_pressure',forceprint=
True)
63 self.set_option(tgt_opts,
'save_traj')
65 self.set_option(tgt_opts,
'subset')
80 self.scripts += [
'md_ism_hfe.py']
85 self.OptionDict[
'engname'] = self.engname
87 self.
engine_opts = OrderedDict(list(self.OptionDict.items()) + list(options.items()))
90 if self.
hfemode.lower()
in [
'sp',
'single']:
91 logger.info(
"Hydration free energies from geometry optimization and single point energy evaluation\n")
93 elif self.
hfemode.lower() ==
'ti2':
94 logger.info(
"Hydration free energies from thermodynamic integration (linear response approximation)\n")
95 elif self.
hfemode.lower() ==
'exp_gas':
96 logger.info(
"Hydration free energies from exponential reweighting from the gas to the aqueous phase\n")
97 elif self.
hfemode.lower() ==
'exp_liq':
98 logger.info(
"Hydration free energies from exponential reweighting from the aqueous to the gas phase\n")
99 elif self.
hfemode.lower() ==
'exp_both':
100 logger.info(
"Hydration free energies from exponential reweighting from the aqueous to the gas phase and vice versa, taking the average\n")
102 logger.error(
"Please choose hfemode from single, sp, ti2, exp_gas, exp_liq, or exp_both\n")
105 if self.FF.rigid_water:
106 logger.error(
'This class cannot be used with rigid water molecules.\n')
110 """ Read the reference hydrational data from a file. """ 113 self.
expval = OrderedDict()
122 for line
in open(self.
datafile).readlines():
123 s = line.expandtabs().strip().split(
'#')[0].split()
124 if len(s) == 0:
continue 138 self.
expval[ID] = float(s[nxt])
142 self.
experr[ID] = float(s[nxt+1])
146 self.
molecules = OrderedDict([(i, os.path.abspath(os.path.join(self.root, self.tgtdir,
'molecules', i+self.crdsfx)))
for i
in self.
IDs])
148 if not os.path.isfile(path):
149 logger.error(
'Coordinate file %s does not exist!\nMake sure coordinate files are in the right place\n' % path)
151 if self.subset
is not None:
153 self.
whfe = np.array([1
if i
in subset
else 0
for i
in range(len(self.
IDs))])
155 self.
whfe = np.ones(len(self.
IDs))
159 Submit a simulation to the Work Queue or run it locally. 162 label = The name of the molecule (and hopefully the folder name that you're running in) 163 liq = True/false flag indicating whether to run in liquid or gas phase 168 md_opts = OrderedDict()
169 md_opts[
'temperature'] = self.hfe_temperature
170 md_opts[
'pressure'] = self.hfe_pressure
171 md_opts[
'minimize'] =
True 174 md_opts[
'nequil'] = self.liquid_eq_steps
175 md_opts[
'nsteps'] = self.liquid_md_steps
176 md_opts[
'timestep'] = self.liquid_timestep
177 md_opts[
'sample'] = self.liquid_interval
180 md_opts[
'nequil'] = self.gas_eq_steps
181 md_opts[
'nsteps'] = self.gas_md_steps
182 md_opts[
'timestep'] = self.gas_timestep
183 md_opts[
'sample'] = self.gas_interval
188 eng_opts[
'implicit_solvent'] = liq
189 eng_opts[
'coords'] = os.path.basename(self.
molecules[label])
191 if not os.path.exists(sdnm):
194 if not os.path.exists(
'md_result.p'):
198 for f
in self.scripts:
199 LinkFile(os.path.join(os.path.split(__file__)[0],
"data",f),os.path.join(os.getcwd(),f))
205 lp_dump((self.OptionDict, eng_opts, md_opts),
'simulation.p')
207 cmdstr =
'%s python md_ism_hfe.py %s' % (self.prefix,
"-g" if AGrad
else "")
209 logger.info(
"Running condensed phase simulation locally.\n")
210 logger.info(
"You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
211 _exec(cmdstr, copy_stderr=
True, outfnm=
'md.out')
213 queue_up(wq, command = cmdstr+
' &> md.out', tag=
'%s:%s/%s' % (self.name, label,
"liq" if liq
else "gas"),
214 input_files = self.scripts + [
'simulation.p',
'forcefield.p', os.path.basename(self.
molecules[label])],
215 output_files = [
'md_result.p',
'md.out'] + self.
extra_output, tgt=self, verbose=
False, print_time=3600)
220 Set up and submit/run sampling simulations in the liquid and gas phases. 224 printcool(
"Target: %s - launching %i MD simulations\nTime steps (liq):" 225 "%i (eq) + %i (md)\nTime steps (g): %i (eq) + %i (md)" %
226 (self.name, 2*len(self.
IDs), self.liquid_eq_steps, self.liquid_md_steps,
227 self.gas_eq_steps, self.gas_md_steps), color=0)
229 if self.evaluated
and self.goodstep
and self.save_traj < 2:
231 if os.path.exists(fn):
238 for label
in self.
IDs:
239 if not os.path.exists(label):
247 def submit_jobs(self, mvals, AGrad=True, AHess=True):
249 if self.
hfemode.lower()
not in [
'ti2',
'exp_gas',
'exp_liq',
'exp_both']:
254 self.serialize_ff(mvals)
255 if self.
hfemode.lower()
in [
'ti2',
'exp_gas',
'exp_liq',
'exp_both']:
259 """ Create a list of engines which are used to calculate HFEs using single point evaluation. """ 264 pdbfnm = os.path.abspath(os.path.join(self.root,self.tgtdir,
'molecules', mnm+
'.pdb'))
265 self.
liq_engines[mnm] = self.engine_(target=self, coords=pdbfnm, implicit_solvent=
True, **self.
engine_opts)
269 """ Print qualitative indicator. """ 270 banner =
"Hydration free energies (kcal/mol)" 272 data = OrderedDict([(ID, [])
for ID
in self.
IDs])
274 headings.append(
"Nickname")
278 headings.append(
"Reference +- StdErr")
280 data[ID].append(
"% 9.3f +- %6.3f" % (self.
expval[ID], self.
experr[ID]))
282 headings.append(
"Reference")
284 data[ID].append(
"% 9.3f" % self.
expval[ID])
285 if hasattr(self,
'calc_err'):
286 headings.append(
"Calculated +- StdErr")
288 data[ID].append(
"% 10.3f +- %6.3f" % (self.
calc[ID], self.
calc_err[ID]))
290 headings.append(
"Calculated")
292 data[ID].append(
"% 10.3f" % self.
calc[ID])
293 headings += [
"Calc-Ref",
"Weight",
"Residual"]
294 for iid, ID
in enumerate(self.
IDs):
295 data[ID].append(
"% 8.3f" % (self.
calc[ID] - self.
expval[ID]))
296 data[ID].append(
"%8.3f" % (self.
whfe[iid]))
297 data[ID].append(
"%8.3f" % (self.
whfe[iid]*(self.
calc[ID] - self.
expval[ID])**2))
298 self.printcool_table(data, headings, banner)
301 """ Calculate HFEs using single point evaluation. """ 304 eliq, rmsdliq, _ = self.
liq_engines[mnm].optimize()
305 egas, rmsdgas, _ = self.
gas_engines[mnm].optimize()
306 hfe[mnm] = eliq - egas
309 def get_sp(self, mvals, AGrad=False, AHess=False):
310 """ Get the hydration free energy and first parameteric derivatives using single point energy evaluations. """ 314 return np.array(list(self.
hfe_dict.values()))
315 calc_hfe = get_hfe(mvals)
316 D = calc_hfe - np.array(list(self.
expval.values()))
317 dD = np.zeros((self.FF.np,len(self.
IDs)))
320 dD[p,:], _ =
f12d3p(
fdwrap(get_hfe, mvals, p), h = self.h, f0 = calc_hfe)
323 def get_exp(self, mvals, AGrad=False, AHess=False):
324 """ Get the hydration free energy using the Zwanzig formula. We will obtain two different estimates along with their uncertainties. """ 327 dD = np.zeros((self.FF.np,len(self.
IDs)))
328 kT = (kb * self.hfe_temperature)
329 beta = 1. / (kb * self.hfe_temperature)
330 for ilabel, label
in enumerate(self.
IDs):
333 data = defaultdict(dict)
334 for p
in [
'gas',
'liq']:
338 L = len(results[
'Potentials'])
340 Eg = results[
'Potentials']
341 Eaq = results[
'Potentials'] + results[
'Hydration']
343 expmbH = np.exp(-1.0*beta*results[
'Hydration'])
344 data[p][
'Hyd'] = -kT*np.log(np.mean(expmbH))
347 data[p][
'HydErr'] = np.std([-kT*np.log(np.mean(expmbH[np.random.randint(L,size=L)]))
for i
in range(100)]) * np.sqrt(
statisticalInefficiency(results[
'Hydration']))
349 dEg = results[
'Potential_Derivatives']
350 dEaq = results[
'Potential_Derivatives'] + results[
'Hydration_Derivatives']
351 data[p][
'dHyd'] = (
flat(np.dot(dEaq,
col(expmbH))/L)-np.mean(dEg,axis=1)*np.mean(expmbH)) / np.mean(expmbH)
353 Eg = results[
'Potentials'] - results[
'Hydration']
354 Eaq = results[
'Potentials']
356 exppbH = np.exp(+1.0*beta*results[
'Hydration'])
357 data[p][
'Hyd'] = +kT*np.log(np.mean(exppbH))
360 data[p][
'HydErr'] = np.std([+kT*np.log(np.mean(exppbH[np.random.randint(L,size=L)]))
for i
in range(100)]) * np.sqrt(
statisticalInefficiency(results[
'Hydration']))
362 dEg = results[
'Potential_Derivatives'] - results[
'Hydration_Derivatives']
363 dEaq = results[
'Potential_Derivatives']
364 data[p][
'dHyd'] = -(
flat(np.dot(dEg,
col(exppbH))/L)-np.mean(dEaq,axis=1)*np.mean(exppbH)) / np.mean(exppbH)
369 self.
hfe_dict[label] = data[
'gas'][
'Hyd'] / 4.184
370 self.
hfe_err[label] = data[
'gas'][
'HydErr'] / 4.184
371 elif self.
hfemode ==
'exp_liq':
372 self.
hfe_dict[label] = data[
'liq'][
'Hyd'] / 4.184
373 self.
hfe_err[label] = data[
'liq'][
'HydErr'] / 4.184
374 elif self.
hfemode ==
'exp_both':
375 self.
hfe_dict[label] = 0.5*(data[
'liq'][
'Hyd']+data[
'gas'][
'Hyd']) / 4.184
376 self.
hfe_err[label] = 0.5*(data[
'liq'][
'HydErr']+data[
'gas'][
'HydErr']) / 4.184
380 dD[:, ilabel] = self.
whfe[ilabel]*data[
'gas'][
'dHyd'] / 4.184
381 elif self.
hfemode ==
'exp_liq':
382 dD[:, ilabel] = self.
whfe[ilabel]*data[
'liq'][
'dHyd'] / 4.184
383 elif self.
hfemode ==
'exp_both':
384 dD[:, ilabel] = 0.5*self.
whfe[ilabel]*(data[
'liq'][
'dHyd']+data[
'gas'][
'dHyd']) / 4.184
386 calc_hfe = np.array(list(self.
hfe_dict.values()))
387 D = self.
whfe*(calc_hfe - np.array(list(self.
expval.values())))
390 def get_ti2(self, mvals, AGrad=False, AHess=False):
391 """ Get the hydration free energy using two-point thermodynamic integration. """ 393 dD = np.zeros((self.FF.np,len(self.
IDs)))
394 beta = 1. / (kb * self.hfe_temperature)
395 for ilabel, label
in enumerate(self.
IDs):
398 data = defaultdict(dict)
399 for p
in [
'gas',
'liq']:
402 results =
lp_load(
'md_result.p')
404 H = results[
'Hydration']
406 data[p][
'Hyd'] = np.mean(H)
408 dE = results[
'Potential_Derivatives']
409 dH = results[
'Hydration_Derivatives']
411 data[p][
'dHyd'] = np.mean(dH,axis=1)-beta*(
flat(np.dot(dE,
col(H))/len(H))-np.mean(dE,axis=1)*np.mean(H))
415 self.
hfe_dict[label] = 0.5*(data[
'liq'][
'Hyd']+data[
'gas'][
'Hyd']) / 4.184
418 dD[:, ilabel] = 0.5*self.
whfe[ilabel]*(data[
'liq'][
'dHyd']+data[
'gas'][
'dHyd']) / 4.184
420 calc_hfe = np.array(list(self.
hfe_dict.values()))
421 D = self.
whfe*(calc_hfe - np.array(list(self.
expval.values())))
424 def get(self, mvals, AGrad=False, AHess=False):
425 """ Evaluate objective function. """ 426 Answer = {
'X':0.0,
'G':np.zeros(self.FF.np),
'H':np.zeros((self.FF.np, self.FF.np))}
427 if self.
hfemode.lower() ==
'single' or self.
hfemode.lower() ==
'sp':
428 D, dD = self.
get_sp(mvals, AGrad, AHess)
429 elif self.
hfemode.lower() ==
'ti2':
430 D, dD = self.
get_ti2(mvals, AGrad, AHess)
431 elif self.
hfemode.lower()
in [
'exp_gas',
'exp_liq',
'exp_both']:
432 D, dD = self.
get_exp(mvals, AGrad, AHess)
433 Answer[
'X'] = np.dot(D,D) / self.denom**2 / (np.sum(self.
whfe)
if self.normalize
else 1)
435 Answer[
'G'][p] = 2*np.dot(D, dD[p,:]) / self.denom**2 / (np.sum(self.
whfe)
if self.normalize
else 1)
437 Answer[
'H'][p,q] = 2*np.dot(dD[p,:], dD[q,:]) / self.denom**2 / (np.sum(self.
whfe)
if self.normalize
else 1)
440 if hasattr(self,
'hfe_err'):
def build_engines(self)
Create a list of engines which are used to calculate HFEs using single point evaluation.
def get_exp(self, mvals, AGrad=False, AHess=False)
Get the hydration free energy using the Zwanzig formula.
datafile
The vdata.txt file that contains the hydrations.
engine_opts
Scripts to be copied from the ForceBalance installation directory.
def read_reference_data(self)
Read the reference hydrational data from a file.
def get_sp(self, mvals, AGrad=False, AHess=False)
Get the hydration free energy and first parameteric derivatives using single point energy evaluations...
def lp_load(fnm)
Read an object from a bzipped file specified by the path.
Nifty functions, intended to be imported by any module within ForceBalance.
def submit_liq_gas(self, mvals, AGrad=True)
Set up and submit/run sampling simulations in the liquid and gas phases.
def LinkFile(src, dest, nosrcok=False)
def flat(vec)
Given any list, array, or matrix, return a single-index array.
Subclass of Target for fitting force fields to hydration free energies.
def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True)
Compute the (cross) statistical inefficiency of (two) timeseries.
def submit_jobs(self, mvals, AGrad=True, AHess=True)
def hydration_driver_sp(self)
Calculate HFEs using single point evaluation.
def in_fd()
Invoking this function from anywhere will tell us whether we're being called by a finite-difference f...
def fdwrap(func, mvals0, pidx, key=None, kwargs)
A function wrapper for finite difference designed for differentiating 'get'-type functions.
def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue.
def lp_dump(obj, fnm, protocol=0)
Write an object to a zipped pickle file specified by the path.
def col(vec)
Given any list, array, or matrix, return a 1-column 2D array.
def get_ti2(self, mvals, AGrad=False, AHess=False)
Get the hydration free energy using two-point thermodynamic integration.
def link_dir_contents(abssrcdir, absdestdir)
def printcool(text, sym="#", bold=False, color=2, ansi=None, bottom='-', minwidth=50, center=True, sym2="=")
Cool-looking printout for slick formatting of output.
def indicate(self)
Print qualitative indicator.
def get(self, mvals, AGrad=False, AHess=False)
Evaluate objective function.
def f12d3p(f, h, f0=None)
A three-point finite difference stencil.
loop_over_snapshots
LPW 2018-02-11: This is set to True if the target calculates a single-point property over several exi...
def __init__(self, options, tgt_opts, forcefield)
Initialization.
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, integer, or what have you...
def run_simulation(self, label, liq, AGrad=True)
Submit a simulation to the Work Queue or run it locally.