ForceBalance API  1.3
Automated optimization of force fields and empirical potentials
thermo.py
Go to the documentation of this file.
1 from __future__ import division
2 from __future__ import print_function
3 from builtins import zip
4 from builtins import str
5 from builtins import object
6 import os
7 import errno
8 import numpy as np
9 
10 from forcebalance.target import Target
11 from forcebalance.finite_difference import in_fd, f12d3p, fdwrap
12 from forcebalance.nifty import flat, col, row
13 from forcebalance.nifty import lp_dump, lp_load, wopen, _exec
14 from forcebalance.nifty import LinkFile, link_dir_contents
15 from forcebalance.nifty import printcool, printcool_dictionary
16 
17 from collections import OrderedDict
18 
19 from forcebalance.output import getLogger
20 logger = getLogger(__name__)
21 
22 def energy_derivatives(engine, FF, mvals, h, pgrad, dipole=False):
23 
24  """
25  Compute the first and second derivatives of a set of snapshot
26  energies with respect to the force field parameters.
27 
28  This basically calls the finite difference subroutine on the
29  energy_driver subroutine also in this script.
30 
31  In the future we may need to be more sophisticated with
32  controlling the quantities which are differentiated, but for
33  now this is okay..
34 
35  @param[in] engine Engine object for calculating energies
36  @param[in] FF Force field object
37  @param[in] mvals Mathematical parameter values
38  @param[in] h Finite difference step size
39  @param[in] pgrad List of active parameters for differentiation
40  @param[in] dipole Switch for dipole derivatives.
41  @return G First derivative of the energies in a N_param x N_coord array
42  @return GDx First derivative of the box dipole moment x-component in a N_param x N_coord array
43  @return GDy First derivative of the box dipole moment y-component in a N_param x N_coord array
44  @return GDz First derivative of the box dipole moment z-component in a N_param x N_coord array
45 
46  """
47  def single_point(mvals_):
48  FF.make(mvals_)
49  if dipole:
50  return engine.energy_dipole()
51  else:
52  return engine.energy()
53 
54  ED0 = single_point(mvals)
55  G = OrderedDict()
56  G['potential'] = np.zeros((FF.np, ED0.shape[0]))
57  if dipole:
58  G['dipole'] = np.zeros((FF.np, ED0.shape[0], 3))
59  for i in pgrad:
60  logger.info("%i %s\r" % (i, (FF.plist[i] + " "*30)))
61  edg, _ = f12d3p(fdwrap(single_point,mvals,i),h,f0=ED0)
62  if dipole:
63  G['potential'][i] = edg[:,0]
64  G['dipole'][i] = edg[:,1:]
65  else:
66  G['potential'][i] = edg[:]
67  return G
68 
69 #
70 class Thermo(Target):
71  """
72  A target for fitting general experimental data sets. The
73  experimental data is described in a .txt file and is handled with a
74  `Quantity` subclass.
75 
76  """
77  def __init__(self, options, tgt_opts, forcefield):
78 
79  super(Thermo, self).__init__(options, tgt_opts, forcefield)
80 
81 
83  self.set_option(tgt_opts, "expdata_txt", forceprint=True)
84  # Quantities to calculate
85  self.set_option(tgt_opts, "quantities", forceprint=True)
86  # Length of simulation chain
87  self.set_option(tgt_opts, "n_sim_chain", forceprint=True)
88  # Number of time steps in the equilibration run
89  self.set_option(tgt_opts, "eq_steps", forceprint=True)
90  # Number of time steps in the production run
91  self.set_option(tgt_opts, "md_steps", forceprint=True)
92 
93 
96  self.loop_over_snapshots = False
97  # Prefix names for simulation data
98  self.simpfx = "sim"
99  # Data points for quantities
100  self.points = []
101  # Denominators for quantities
102  self.denoms = {}
103  # Weights for quantities
104  self.weights = {}
105 
106 
107  self._read_expdata(os.path.join(self.root,
108  self.tgtdir,
109  self.expdata_txt))
110 
111 
112  for f in self.scripts:
113  LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
114  os.path.join(self.root, self.tempdir, f))
115 
116  def _read_expdata(self, expdata):
117  """Read and store experimental data.
118 
119  Parameters
120  ----------
121  expdata : string
122  Read experimental data from this filename.
123 
124  Returns
125  -------
126  Nothing
127 
128  """
129  fp = open(expdata)
130 
131  line = fp.readline()
132  foundHeader = False
133  names = None
134  units = None
135  label_header = None
136  label_unit = None
137  count = 0
138  while line:
139  # Skip comments and blank lines
140  if line.lstrip().startswith("#") or not line.strip():
141  line = fp.readline()
142  continue
143 
144  if "=" in line: # Read variable
145  param, value = line.split("=")
146  param = param.strip().lower()
147  if param == "denoms":
148  for e, v in enumerate(value.split()):
149  self.denoms[self.quantities[e]] = float(v)
150  elif param == "weights":
151  for e, v in enumerate(value.split()):
152  self.weights[self.quantities[e]] = float(v)
153  elif foundHeader: # Read exp data
154  count += 1
155  vals = line.split()
156 
157  label = (vals[0], label_header, label_unit)
158  refs = np.array(vals[1:-2:2]).astype(float)
159  wts = np.array(vals[2:-2:2]).astype(float)
160  temperature = float(vals[-2])
161  pressure = None if vals[-1].lower() == "none" else \
162  float(vals[-1])
163 
164  dp = Point(count, label=label, refs=refs, weights=wts,
165  names=names, units=units,
166  temperature=temperature, pressure=pressure)
167  self.points.append(dp)
168  else: # Read headers
169  foundHeader = True
170  headers = list(zip(*[tuple(h.split("_")) for h in line.split()
171  if h != "w"]))
172 
173  label_header = list(headers[0])[0]
174  label_unit = list(headers[1])[0]
175  names = list(headers[0][1:-2])
176  units = list(headers[1][1:-2])
177 
178  line = fp.readline()
179 
180  def retrieve(self, dp):
181  """Retrieve the molecular dynamics (MD) results and store the calculated
182  quantities in the Point object dp.
183 
184  Parameters
185  ----------
186  dp : Point
187  Store the calculated quantities in this point.
188 
189  Returns
190  -------
191  Nothing
192 
193  """
194  abspath = os.path.join(os.getcwd(), '%d/md_result.p' % dp.idnr)
195 
196  if os.path.exists(abspath):
197  logger.info('Reading data from ' + abspath + '.\n')
198 
199  vals, errs, grads = lp_load(abspath)
200 
201  dp.data["values"] = vals
202  dp.data["errors"] = errs
203  dp.data["grads"] = grads
204 
205  else:
206  msg = 'The file ' + abspath + ' does not exist so we cannot read it.\n'
207  logger.warning(msg)
208 
209  dp.data["values"] = np.zeros((len(self.quantities)))
210  dp.data["errors"] = np.zeros((len(self.quantities)))
211  dp.data["grads"] = np.zeros((len(self.quantities), self.FF.np))
212 
213  def submit_jobs(self, mvals, AGrad=True, AHess=True):
214  """This routine is called by Objective.stage() and will run before "get".
215  It submits the jobs and the stage() function will wait for jobs
216  to complete.
217 
218  Parameters
219  ----------
220  mvals : list
221  Mathematical parameter values.
222  AGrad : Boolean
223  Switch to turn on analytic gradient.
224  AHess : Boolean
225  Switch to turn on analytic Hessian.
226 
227  Returns
228  -------
229  Nothing.
230 
231  """
232  # Set up and run the simulation chain on all points.
233  for pt in self.points:
234  # Create subdir
235  try:
236  os.makedirs(str(pt.idnr))
237  except OSError as exception:
238  if exception.errno != errno.EEXIST:
239  raise
240 
241  # Goto subdir
242  os.chdir(str(pt.idnr))
243 
244  # Link dir contents from target subdir to current temp directory.
245  for f in self.scripts:
246  LinkFile(os.path.join(self.root, self.tempdir, f),
247  os.path.join(os.getcwd(), f))
248 
249  link_dir_contents(os.path.join(self.root, self.tgtdir,
250  str(pt.idnr)), os.getcwd())
251 
252  # Dump the force field to a pickle file
253  lp_dump((self.FF, mvals, self.OptionDict, AGrad), 'forcebalance.p')
254 
255  # Run the simulation chain for point.
256  cmdstr = ("%s python md_chain.py " % self.mdpfx +
257  " ".join(self.quantities) + " " +
258  "--engine %s " % self.engname +
259  "--length %d " % self.n_sim_chain +
260  "--name %s " % self.simpfx +
261  "--temperature %f " % pt.temperature +
262  "--pressure %f " % pt.pressure +
263  "--nequil %d " % self.eq_steps +
264  "--nsteps %d " % self.md_steps)
265  _exec(cmdstr, copy_stderr=True, outfnm='md_chain.out')
266 
267  os.chdir('..')
268 
269  def indicate(self):
270  """Shows optimization state."""
271  AGrad = hasattr(self, 'Gp')
272  PrintDict = OrderedDict()
273 
274  def print_item(key, physunit):
275  if self.Xp[key] > 0:
276  the_title = ("%s %s (%s)\n" % (self.name, key.capitalize(), physunit) +
277  "No. Temperature Pressure Reference " +
278  "Calculated +- Stddev " +
279  " Delta Weight Term ")
280 
281  printcool_dictionary(self.Pp[key], title=the_title, bold=True,
282  color=4, keywidth=15)
283 
284  bar = printcool(("%s objective function: % .3f%s" %
285  (key.capitalize(), self.Xp[key],
286  ", Derivative:" if AGrad else "")))
287  if AGrad:
288  self.FF.print_map(vals=self.Gp[key])
289  logger.info(bar)
290 
291  PrintDict[key] = (("% 10.5f % 8.3f % 14.5e" %
292  (self.Xp[key], self.Wp[key],
293  self.Xp[key]*self.Wp[key])))
294 
295  for i, q in enumerate(self.quantities):
296  print_item(q, self.points[0].ref["units"][i])
297 
298  PrintDict['Total'] = "% 10s % 8s % 14.5e" % ("","", self.Objective)
299 
300  Title = ("%s Thermodynamic Properties:\n %-20s %40s" %
301  (self.name, "Property", "Residual x Weight = Contribution"))
302  printcool_dictionary(PrintDict, color=4, title=Title, keywidth=31)
303  return
304 
305  def objective_term(self, quantity):
306  """Calculates the contribution to the objective function (the term) for a
307  given quantity.
308 
309  Parameters
310  ----------
311  quantity : string
312  Calculate the objective term for this quantity.
313 
314  Returns
315  -------
316  term : dict
317  `term` is a dict with keys `X`, `G`, `H` and `info`. The values of
318  these keys are the objective term itself (`X`), its gradient (`G`),
319  its Hessian (`H`), and an OrderedDict with print information on
320  individiual data points (`info`).
321 
322  """
323  Objective = 0.0
324  Gradient = np.zeros(self.FF.np)
325  Hessian = np.zeros((self.FF.np, self.FF.np))
326 
327  # Grab ref data for quantity
328  qid = self.quantities.index(quantity)
329  Exp = np.array([pt.ref["refs"][qid] for pt in self.points])
330  Weights = np.array([pt.ref["weights"][qid] for pt in self.points])
331  Denom = self.denoms[quantity]
332 
333  # Renormalize weights
334  Weights /= np.sum(Weights)
335  logger.info("Renormalized weights to " + str(np.sum(Weights)) + "\n")
336  logger.info(("Physical quantity '%s' uses denominator = %g %s\n" %
337  (quantity.capitalize(), Denom,
338  self.points[0].ref["units"][self.quantities.index(quantity)])))
339 
340  # Grab calculated values
341  values = np.array([pt.data["values"][qid] for pt in self.points])
342  errors = np.array([pt.data["errors"][qid] for pt in self.points])
343  grads = np.array([pt.data["grads"][qid] for pt in self.points])
344 
345  # Calculate objective term using Least-squares function. Evaluate using
346  # Einstein summation: W is N-array, Delta is N-array and grads is
347  # NxM-array, where N is number of points and M is number of parameters.
348  #
349  # X_i = W_i * Delta2_i (no summed indices)
350  # G_ij = W_i * Delta_i * grads_ij (no summed indices)
351  # H_ijm = W_i * gradsT_jk * grads_lm (sum over k and l)
352  #
353  # Result: X is N-array, G is NxM-array and H is NxMxM-array.
354  #
355  Deltas = values - Exp
356  Objs = np.einsum("i,i->i", Weights, Deltas**2) / Denom / Denom
357  Grads = 2.0*np.einsum("i,i,ij->ij", Weights, Deltas, grads) / Denom / Denom
358  Hess = 2.0*np.einsum("i,jk,lm->ijm", Weights, grads.T, grads) / Denom / Denom
359 
360  # Average over all points
361  Objective += np.sum(Objs, axis=0)
362  Gradient += np.sum(Grads, axis=0)
363  Hessian += np.sum(Hess, axis=0)
364 
365  # Store gradients and setup print map
366  GradMapPrint = [["#Point"] + self.FF.plist]
367 
368  for pt in self.points:
369  temp = pt.temperature
370  press = pt.pressure
371  GradMapPrint.append([' %8.2f %8.1f' % (temp, press)] +
372  ["% 9.3e" % i for i in grads[pt.idnr-1]])
373 
374  o = wopen('gradient_%s.dat' % quantity)
375  for line in GradMapPrint:
376  print(' '.join(line), file=o)
377  o.close()
378 
379  printer = OrderedDict([(" %-5d %-12.2f %-8.1f"
380  % (pt.idnr, pt.temperature, pt.pressure),
381  ("% -10.3f % -10.3f +- %-8.3f % -8.3f % -9.5f % -9.5f"
382  % (Exp[pt.idnr-1], values[pt.idnr-1],
383  errors[pt.idnr-1], Deltas[pt.idnr-1],
384  Weights[pt.idnr-1], Objs[pt.idnr-1])))
385  for pt in self.points])
386 
387  return { "X": Objective, "G": Gradient, "H": Hessian, "info": printer }
388 
389  def get(self, mvals, AGrad=True, AHess=True):
390  """Return the contribution to the total objective function. This is a
391  weighted average of the calculated quantities.
392 
393  Parameters
394  ----------
395  mvals : list
396  Mathematical parameter values.
397  AGrad : Boolean
398  Switch to turn on analytic gradient.
399  AHess : Boolean
400  Switch to turn on analytic Hessian.
401 
402  Returns
403  -------
404  Answer : dict
405  Contribution to the objective function. `Answer` is a dict with keys
406  `X` for the objective function, `G` for its gradient and `H` for its
407  Hessian.
408 
409  """
410  Answer = {}
411 
412  Objective = 0.0
413  Gradient = np.zeros(self.FF.np)
414  Hessian = np.zeros((self.FF.np, self.FF.np))
415 
416  for pt in self.points:
417  # Update data point with MD results
418  self.retrieve(pt)
419 
420  obj = OrderedDict()
421  reweighted = []
422  for q in self.quantities:
423  # Returns dict with keys "X"=objective term value, "G"=the
424  # gradient, "H"=the hessian, and "info"=printed info about points
425  obj[q] = self.objective_term(q)
426 
427  # Apply weights for quantities (normalized)
428  if obj[q]["X"] == 0:
429  self.weights[q] = 0.0
430 
431  # Store weights sorted in the order of self.quantities
432  reweighted.append(self.weights[q])
433 
434  # Normalize weights
435  reweighted = np.array(reweighted)
436  wtot = np.sum(reweighted)
437  reweighted = reweighted/wtot if wtot > 0 else reweighted
438 
439  # Picks out the "X", "G" and "H" keys for the quantities sorted in the
440  # order of self.quantities. Xs is N-array, Gs is NxM-array and Hs is
441  # NxMxM-array, where N is number of quantities and M is number of
442  # parameters.
443  Xs = np.array([dic["X"] for dic in obj.values()])
444  Gs = np.array([dic["G"] for dic in obj.values()])
445  Hs = np.array([dic["H"] for dic in obj.values()])
446 
447  # Target contribution is (normalized) weighted averages of the
448  # individual quantity terms.
449  Objective = np.average(Xs, weights=(None if np.all(reweighted == 0) else \
450  reweighted), axis=0)
451  if AGrad:
452  Gradient = np.average(Gs, weights=(None if np.all(reweighted == 0) else \
453  reweighted), axis=0)
454  if AHess:
455  Hessian = np.average(Hs, weights=(None if np.all(reweighted == 0) else \
456  reweighted), axis=0)
457 
458  if not in_fd():
459  # Store results to show with indicator() function
460  self.Xp = {q : dic["X"] for (q, dic) in obj.items()}
461  self.Wp = {q : reweighted[self.quantities.index(q)]
462  for (q, dic) in obj.items()}
463  self.Pp = {q : dic["info"] for (q, dic) in obj.items()}
464 
465  if AGrad:
466  self.Gp = {q : dic["G"] for (q, dic) in obj.items()}
467 
468  self.Objective = Objective
469 
470  Answer = { "X": Objective, "G": Gradient, "H": Hessian }
471  return Answer
472 
473 # class Point --- data container
474 class Point(object):
475  def __init__(self, idnr, label=None, refs=None, weights=None, names=None,
476  units=None, temperature=None, pressure=None, data=None):
477  self.idnr = idnr
478  self.ref = { "label" : label,
479  "refs" : refs,
480  "weights": weights,
481  "names" : names,
482  "units" : units }
483  self.temperature = temperature
484  self.pressure = pressure
485  self.data = data if data is not None else {}
486 
487  def __str__(self):
488  msg = []
489  if self.temperature is None:
490  msg.append("State: Unknown.")
491  elif self.pressure is None:
492  msg.append("State: Point " + str(self.idnr) + " at " +
493  str(self.temperature) + " K.")
494  else:
495  msg.append("State: Point " + str(self.idnr) + " at " +
496  str(self.temperature) + " K and " +
497  str(self.pressure) + " bar.")
498 
499  msg.append("Point " + str(self.idnr) + " reference data " + "-"*30)
500  for key in self.ref:
501  msg.append(" " + key.strip() + " = " + str(self.ref[key]).strip())
502 
503  msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30)
504  for key in self.data:
505  msg.append(" " + key.strip() + " = " + str(self.data[key]).strip())
506 
507  return "\n".join(msg)
def indicate(self)
Shows optimization state.
Definition: thermo.py:276
def lp_load(fnm)
Read an object from a bzipped file specified by the path.
Definition: nifty.py:836
Nifty functions, intended to be imported by any module within ForceBalance.
def __str__(self)
Definition: thermo.py:495
def LinkFile(src, dest, nosrcok=False)
Definition: nifty.py:1313
def _read_expdata(self, expdata)
Read and store experimental data.
Definition: thermo.py:131
A target for fitting general experimental data sets.
Definition: thermo.py:78
def __init__(self, options, tgt_opts, forcefield)
Definition: thermo.py:79
def in_fd()
Invoking this function from anywhere will tell us whether we're being called by a finite-difference f...
def fdwrap(func, mvals0, pidx, key=None, kwargs)
A function wrapper for finite difference designed for differentiating 'get'-type functions.
def lp_dump(obj, fnm, protocol=0)
Write an object to a zipped pickle file specified by the path.
Definition: nifty.py:817
loop_over_snapshots
Initialize base class.
Definition: thermo.py:98
def link_dir_contents(abssrcdir, absdestdir)
Definition: nifty.py:1345
def retrieve(self, dp)
Retrieve the molecular dynamics (MD) results and store the calculated quantities in the Point object ...
Definition: thermo.py:197
def printcool_dictionary(Dict, title="Dictionary Keys : Values", bold=False, color=2, keywidth=25, topwidth=50, center=True, leftpad=0)
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
Definition: nifty.py:366
def energy_derivatives(engine, FF, mvals, h, pgrad, dipole=False)
Compute the first and second derivatives of a set of snapshot energies with respect to the force fiel...
Definition: thermo.py:46
def printcool(text, sym="#", bold=False, color=2, ansi=None, bottom='-', minwidth=50, center=True, sym2="=")
Cool-looking printout for slick formatting of output.
Definition: nifty.py:321
def submit_jobs(self, mvals, AGrad=True, AHess=True)
This routine is called by Objective.stage() and will run before "get".
Definition: thermo.py:236
def get(self, mvals, AGrad=True, AHess=True)
Return the contribution to the total objective function.
Definition: thermo.py:417
def objective_term(self, quantity)
Calculates the contribution to the objective function (the term) for a given quantity.
Definition: thermo.py:329
def f12d3p(f, h, f0=None)
A three-point finite difference stencil.
def wopen(dest, binary=False)
If trying to write to a symbolic link, remove it first.
Definition: nifty.py:1304