ForceBalance API  1.3
Automated optimization of force fields and empirical potentials
opt_geo_target.py
Go to the documentation of this file.
1 """ @package forcebalance.opt_geo_target Optimized Geometry fitting module.
2 
3 @author Yudong Qiu, Lee-Ping Wang
4 @date 03/2019
5 """
6 from __future__ import division
7 import os
8 import shutil
9 import numpy as np
10 import re
11 import subprocess
12 from copy import deepcopy
13 from collections import OrderedDict, defaultdict
14 from forcebalance.nifty import col, eqcgmx, flat, floatornan, fqcgmx, invert_svd, kb, printcool, printcool_dictionary, bohr2ang, warn_press_key
15 from forcebalance.target import Target
16 from forcebalance.molecule import Molecule
17 from forcebalance.finite_difference import fdwrap, f1d2p, f12d3p, in_fd
18 from forcebalance.output import getLogger
19 logger = getLogger(__name__)
20 
21 RADIAN_2_DEGREE = 180 / np.pi
22 
23 def periodic_diff(a, b, v_periodic):
24  ''' convenient function for computing the minimum difference in periodic coordinates
25  Parameters
26  ----------
27  a: np.ndarray or float
28  Reference values in a numpy array
29  b: np.ndarray or float
30  Target values in a numpy arrary
31  v_periodic: float > 0
32  Value of the periodic boundary
33 
34  Returns
35  -------
36  diff: np.ndarray
37  The array of same shape containing the difference between a and b
38  All return values are in range [-v_periodic/2, v_periodic/2),
39  "( )" means exclusive, "[ ]" means inclusive
40 
41  Examples
42  -------
43  periodic_diff(0.0, 2.1, 2.0) => -0.1
44  periodic_diff(0.0, 1.9, 2.0) => 0.1
45  periodic_diff(0.0, 1.0, 2.0) => -1.0
46  periodic_diff(1.0, 0.0, 2.0) => -1.0
47  periodic_diff(1.0, 0.1, 2.0) => -0.9
48  periodic_diff(1.0, 10.1, 2.0) => 0.9
49  periodic_diff(1.0, 9.9, 2.0) => -0.9
50  '''
51  assert v_periodic > 0
52  h = 0.5 * v_periodic
53  return (a - b + h) % v_periodic - h
54 
55 def compute_rmsd(ref, tar, v_periodic=None):
56  """
57  Compute the RMSD between two arrays, supporting periodic difference
58  """
59  assert len(ref) == len(tar), 'array length must match'
60  n = len(ref)
61  if n == 0: return 0.0
62  if v_periodic is not None:
63  diff = periodic_diff(ref, tar, v_periodic)
64  else:
65  diff = ref - tar
66  rmsd = np.sqrt(np.sum(diff**2) / n)
67  return rmsd
68 
69 class OptGeoTarget(Target):
70  """ Subclass of Target for fitting MM optimized geometries to QM optimized geometries. """
71  def __init__(self,options,tgt_opts,forcefield):
72  super(OptGeoTarget,self).__init__(options,tgt_opts,forcefield)
73  self.set_option(None, None, 'optgeo_options', os.path.join(self.tgtdir,tgt_opts['optgeo_options_txt']))
74  self.sys_opts = self.parse_optgeo_options(self.optgeo_options)
75 
76  engine_args = OrderedDict(list(self.OptionDict.items()) + list(options.items()))
77  engine_args.pop('name', None)
78 
79  self.create_engines(engine_args)
80 
82 
83  self.set_option(tgt_opts,'writelevel','writelevel')
84 
85  def create_engines(self, engine_args):
86  raise NotImplementedError("create_engines() should be implemented in subclass")
87 
88  @staticmethod
89  def parse_optgeo_options(filename):
90  """ Parse an optgeo_options.txt file into specific OptGeoTarget Target Options"""
91  logger.info("Reading optgeo options from file: %s\n" % filename)
92  global_opts = OrderedDict()
93  sys_opts = OrderedDict()
94  section = None
95  section_opts = OrderedDict()
96  with open(filename) as f:
97  for ln, line in enumerate(f, 1):
98  # Anything after "#" is a comment
99  line = line.split("#", maxsplit=1)[0].strip()
100  if not line: continue
101  ls = line.split()
102  key = ls[0].lower()
103  if key[0] == "$":
104  # section sign $
105  if key == '$end':
106  if section is None:
107  warn_press_key("Line %i: Encountered $end before any section." % ln)
108  elif section == 'global':
109  # global options read finish
110  global_opts = section_opts
111  elif section == 'system':
112  # check if system section contains name
113  if 'name' not in section_opts:
114  warn_press_key("Line %i: You need to specify a name for the system section ending." % ln)
115  elif section_opts['name'] in sys_opts:
116  warn_press_key("Line %i: A system named %s already exists in Systems" % (ln, section_opts['name']))
117  else:
118  sys_opts[section_opts['name']] = section_opts
119  section = None
120  section_opts = OrderedDict()
121  else:
122  if section is not None:
123  warn_press_key("Line %i: Encountered section start %s before previous section $end." % (ln, key))
124  if key == '$global':
125  section = 'global'
126  elif key == '$system':
127  section = 'system'
128  else:
129  warn_press_key("Line %i: Encountered unsupported section name %s " % (ln, key))
130  else:
131  # put normal key-value options into section_opts
132  if key in ['name', 'geometry', 'topology']:
133  if len(ls) != 2:
134  warn_press_key("Line %i: one value expected for key %s" % (ln, key))
135  if section == 'global':
136  warn_press_key("Line %i: key %s should not appear in $global section" % (ln, key))
137  section_opts[key] = ls[1]
138  elif key in ['bond_denom', 'angle_denom', 'dihedral_denom', 'improper_denom']:
139  if len(ls) != 2:
140  warn_press_key("Line %i: one value expected for key %s" % (ln, key))
141  section_opts[key] = float(ls[1])
142  elif key == 'mol2':
143  # special parsing for mol2 option for SMIRNOFF engine
144  # the value is a list of filenames
145  section_opts[key] = ls[1:]
146  # apply a few default global options
147  global_opts.setdefault('bond_denom', 0.02)
148  global_opts.setdefault('angle_denom', 3)
149  global_opts.setdefault('dihedral_denom', 10.0)
150  global_opts.setdefault('improper_denom', 10.0)
151  # copy global options into each system
152  for sys_name, sys_opt_dict in sys_opts.items():
153  for k,v in global_opts.items():
154  # do not overwrite system options
155  sys_opt_dict.setdefault(k, v)
156  for k in ['name', 'geometry', 'topology']:
157  if k not in sys_opt_dict:
158  warn_press_key("key %s missing in system section named %s" %(k, sys_name))
159  return sys_opts
160 
161  def _build_internal_coordinates(self):
162  "Build internal coordinates system with geometric.internal.PrimitiveInternalCoordinates"
163  # geometric module is imported to build internal coordinates
164  # importing here will avoid import error for calculations not using this target
165  from geometric.internal import PrimitiveInternalCoordinates, Distance, Angle, Dihedral, OutOfPlane
166  self.internal_coordinates = OrderedDict()
167  for sysname, sysopt in self.sys_opts.items():
168  geofile = os.path.join(self.root, self.tgtdir, sysopt['geometry'])
169  topfile = os.path.join(self.root, self.tgtdir, sysopt['topology'])
170  # logger.info("Building internal coordinates from file: %s\n" % topfile)
171  m0 = Molecule(geofile)
172  m = Molecule(topfile)
173  p_IC = PrimitiveInternalCoordinates(m)
174  # here we explicitly pick the bonds, angles and dihedrals to evaluate
175  ic_bonds, ic_angles, ic_dihedrals, ic_impropers = [], [], [], []
176  for ic in p_IC.Internals:
177  if isinstance(ic, Distance):
178  ic_bonds.append(ic)
179  elif isinstance(ic, Angle):
180  ic_angles.append(ic)
181  elif isinstance(ic, Dihedral):
182  ic_dihedrals.append(ic)
183  elif isinstance(ic, OutOfPlane):
184  ic_impropers.append(ic)
185  # compute and store reference values
186  pos_ref = m0.xyzs[0]
187  vref_bonds = np.array([ic.value(pos_ref) for ic in ic_bonds])
188  vref_angles = np.array([ic.value(pos_ref)*RADIAN_2_DEGREE for ic in ic_angles])
189  vref_dihedrals = np.array([ic.value(pos_ref)*RADIAN_2_DEGREE for ic in ic_dihedrals])
190  vref_impropers = np.array([ic.value(pos_ref)*RADIAN_2_DEGREE for ic in ic_impropers])
191  self.internal_coordinates[sysname] = {
192  'ic_bonds': ic_bonds,
193  'ic_angles': ic_angles,
194  'ic_dihedrals': ic_dihedrals,
195  'ic_impropers': ic_impropers,
196  'vref_bonds': vref_bonds,
197  'vref_angles': vref_angles,
198  'vref_dihedrals': vref_dihedrals,
199  'vref_impropers': vref_impropers,
200  }
201 
202  def system_driver(self, sysname, save_mol=None):
203  """ Run calculation for one system, return internal coordinate values after optimization """
204  engine = self.engines[sysname]
205  ic_dict = self.internal_coordinates[sysname]
206  if engine.__class__.__name__ in ('OpenMM', 'SMIRNOFF'):
207  # OpenMM.optimize() by default resets geometry to initial geometry before optimization
208  engine.optimize(0)
209  pos = engine.getContextPosition()
210  if save_mol is not None:
211  MM_minimized_mol = deepcopy(engine.mol[0])
212  MM_minimized_mol.xyzs[0] = pos
213  MM_minimized_mol.write(save_mol)
214  else:
215  raise NotImplementedError("system_driver() not implemented for %s" % engine.__name__)
216  v_ic = {
217  'bonds': np.array([ic.value(pos) for ic in ic_dict['ic_bonds']]),
218  'angles': np.array([ic.value(pos)*RADIAN_2_DEGREE for ic in ic_dict['ic_angles']]),
219  'dihedrals': np.array([ic.value(pos)*RADIAN_2_DEGREE for ic in ic_dict['ic_dihedrals']]),
220  'impropers': np.array([ic.value(pos)*RADIAN_2_DEGREE for ic in ic_dict['ic_impropers']]),
221  }
222  return v_ic
223 
224  def indicate(self):
225  title_str = "%s, Objective = % .5e" % (self.name, self.objective)
226  #QYD: This title is carefully placed to align correctly
227  column_head_str1 = " %-20s %13s %13s %15s %15s %17s " % ("System", "Bonds", "Angles", "Dihedrals", "Impropers", "Term.")
228  column_head_str2 = " %-20s %9s %7s %9s %7s %9s %7s %9s %7s %17s " % ('', 'RMSD', 'denom', 'RMSD', 'denom', 'RMSD', 'denom', 'RMSD', 'denom', '')
229  printcool_dictionary(self.PrintDict,title=title_str + '\n' + column_head_str1 + '\n' + column_head_str2, center=[True,False,False])
231  def get(self, mvals, AGrad=False, AHess=False):
232  Answer = {'X':0.0, 'G':np.zeros(self.FF.np), 'H':np.zeros((self.FF.np, self.FF.np))}
233  self.PrintDict = OrderedDict()
234  # enable self.system_mval_masks (supported by OptGeoTarget_SMIRNOFF)
235  enable_system_mval_mask = hasattr(self, 'system_mval_masks')
236  def compute(mvals, p_idx=None):
237  ''' Compute total objective value for each system '''
238  self.FF.make(mvals)
239  v_obj_list = []
240  for sysname, sysopt in self.sys_opts.items():
241  # ref values of each type
242  vref_bonds = self.internal_coordinates[sysname]['vref_bonds']
243  vref_angles = self.internal_coordinates[sysname]['vref_angles']
244  vref_dihedrals = self.internal_coordinates[sysname]['vref_dihedrals']
245  vref_impropers = self.internal_coordinates[sysname]['vref_impropers']
246  # counts of each type
247  n_bonds = len(vref_bonds)
248  n_angles = len(vref_angles)
249  n_dihedrals = len(vref_dihedrals)
250  n_impropers = len(vref_impropers)
251  # use self.system_mval_masks to skip evaluations when computing gradients
252  if enable_system_mval_mask and in_fd() and (p_idx is not None) and (self.system_mval_masks[sysname][p_idx] == False):
253  v_obj_list += [0] * (n_bonds + n_angles + n_dihedrals + n_impropers)
254  continue
255  # read denominators from system options
256  bond_denom = sysopt['bond_denom']
257  angle_denom = sysopt['angle_denom']
258  dihedral_denom = sysopt['dihedral_denom']
259  improper_denom = sysopt['improper_denom']
260  # inverse demon to be scaling factors, 0 for denom 0
261  scale_bond = 1.0 / bond_denom if bond_denom != 0 else 0.0
262  scale_angle = 1.0 / angle_denom if angle_denom != 0 else 0.0
263  scale_dihedral = 1.0 / dihedral_denom if dihedral_denom != 0 else 0.0
264  scale_improper = 1.0 / improper_denom if improper_denom != 0 else 0.0
265  # calculate new internal coordinates
266  v_ic = self.system_driver(sysname)
267  # objective contribution from bonds
268  vtar_bonds = v_ic['bonds']
269  diff_bond = ((vref_bonds - vtar_bonds) * scale_bond).tolist() if n_bonds > 0 else []
270  # objective contribution from angles
271  vtar_angles = v_ic['angles']
272  diff_angle = (periodic_diff(vref_angles, vtar_angles, 360) * scale_angle).tolist() if n_angles > 0 else []
273  # objective contribution from dihedrals
274  vtar_dihedrals = v_ic['dihedrals']
275  diff_dihedral = (periodic_diff(vref_dihedrals, vtar_dihedrals, 360) * scale_dihedral).tolist() if n_dihedrals > 0 else []
276  # objective contribution from improper dihedrals
277  vtar_impropers = v_ic['impropers']
278  diff_improper = (periodic_diff(vref_impropers, vtar_impropers, 360) * scale_improper).tolist() if n_impropers > 0 else []
279  # combine objective values into a big result list
280  sys_obj_list = diff_bond + diff_angle + diff_dihedral + diff_improper
281  # extend the result v_obj_list by individual terms in this system
282  v_obj_list += sys_obj_list
283  # save print string
284  if not in_fd():
285  # For printing, we group the RMSD by type
286  rmsd_bond = compute_rmsd(vref_bonds, vtar_bonds)
287  rmsd_angle = compute_rmsd(vref_angles, vtar_angles, v_periodic=360)
288  rmsd_dihedral = compute_rmsd(vref_dihedrals, vtar_dihedrals, v_periodic=360)
289  rmsd_improper = compute_rmsd(vref_impropers, vtar_impropers, v_periodic=360)
290  obj_total = sum(v**2 for v in sys_obj_list)
291  self.PrintDict[sysname] = "% 9.3f % 7.2f % 9.3f % 7.2f % 9.3f % 7.2f % 9.3f % 7.2f %17.3f" % (rmsd_bond, \
292  bond_denom, rmsd_angle, angle_denom, rmsd_dihedral, dihedral_denom, rmsd_improper, improper_denom, obj_total)
293  return np.array(v_obj_list, dtype=float)
294 
295  V = compute(mvals)
296  Answer['X'] = np.dot(V,V)
297  # write objective decomposition if wanted
298  if self.writelevel > 0:
299  # recover mvals
300  self.FF.make(mvals)
301  with open('rmsd_decomposition.txt', 'w') as fout:
302  for sysname in self.internal_coordinates:
303  fout.write("\n[ %s ]\n" % sysname)
304  fout.write('%-25s %15s %15s %15s\n' % ("Internal Coordinate", "Ref QM Value", "Cur MM Value", "Difference"))
305  # reference data
306  sys_data = self.internal_coordinates[sysname]
307  sys_data['ic_bonds']
308  # compute all internal coordinate values again and save mm optimized geometry in xyz file
309  v_ic = self.system_driver(sysname, save_mol='%s_mmopt.xyz' % sysname)
310  for p in ['bonds', 'angles', 'dihedrals', 'impropers']:
311  fout.write('--- ' + p + ' ---\n')
312  ic_list = sys_data['ic_' + p]
313  ref_v = sys_data['vref_' + p]
314  tar_v = v_ic[p]
315  # print each value
316  for ic, v1, v2 in zip(ic_list, ref_v, tar_v):
317  diff = periodic_diff(v1, v2, v_periodic=360) if p != 'bonds' else v1-v2
318  fout.write('%-25s %15.5f %15.5f %+15.3e\n' % (ic, v1, v2, diff))
319  # compute gradients and hessian
320  dV = np.zeros((self.FF.np,len(V)))
321  if AGrad or AHess:
322  for p in self.pgrad:
323  dV[p,:], _ = f12d3p(fdwrap(compute, mvals, p, p_idx = p), h = self.h, f0 = V)
324 
325  for p in self.pgrad:
326  Answer['G'][p] = 2*np.dot(V, dV[p,:])
327  for q in self.pgrad:
328  Answer['H'][p,q] = 2*np.dot(dV[p,:], dV[q,:])
329  if not in_fd():
330  self.objective = Answer['X']
331  self.FF.make(mvals)
332  return Answer
Nifty functions, intended to be imported by any module within ForceBalance.
def system_driver(self, sysname, save_mol=None)
Run calculation for one system, return internal coordinate values after optimization.
Subclass of Target for fitting MM optimized geometries to QM optimized geometries.
def in_fd()
Invoking this function from anywhere will tell us whether we're being called by a finite-difference f...
def fdwrap(func, mvals0, pidx, key=None, kwargs)
A function wrapper for finite difference designed for differentiating 'get'-type functions.
def create_engines(self, engine_args)
def warn_press_key(warning, timeout=10)
Definition: nifty.py:1599
def printcool_dictionary(Dict, title="Dictionary Keys : Values", bold=False, color=2, keywidth=25, topwidth=50, center=True, leftpad=0)
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
Definition: nifty.py:366
def periodic_diff(a, b, v_periodic)
convenient function for computing the minimum difference in periodic coordinates Parameters a: np...
def compute_rmsd(ref, tar, v_periodic=None)
Compute the RMSD between two arrays, supporting periodic difference.
def parse_optgeo_options(filename)
Parse an optgeo_options.txt file into specific OptGeoTarget Target Options.
def f12d3p(f, h, f0=None)
A three-point finite difference stencil.
def get(self, mvals, AGrad=False, AHess=False)
Compute total objective value for each system.