ForceBalance API  1.3
Automated optimization of force fields and empirical potentials
parser.py
Go to the documentation of this file.
1 """ @package forcebalance.parser Input file parser for ForceBalance jobs. Additionally, the location for all default options.
2 
3 Although I will do my best to write good documentation,
4 for many programs the input parser becomes the most up-to-date
5 source for documentation. So this is a great place to write
6 lots of comments for those who implement new functionality.
7 
8 There are two types of sections for options - GENERAL and TARGET.
9 Since there can be many fitting targets within a single job (i.e. we
10 may wish to fit water trimers and hexamers, which constitutes two
11 fitting targets) the input is organized into sections, like so:
12 
13 $options\n
14 gen_option_1 Big\n
15 gen_option_2 Mao\n
16 $target\n
17 tgt_option_1 Sniffy\n
18 tgt_option_2 Schmao\n
19 $target\n
20 tgt_option_1 Nifty\n
21 tgt_option_2 Jiffy\n
22 $end
23 
24 In this case, two sets of target options are generated in addition to the general option.
25 
26 (Note: "Target" used to be called "Simulation". Backwards compatibility is maintained.)
27 
28 Each option is meant to be parsed as a certain variable type.
29 
30 - String option values are read in directly; note that only the first two words in the line are processed
31 - Some strings are capitalized when they are read in; this is mainly for function tables like OptTab and TgtTab
32 - List option types will pick up all of the words on the line and use them as values,
33 plus if the option occurs more than once it will aggregate all of the values.
34 - Integer and float option types are read in a pretty straightforward way
35 - Boolean option types are always set to true, unless the second word is '0', 'no', or 'false' (not case sensitive)
36 - Section option types are meant to treat more elaborate inputs, such
37 as the user pasting in output parameters from a previous job as input,
38 or a specification of internal coordinate system. I imagine that for
39 every section type I would have to write my own parser. Maybe a
40 ParsTab of parsing functions would work. :)
41 
42 To add a new option, simply add it to the dictionaries below and give it a default value if desired.
43 If you add an entirely new type, make sure to implement the interpretation of that type in the parse_inputs function.
44 
45 @author Lee-Ping Wang
46 @date 11/2012
47 """
48 from __future__ import absolute_import
49 
50 from builtins import str
51 import os
52 import re
53 import sys
54 import itertools
55 import traceback
56 from .nifty import printcool, printcool_dictionary, which, isfloat
57 from copy import deepcopy
58 from collections import OrderedDict
59 
60 from forcebalance.output import getLogger
61 logger = getLogger(__name__)
62 
63 
66 gen_opts_types = {
67  'strings' : {"gmxpath" : (which('mdrun'), 60, 'Path for GROMACS executables (if not the default)', 'All targets that use GROMACS', ['GMX']),
68  "gmxsuffix" : ('', 60, 'The suffix of GROMACS executables', 'All targets that use GROMACS', ['GMX']),
69  "tinkerpath" : (which('testgrad'), 60, 'Path for TINKER executables (if not the default)', 'All targets that use TINKER', ['TINKER']),
70  "penalty_type" : ("L2", 100, 'Type of the penalty: L2, L1 or Box', 'All optimizations'),
71  "scan_vals" : (None, -100, 'Values to scan in the parameter space, given like this: -0.1:0.1:11', 'Job types scan_mvals and scan_pvals'),
72  "readchk" : (None, -50, 'Name of the restart file we read from', 'Restart jobtype "newton" with "writechk" set'),
73  "writechk" : (None, -50, 'Name of the restart file we write to (can be same as readchk)', 'Main optimizer'),
74  "ffdir" : ('forcefield', 100, 'Directory containing force fields, relative to project directory', 'All'),
75  "amoeba_pol" : (None, 0, 'The AMOEBA polarization type, either direct, mutual, or nonpolarizable.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']),
76  "amberhome" : (None, -10, 'Path to AMBER installation directory (leave blank to use AMBERHOME environment variable.', 'Targets that use AMBER', 'AMBER'),
77  },
78  'allcaps' : {"jobtype" : ("single", 200, 'The calculation type, defaults to a single-point evaluation of objective function.',
79  'All (important); choose "single", "gradient", "hessian", "newton" (Main Optimizer), "bfgs", "powell", "simplex", "anneal", "genetic", "conjugategradient", "scan_mvals", "scan_pvals", "fdcheck[gh]"'),
80  },
81  'lists' : {"forcefield" : ([], 200, 'The names of force fields, corresponding to directory forcefields/file_name.(itp,xml,prm,frcmod,mol2)', 'All (important)'),
82  "scanindex_num" : ([], -100, 'Numerical index of the parameter to scan over', 'Job types scan_mvals and scan_pvals'),
83  "scanindex_name" : ([], -100, 'Parameter name to scan over (should convert to a numerical index)', 'Job types scan_mvals and scan_pvals')
84  },
85  'ints' : {"maxstep" : (100, 50, 'Maximum number of steps in an optimization', 'Main Optimizer'),
86  "objective_history" : (2, 20, 'Number of good optimization steps to average over when checking the objective convergence criterion', 'Main Optimizer (jobtype "newton")'),
87  "wq_port" : (0, 0, 'The port number to use for Work Queue', 'Targets that use Work Queue (advanced usage)'),
88  "criteria" : (1, 160, 'The number of convergence criteria that must be met for main optimizer to converge', 'Main Optimizer'),
89  "rpmd_beads" : (0, -160, 'Number of beads in ring polymer MD (zero to disable)', 'Condensed phase property targets (advanced usage)', 'liquid_openmm'),
90  "zerograd" : (-1, 0, 'Set to a nonnegative number to turn on zero gradient skipping at that optimization step.', 'All'),
91  },
92  'bools' : {"backup" : (1, 10, 'Write temp directories to backup before wiping them'),
93  "writechk_step" : (1, -50, 'Write the checkpoint file at every optimization step'),
94  "converge_lowq" : (0, -50, 'Allow convergence on "low quality" steps'),
95  "have_vsite" : (0, -150, 'Specify whether there are virtual sites in the simulation (being fitted or not). Enforces calculation of vsite positions.', 'Experimental feature in ESP fitting', ['ABINITIO']),
96  "constrain_charge" : (0, 10, 'Specify whether to constrain the charges on the molecules.', 'Printing the force field (all calculations)'),
97  "print_gradient" : (1, 20, 'Print the objective function gradient at every step', 'Main Optimizer'),
98  "logarithmic_map" : (0, -150, 'Optimize in the space of log-variables', 'Creating the force field (all calculations, advanced usage)'),
99  "print_hessian" : (0, 20, 'Print the objective function Hessian at every step', 'Main Optimizer'),
100  "print_parameters" : (1, 20, 'Print the mathematical and physical parameters at every step', 'Main Optimizer'),
101  "normalize_weights": (1, 100, 'Normalize the weights for the fitting targets', 'Objective function (all calculations)'),
102  "verbose_options" : (0, 150, 'Set to false to suppress printing options that are equal to their defaults', 'Printing output'),
103  "rigid_water" : (0, -150, 'Perform calculations using rigid water molecules.', 'Currently used in AMOEBA parameterization (advanced usage)', ['OPENMM','TINKER']),
104  "constrain_h" : (0, -150, 'Perform calculations with contrained hydrogen bond lengths.', 'Used in liquid-OpenMM', ['OPENMM']),
105  "vsite_bonds" : (0, -150, 'Generate bonds from virtual sites to host atom bonded atoms.', 'Currently used in AMOEBA parameterization (advanced usage)', ['OPENMM','TINKER']),
106  "use_pvals" : (0, -150, 'Bypass the transformation matrix and use the physical parameters directly', 'Creating the force field; advanced usage, be careful.'),
107  "asynchronous" : (0, 0, 'Execute Work Queue tasks and local calculations asynchronously for improved speed', 'Targets that use Work Queue (advanced usage)'),
108  "reevaluate" : (None, 0, 'Re-evaluate the objective function and gradients when the step is rejected (for noisy objective functions).', 'Main Optimizer'),
109  "continue" : (0, 140, 'Continue the current run from where we left off (supports mid-iteration recovery).', 'Main Optimizer'),
110  "duplicate_pnames" : (0, -150, 'Allow duplicate parameter names (only if you know what you are doing!', 'Force Field Parser'),
111  },
112  'floats' : {"trust0" : (1e-1, 100, 'Levenberg-Marquardt trust radius; set to negative for nonlinear search', 'Main Optimizer'),
113  "mintrust" : (0.0, 10, 'Minimum trust radius (if the trust radius is tiny, then noisy optimizations become really gnarly)', 'Main Optimizer'),
114  "convergence_objective" : (1e-4, 100, 'Convergence criterion of objective function (in MainOptimizer this is the stdev of X2 over [objective_history] steps)', 'Main Optimizer'),
115  "convergence_gradient" : (1e-3, 100, 'Convergence criterion of gradient norm', 'Main Optimizer'),
116  "convergence_step" : (1e-4, 100, 'Convergence criterion of step size (just needs to fall below this threshold)', 'Main Optimizer'),
117  "eig_lowerbound" : (1e-4, 10, 'Minimum eigenvalue for applying steepest descent correction', 'Main Optimizer'),
118  "step_lowerbound" : (1e-6, 10, 'Optimization will "fail" if step falls below this size', 'Main Optimizer'),
119  "lm_guess" : (1.0, 9, 'Guess value for bracketing line search in trust radius algorithm', 'Main Optimizer'),
120  "finite_difference_h" : (1e-3, 50, 'Step size for finite difference derivatives in many functions', 'pretty much everywhere'),
121  "finite_difference_factor" : (0.1, 40, 'Make sure that the finite difference step size does not exceed this multiple of the trust radius.', 'Main Optimizer'),
122  "penalty_additive" : (0.0, 55, 'Factor for additive penalty function in objective function', 'Objective function, all penalty types'),
123  "penalty_multiplicative" : (0.0, 55, 'Factor for multiplicative penalty function in objective function', 'Objective function, all penalty types'),
124  "penalty_alpha" : (1e-3, 53, 'Extra parameter for fusion penalty function. Dictates position of log barrier or L1-L0 switch distance',
125  'Objective function, FUSION_BARRIER or FUSION_L0 penalty type, advanced usage in basis set optimizations'),
126  "penalty_hyperbolic_b" : (1e-6, 54, 'Cusp region for hyperbolic constraint; for x=0, the Hessian is a/2b', 'Penalty type L1'),
127  "penalty_power" : (2.0, 52, 'Power of the Euclidean norm of the parameter vector (default 2.0 is normal L2 penalty)', 'Penalty type L2'),
128  "adaptive_factor" : (0.25, 10, 'The step size is increased / decreased by up to this much in the event of a good / bad step; increase for a more variable step size.', 'Main Optimizer'),
129  "adaptive_damping" : (0.5, 10, 'Damping factor that ties down the trust radius to trust0; decrease for a more variable step size.', 'Main Optimizer'),
130  "error_tolerance" : (0.0, 10, 'Error tolerance; the optimizer will only reject steps that increase the objective function by more than this number.', 'Main Optimizer'),
131  "search_tolerance" : (1e-4, -10, 'Search tolerance; used only when trust radius is negative, dictates convergence threshold of nonlinear search.', 'Main Optimizer with negative mintrust; advanced usage'),
132  "amoeba_eps" : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']),
133  },
134  'sections': {"read_mvals" : (None, 100, 'Paste mathematical parameters into the input file for them to be read in directly', 'Restarting an optimization'),
135  "read_pvals" : (None, 100, 'Paste physical parameters into the input file for them to be read in directly', 'Restarting an optimization (recommend use_mvals instead)'),
136  "priors" : (OrderedDict(), 150, 'Paste priors into the input file for them to be read in directly', 'Scaling and regularization of parameters (important)')
137  }
138  }
139 
140 
141 tgt_opts_types = {
142  'strings' : {"force_map" : ('residue', 0, 'The resolution of mapping interactions to net forces and torques for groups of atoms. In order of resolution: molecule > residue > charge-group', 'Force Matching', 'AbInitio'),
143  "fragment1" : ('', 0, 'Interaction fragment 1: a selection of atoms specified using atoms and dashes, e.g. 1-6 to select the first through sixth atom (i.e. list numbering starts from 1)', 'Interaction energies', 'Interaction'),
144  "fragment2" : ('', 0, 'Interaction fragment 2: a selection of atoms specified using atoms and dashes, e.g. 7-11 to select atoms 7 through 11.', 'Interaction energies', 'Interaction'),
145  "openmm_precision" : (None, -10, 'Precision of OpenMM calculation if using CUDA or OpenCL platform. Choose either single, double or mixed ; defaults to the OpenMM default.', 'Targets that use OpenMM', 'OpenMM'),
146  "openmm_platform" : (None, -10, 'OpenMM platform. Choose either Reference, CUDA or OpenCL. AMOEBA is on Reference or CUDA only.', 'Targets that use OpenMM', 'OpenMM'),
147  "qdata_txt" : (None, -10, 'Text file containing quantum data. If not provided, will search for a default (qdata.txt).', 'Energy/force matching, ESP evaluations, interaction energies', 'TINKER'),
148  "inter_txt" : ('interactions.txt', 0, 'Text file containing interacting systems. If not provided, will search for a default.', 'Binding energy target', 'BindingEnergy'),
149  "optgeo_options_txt" : ('optgeo_options.txt', 0, 'Text file containing extra options for Optimized Geometry target. If not provided, will search for a default.', 'Optimized Geometry target', 'OptGeoTarget'),
150  "reassign_modes" : (None, -180, 'Reassign modes before fitting frequencies, using either linear assignment "permute" or maximum overlap "overlap".', 'Vibrational frequency targets', 'vibration'),
151  "liquid_coords" : (None, 0, 'Provide file name for condensed phase coordinates.', 'Condensed phase properties', 'Liquid'),
152  "gas_coords" : (None, 0, 'Provide file name for gas phase coordinates.', 'Condensed phase properties', 'Liquid'),
153  "nvt_coords" : (None, 0, 'Provide file name for condensed phase NVT coordinates.', 'Condensed phase properties', 'Liquid'),
154  "lipid_coords" : (None, 0, 'Provide file name for lipid coordinates.', 'Condensed phase properties', 'Lipid'),
155  "coords" : (None, -10, 'Coordinates for single point evaluation; if not provided, will search for a default.', 'Energy/force matching, ESP evaluations, interaction energies'),
156  "pdb" : (None, -10, 'PDB file mainly used for building OpenMM and AMBER systems.', 'Targets that use AMBER and OpenMM', 'AMBER, OpenMM'),
157  "gmx_mdp" : (None, -10, 'Gromacs .mdp files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'),
158  "gmx_top" : (None, -10, 'Gromacs .top files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'),
159  "gmx_ndx" : (None, -10, 'Gromacs .ndx files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'),
160  "amber_leapcmd" : (None, -10, 'File containing commands for "tleap" when setting up AMBER simulations.', 'Targets that use AMBER', 'AMBER'),
161  "tinker_key" : (None, -10, 'TINKER .key files. If not provided, will search for default.', 'Targets that use TINKER', 'TINKER'),
162  "expdata_txt" : ('expset.txt', 0, 'Text file containing experimental data.', 'Thermodynamic properties target', 'thermo'),
163  "hfedata_txt" : ('hfedata.txt', 0, 'Text file containing experimental data.', 'Hydration free energy target', 'hydration'),
164  "hfemode" : ('single', 0, 'Method for calculating hydration energies (single point, FEP, TI).', 'Hydration free energy target', 'hydration'),
165  "read" : (None, 50, 'Provide a temporary directory ".tmp" to read data from a previous calculation on the initial iteration (for instance, to restart an aborted run).', 'Liquid and Remote targets', 'Liquid, Remote'),
166  "remote_prefix" : ('', 50, 'Specify an optional prefix script to run in front of rtarget.py, for loading environment variables', 'Remote targets', 'Remote'),
167  "fitatoms" : ('0', 0, 'Number of fitting atoms; defaults to all of them. Use a comma and dash style list (1,2-5), atoms numbered from one, inclusive', 'Energy + Force Matching', 'AbInitio'),
168  "subset" : (None, 0, 'Specify a subset of molecules to fit. The rest are used for cross-validation.', 'Hydration free energy target', 'Hydration'),
169  "gmx_eq_barostat" : ('berendsen', 0, 'Name of the barostat to use for equilibration.', 'Condensed phase property targets, Gromacs only', 'liquid, lipid'),
170  "evaluator_input" : ('evaluator_input.json', 0, 'JSON file containing options for the OpenFF Evaluator target. If not provided, will search for a default.', 'OpenFF Evaluator target', 'Evaluator_SMIRNOFF'),
171  },
172  'allcaps' : {"type" : (None, 200, 'The type of fitting target, for instance AbInitio_GMX ; this must correspond to the name of a Target subclass.', 'All targets (important)' ,''),
173  "engine" : (None, 180, 'The external code used to execute the simulations (GMX, TINKER, AMBER, OpenMM)', 'All targets (important)', '')
174  },
175  'lists' : {"name" : ([], 200, 'The name of the target, corresponding to the directory targets/name ; may provide a list if multiple targets have the same settings', 'All targets (important)'),
176  "fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'),
177  "quantities" : ([], 100, 'List of quantities to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'),
178  "mol2" : ([], 50, 'List of .mol2 files needed to set up the system (in addition to any specified under forcefield)', 'All targets that use SMIRNOFF', 'smirnoff')
179  },
180  'ints' : {"shots" : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'),
181  "sleepy" : (0, -50, 'Wait a number of seconds every time this target is visited (gives me a chance to ctrl+C)', 'All targets (advanced usage)'),
182  "liquid_md_steps" : (10000, 0, 'Number of time steps for the liquid production run.', 'Condensed phase property targets', 'liquid'),
183  "liquid_eq_steps" : (1000, 0, 'Number of time steps for the liquid equilibration run.', 'Condensed phase property targets', 'liquid'),
184  "lipid_md_steps" : (10000, 0, 'Number of time steps for the lipid production run.', 'Condensed phase property targets', 'lipid'),
185  "lipid_eq_steps" : (1000, 0, 'Number of time steps for the lipid equilibration run.', 'Condensed phase property targets', 'lipid'),
186  "n_mcbarostat" : (25, 0, 'Number of steps in the liquid simulation between MC barostat volume adjustments.', 'Liquid properties in OpenMM', 'Liquid_OpenMM'),
187  "gas_md_steps" : (100000, 0, 'Number of time steps for the gas production run, if different from default.', 'Condensed phase property targets', 'liquid'),
188  "gas_eq_steps" : (10000, 0, 'Number of time steps for the gas equilibration run, if different from default.', 'Condensed phase property targets', 'liquid'),
189  "nvt_md_steps" : (100000, 0, 'Number of time steps for the liquid NVT production run.', 'Condensed phase property targets', 'liquid'),
190  "nvt_eq_steps" : (10000, 0, 'Number of time steps for the liquid NVT equilibration run.', 'Condensed phase property targets', 'liquid'),
191  "writelevel" : (0, 0, 'Affects the amount of data being printed to the temp directory.', 'Energy + Force Matching', 'AbInitio'),
192  "md_threads" : (1, 0, 'Set the number of threads used by Gromacs or TINKER processes in MD simulations', 'Condensed phase properties in GROMACS and TINKER', 'Liquid_GMX, Lipid_GMX, Liquid_TINKER'),
193  "save_traj" : (0, -10, 'Whether to save trajectories. 0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'),
194  "eq_steps" : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'),
195  "md_steps" : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'),
196  "n_sim_chain" : (1, 0, 'Number of simulations required to calculate quantities.', 'Thermodynamic property targets', 'thermo'),
197  "n_molecules" : (-1, 0, 'Provide the number of molecules in the structure (defaults to auto-detect).', 'Condensed phase properties', 'Liquid'),
198  },
199  'bools' : {"fdgrad" : (0, -100, 'Finite difference gradient of objective function w/r.t. specified parameters', 'Use together with fd_ptypes (advanced usage)'),
200  "fdhess" : (0, -100, 'Finite difference Hessian of objective function w/r.t. specified parameters', 'Use together with fd_ptypes (advanced usage)'),
201  "fdhessdiag" : (0, -100, 'Finite difference Hessian diagonals w/r.t. specified parameters (costs 2np times a objective calculation)', 'Use together with fd_ptypes (advanced usage)'),
202  "all_at_once" : (1, -50, 'Compute all energies and forces in one fell swoop where possible(as opposed to calling the simulation code once per snapshot)', 'Various QM targets and MD codes', 'AbInitio'),
203  "run_internal" : (1, -50, 'For OpenMM or other codes with Python interface: Compute energies and forces internally', 'OpenMM interface', 'OpenMM'),
204  "energy" : (1, 0, 'Enable the energy objective function', 'All ab initio targets', 'AbInitio'),
205  "force" : (1, 0, 'Enable the force objective function', 'All ab initio targets', 'AbInitio'),
206  "resp" : (0, -150, 'Enable the RESP objective function', 'Ab initio targets with RESP; experimental (remember to set espweight)'),
207  "do_cosmo" : (0, -150, 'Call Q-Chem to do MM COSMO on MM snapshots.', 'Currently unused, but possible in AbInitio target'),
208  "optimize_geometry": (1, 0, 'Perform a geometry optimization before computing properties', 'Monomer properties', 'moments'),
209  "absolute" : (0, -150, 'When matching energies in AbInitio, do not subtract the mean energy gap.', 'Energy matching (advanced usage)', 'abinitio'),
210  "cauchy" : (0, 0, 'Normalize interaction energies each using 1/(denom**2 + reference**2) which resembles a Cauchy distribution', 'Interaction energy targets', 'interaction'),
211  "attenuate" : (0, 0, 'Normalize interaction energies using 1/(denom**2 + reference**2) only for repulsive interactions greater than denom.', 'Interaction energy targets', 'interaction'),
212  "normalize" : (0, -150, 'Divide objective function by the number of snapshots / vibrations', 'Interaction energy / vibrational mode targets', 'interaction, vibration'),
213  "w_normalize" : (0, 0, 'Normalize the condensed phase property contributions to the liquid / lipid property target', 'Condensed phase property targets', 'liquid, lipid'),
214  "manual" : (0, -150, 'Give the user a chance to fill in condensed phase stuff on the zeroth step', 'Condensed phase property targets (advanced usage)', 'liquid'),
215  "hvap_subaverage" : (0, -150, 'Don\'t target the average enthalpy of vaporization and allow it to freely float (experimental)', 'Condensed phase property targets (advanced usage)', 'liquid'),
216  "force_cuda" : (0, -150, 'Force the external npt.py script to crash if CUDA Platform not available', 'Condensed phase property targets (advanced usage)', 'liquid_openmm'),
217  "anisotropic_box" : (0, -150, 'Enable anisotropic box scaling (e.g. for crystals or two-phase simulations) in external npt.py script', 'Condensed phase property targets (advanced usage)', 'liquid_openmm, liquid_tinker'),
218  "mts_integrator" : (0, -150, 'Enable multiple-timestep integrator in external npt.py script', 'Condensed phase property targets (advanced usage)', 'liquid_openmm'),
219  "minimize_energy" : (1, 0, 'Minimize the energy of the system prior to running dynamics', 'Condensed phase property targets (advanced usage)', 'liquid_openmm', 'liquid_tinker'),
220  "remote" : (0, 50, 'Evaluate target as a remote work_queue task', 'All targets (optional)'),
221  "adapt_errors" : (0, 50, 'Adapt to simulation uncertainty by combining property estimations and adjusting simulation length.', 'Condensed phase property targets', 'liquid'),
222  "force_average" : (0, -50, 'Average over all atoms when normalizing force errors.', 'Force matching', 'abinitio'),
223  "remote_backup" : (0, -50, 'When running remote target, back up files at the remote location.', 'Liquid, lipid and remote targets', 'liquid, lipid, remote'),
224  "pure_num_grad" : (0, -50, 'Pure numerical gradients -- launch two additional simulations for each perturbed forcefield parameter, and compute derivatives using 3-point formula. (This is very expensive and should only serve as a sanity check)')
225  },
226  'floats' : {"weight" : (1.0, 150, 'Weight of the target (determines its importance vs. other targets)', 'All targets (important)'),
227  "w_rho" : (1.0, 0, 'Weight of experimental density', 'Condensed phase property targets', 'liquid, lipid'),
228  "w_hvap" : (1.0, 0, 'Weight of enthalpy of vaporization', 'Condensed phase property targets', 'liquid, lipid'),
229  "w_alpha" : (1.0, 0, 'Weight of thermal expansion coefficient', 'Condensed phase property targets', 'liquid, lipid'),
230  "w_kappa" : (1.0, 0, 'Weight of isothermal compressibility', 'Condensed phase property targets', 'liquid, lipid'),
231  "w_cp" : (1.0, 0, 'Weight of isobaric heat capacity', 'Condensed phase property targets', 'liquid, lipid'),
232  "w_eps0" : (1.0, 0, 'Weight of dielectric constant', 'Condensed phase property targets', 'liquid, lipid'),
233  "w_al" : (1.0, 0, 'Weight of average area per lipid', 'Lipid property targets', 'lipid'),
234  "w_scd" : (1.0, 0, 'Weight of deuterium order parameter', 'Lipid property targets', 'lipid'),
235  "w_energy" : (1.0, 0, 'Weight of energy', 'Ab initio targets', 'liquid'),
236  "w_force" : (1.0, 0, 'Weight of atomistic forces', 'Ab initio targets', 'liquid'),
237  "w_surf_ten" : (0.0, 0, 'Weight of surface tension', 'Condensed phase property targets', 'liquid'),
238  "w_netforce" : (0.0, 0, 'Weight of net forces (condensed to molecules, residues, or charge groups)', 'Ab initio targets', 'abinitio'),
239  "w_torque" : (0.0, 0, 'Weight of torques (condensed to molecules, residues, or charge groups)', 'Ab initio targets', 'abinitio'),
240  "w_resp" : (0.0, -150, 'Weight of RESP', 'Ab initio targets with RESP (advanced usage)', 'abinitio'),
241  "resp_a" : (0.001, -150, 'RESP "a" parameter for strength of penalty; 0.001 is strong, 0.0005 is weak', 'Ab initio targets with RESP (advanced usage)', 'abinitio'),
242  "resp_b" : (0.1, -150, 'RESP "b" parameter for hyperbolic behavior; 0.1 is recommended', 'Ab initio targets with RESP (advanced usage)', 'abinitio'),
243  "energy_upper" : (30.0, 0, 'Upper energy cutoff (in kcal/mol); super-repulsive interactions are given zero weight', 'Interaction energy targets', 'interaction'),
244  "hfe_temperature" : (298.15, -100, 'Simulation temperature for hydration free energies (Kelvin)', 'Hydration free energy using molecular dynamics', 'hydration'),
245  "hfe_pressure" : (1.0, -100, 'Simulation temperature for hydration free energies (atm)', 'Hydration free energy using molecular dynamics', 'hydration'),
246  "energy_denom" : (1.0, 0, 'Energy normalization for binding energies in kcal/mol (default is to use stdev)', 'Binding energy targets', 'binding'),
247  "energy_rms_override" : (0.0, 0, 'If nonzero, override the Energy RMS used to normalize the energy part of the objective function term', 'Energy matching', 'abinitio'),
248  "force_rms_override" : (0.0, 0, 'If nonzero, override the Force RMS used to normalize the energy part of the objective function term', 'Force matching', 'abinitio'),
249  "rmsd_denom" : (0.1, 0, 'RMSD normalization for optimized geometries in Angstrom', 'Binding energy targets', 'binding'),
250  "wavenumber_tol" : (10.0, 0, 'Frequency normalization (in wavenumber) for vibrational frequencies', 'Vibrational frequency targets', 'vibration'),
251  "dipole_denom" : (1.0, 0, 'Dipole normalization (Debye) ; set to 0 if a zero weight is desired', 'Monomer property targets', 'monomer'),
252  "quadrupole_denom" : (1.0, 0, 'Quadrupole normalization (Buckingham) ; set to 0 if a zero weight is desired', 'Monomer property targets', 'monomer'),
253  "polarizability_denom" : (1.0, 0, 'Dipole polarizability tensor normalization (cubic Angstrom) ; set to 0 if a zero weight is desired', 'Monomer property targets with polarizability', 'monomer'),
254  "liquid_timestep" : (1.0, 0, 'Time step size for the liquid simulation.', 'Condensed phase property targets', 'liquid'),
255  "liquid_interval" : (0.1, 0, 'Time interval for saving coordinates for the liquid production run.', 'Condensed phase property targets', 'liquid'),
256  "gas_timestep" : (1.0, 0, 'Time step size for the gas simulation (if zero, use default in external script.).', 'Condensed phase property targets', 'liquid'),
257  "gas_interval" : (0.1, 0, 'Time interval for saving coordinates for the gas production run (if zero, use default in external script.)', 'Condensed phase property targets', 'liquid'),
258  "lipid_timestep" : (1.0, 0, 'Time step size for the lipid simulation.', 'Lipid property targets', 'lipid'),
259  "lipid_interval" : (0.1, 0, 'Time interval for saving coordinates for the lipid production run.', 'Lipid property targets', 'lipid'),
260  "nvt_timestep" : (1.0, 0, 'Time step size for the NVT simulation.', 'Condensed phase property targets', 'liquid'),
261  "nvt_interval" : (0.1, 0, 'Time interval for saving coordinates for the NVT simulation production run.', 'Condensed phase property targets', 'liquid'),
262  "self_pol_mu0" : (0.0, -150, 'Gas-phase dipole parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'),
263  "self_pol_alpha" : (0.0, -150, 'Polarizability parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'),
264  "epsgrad" : (0.0, -150, 'Gradient below this threshold will be set to zero.', 'All targets'),
265  "energy_asymmetry": (1.0, -150, 'Snapshots with (E_MM - E_QM) < 0.0 will have their weights increased by this factor.', 'Ab initio targets'),
266  "nonbonded_cutoff" : (None, -1, 'Cutoff for nonbonded interactions (passed to engines).', 'Condensed phase property targets', 'liquid'),
267  "vdw_cutoff" : (None, -2, 'Cutoff for vdW interactions if different from other nonbonded interactions', 'Condensed phase property targets', 'liquid'),
268  "liquid_fdiff_h" : (1e-2, 0, 'Step size for finite difference derivatives for liquid targets in pure_num_grad', 'Condensed phase property targets', 'liquid'),
269  "restrain_k" : (1.0, 0, 'Force constant for harmonic positional energy restraints', 'Torsion profile with MM relaxation target', 'torsionprofile'),
270  },
271  'sections': {}
272  }
273 
274 all_opts_names = list(itertools.chain(*[i.keys() for i in gen_opts_types.values()])) + list(itertools.chain(*[i.keys() for i in tgt_opts_types.values()]))
276 for i in all_opts_names:
277  iocc = []
278  for typ, dct in gen_opts_types.items():
279  if i in dct:
280  iocc.append("gen_opt_types %s" % typ)
281  for typ, dct in tgt_opts_types.items():
282  if i in dct:
283  iocc.append("gen_opt_types %s" % typ)
284  if len(iocc) != 1:
285  logger.error("CODING ERROR: ForceBalance option %s occurs in more than one place (%s)\n" % (i, str(iocc)))
286  raise RuntimeError
287 
288 
289 gen_opts_defaults = {}
290 for t in gen_opts_types:
291  subdict = {}
292  for i in gen_opts_types[t]:
293  subdict[i] = gen_opts_types[t][i][0]
294  gen_opts_defaults.update(subdict)
295 
296 
297 tgt_opts_defaults = {}
298 for t in tgt_opts_types:
299  subdict = {}
300  for i in tgt_opts_types[t]:
301  subdict[i] = tgt_opts_types[t][i][0]
302  tgt_opts_defaults.update(subdict)
303 
304 
305 bkwd = {"simtype" : "type",
306  "masterfile" : "inter_txt",
307  "openmm_cuda_precision" : "openmm_precision",
308  "mdrun_threads" : "md_threads",
309  "mts_vvvr" : "mts_integrator",
310  "amoeba_polarization" : "amoeba_pol",
311  "liquid_prod_steps" : "liquid_md_steps",
312  "gas_prod_steps" : "gas_md_steps",
313  "liquid_equ_steps" : "liquid_eq_steps",
314  "gas_equ_steps" : "gas_eq_steps",
315  "lipid_prod_steps" : "lipid_md_steps",
316  "lipid_equ_steps" : "lipid_eq_steps",
317  }
318 
319 
320 mainsections = ["SIMULATION","TARGET","OPTIONS","END","NONE"]
322 def read_mvals(fobj):
323  Answer = []
324  for line in fobj:
325  if re.match("(/read_mvals)|(^\$end)",line):
326  break
327  Answer.append(float(line.split('[', maxsplit=1)[-1].split(']', maxsplit=1)[0].split()[-1]))
328  return Answer
329 
330 def read_pvals(fobj):
331  Answer = []
332  for line in fobj:
333  if re.match("(/read_pvals)|(^\$end)",line):
334  break
335  Answer.append(float(line.split('[', maxsplit=1)[-1].split(']', maxsplit=1)[0].split()[-1]))
336  return Answer
337 
338 def read_priors(fobj):
339  Answer = OrderedDict()
340  for line in fobj:
341  line = line.split("#")[0]
342  if re.match("(/priors)|(^\$end)",line):
343  break
344  Answer[line.split()[0]] = float(line.split()[-1])
345  return Answer
346 
347 def read_internals(fobj):
348  return
349 
350 
351 ParsTab = {"read_mvals" : read_mvals,
352  "read_pvals" : read_pvals,
353  "priors" : read_priors,
354  "internal" : read_internals
355  }
356 
357 def printsection(heading,optdict,typedict):
358  """ Print out a section of the input file in a parser-compliant and readable format.
359 
360  At the time of writing of this function, it's mainly intended to be called by MakeInputFile.py.
361  The heading is printed first (it is something like $options or $target). Then it loops
362  through the variable types (strings, allcaps, etc...) and the keys in each variable type.
363  The one-line description of each key is printed out as a comment, and then the key itself is
364  printed out along with the value provided in optdict. If optdict is None, then the default
365  value is printed out instead.
366 
367  @param[in] heading Heading, either $options or $target
368  @param[in] optdict Options dictionary or None.
369  @param[in] typedict Option type dictionary, either gen_opts_types or tgt_opts_types specified in this file.
370  @return Answer List of strings for the section that we are printing out.
371 
372  """
373  from forcebalance.objective import Implemented_Targets
374  from forcebalance.optimizer import Optimizer
375 
376  def FilterTargets(search):
377  if type(search) == str:
378  search = [search]
379  list_out = []
380  for key in sorted(Implemented_Targets.keys()):
381  if any([i.lower() in key.lower() for i in search]):
382  list_out.append(Implemented_Targets[key].__name__)
383  return ', '.join(sorted(list_out))
384 
385  Answer = [heading]
386  firstentry = 1
387  Options = []
388  for i in ['strings','allcaps','lists','ints','bools','floats','sections']:
389  vartype = re.sub('s$','',i)
390  for j in typedict[i]:
391  Option = []
392  val = optdict[j] if optdict is not None else typedict[i][j][0]
393  if firstentry:
394  firstentry = 0
395  else:
396  Option.append("")
397  Priority = typedict[i][j][1]
398  Option.append("# (%s) %s" % (vartype, typedict[i][j][2]))
399  if len(typedict[i][j]) >= 4:
400  Relevance = typedict[i][j][3]
401  str2 = "# used in: %s" % Relevance
402  if len(typedict[i][j]) >= 5:
403  TargetName = FilterTargets(typedict[i][j][4])
404  str2 += " (%s)" % TargetName
405  else:
406  TargetName = "None"
407  Option.append(str2)
408  else:
409  Relevance = "None"
410  Option.append("%s %s" % (str(j),str(val)))
411  Options.append((Option, Priority, TargetName, j))
412  def key1(o):
413  return o[1]
414  def key2(o):
415  return o[2]
416  def key3(o):
417  return o[3]
418  Options.sort(key=key3)
419  Options.sort(key=key2)
420  Options.sort(key=key1, reverse=True)
421  for o in Options:
422  Answer += o[0]
423 
424  # PriSet = sorted(list(set(Priorities)))[::-1]
425  # TgtSet = sorted(list(set(TargetNames)))
426  # RelSet = sorted(list(set(Relevances)))
427  # for p0 in PriSet:
428  # ogrp = []
429  # rgrp = []
430  # tgrp = []
431  # for o, p, r, t in zip(Options, Priorities, Relevances, TargetNames):
432  # if p == p0:
433  # ogrp.append(o)
434  # rgrp.append(r)
435  # tgrp.append(t)
436  # ogrp2 = []
437  # rgrp2 = []
438  # for t0 in TgtSet:
439  # for o, r, t in zip(ogrp, rgrp, tgrp):
440  # if t == t0:
441  # ogrp2.append(
442 
443  Answer.append("$end")
444  return Answer
445 
446 def parse_inputs(input_file=None):
447  """ Parse through the input file and read all user-supplied options.
448 
449  This is usually the first thing that happens when an executable script is called.
450  Our parser first loads the default options, and then updates these options as it
451  encounters keywords.
452 
453  Each keyword corresponds to a variable type; each variable type (e.g. string,
454  integer, float, boolean) is treated differently. For more elaborate inputs,
455  there is a 'section' variable type.
456 
457  There is only one set of general options, but multiple sets of target options.
458  Each target has its own section delimited by the \em $target keyword,
459  and we build a list of target options.
460 
461  @param[in] input_file The name of the input file.
462  @return options General options.
463  @return tgt_opts List of fitting target options.
464 
465  @todo Implement internal coordinates.
466  @todo Implement sampling correction.
467  @todo Implement charge groups.
468  """
469 
470  logger.info("Reading options from file: %s\n" % input_file)
471  section = "NONE"
472  # First load in all of the default options.
473  options = deepcopy(gen_opts_defaults) # deepcopy to make sure options doesn't make changes to gen_opts_defaults
474  options['root'] = os.getcwd()
475  options['input_file'] = input_file
476  tgt_opts = []
477  this_tgt_opt = deepcopy(tgt_opts_defaults)
478  # Give back a bunch of default options if input file isn't specified.
479  if input_file is None:
480  return (options, [this_tgt_opt])
481  fobj = open(input_file)
482  for line in fobj:
483  try:
484  # Anything after "#" is a comment
485  line = line.split("#")[0].strip()
486  s = line.split()
487  # Skip over blank lines
488  if len(s) == 0:
489  continue
490  key = s[0].lower()
491  if key in bkwd: # Do option replacement for backward compatibility.
492  key = bkwd[key]
493  # If line starts with a $, this signifies that we're in a new section.
494  if re.match('^\$',line):
495  newsection = re.sub('^\$','',line).upper()
496  if section in ["SIMULATION","TARGET"] and newsection in mainsections:
497  tgt_opts.append(this_tgt_opt)
498  this_tgt_opt = deepcopy(tgt_opts_defaults)
499  if newsection == "END": newsection = "NONE"
500  section = newsection
501  elif section in ["OPTIONS","SIMULATION","TARGET"]:
502 
504  (this_opt, opts_types) = (options, gen_opts_types) if section == "OPTIONS" else (this_tgt_opt, tgt_opts_types)
505 
506  if len(s) > 1 and s[1].upper() == "NONE":
507  this_opt[key] = None
508  elif key in opts_types['strings']:
509  this_opt[key] = s[1]
510  elif key in opts_types['allcaps']:
511  this_opt[key] = s[1].upper()
512  elif key in opts_types['lists']:
513  for word in s[1:]:
514  this_opt.setdefault(key,[]).append(word)
515  elif key in opts_types['ints']:
516  if isfloat(s[1]):
517  this_opt[key] = int(float(s[1]))
518  else:
519  this_opt[key] = int(s[1])
520  elif key in opts_types['bools']:
521  if len(s) == 1:
522  this_opt[key] = True
523  elif s[1].upper() in ["0", "NO", "FALSE", "OFF"]:
524  this_opt[key] = False
525  elif isfloat(s[1]) and int(float(s[1])) == 0:
526  this_opt[key] = False
527  elif s[1].upper() in ["1", "YES", "TRUE", "ON"]:
528  this_opt[key] = True
529  elif isfloat(s[1]) and int(float(s[1])) == 1:
530  this_opt[key] = True
531  else:
532  logger.error('%s is a true/false option but you provided %s; to enable, provide ["1", "yes", "true", "on" or <no value>]. To disable, provide ["0", "no", "false", or "off"].\n' % (key, s[1]))
533  raise RuntimeError
534  elif key in opts_types['floats']:
535  this_opt[key] = float(s[1])
536  elif key in opts_types['sections']:
537  this_opt[key] = ParsTab[key](fobj)
538  else:
539  logger.error("Unrecognized keyword: --- \x1b[1;91m%s\x1b[0m --- in %s section\n" \
540  % (key, section))
541  logger.error("Perhaps this option actually belongs in %s section?\n" \
542  % (section == "OPTIONS" and "a TARGET" or "the OPTIONS"))
543  raise RuntimeError
544  elif section == "NONE" and len(s) > 0:
545  logger.error("Encountered a non-comment line outside of a section\n")
546  raise RuntimeError
547  elif section not in mainsections:
548  logger.error("Unrecognized section: %s\n" % section)
549  raise RuntimeError
550  except:
551  # traceback.print_exc()
552  logger.exception("Failed to read in this line! Check your input file.\n")
553  logger.exception('\x1b[91m' + line + '\x1b[0m\n')
554  raise RuntimeError
555  if section == "SIMULATION" or section == "TARGET":
556  tgt_opts.append(this_tgt_opt)
557  if not options['verbose_options']:
558  printcool("Options at their default values are not printed\n Use 'verbose_options True' to Enable", color=5)
559  # Expand target options (i.e. create multiple tgt_opts dictionaries if multiple target names are specified)
560  tgt_opts_x = []
561  for topt in tgt_opts:
562  for name in topt['name']:
563  toptx = deepcopy(topt)
564  toptx['name'] = name
565  tgt_opts_x.append(toptx)
566  return options, tgt_opts_x
def read_mvals(fobj)
Definition: parser.py:323
Optimization algorithms.
def read_priors(fobj)
Definition: parser.py:339
def read_internals(fobj)
Definition: parser.py:348
def which(fnm)
Definition: nifty.py:1362
ForceBalance objective function.
def printsection(heading, optdict, typedict)
Print out a section of the input file in a parser-compliant and readable format.
Definition: parser.py:374
def parse_inputs(input_file=None)
Parse through the input file and read all user-supplied options.
Definition: parser.py:471
def printcool(text, sym="#", bold=False, color=2, ansi=None, bottom='-', minwidth=50, center=True, sym2="=")
Cool-looking printout for slick formatting of output.
Definition: nifty.py:321
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, integer, or what have you...
Definition: molecule.py:406
def read_pvals(fobj)
Definition: parser.py:331