ForceBalance API  1.3
Automated optimization of force fields and empirical potentials
molecule.py
Go to the documentation of this file.
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4 
5 import copy
6 import imp
7 import itertools
8 import os
9 import re
10 import sys
11 import sysconfig
12 import json
13 from collections import OrderedDict, namedtuple, Counter
14 from ctypes import *
15 from datetime import date
16 from warnings import warn
17 
18 import numpy as np
19 from numpy import sin, cos, arccos
20 from numpy.linalg import multi_dot
21 from pkg_resources import parse_version
22 
23 # For Python 3 compatibility
24 try:
25  from itertools import zip_longest as zip_longest
26 except ImportError:
27  from itertools import izip_longest as zip_longest
28 
29 # For Python 2 backwards-compatibility
30 try:
31  input = raw_input
32 except NameError:
33  pass
34 
35 # Special error which is thrown when TINKER .arc data is detected in a .xyz file
36 class ActuallyArcError(IOError):
37  pass
38 
39 # ======================================================================#
40 # | |#
41 # | Chemical file format conversion module |#
42 # | |#
43 # | Lee-Ping Wang (leeping@ucdavis.edu) |#
44 # | Last updated March 31, 2019 |#
45 # | |#
46 # | This code is part of ForceBalance and is covered under the |#
47 # | ForceBalance copyright notice and 3-clause BSD license. |#
48 # | Please see https://github.com/leeping/forcebalance for details. |#
49 # | |#
50 # | Feedback and suggestions are encouraged. |#
51 # | |#
52 # | What this is for: |#
53 # | Converting a molecule between file formats |#
54 # | Loading and processing of trajectories |#
55 # | (list of geometries for the same set of atoms) |#
56 # | Concatenating or slicing trajectories |#
57 # | Combining molecule metadata (charge, Q-Chem rem variables) |#
58 # | |#
59 # | Supported file formats: |#
60 # | See the __init__ method in the Molecule class. |#
61 # | |#
62 # | Note to self / developers: |#
63 # | Please make this file as standalone as possible |#
64 # | (i.e. don't introduce dependencies). If we load an external |#
65 # | library to parse a file, do so with 'try / except' so that |#
66 # | the module is still usable even if certain parts are missing. |#
67 # | It's better to be like a Millennium Falcon. :P |#
68 # | |#
69 # | At present, when I perform operations like adding two objects, |#
70 # | the sum is created from deep copies of data members in the |#
71 # | originals. This is because copying by reference is confusing; |#
72 # | suppose if I do B += A and modify something in B; it should not |#
73 # | change in A. |#
74 # | |#
75 # | A consequence of this is that data members should not be too |#
76 # | complicated; they should be things like lists or dicts, and NOT |#
77 # | contain references to themselves. |#
78 # | |#
79 # | To-do list: Handling of comments is still not very good. |#
80 # | Comments from previous files should be 'passed on' better. |#
81 # | |#
82 # | Contents of this file: |#
83 # | 0) Names of data variables |#
84 # | 1) Imports |#
85 # | 2) Subroutines |#
86 # | 3) Molecule class |#
87 # | a) Class customizations (add, getitem) |#
88 # | b) Instantiation |#
89 # | c) Core functionality (read, write) |#
90 # | d) Reading functions |#
91 # | e) Writing functions |#
92 # | f) Extra stuff |#
93 # | 4) "main" function (if executed) |#
94 # | |#
95 # | Required: Python 2.7 or 3.6 |#
96 # | (2.6, 3.5 and earlier untested) |#
97 # | NumPy 1.6 |#
98 # | Optional: Mol2, PDB, DCD readers |#
99 # | (can be found in ForceBalance) |#
100 # | NetworkX package (for topologies) |#
101 # | |#
102 # | Thanks: Todd Dolinsky, Yong Huang, |#
103 # | Kyle Beauchamp (PDB) |#
104 # | John Stone (DCD Plugin) |#
105 # | Pierre Tuffery (Mol2 Plugin) |#
106 # | #python IRC chat on FreeNode |#
107 # | |#
108 # | Contributors: Leah Isseroff Bendavid |#
109 # | Yudong Qiu |#
110 # | |#
111 # | Instructions: |#
112 # | |#
113 # | To import: |#
114 # | from molecule import Molecule |#
115 # | To create a Molecule object: |#
116 # | MyMol = Molecule(fnm) |#
117 # | To convert to a new file format: |#
118 # | MyMol.write('newfnm.format') |#
119 # | To concatenate geometries: |#
120 # | MyMol += MyMolB |#
121 # | |#
122 # ======================================================================#
123 
124 # =========================================#
125 # | DECLARE VARIABLE NAMES HERE |#
126 # | |#
127 # | Any member variable in the Molecule |#
128 # | class must be declared here otherwise |#
129 # | the Molecule class won't recognize it |#
130 # =========================================#
131 # | Data attributes in FrameVariableNames |#
132 # | must be a list along the frame axis, |#
133 # | and they must have the same length. |#
134 # =========================================#
135 # xyzs = List of arrays of atomic xyz coordinates
136 # comms = List of comment strings
137 # boxes = List of 3-element or 9-element arrays for periodic boxes
138 # qm_grads = List of arrays of gradients (i.e. negative of the atomistic forces) from QM calculations
139 # qm_espxyzs = List of arrays of xyz coordinates for ESP evaluation
140 # qm_espvals = List of arrays of ESP values
141 # qm_zpe = Zero point energy, kcal/mol (from a qchem freq calculation)
142 # qm_entropy = Entropy contribution at STP, cal/mol.K (from a qchem freq calculation)
143 # qm_enthalpy= Enthalpic contribution at STP, excluding electronic energy and ZPE, kcal/mol (from a qchem freq calculation)
144 
145 FrameVariableNames = {'xyzs', 'comms', 'boxes', 'qm_hessians', 'qm_grads', 'qm_energies', 'qm_interaction',
146  'qm_espxyzs', 'qm_espvals', 'qm_extchgs', 'qm_mulliken_charges', 'qm_mulliken_spins', 'qm_zpe',
147  'qm_entropy', 'qm_enthalpy', 'qm_bondorder'}
148 #=========================================#
149 #| Data attributes in AtomVariableNames |#
150 #| must be a list along the atom axis, |#
151 #| and they must have the same length. |#
152 #=========================================#
153 # elem = List of elements
154 # partial_charge = List of atomic partial charges
155 # atomname = List of atom names (can come from MM coordinate file)
156 # atomtype = List of atom types (can come from MM force field)
157 # tinkersuf = String that comes after the XYZ coordinates in TINKER .xyz or .arc files
158 # resid = Residue IDs (can come from MM coordinate file)
159 # resname = Residue names
160 # terminal = List of true/false denoting whether this atom is followed by a terminal group.
161 AtomVariableNames = {'elem', 'partial_charge', 'atomname', 'atomtype', 'tinkersuf', 'resid', 'resname', 'qcsuf',
162  'qm_ghost', 'chain', 'altloc', 'icode', 'terminal'}
163 #=========================================#
164 #| This can be any data attribute we |#
165 #| want but it's usually some property |#
166 #| of the molecule not along the frame |#
167 #| atom axis. |#
168 #=========================================#
169 # bonds = A list of 2-tuples representing bonds. Carefully pruned when atom subselection is done.
170 # fnm = The file name that the class was built from
171 # qcrems = The Q-Chem 'rem' variables stored as a list of OrderedDicts
172 # qctemplate = The Q-Chem template file, not including the coordinates or rem variables
173 # charge = The net charge of the molecule
174 # mult = The spin multiplicity of the molecule
175 MetaVariableNames = {'fnm', 'ftype', 'qcrems', 'qctemplate', 'qcerr', 'charge', 'mult', 'bonds', 'topology',
176  'molecules'}
177 # Variable names relevant to quantum calculations explicitly
178 QuantumVariableNames = {'qcrems', 'qctemplate', 'charge', 'mult', 'qcsuf', 'qm_ghost', 'qm_bondorder'}
179 # Superset of all variable names.
180 AllVariableNames = QuantumVariableNames | AtomVariableNames | MetaVariableNames | FrameVariableNames
181 
182 
183 #================================#
184 # Set up the logger #
185 #================================#
186 if "forcebalance" in __name__:
187  # If this module is part of ForceBalance, use the package level logger
188  from .output import *
189  package = "ForceBalance"
190 elif "geometric" in __name__:
191  # This ensures logging behavior is consistent with the rest of geomeTRIC
192  from .nifty import logger
193  package = "geomeTRIC"
194 else:
195  # Previous default behavior if FB package level loggers could not be imported
196  from logging import *
197  class RawStreamHandler(StreamHandler):
198  """Exactly like output.StreamHandler except it does no extra formatting
199  before sending logging messages to the stream. This is more compatible with
200  how output has been displayed in ForceBalance. Default stream has also been
201  changed from stderr to stdout"""
202  def __init__(self, stream = sys.stdout):
203  super(RawStreamHandler, self).__init__(stream)
204 
205  def emit(self, record):
206  message = record.getMessage()
207  self.stream.write(message)
208  self.flush()
209  # logger=getLogger()
210  # logger.handlers = [RawStreamHandler(sys.stdout)]
211  # LPW: Daniel Smith suggested the below four lines to improve logger behavior
212  logger = getLogger("MoleculeLogger")
213  logger.setLevel(INFO)
214  handler = RawStreamHandler()
215  logger.addHandler(handler)
216  if __name__ == "__main__":
217  package = "LPW-molecule.py"
218  else:
219  package = __name__.split('.')[0]
220 
221 module_name = __name__.replace('.molecule','')
223 # Covalent radii from Cordero et al. 'Covalent radii revisited' Dalton Transactions 2008, 2832-2838.
224 Radii = [0.31, 0.28, # H and He
225  1.28, 0.96, 0.84, 0.76, 0.71, 0.66, 0.57, 0.58, # First row elements
226  0.00, 1.41, 1.21, 1.11, 1.07, 1.05, 1.02, 1.06, # Second row elements
227  # 1.66, 1.41, 1.21, 1.11, 1.07, 1.05, 1.02, 1.06, # Second row elements
228  2.03, 1.76, 1.70, 1.60, 1.53, 1.39, 1.61, 1.52, 1.50,
229  1.24, 1.32, 1.22, 1.22, 1.20, 1.19, 1.20, 1.20, 1.16, # Third row elements, K through Kr
230  2.20, 1.95, 1.90, 1.75, 1.64, 1.54, 1.47, 1.46, 1.42,
231  1.39, 1.45, 1.44, 1.42, 1.39, 1.39, 1.38, 1.39, 1.40, # Fourth row elements, Rb through Xe
232  2.44, 2.15, 2.07, 2.04, 2.03, 2.01, 1.99, 1.98,
233  1.98, 1.96, 1.94, 1.92, 1.92, 1.89, 1.90, 1.87, # Fifth row elements, s and f blocks
234  1.87, 1.75, 1.70, 1.62, 1.51, 1.44, 1.41, 1.36,
235  1.36, 1.32, 1.45, 1.46, 1.48, 1.40, 1.50, 1.50, # Fifth row elements, d and p blocks
236  2.60, 2.21, 2.15, 2.06, 2.00, 1.96, 1.90, 1.87, 1.80, 1.69] # Sixth row elements
237 
238 # A list that gives you the element if you give it the atomic number, hence the 'none' at the front.
239 Elements = ["None",'H','He',
240  'Li','Be','B','C','N','O','F','Ne',
241  'Na','Mg','Al','Si','P','S','Cl','Ar',
242  'K','Ca','Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn','Ga','Ge','As','Se','Br','Kr',
243  'Rb','Sr','Y','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn','Sb','Te','I','Xe',
244  'Cs','Ba','La','Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy','Ho','Er','Tm','Yb',
245  'Lu','Hf','Ta','W','Re','Os','Ir','Pt','Au','Hg','Tl','Pb','Bi','Po','At','Rn',
246  'Fr','Ra','Ac','Th','Pa','U','Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr','Rf','Db','Sg','Bh','Hs','Mt']
247 
248 # Dictionary of atomic masses ; also serves as the list of elements (periodic table)
249 #
250 # Atomic mass data was updated on 2020-05-07 from NIST:
251 # "Atomic Weights and Isotopic Compositions with Relative Atomic Masses"
252 # https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
253 # using All Elements -> preformatted ASCII table.
254 #
255 # The standard atomic weight was provided in several different formats:
256 # Two numbers in brackets as in [1.00784,1.00811] : The average value of the two limits is used.
257 # With parentheses(uncert) as in 4.002602(2) : The parentheses was split off and all significant digits are used.
258 # A single number in brackets as in [98] : The single number was used
259 # Not provided (for Am, Z=95 and up): The mass number of the lightest isotope was used
260 PeriodicTable = OrderedDict([("H", 1.007975), ("He", 4.002602), # First row
261  ("Li", 6.9675), ("Be", 9.0121831), ("B", 10.8135), ("C", 12.0106), ("N", 14.006855), ("O", 15.99940), ("F", 18.99840316), ("Ne", 20.1797), # Second row Li-Ne
262  ("Na", 22.98976928), ("Mg", 24.3055), ("Al", 26.9815385), ("Si", 28.085), ("P", 30.973762), ("S", 32.0675), ("Cl", 35.4515), ("Ar", 39.948), # Third row Na-Ar
263  ("K", 39.0983), ("Ca", 40.078), ("Sc", 44.955908), ("Ti", 47.867), ("V", 50.9415), ("Cr", 51.9961), ("Mn", 54.938044), ("Fe", 55.845), ("Co", 58.933194), # Fourth row K-Kr
264  ("Ni", 58.6934), ("Cu", 63.546), ("Zn", 65.38), ("Ga", 69.723), ("Ge", 72.63), ("As", 74.921595), ("Se", 78.971), ("Br", 79.904), ("Kr", 83.798),
265  ("Rb", 85.4678), ("Sr", 87.62), ("Y", 88.90584), ("Zr", 91.224), ("Nb", 92.90637), ("Mo", 95.95), ("Tc", 98.), ("Ru", 101.07), ("Rh", 102.9055), # Fifth row Rb-Xe
266  ("Pd", 106.42), ("Ag", 107.8682), ("Cd", 112.414), ("In", 114.818), ("Sn", 118.71), ("Sb", 121.76), ("Te", 127.6), ("I", 126.90447), ("Xe", 131.293),
267  ("Cs", 132.905452), ("Ba", 137.327), ("La", 138.90547), ("Ce", 140.116), ("Pr", 140.90766), ("Nd", 144.242), ("Pm", 145.), ("Sm", 150.36), # Sixth row Cs-Rn
268  ("Eu", 151.964), ("Gd", 157.25), ("Tb", 158.92535), ("Dy", 162.5), ("Ho", 164.93033), ("Er", 167.259), ("Tm", 168.93422), ("Yb", 173.054),
269  ("Lu", 174.9668), ("Hf", 178.49), ("Ta", 180.94788), ("W", 183.84), ("Re", 186.207), ("Os", 190.23), ("Ir", 192.217), ("Pt", 195.084),
270  ("Au", 196.966569), ("Hg", 200.592), ("Tl", 204.3835), ("Pb", 207.2), ("Bi", 208.9804), ("Po", 209.), ("At", 210.), ("Rn", 222.),
271  ("Fr", 223.), ("Ra", 226.), ("Ac", 227.), ("Th", 232.0377), ("Pa", 231.03588), ("U", 238.02891), ("Np", 237.), ("Pu", 244.), # Seventh row Fr-Og
272  ("Am", 241.), ("Cm", 243.), ("Bk", 247.), ("Cf", 249.), ("Es", 252.), ("Fm", 257.), ("Md", 258.), ("No", 259.),
273  ("Lr", 262.), ("Rf", 267.), ("Db", 268.), ("Sg", 271.), ("Bh", 272.), ("Hs", 270.), ("Mt", 276.), ("Ds", 281.),
274  ("Rg", 280.), ("Cn", 285.), ("Nh", 284.), ("Fl", 289.), ("Mc", 288.), ("Lv", 293.), ("Ts", 292.), ("Og", 294.)])
275 
276 # Old masses used pre-2020, retained in case it is useful:
277 # PeriodicTable = OrderedDict([('H' , 1.0079), ('He' , 4.0026),
278 # ('Li' , 6.941), ('Be' , 9.0122), ('B' , 10.811), ('C' , 12.0107), ('N' , 14.0067), ('O' , 15.9994), ('F' , 18.9984), ('Ne' , 20.1797),
279 # ('Na' , 22.9897), ('Mg' , 24.305), ('Al' , 26.9815), ('Si' , 28.0855), ('P' , 30.9738), ('S' , 32.065), ('Cl' , 35.453), ('Ar' , 39.948),
280 # ('K' , 39.0983), ('Ca' , 40.078), ('Sc' , 44.9559), ('Ti' , 47.867), ('V' , 50.9415), ('Cr' , 51.9961), ('Mn' , 54.938), ('Fe' , 55.845), ('Co' , 58.9332),
281 # ('Ni' , 58.6934), ('Cu' , 63.546), ('Zn' , 65.39), ('Ga' , 69.723), ('Ge' , 72.64), ('As' , 74.9216), ('Se' , 78.96), ('Br' , 79.904), ('Kr' , 83.8),
282 # ('Rb' , 85.4678), ('Sr' , 87.62), ('Y' , 88.9059), ('Zr' , 91.224), ('Nb' , 92.9064), ('Mo' , 95.94), ('Tc' , 98), ('Ru' , 101.07), ('Rh' , 102.9055),
283 # ('Pd' , 106.42), ('Ag' , 107.8682), ('Cd' , 112.411), ('In' , 114.818), ('Sn' , 118.71), ('Sb' , 121.76), ('Te' , 127.6), ('I' , 126.9045), ('Xe' , 131.293),
284 # ('Cs' , 132.9055), ('Ba' , 137.327), ('La' , 138.9055), ('Ce' , 140.116), ('Pr' , 140.9077), ('Nd' , 144.24), ('Pm' , 145), ('Sm' , 150.36),
285 # ('Eu' , 151.964), ('Gd' , 157.25), ('Tb' , 158.9253), ('Dy' , 162.5), ('Ho' , 164.9303), ('Er' , 167.259), ('Tm' , 168.9342), ('Yb' , 173.04),
286 # ('Lu' , 174.967), ('Hf' , 178.49), ('Ta' , 180.9479), ('W' , 183.84), ('Re' , 186.207), ('Os' , 190.23), ('Ir' , 192.217), ('Pt' , 195.078),
287 # ('Au' , 196.9665), ('Hg' , 200.59), ('Tl' , 204.3833), ('Pb' , 207.2), ('Bi' , 208.9804), ('Po' , 209), ('At' , 210), ('Rn' , 222),
288 # ('Fr' , 223), ('Ra' , 226), ('Ac' , 227), ('Th' , 232.0381), ('Pa' , 231.0359), ('U' , 238.0289), ('Np' , 237), ('Pu' , 244),
289 # ('Am' , 243), ('Cm' , 247), ('Bk' , 247), ('Cf' , 251), ('Es' , 252), ('Fm' , 257), ('Md' , 258), ('No' , 259),
290 # ('Lr' , 262), ('Rf' , 261), ('Db' , 262), ('Sg' , 266), ('Bh' , 264), ('Hs' , 277), ('Mt' , 268)])
291 
292 def getElement(mass):
293  return PeriodicTable.keys()[np.argmin([np.abs(m-mass) for m in PeriodicTable.values()])]
294 
295 def elem_from_atomname(atomname):
296  """ Given an atom name, attempt to get the element in most cases. """
297  return re.search('[A-Z][a-z]*',atomname).group(0)
299 if "forcebalance" in __name__:
300  #============================#
301  #| DCD read/write functions |#
302  #============================#
303  # Try to load _dcdlib.so either from a directory in the LD_LIBRARY_PATH
304  # or from the same directory as this module.
305  have_dcdlib = False
306  for fnm in ["_dcdlib.so",
307  os.path.join(imp.find_module(__name__.split('.')[0])[1],"_dcdlib.so"),
308  os.path.join(imp.find_module(__name__.split('.')[0])[1],"_dcdlib"+str(sysconfig.get_config_var('EXT_SUFFIX'))),
309  os.path.join(os.path.dirname(__file__),"_dcdlib.so"),
310  os.path.join(os.path.dirname(__file__),"_dcdlib"+str(sysconfig.get_config_var('EXT_SUFFIX')))]:
311  if os.path.exists(fnm):
312  _dcdlib = CDLL(fnm)
313  have_dcdlib = True
314  break
315  if not have_dcdlib:
316  logger.debug('Note: Cannot import optional dcdlib module to read/write DCD files.\n')
317 
318  #============================#
319  #| PDB read/write functions |#
320  #============================#
321  try:
322  from .PDB import *
323  except ImportError:
324  logger.debug('Note: Cannot import optional pdb module to read/write PDB files.\n')
325 
326  #=============================#
327  #| Mol2 read/write functions |#
328  #=============================#
329  try:
330  from . import Mol2
331  except ImportError:
332  logger.debug('Note: Cannot import optional Mol2 module to read .mol2 files.\n')
333 
334  #==============================#
335  #| OpenMM interface functions |#
336  #==============================#
337  try:
338  from simtk.unit import *
339  from simtk.openmm import *
340  from simtk.openmm.app import *
341  except ImportError:
342  logger.debug('Note: Cannot import optional OpenMM module.\n')
343 
344 elif "geometric" in __name__:
345  #============================#
346  #| PDB read/write functions |#
347  #============================#
348  try:
349  from .PDB import *
350  except ImportError:
351  logger.debug('Note: Failed to import optional pdb module to read/write PDB files.\n')
352  #==============================#
353  #| OpenMM interface functions |#
354  #==============================#
355  try:
356  from simtk.unit import *
357  from simtk.openmm import *
358  from simtk.openmm.app import *
359  except ImportError:
360  logger.debug('Note: Failed to import optional OpenMM module.\n')
361 
362 #===========================#
363 #| Convenience subroutines |#
364 #===========================#
365 
366 
367 bohr2ang = 0.529177210
368 
369 def unmangle(M1, M2):
370  """
371  Create a mapping that takes M1's atom indices to M2's atom indices based on position.
372 
373  If we start with atoms in molecule "PDB", and the new molecule "M"
374  contains re-numbered atoms, then this code works:
375 
376  M.elem = list(np.array(PDB.elem)[unmangled])
377  """
378  if M1.na != M2.na:
379  logger.error("Unmangler only deals with same number of atoms\n")
380  raise RuntimeError
381  unmangler = {}
382  for i in range(M1.na):
383  for j in range(M2.na):
384  if np.linalg.norm(M1.xyzs[0][i] - M2.xyzs[0][j]) < 0.1:
385  unmangler[j] = i
386  unmangled = [unmangler[i] for i in sorted(unmangler.keys())]
387  if len(unmangled) != M1.na:
388  logger.error("Unmangler failed (different structures?)\n")
389  raise RuntimeError
390  return unmangled
391 
392 def nodematch(node1,node2):
393  # Matching two nodes of a graph. Nodes are equivalent if the elements are the same
394  return node1['e'] == node2['e']
396 def isint(word):
397  """ONLY matches integers! If you have a decimal point? None shall pass!"""
398  return re.match('^[-+]?[0-9]+$',word)
399 
400 def isfloat(word):
401  """Matches ANY number; it can be a decimal, scientific notation, integer, or what have you"""
402  return re.match(r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?$',word)
403 
404 # Used to get the white spaces in a split line.
405 splitter = re.compile(r'(\s+|\S+)')
407 # Container for Bravais lattice vector. Three cell lengths, three angles, three vectors, volume, and TINKER trig functions.
408 Box = namedtuple('Box',['a','b','c','alpha','beta','gamma','A','B','C','V'])
409 radian = 180. / np.pi
411  """ This function takes in three lattice lengths and three lattice angles, and tries to return a complete box specification. """
412  b = a
413  c = a
414  alpha = 90
415  beta = 90
416  gamma = 90
417  alph = alpha*np.pi/180
418  bet = beta*np.pi/180
419  gamm = gamma*np.pi/180
420  v = np.sqrt(1 - cos(alph)**2 - cos(bet)**2 - cos(gamm)**2 + 2*cos(alph)*cos(bet)*cos(gamm))
421  Mat = np.array([[a, b*cos(gamm), c*cos(bet)],
422  [0, b*sin(gamm), c*((cos(alph)-cos(bet)*cos(gamm))/sin(gamm))],
423  [0, 0, c*v/sin(gamm)]])
424  L1 = Mat.dot(np.array([[1],[0],[0]]))
425  L2 = Mat.dot(np.array([[0],[1],[0]]))
426  L3 = Mat.dot(np.array([[0],[0],[1]]))
427  return Box(a,b,c,alpha,beta,gamma,np.array(L1).flatten(),np.array(L2).flatten(),np.array(L3).flatten(),v*a*b*c)
428 
429 def BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma):
430  """ This function takes in three lattice lengths and three lattice angles, and tries to return a complete box specification. """
431  alph = alpha*np.pi/180
432  bet = beta*np.pi/180
433  gamm = gamma*np.pi/180
434  v = np.sqrt(1 - cos(alph)**2 - cos(bet)**2 - cos(gamm)**2 + 2*cos(alph)*cos(bet)*cos(gamm))
435  Mat = np.array([[a, b*cos(gamm), c*cos(bet)],
436  [0, b*sin(gamm), c*((cos(alph)-cos(bet)*cos(gamm))/sin(gamm))],
437  [0, 0, c*v/sin(gamm)]])
438  L1 = Mat.dot(np.array([[1],[0],[0]]))
439  L2 = Mat.dot(np.array([[0],[1],[0]]))
440  L3 = Mat.dot(np.array([[0],[0],[1]]))
441  return Box(a,b,c,alpha,beta,gamma,np.array(L1).flatten(),np.array(L2).flatten(),np.array(L3).flatten(),v*a*b*c)
442 
443 def BuildLatticeFromVectors(v1, v2, v3):
444  """ This function takes in three lattice vectors and tries to return a complete box specification. """
445  a = np.linalg.norm(v1)
446  b = np.linalg.norm(v2)
447  c = np.linalg.norm(v3)
448  alpha = arccos(np.dot(v2, v3) / np.linalg.norm(v2) / np.linalg.norm(v3)) * radian
449  beta = arccos(np.dot(v1, v3) / np.linalg.norm(v1) / np.linalg.norm(v3)) * radian
450  gamma = arccos(np.dot(v1, v2) / np.linalg.norm(v1) / np.linalg.norm(v2)) * radian
451  alph = alpha*np.pi/180
452  bet = beta*np.pi/180
453  gamm = gamma*np.pi/180
454  v = np.sqrt(1 - cos(alph)**2 - cos(bet)**2 - cos(gamm)**2 + 2*cos(alph)*cos(bet)*cos(gamm))
455  Mat = np.array([[a, b*cos(gamm), c*cos(bet)],
456  [0, b*sin(gamm), c*((cos(alph)-cos(bet)*cos(gamm))/sin(gamm))],
457  [0, 0, c*v/sin(gamm)]])
458  L1 = Mat.dot(np.array([[1],[0],[0]]))
459  L2 = Mat.dot(np.array([[0],[1],[0]]))
460  L3 = Mat.dot(np.array([[0],[0],[1]]))
461  return Box(a,b,c,alpha,beta,gamma,np.array(L1).flatten(),np.array(L2).flatten(),np.array(L3).flatten(),v*a*b*c)
462 
463 #===========================#
464 #| Connectivity graph |#
465 #| Good for doing simple |#
466 #| topology tricks |#
467 #===========================#
468 try:
469  import networkx as nx
470  class MyG(nx.Graph):
471  def __init__(self):
472  super(MyG,self).__init__()
473  self.Alive = True
474  def __eq__(self, other):
475  # This defines whether two MyG objects are "equal" to one another.
476  if not self.Alive:
477  return False
478  if not other.Alive:
479  return False
480  return nx.is_isomorphic(self,other,node_match=nodematch)
481  def __hash__(self):
482  """ The hash function is something we can use to discard two things that are obviously not equal. Here we neglect the hash. """
483  return 1
484  def L(self):
485  """ Return a list of the sorted atom numbers in this graph. """
486  return sorted(list(self.nodes()))
487  def AStr(self):
488  """ Return a string of atoms, which serves as a rudimentary 'fingerprint' : '99,100,103,151' . """
489  return ','.join(['%i' % i for i in self.L()])
490  def e(self):
491  """ Return an array of the elements. For instance ['H' 'C' 'C' 'H']. """
492  elems = nx.get_node_attributes(self,'e')
493  return [elems[i] for i in self.L()]
494  def ef(self):
495  """ Create an Empirical Formula """
496  Formula = list(self.e())
497  return ''.join([('%s%i' % (k, Formula.count(k)) if Formula.count(k) > 1 else '%s' % k) for k in sorted(set(Formula))])
498  def x(self):
499  """ Get a list of the coordinates. """
500  coors = nx.get_node_attributes(self,'x')
501  return np.array([coors[i] for i in self.L()])
502 except ImportError:
503  logger.warning("Cannot import optional NetworkX module, topology tools won't work\n.")
504 
505 def TopEqual(mol1, mol2):
506  """ For the nanoreactor project: Determine whether two Molecule objects have the same topologies. """
507  # Checks to see if the two molecule objects have the same fragments.
508  GraphEqual = Counter(mol1.molecules) == Counter(mol2.molecules)
509  # Checks to see whether the molecule objects have the same atoms in each fragment.
510  AtomEqual = Counter([tuple(m.L()) for m in mol1.molecules]) == Counter([tuple(m.L()) for m in mol2.molecules])
511  return GraphEqual and AtomEqual
512 
513 def MolEqual(mol1, mol2):
514  """
515  Determine whether two Molecule objects have the same fragments by
516  looking at elements and connectivity graphs. This is less strict
517  than TopEqual (i.e. more often returns True).
518  """
519  if mol1.na != mol2.na : return False
520  if Counter(mol1.elem) != Counter(mol2.elem) : return False
521  return Counter(mol1.molecules) == Counter(mol2.molecules)
522 
523 def format_xyz_coord(element,xyz,tinker=False):
524  """ Print a line consisting of (element, x, y, z) in accordance with .xyz file format
525 
526  @param[in] element A chemical element of a single atom
527  @param[in] xyz A 3-element array containing x, y, z coordinates of that atom
528 
529  """
530  if tinker:
531  return "%-3s % 13.8f % 13.8f % 13.8f" % (element,xyz[0],xyz[1],xyz[2])
532  else:
533  return "%-5s % 15.10f % 15.10f % 15.10f" % (element,xyz[0],xyz[1],xyz[2])
535 def _format_83(f):
536  """Format a single float into a string of width 8, with ideally 3 decimal
537  places of precision. If the number is a little too large, we can
538  gracefully degrade the precision by lopping off some of the decimal
539  places. If it's much too large, we throw a ValueError"""
540  if -999.999 < f < 9999.999:
541  return '%8.3f' % f
542  if -9999999 < f < 99999999:
543  return ('%8.3f' % f)[:8]
544  raise ValueError('coordinate "%s" could not be represented '
545  'in a width-8 field' % f)
547 def format_gro_coord(resid, resname, aname, seqno, xyz):
548  """ Print a line in accordance with .gro file format, with six decimal points of precision
549 
550  Nine decimal points of precision are necessary to get forces below 1e-3 kJ/mol/nm.
551 
552  @param[in] resid The number of the residue that the atom belongs to
553  @param[in] resname The name of the residue that the atom belongs to
554  @param[in] aname The name of the atom
555  @param[in] seqno The sequential number of the atom
556  @param[in] xyz A 3-element array containing x, y, z coordinates of that atom
557 
558  """
559  return "%5i%-5s%5s%5i % 13.9f % 13.9f % 13.9f" % (resid,resname,aname,seqno,xyz[0],xyz[1],xyz[2])
560 
561 def format_xyzgen_coord(element,xyzgen):
562  """ Print a line consisting of (element, p, q, r, s, t, ...) where
563  (p, q, r) are arbitrary atom-wise data (this might happen, for
564  instance, with atomic charges)
565 
566  @param[in] element A chemical element of a single atom
567  @param[in] xyzgen A N-element array containing data for that atom
568 
569  """
570  return "%-5s" + ' '.join(["% 15.10f" % i] for i in xyzgen)
571 
572 def format_gro_box(box):
573  """ Print a line corresponding to the box vector in accordance with .gro file format
574 
575  @param[in] box Box NamedTuple
576 
577  """
578  if box.alpha == 90.0 and box.beta == 90.0 and box.gamma == 90.0:
579  return ' '.join(["% 13.9f" % (i/10) for i in [box.a, box.b, box.c]])
580  else:
581  return ' '.join(["% 13.9f" % (i/10) for i in [box.A[0], box.B[1], box.C[2], box.A[1], box.A[2], box.B[0], box.B[2], box.C[0], box.C[1]]])
582 
583 def is_gro_coord(line):
584  """ Determines whether a line contains GROMACS data or not
585 
586  @param[in] line The line to be tested
587 
588  """
589  sline = line.split()
590  if len(sline) == 6:
591  return all([isint(sline[2]),isfloat(sline[3]),isfloat(sline[4]),isfloat(sline[5])])
592  elif len(sline) == 5:
593  return all([isint(line[15:20]),isfloat(sline[2]),isfloat(sline[3]),isfloat(sline[4])])
594  else:
595  return 0
596 
597 def is_charmm_coord(line):
598  """ Determines whether a line contains CHARMM data or not
599 
600  @param[in] line The line to be tested
601 
602  """
603  sline = line.split()
604  if len(sline) >= 7:
605  return all([isint(sline[0]), isint(sline[1]), isfloat(sline[4]), isfloat(sline[5]), isfloat(sline[6])])
606  else:
607  return 0
608 
609 def is_gro_box(line):
610  """ Determines whether a line contains a GROMACS box vector or not
611 
612  @param[in] line The line to be tested
613 
614  """
615  sline = line.split()
616  if len(sline) == 9 and all([isfloat(i) for i in sline]):
617  return 1
618  elif len(sline) == 3 and all([isfloat(i) for i in sline]):
619  return 1
620  else:
621  return 0
622 
623 def add_strip_to_mat(mat,strip):
624  out = list(mat)
625  if out == [] and strip != []:
626  out = list(strip)
627  elif out != [] and strip != []:
628  for (i,j) in zip(out,strip):
629  i += list(j)
630  return out
631 
632 def pvec(vec):
633  return ''.join([' % .10e' % i for i in list(vec.flatten())])
634 
635 def grouper(n, iterable):
636  """ Groups a big long iterable into groups of ten or what have you. """
637  args = [iter(iterable)] * n
638  return list([e for e in t if e is not None] for t in zip_longest(*args))
639 
640 def even_list(totlen, splitsize):
641  """ Creates a list of number sequences divided as evenly as possible. """
642  joblens = np.zeros(splitsize,dtype=int)
643  subsets = []
644  for i in range(totlen):
645  joblens[i%splitsize] += 1
646  jobnow = 0
647  for i in range(splitsize):
648  subsets.append(list(range(jobnow, jobnow + joblens[i])))
649  jobnow += joblens[i]
650  return subsets
651 
652 class MolfileTimestep(Structure):
653  """ Wrapper for the timestep C structure used in molfile plugins. """
654  _fields_ = [("coords",POINTER(c_float)), ("velocities",POINTER(c_float)),
655  ("A",c_float), ("B",c_float), ("C",c_float), ("alpha",c_float),
656  ("beta",c_float), ("gamma",c_float), ("physical_time",c_double)]
657 
658 def both(A, B, key):
659  return key in A.Data and key in B.Data
660 
661 def diff(A, B, key):
662  if not (key in A.Data and key in B.Data) : return False
663  else:
664  if type(A.Data[key]) is np.ndarray:
665  return (A.Data[key] != B.Data[key]).any()
666  elif key == 'tinkersuf':
667  return [sorted([int(j) for j in i.split()]) for i in A.Data[key]] != \
668  [sorted([int(j) for j in i.split()]) for i in B.Data[key]]
669  else:
670  return A.Data[key] != B.Data[key]
671 
672 def either(A, B, key):
673  return key in A.Data or key in B.Data
674 
675 #===========================#
676 #| Alignment subroutines |#
677 #| Moments added 08/03/12 |#
678 #===========================#
679 def EulerMatrix(T1,T2,T3):
680  """ Constructs an Euler matrix from three Euler angles. """
681  DMat = np.zeros((3,3))
682  DMat[0,0] = np.cos(T1)
683  DMat[0,1] = np.sin(T1)
684  DMat[1,0] = -np.sin(T1)
685  DMat[1,1] = np.cos(T1)
686  DMat[2,2] = 1
687  CMat = np.zeros((3,3))
688  CMat[0,0] = 1
689  CMat[1,1] = np.cos(T2)
690  CMat[1,2] = np.sin(T2)
691  CMat[2,1] = -np.sin(T2)
692  CMat[2,2] = np.cos(T2)
693  BMat = np.zeros((3,3))
694  BMat[0,0] = np.cos(T3)
695  BMat[0,1] = np.sin(T3)
696  BMat[1,0] = -np.sin(T3)
697  BMat[1,1] = np.cos(T3)
698  BMat[2,2] = 1
699  EMat = multi_dot([BMat, CMat, DMat])
700  return EMat
701 
702 def ComputeOverlap(theta,elem,xyz1,xyz2):
703  """
704  Computes an 'overlap' between two molecules based on some
705  fictitious density. Good for fine-tuning alignment but gets stuck
706  in local minima.
707  """
708  xyz2R = np.dot(EulerMatrix(theta[0],theta[1],theta[2]), xyz2.T).T
709  Obj = 0.0
710  elem = np.array(elem)
711  for i in set(elem):
712  for j in np.where(elem==i)[0]:
713  for k in np.where(elem==i)[0]:
714  dx = xyz1[j] - xyz2R[k]
715  dx2 = np.dot(dx,dx)
716  Obj -= np.exp(-0.5*dx2)
717  return Obj
718 
719 def AlignToDensity(elem,xyz1,xyz2,binary=False):
720  """
721  Computes a "overlap density" from two frames.
722  This function can be called by AlignToMoments to get rid of inversion problems
723  """
724  grid = np.pi*np.array(list(itertools.product([0,1],[0,1],[0,1])))
725  ovlp = np.array([ComputeOverlap(e, elem, xyz1, xyz2) for e in grid]) # Mao
726  t1 = grid[np.argmin(ovlp)]
727  xyz2R = np.dot(EulerMatrix(t1[0],t1[1],t1[2]), xyz2.T).T.copy()
728  return xyz2R
729 
730 def AlignToMoments(elem,xyz1,xyz2=None):
731  """Pre-aligns molecules to 'moment of inertia'.
732  If xyz2 is passed in, it will assume that xyz1 is already
733  aligned to the moment of inertia, and it simply does 180-degree
734  rotations to make sure nothing is inverted."""
735  xyz = xyz1 if xyz2 is None else xyz2
736  I = np.zeros((3,3))
737  for i, xi in enumerate(xyz):
738  I += (np.dot(xi,xi)*np.eye(3) - np.outer(xi,xi))
739  # This is the original line from MSMBuilder, but we're choosing not to use masses
740  # I += PeriodicTable[elem[i]]*(np.dot(xi,xi)*np.eye(3) - np.outer(xi,xi))
741  A, B = np.linalg.eig(I)
742  # Sort eigenvectors by eigenvalue
743  BB = B[:, np.argsort(A)]
744  determ = np.linalg.det(BB)
745  Thresh = 1e-3
746  if np.abs(determ - 1.0) > Thresh:
747  if np.abs(determ + 1.0) > Thresh:
748  logger.info("in AlignToMoments, determinant is % .3f" % determ)
749  BB[:,2] *= -1
750  xyzr = np.dot(BB.T, xyz.T).T.copy()
751  if xyz2 is not None:
752  xyzrr = AlignToDensity(elem,xyz1,xyzr,binary=True)
753  return xyzrr
754  else:
755  return xyzr
756 
757 def get_rotate_translate(matrix1,matrix2):
758  # matrix2 contains the xyz coordinates of the REFERENCE
759  assert np.shape(matrix1) == np.shape(matrix2), 'Matrices not of same dimensions'
760 
761  # Store number of rows
762  nrows = np.shape(matrix1)[0]
763 
764  # Getting centroid position for each selection
765  avg_pos1 = matrix1.sum(axis=0)/nrows
766  avg_pos2 = matrix2.sum(axis=0)/nrows
767 
768  # Translation of matrices
769  avg_matrix1 = matrix1-avg_pos1
770  avg_matrix2 = matrix2-avg_pos2
771 
772  # Covariance matrix
773  covar = np.dot(avg_matrix1.T,avg_matrix2)
774 
775  # Do the SVD in order to get rotation matrix
776  v,s,wt = np.linalg.svd(covar)
777 
778  # Rotation matrix
779  # Transposition of v,wt
780  wvt = np.dot(wt.T, v.T)
781 
782  # Ensure a right-handed coordinate system
783  d = np.eye(3)
784  if np.linalg.det(wvt) < 0:
785  d[2,2] = -1.0
786 
787  rot_matrix = multi_dot([wt.T,d,v.T]).T
788  trans_matrix = avg_pos2-np.dot(avg_pos1,rot_matrix)
789  return trans_matrix, rot_matrix
790 
791 def cartesian_product2(arrays):
792  """ Form a Cartesian product of two NumPy arrays. """
793  la = len(arrays)
794  arr = np.empty([len(a) for a in arrays] + [la], dtype=np.int32)
795  for i, a in enumerate(np.ix_(*arrays)):
796  arr[...,i] = a
797  return arr.reshape(-1, la)
798 
799 def extract_int(arr, avgthre, limthre, label="value", verbose=True):
800  """
801  Get the representative integer value from an array.
802  The integer value is the rounded mean. Perform sanity
803  checks to ensure the array isn't overly "non-integer".
804 
805  Parameters
806  ----------
807  arr : numpy.ndarray
808  NumPy array containing a series of floating point values
809  where we'd like to extract the representative integer value.
810  avgthre : float
811  If the average deviates from the closest integer by
812  more than this amount, do not pass.
813  limthre : float
814  If any element in this array deviates from the closest integer by
815  more than this amount, do not pass.
816  label : str
817  Descriptive name of this variable, used only in printout.
818  verbose : bool
819  Print information in case array makes excursions larger than the threshold
820 
821  Returns
822  -------
823  int
824  Representative integer value for the array.
825  passed : bool
826  Indicates whether the array mean and/or maximum deviations stayed with the thresholds.
827  """
828  average = np.mean(arr)
829  maximum = np.max(arr)
830  minimum = np.min(arr)
831  rounded = round(average)
832  passed = True
833  if abs(average - rounded) > avgthre:
834  if verbose:
835  logger.info("Average %s (%f) deviates from integer %s (%i) by more than threshold of %f" % (label, average, label, rounded, avgthre))
836  passed = False
837  if abs(maximum - minimum) > limthre:
838  if verbose:
839  logger.info("Maximum %s fluctuation (%f) is larger than threshold of %f" % (label, abs(maximum-minimum), limthre))
840  passed = False
841  return int(rounded), passed
842 
843 def extract_pop(M, verbose=True):
844  """
845  Extract our best estimate of charge and spin-z from the comments
846  section of a Molecule object created with Nanoreactor. Note that
847  spin-z is 1.0 if there is one unpaired electron (not one/half) because
848  the unit is in terms of populations.
849 
850  This function is intended to work on atoms that are *extracted*
851  from an ab initio MD trajectory, where the Mulliken charge and spin
852  populations are not exactly integers. It attempts to return the closest
853  integer but it will sometimes fail.
854 
855  If the number of electrons and spin-z are inconsistent, then
856  return -999,-999 (indicates failure).
857 
858 
859  Parameters
860  ----------
861  M : Molecule
862  Molecule object that we're getting charge and spin from, with a known comment syntax
863  Reaction: formula CHO -> CO+H atoms ['0-2'] -> ['0-1','2'] frame 219482 charge -0.742 sz +0.000 sz^2 0.000
864 
865  Returns
866  -------
867  chg : int
868  Representative integer net charge of this Molecule, or -999 if inconsistent
869  spn : int
870  Representative integer spin-z of this Molecule (one unpaired electron: sz=1), or -999 if inconsistent
871 
872  """
873 
874  # Read in the charge and spin on the whole system.
875  srch = lambda s : np.array([float(re.search(r'(?<=%s )[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?' % s, c).group(0)) for c in M.comms if all([i in c for i in ('charge', 'sz')])])
876  Chgs = srch('charge') # An array of the net charge.
877  SpnZs = srch('sz') # An array of the net Z-spin.
878  Spn2s = srch(r'sz\^2') # An array of the sum of sz^2 by atom.
879 
880  chg, chgpass = extract_int(Chgs, 0.3, 1.0, label="charge")
881  spn, spnpass = extract_int(abs(SpnZs), 0.3, 1.0, label="spin-z")
882 
883  # Try to calculate the correct spin.
884  nproton = sum([Elements.index(i) for i in M.elem])
885  nelectron = nproton + chg
886  if not spnpass:
887  if verbose: logger.info("Going with the minimum spin consistent with charge.")
888  if nelectron%2 == 0:
889  spn = 0
890  else:
891  spn = 1
892 
893  # The number of electrons should be odd iff the spin is odd.
894  if (int((nelectron-spn)/2))*2 != (nelectron-spn):
895  if verbose: logger.info("\x1b[91mThe number of electrons (%i) is inconsistent with the spin-z (%i)\x1b[0m" % (nelectron, spn))
896  return -999, -999
897 
898  if verbose: logger.info("%i electrons; charge %i, spin %i" % (nelectron, chg, spn))
899  return chg, spn
900 
901 def arc(Mol, begin=None, end=None, RMSD=True, align=True):
902  """
903  Get the arc-length for a trajectory segment.
904  Uses RMSD or maximum displacement of any atom in the trajectory.
905 
906  Parameters
907  ----------
908  Mol : Molecule
909  Molecule object for calculating the arc length.
910  begin : int
911  Starting frame, defaults to first frame
912  end : int
913  Ending frame, defaults to final frame
914  RMSD : bool
915  Set to True to use frame-to-frame RMSD; otherwise use the maximum displacement of any atom
916 
917  Returns
918  -------
919  Arc : np.ndarray
920  Arc length between frames in Angstrom, length is n_frames - 1
921  """
922  if align:
923  Mol.align()
924  if begin is None:
925  begin = 0
926  if end is None:
927  end = len(Mol)
928  if RMSD:
929  Arc = Mol.pathwise_rmsd(align)
930  else:
931  Arc = np.array([np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) for i in range(begin, end-1)])
932  return Arc
933 
934 def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True):
935  """
936  Equalize the spacing of frames in a trajectory with linear interpolation.
937  This is done in a very simple way, first calculating the arc length
938  between frames, then creating an equally spaced array, and interpolating
939  all Cartesian coordinates along this equally spaced array.
940 
941  This is intended to be used on trajectories with smooth transformations and
942  ensures that concatenated frames containing both optimization coordinates
943  and dynamics trajectories don't have sudden changes in their derivatives.
944 
945  Parameters
946  ----------
947  Mol : Molecule
948  Molecule object for equalizing the spacing.
949  frames : int
950  Return a Molecule object with this number of frames.
951  RMSD : bool
952  Use RMSD in the arc length calculation.
953 
954  Returns
955  -------
956  Mol1 : Molecule
957  New molecule object, either the same one (if frames > len(Mol))
958  or with equally spaced frames.
959  """
960  ArcMol = arc(Mol, RMSD=RMSD, align=align)
961  ArcMolCumul = np.insert(np.cumsum(ArcMol), 0, 0.0)
962  if frames != 0 and dx != 0:
963  logger.error("Provide dx or frames or neither")
964  elif dx != 0:
965  frames = int(float(max(ArcMolCumul))/dx)
966  elif frames == 0:
967  frames = len(ArcMolCumul)
968 
969  ArcMolEqual = np.linspace(0, max(ArcMolCumul), frames)
970  xyzold = np.array(Mol.xyzs)
971  xyznew = np.zeros((frames, Mol.na, 3))
972  for a in range(Mol.na):
973  for i in range(3):
974  xyznew[:,a,i] = np.interp(ArcMolEqual, ArcMolCumul, xyzold[:, a, i])
975  if len(xyzold) == len(xyznew):
976  Mol1 = copy.deepcopy(Mol)
977  else:
978  # If we changed the number of coordinates, then
979  # do some integer interpolation of the comments and
980  # other frame variables.
981  Mol1 = Mol[np.array([int(round(i)) for i in np.linspace(0, len(xyzold)-1, len(xyznew))])]
982  Mol1.xyzs = list(xyznew)
983  return Mol1
984 
985 def AtomContact(xyz, pairs, box=None, displace=False):
986  """
987  Compute distances between pairs of atoms.
988 
989  Parameters
990  ----------
991  xyz : np.ndarray
992  N_frames*N_atoms*3 (3D) array of atomic positions
993  If you only have a single set of positions, pass in xyz[np.newaxis, :]
994  pairs : list
995  List of 2-tuples of atom indices
996  box : np.ndarray, optional
997  N_frames*3 (2D) array of periodic box vectors
998  If you only have a single set of positions, pass in box[np.newaxis, :]
999  displace : bool
1000  If True, also return N_frames*N_pairs*3 array of displacement vectors
1001 
1002  Returns
1003  -------
1004  np.ndarray
1005  N_pairs*N_frames (2D) array of minimum image convention distances
1006  np.ndarray (optional)
1007  if displace=True, N_frames*N_pairs*3 array of displacement vectors
1008  """
1009  # Obtain atom selections for atom pairs
1010  parray = np.array(pairs)
1011  sel1 = parray[:,0]
1012  sel2 = parray[:,1]
1013  xyzpbc = xyz.copy()
1014  # Minimum image convention: Place all atoms in the box
1015  # [0, xbox); [0, ybox); [0, zbox)
1016  if box is not None:
1017  xyzpbc /= box[:,np.newaxis,:]
1018  xyzpbc = xyzpbc % 1.0
1019  # Obtain atom selections for the pairs to be computed
1020  # These are typically longer than N but shorter than N^2.
1021  xyzsel1 = xyzpbc[:,sel1,:]
1022  xyzsel2 = xyzpbc[:,sel2,:]
1023  # Calculate xyz displacement
1024  dxyz = xyzsel2-xyzsel1
1025  # Apply minimum image convention to displacements
1026  if box is not None:
1027  dxyz = np.mod(dxyz+0.5, 1.0) - 0.5
1028  dxyz *= box[:,np.newaxis,:]
1029  dr2 = np.sum(dxyz**2,axis=2)
1030  dr = np.sqrt(dr2)
1031  if displace:
1032  return dr, dxyz
1033  else:
1034  return dr
1035 
1036 #===================================#
1037 #| Rotation subroutine 2018-10-28 |#
1038 #| Copied from geomeTRIC.rotate |#
1039 #===================================#
1040 
1041 def form_rot(q):
1042  """
1043  Given a quaternion p, form a rotation matrix from it.
1044 
1045  Parameters
1046  ----------
1047  q : numpy.ndarray
1048  1D array with 3 elements representing the rotation quaterion.
1049  Elements of quaternion are : [cos(a/2), sin(a/2)*axis[0..2]]
1050 
1051  Returns
1052  -------
1053  numpy.array
1054  3x3 rotation matrix
1055  """
1056  assert q.ndim == 1
1057  assert q.shape[0] == 4
1058  # Take the "complex conjugate"
1059  qc = np.zeros_like(q)
1060  qc[0] = q[0]
1061  qc[1] = -q[1]
1062  qc[2] = -q[2]
1063  qc[3] = -q[3]
1064  # Form al_q and al_qc matrices
1065  al_q = np.array([[ q[0], -q[1], -q[2], -q[3]],
1066  [ q[1], q[0], -q[3], q[2]],
1067  [ q[2], q[3], q[0], -q[1]],
1068  [ q[3], -q[2], q[1], q[0]]])
1069  ar_qc = np.array([[ qc[0], -qc[1], -qc[2], -qc[3]],
1070  [ qc[1], qc[0], qc[3], -qc[2]],
1071  [ qc[2], -qc[3], qc[0], qc[1]],
1072  [ qc[3], qc[2], -qc[1], qc[0]]])
1073  # Multiply matrices
1074  R4 = np.dot(al_q,ar_qc)
1075  return R4[1:, 1:]
1076 
1077 def axis_angle(axis, angle):
1078  """
1079  Given a rotation axis and angle, return the corresponding
1080  3x3 rotation matrix, which will rotate a (Nx3) array of
1081  xyz coordinates as x0_rot = np.dot(R, x0.T).T
1082 
1083  Parameters
1084  ----------
1085  axis : numpy.ndarray
1086  1D array with 3 elements representing the rotation axis
1087  angle : float
1088  The angle of the rotation
1089 
1090  Returns
1091  -------
1092  numpy.array
1093  3x3 rotation matrix
1094  """
1095  assert axis.ndim == 1
1096  assert axis.shape[0] == 3
1097  axis /= np.linalg.norm(axis)
1098  # Make quaternion
1099  ct2 = np.cos(angle/2)
1100  st2 = np.sin(angle/2)
1101  q = np.array([ct2, st2*axis[0], st2*axis[1], st2*axis[2]])
1102  # Form rotation matrix
1103  R = form_rot(q)
1104  return R
1105 
1106 class Molecule(object):
1107  """ Lee-Ping's general file format conversion class.
1108 
1109  The purpose of this class is to read and write chemical file formats in a
1110  way that is convenient for research. There are highly general file format
1111  converters out there (e.g. catdcd, openbabel) but I find that writing
1112  my own class can be very helpful for specific purposes. Here are some things
1113  this class can do:
1114 
1115  - Convert a .gro file to a .xyz file, or a .pdb file to a .dcd file.
1116  Data is stored internally, so any readable file can be converted into
1117  any writable file as long as there is sufficient information to write
1118  that file.
1119 
1120  - Accumulate information from different files. For example, we may read
1121  A.gro to get a list of coordinates, add quantum settings from a B.in file,
1122  and write A.in (this gives us a file that we can use to run QM calculations)
1123 
1124  - Concatenate two trajectories together as long as they're compatible. This
1125  is done by creating two Molecule objects and then simply adding them. Addition
1126  means two things: (1) Information fields missing from each class, but present
1127  in the other, are added to the sum, and (2) Appendable or per-frame fields
1128  (i.e. coordinates) are concatenated together.
1129 
1130  - Slice trajectories using reasonable Python language. That is to
1131  say, MyMolecule[1:10] returns a new Molecule object that contains
1132  frames 1 through 9 (inclusive and numbered starting from zero.)
1134  Special variables: These variables cannot be set manually because
1135  there is a special method associated with getting them.
1136 
1137  na = The number of atoms. You'll get this if you use MyMol.na or MyMol['na'].
1138  ns = The number of snapshots. You'll get this if you use MyMol.ns or MyMol['ns'].
1139 
1140  Unit system: Angstroms.
1141 
1142  """
1143 
1144  def __init__(self, fnm = None, ftype = None, top = None, ttype = None, **kwargs):
1145  """
1146  Create a Molecule object.
1147 
1148  Parameters
1149  ----------
1150  fnm : str, optional
1151  File name to create the Molecule object from. If provided,
1152  the file will be parsed and used to fill in the fields such as
1153  elem (elements), xyzs (coordinates) and so on. If ftype is not
1154  provided, will automatically try to determine file type from file
1155  extension. If not provided, will create an empty object.
1156  ftype : str, optional
1157  File type, corresponding to an entry in the internal table of known
1158  file types. Provide this if you have a nonstandard file extension
1159  or if you wish to force to invoke a particular parser.
1160  top : str, optional
1161  "Topology" file name. If provided, will build the Molecule
1162  using this file name, then "fnm" will load frames instead.
1163  ttype : str, optional
1164  "Typology" file type.
1165  build_topology : bool, optional
1166  Build the molecular topology consisting of: topology (overall connectivity graph),
1167  molecules (list of connected subgraphs), bonds (if not explicitly read in), default True
1168  toppbc : bool, optional
1169  Use periodic boundary conditions when building the molecular topology, default False
1170  The build_topology code will attempt to determine this intelligently.
1171  topframe : int, optional
1172  Provide a frame number for building the molecular topology, default first frame
1173  Fac : float, optional
1174  Multiplicative factor to covalent radii criterion for deciding whether two atoms are bonded
1175  Default value of 1.2 is reasonable, 1.4 will produce lots of bonds
1176  positive_resid : bool, optional
1177  If provided, enforce all positive resIDs.
1178  """
1179  # If we passed in a "topology" file, read it in first, and then load the frames
1180  if top is not None:
1181  load_fnm = fnm
1182  load_type = ftype
1183  fnm = top
1184  ftype = ttype
1185  else:
1186  load_fnm = None
1187  load_type = None
1188  #=========================================#
1189  #| File type tables |#
1190  #| Feel free to edit these as more |#
1191  #| readers / writers are added |#
1192  #=========================================#
1193 
1194  self.Read_Tab = {'gaussian' : self.read_com,
1195  'gromacs' : self.read_gro,
1196  'charmm' : self.read_charmm,
1197  'dcd' : self.read_dcd,
1198  'mdcrd' : self.read_mdcrd,
1199  'inpcrd' : self.read_inpcrd,
1200  'pdb' : self.read_pdb,
1201  'xyz' : self.read_xyz,
1202  'qcschema' : self.read_qcschema,
1203  'mol2' : self.read_mol2,
1204  'qcin' : self.read_qcin,
1205  'qcout' : self.read_qcout,
1206  'qcesp' : self.read_qcesp,
1207  'qdata' : self.read_qdata,
1208  'tinker' : self.read_arc}
1209 
1210  self.Write_Tab = {'gromacs' : self.write_gro,
1211  'xyz' : self.write_xyz,
1212  'lammps' : self.write_lammps_data,
1213  'molproq' : self.write_molproq,
1214  'dcd' : self.write_dcd,
1215  'inpcrd' : self.write_inpcrd,
1216  'mdcrd' : self.write_mdcrd,
1217  'pdb' : self.write_pdb,
1218  'qcin' : self.write_qcin,
1219  'qdata' : self.write_qdata,
1220  'tinker' : self.write_arc}
1221 
1223  self.Funnel = {'gromos' : 'gromacs',
1224  'gro' : 'gromacs',
1225  'g96' : 'gromacs',
1226  'gmx' : 'gromacs',
1227  'in' : 'qcin',
1228  'qcin' : 'qcin',
1229  'com' : 'gaussian',
1230  'rst' : 'inpcrd',
1231  'out' : 'qcout',
1232  'esp' : 'qcesp',
1233  'txt' : 'qdata',
1234  'crd' : 'charmm',
1235  'cor' : 'charmm',
1236  'arc' : 'tinker'}
1237 
1239  self.positive_resid = kwargs.get('positive_resid', 0)
1240  self.built_bonds = False
1241 
1242  self.top_settings = {'toppbc' : kwargs.get('toppbc', False),
1243  'topframe' : kwargs.get('topframe', 0),
1244  'Fac' : kwargs.get('Fac', 1.2),
1245  'read_bonds' : False,
1246  'fragment' : kwargs.get('fragment', False),
1247  'radii' : kwargs.get('radii', {})}
1248 
1249  for i in set(list(self.Read_Tab.keys()) + list(self.Write_Tab.keys())):
1250  self.Funnel[i] = i
1251  # Data container. All of the data is stored in here.
1252  self.Data = {}
1253 
1254  if fnm is not None:
1255  self.Data['fnm'] = fnm
1256  if ftype is None:
1257 
1258  ftype = os.path.splitext(fnm)[1][1:]
1259  if not os.path.exists(fnm):
1260  logger.error('Tried to create Molecule object from a file that does not exist: %s\n' % fnm)
1261  raise IOError
1262  self.Data['ftype'] = ftype
1263 
1264  Parsed = self.Read_Tab[self.Funnel[ftype.lower()]](fnm, **kwargs)
1265 
1266  for key, val in Parsed.items():
1267  self.Data[key] = val
1268 
1269  if 'comms' not in self.Data:
1270  self.comms = ['From %s: Frame %i / %i' % (fnm, i+1, self.ns) for i in range(self.ns)]
1271  if 'qm_energies' in self.Data:
1272  for i in range(self.ns):
1273  self.comms[i] += ', Energy= % 18.10f' % self.qm_energies[i]
1274  else:
1275  self.comms = [i.expandtabs() for i in self.comms]
1276 
1277  if kwargs.get('build_topology', True) and hasattr(self, 'elem') and self.na > 0:
1278  self.build_topology(force_bonds=False)
1279  if load_fnm is not None:
1280  self.load_frames(load_fnm, ftype=load_type, **kwargs)
1282  #=====================================#
1283  #| Core read/write functions |#
1284  #| Hopefully we won't have to change |#
1285  #| these very often! |#
1286  #=====================================#
1287 
1288  def __len__(self):
1289  """ Return the number of frames in the trajectory. """
1290  L = -1
1291  klast = None
1292  Defined = False
1293  for key in self.FrameKeys:
1294  Defined = True
1295  if L != -1 and len(self.Data[key]) != L:
1296  self.repair(key, klast)
1297  L = len(self.Data[key])
1298  klast = key
1299  if not Defined:
1300  return 0
1301  return L
1302 
1303  def __getattr__(self, key):
1304  """ Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionary. """
1305  if key == 'qm_forces':
1306  logger.warning('qm_forces is a deprecated keyword because it actually meant gradients; setting to qm_grads.')
1307  key = 'qm_grads'
1308  if key == 'ns':
1309  return len(self)
1310  elif key == 'na': # The 'na' attribute is the number of atoms.
1311  L = -1
1312  klast = None
1313  Defined = False
1314  for key in self.AtomKeys:
1315  Defined = True
1316  if L != -1 and len(self.Data[key]) != L:
1317  self.repair(key, klast)
1318  L = len(self.Data[key])
1319  klast = key
1320  if Defined:
1321  return L
1322  elif 'xyzs' in self.Data:
1323  return len(self.xyzs[0])
1324  else:
1325  return 0
1326  #raise RuntimeError('na is ill-defined if the molecule has no AtomKeys member variables.')
1327 
1329  elif key == 'FrameKeys':
1330  return set(self.Data) & FrameVariableNames
1331  elif key == 'AtomKeys':
1332  return set(self.Data) & AtomVariableNames
1333  elif key == 'MetaKeys':
1334  return set(self.Data) & MetaVariableNames
1335  elif key == 'QuantumKeys':
1336  return set(self.Data) & QuantumVariableNames
1337  elif key in self.Data:
1338  return self.Data[key]
1339  return getattr(super(Molecule, self), key)
1340 
1341  def __setattr__(self, key, value):
1342  """ Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionary. """
1343 
1345  if key == 'qm_forces':
1346  logger.warning('qm_forces is a deprecated keyword because it actually meant gradients; setting to qm_grads.')
1347  key = 'qm_grads'
1348  if key in AllVariableNames:
1349  self.Data[key] = value
1350  return super(Molecule,self).__setattr__(key, value)
1351 
1352  def __deepcopy__(self, memo):
1353  """ Custom deepcopy method because Python 3.6 appears to have changed its behavior """
1354  New = Molecule()
1355  # Copy over variables that are not contained in self.Data
1356  New.positive_resid = self.positive_resid
1357  New.built_bonds = self.built_bonds
1358  New.top_settings = copy.deepcopy(self.top_settings)
1359 
1360  for key in self.Data:
1361  if key in ['xyzs', 'qm_grads', 'qm_hessians', 'qm_espxyzs', 'qm_espvals', 'qm_extchgs', 'qm_mulliken_charges', 'qm_mulliken_spins', 'molecules', 'qm_bondorder']:
1362  # These variables are lists of NumPy arrays, NetworkX graph objects, or others with
1363  # explicitly defined copy() methods.
1364  New.Data[key] = []
1365  for i in range(len(self.Data[key])):
1366  New.Data[key].append(copy.deepcopy(self.Data[key][i]))
1367  elif key in ['topology']:
1368  # These are NetworkX graph objects or other variables with explicitly defined copy() methods.
1369  New.Data[key] = self.Data[key].copy()
1370  elif key in ['boxes', 'qcrems']:
1371  # We'll use the default deepcopy method for these:
1372  # boxes is a list of named tuples.
1373  # qcrems is a list of OrderedDicts.
1374  New.Data[key] = []
1375  for i in range(len(self.Data[key])):
1376  New.Data[key].append(copy.deepcopy(self.Data[key][i]))
1377  elif key in ['comms', 'qm_energies', 'qm_interaction', 'qm_zpe', 'qm_entropy', 'qm_enthalpy', 'elem', 'partial_charge',
1378  'atomname', 'atomtype', 'tinkersuf', 'resid', 'resname', 'qcsuf', 'qm_ghost', 'chain', 'altloc', 'icode',
1379  'terminal']:
1380  if not isinstance(self.Data[key], list):
1381  raise RuntimeError('Expected data attribute %s to be a list, but it is %s' % (key, str(type(self.Data[key]))))
1382  # Lists of strings or floats.
1383  New.Data[key] = self.Data[key][:]
1384  elif key in ['fnm', 'ftype', 'charge', 'mult', 'qcerr']:
1385  # These are strings, ints, or floats.
1386  New.Data[key] = self.Data[key]
1387  elif key in ['bonds']:
1388  # List of lists of 2 integers.
1389  New.Data[key] = []
1390  for i in range(len(self.Data[key])):
1391  New.Data[key].append(self.Data[key][i][:])
1392  elif key in ['qctemplate']:
1393  # List of 2-tuples where the first element is a string
1394  # (Q-Chem input section name) and the second element
1395  # is a list (Q-Chem input section data).
1396  New.Data[key] = []
1397  for SectName, SectData in self.Data[key]:
1398  New.Data[key].append((SectName, SectData[:]))
1399  else:
1400  raise RuntimeError("Failed to copy key %s" % key)
1401  return New
1402 
1403  def __getitem__(self, key):
1404  """
1405  The Molecule class has list-like behavior, so we can get slices of it.
1406  If we say MyMolecule[0:10], then we'll return a copy of MyMolecule with frames 0 through 9.
1407  """
1408  if isinstance(key, int) or isinstance(key, slice) or isinstance(key,np.ndarray) or isinstance(key,list):
1409  if isinstance(key, int):
1410  key = [key]
1411  New = Molecule()
1412  for k in self.FrameKeys:
1413  if k == 'boxes':
1414  New.Data[k] = [j for i, j in enumerate(self.Data[k]) if i in np.arange(len(self))[key]]
1415  else:
1416  New.Data[k] = list(np.array(copy.deepcopy(self.Data[k]))[key])
1417  for k in self.AtomKeys | self.MetaKeys:
1418  New.Data[k] = copy.deepcopy(self.Data[k])
1419  New.top_settings = copy.deepcopy(self.top_settings)
1420  return New
1421  else:
1422  logger.error('getitem is not implemented for keys of type %s\n' % str(key))
1423  raise RuntimeError
1424 
1425  def __delitem__(self, key):
1426  """
1427  Similarly, in order to delete a frame, we simply perform item deletion on
1428  framewise variables.
1429  """
1430  for k in self.FrameKeys:
1431  del self.Data[k][key]
1432 
1433  def __iter__(self):
1434  """ List-like behavior for looping over trajectories. Note that these values are returned by reference.
1435  Note that this is intended to be more efficient than __getitem__, so when we loop over a trajectory,
1436  it's best to go "for m in M" instead of "for i in range(len(M)): m = M[i]"
1437  """
1438  for frame in range(self.ns):
1439  New = Molecule()
1440  for k in self.FrameKeys:
1441  New.Data[k] = self.Data[k][frame]
1442  for k in self.AtomKeys | self.MetaKeys:
1443  New.Data[k] = self.Data[k]
1444  yield New
1445 
1446  def __add__(self,other):
1447  """ Add method for Molecule objects. """
1448  # Check type of other
1449  if not isinstance(other,Molecule):
1450  logger.error('A Molecule instance can only be added to another Molecule instance\n')
1451  raise TypeError
1452  # Create the sum of the two classes by copying the first class.
1453  Sum = Molecule()
1454  for key in AtomVariableNames | MetaVariableNames:
1455  # Because a molecule object can only have one 'file name' or 'file type' attribute,
1456  # we only keep the original one. This isn't perfect, but that's okay.
1457  if key in ['fnm', 'ftype', 'bonds', 'molecules', 'topology'] and key in self.Data:
1458  Sum.Data[key] = self.Data[key]
1459  elif diff(self, other, key):
1460  for i, j in zip(self.Data[key], other.Data[key]):
1461  logger.info("%s %s %s" % (i, j, str(i==j)))
1462  logger.error('The data member called %s is not the same for these two objects\n' % key)
1463  raise RuntimeError
1464  elif key in self.Data:
1465  Sum.Data[key] = copy.deepcopy(self.Data[key])
1466  elif key in other.Data:
1467  Sum.Data[key] = copy.deepcopy(other.Data[key])
1468  for key in FrameVariableNames:
1469  if both(self, other, key):
1470  if type(self.Data[key]) is not list:
1471  logger.error('Key %s in self is a FrameKey, it must be a list\n' % key)
1472  raise RuntimeError
1473  if type(other.Data[key]) is not list:
1474  logger.error('Key %s in other is a FrameKey, it must be a list\n' % key)
1475  raise RuntimeError
1476  if isinstance(self.Data[key][0], np.ndarray):
1477  Sum.Data[key] = [i.copy() for i in self.Data[key]] + [i.copy() for i in other.Data[key]]
1478  else:
1479  Sum.Data[key] = list(self.Data[key] + other.Data[key])
1480  elif either(self, other, key):
1481  # TINKER 6.3 compatibility - catch the specific case that one has a periodic box and the other doesn't.
1482  if key == 'boxes':
1483  if key in self.Data:
1484  other.Data['boxes'] = [self.Data['boxes'][0] for i in range(len(other))]
1485  elif key in other.Data:
1486  self.Data['boxes'] = [other.Data['boxes'][0] for i in range(len(self))]
1487  else:
1488  logger.error('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key)
1489  raise RuntimeError
1490  return Sum
1491 
1492  def __iadd__(self,other):
1493  """ Add method for Molecule objects. """
1494  # Check type of other
1495  if not isinstance(other,Molecule):
1496  logger.error('A Molecule instance can only be added to another Molecule instance\n')
1497  raise TypeError
1498  # Create the sum of the two classes by copying the first class.
1499  for key in AtomVariableNames | MetaVariableNames:
1500  if key in ['fnm', 'ftype', 'bonds', 'molecules', 'topology']: pass
1501  elif diff(self, other, key):
1502  for i, j in zip(self.Data[key], other.Data[key]):
1503  logger.info("%s %s %s" % (i, j, str(i==j)))
1504  logger.error('The data member called %s is not the same for these two objects\n' % key)
1505  raise RuntimeError
1506  # Information from the other class is added to this class (if said info doesn't exist.)
1507  elif key in other.Data:
1508  self.Data[key] = copy.deepcopy(other.Data[key])
1509  # FrameKeys must be a list.
1510  for key in FrameVariableNames:
1511  if both(self, other, key):
1512  if type(self.Data[key]) is not list:
1513  logger.error('Key %s in self is a FrameKey, it must be a list\n' % key)
1514  raise RuntimeError
1515  if type(other.Data[key]) is not list:
1516  logger.error('Key %s in other is a FrameKey, it must be a list\n' % key)
1517  raise RuntimeError
1518  if isinstance(self.Data[key][0], np.ndarray):
1519  self.Data[key] += [i.copy() for i in other.Data[key]]
1520  else:
1521  self.Data[key] += other.Data[key]
1522  elif either(self, other, key):
1523  # TINKER 6.3 compatibility - catch the specific case that one has a periodic box and the other doesn't.
1524  if key == 'boxes':
1525  if key in self.Data:
1526  other.Data['boxes'] = [self.Data['boxes'][0] for i in range(len(other))]
1527  elif key in other.Data:
1528  self.Data['boxes'] = [other.Data['boxes'][0] for i in range(len(self))]
1529  else:
1530  logger.error('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key)
1531  raise RuntimeError
1532  return self
1533 
1534  def repair(self, key, klast):
1535  """ Attempt to repair trivial issues that would otherwise break the object. """
1536  kthis = key if len(self.Data[key]) < len(self.Data[klast]) else klast
1537  kother = klast if len(self.Data[key]) < len(self.Data[klast]) else key
1538  diff = abs(len(self.Data[key]) - len(self.Data[klast]))
1539  if kthis == 'comms':
1540  # Append empty comments if necessary because this causes way too many crashes.
1541  if diff > 0:
1542  for i in range(diff):
1543  self.Data['comms'].append('')
1544  else:
1545  for i in range(-1*diff):
1546  self.Data['comms'].pop()
1547  elif kthis == 'boxes' and len(self.Data['boxes']) == 1:
1548  # If we only have one box then we can fill in the rest of the trajectory.
1549  for i in range(diff): self.Data['boxes'].append(self.Data['boxes'][-1])
1550  else:
1551  logger.error('The keys %s and %s have different lengths (%i %i)'
1552  '- this isn\'t supposed to happen for two AtomKeys member variables.'
1553  % (key, klast, len(self.Data[key]), len(self.Data[klast])))
1554  raise RuntimeError
1555 
1556  def reorder_according_to(self, other):
1557 
1558  """
1559 
1560  Reorder atoms according to some other Molecule object. This
1561  happens when we run a program like pdb2gmx or pdbxyz and it
1562  scrambles our atom ordering, forcing us to reorder the atoms
1563  for all frames in the current Molecule object.
1564 
1565  Directions:
1566  (1) Load up the scrambled file as a new Molecule object.
1567  (2) Call this function: Original_Molecule.reorder_according_to(scrambled)
1568  (3) Save Original_Molecule to a new file name.
1569 
1570  """
1571 
1572  M = self[0]
1573  N = other
1574  unmangled = unmangle(M, N)
1575  NewData = {}
1576  for key in self.AtomKeys:
1577  NewData[key] = list(np.array(M.Data[key])[unmangled])
1578  for key in self.FrameKeys:
1579  if key in ['xyzs', 'qm_grads', 'qm_mulliken_charges', 'qm_mulliken_spins']:
1580  NewData[key] = list([self.Data[key][i][unmangled] for i in range(len(self))])
1581  for key in NewData:
1582  setattr(self, key, copy.deepcopy(NewData[key]))
1583 
1584  def reorder_indices(self, other):
1585 
1586  """
1587 
1588  Return the indices that would reorder atoms according to some
1589  other Molecule object. This happens when we run a program
1590  like pdb2gmx or pdbxyz and it scrambles our atom ordering.
1591 
1592  Directions:
1593  (1) Load up the scrambled file as a new Molecule object.
1594  (2) Call this function: Original_Molecule.reorder_indices(scrambled)
1595 
1596  """
1597  return unmangle(self[0], other)
1598 
1599  def append(self,other):
1600  self += other
1601 
1602 
1603  def require(self, *args):
1604  for arg in args:
1605  if arg == 'na': # The 'na' attribute is the number of atoms.
1606  if hasattr(self, 'na'):
1607  continue
1608  if arg == 'qm_forces':
1609  logger.warning('qm_forces is a deprecated keyword because it actually meant gradients; setting to qm_grads.')
1610  arg = 'qm_grads'
1611  if arg not in self.Data:
1612  logger.error("%s is a required attribute for writing this type of file but it's not present\n" % arg)
1613  raise RuntimeError
1614 
1615  # def read(self, fnm, ftype = None):
1616  # """ Read in a file. """
1617  # if ftype is None:
1618  # ## Try to determine from the file name using the extension.
1619  # ftype = os.path.splitext(fnm)[1][1:]
1620  # ## This calls the table of reader functions and prints out an error message if it fails.
1621  # ## 'Answer' is a dictionary of data that is returned from the reader function.
1622  # Answer = self.Read_Tab[self.Funnel[ftype.lower()]](fnm)
1623  # return Answer
1624 
1625  def write(self,fnm=None,ftype=None,append=False,selection=None,**kwargs):
1626  if fnm is None and ftype is None:
1627  logger.error("Output file name and file type are not specified.\n")
1628  raise RuntimeError
1629  elif ftype is None:
1630  ftype = os.path.splitext(fnm)[1][1:]
1631 
1632  if 'comms' not in self.Data:
1633  self.comms = ['Generated by %s from %s: Frame %i of %i' % (package, fnm, i+1, self.ns) for i in range(self.ns)]
1634  if 'xyzs' in self.Data and len(self.comms) < len(self.xyzs):
1635  for i in range(len(self.comms), len(self.xyzs)):
1636  self.comms.append("Frame %i: generated by %s" % (i, package))
1637 
1639  self.fout = fnm
1640  if type(selection) in [int, np.int64, np.int32]:
1641  selection = [selection]
1642  if selection is None:
1643  selection = list(range(len(self)))
1644  else:
1645  selection = list(selection)
1646  Answer = self.Write_Tab[self.Funnel[ftype.lower()]](selection,**kwargs)
1647 
1648  if Answer is not None:
1649  if fnm is None or fnm == sys.stdout:
1650  outfile = sys.stdout
1651  elif append:
1652  # Writing to symbolic links risks unintentionally overwriting the source file -
1653  # thus we delete the link first.
1654  if os.path.islink(fnm):
1655  os.unlink(fnm)
1656  outfile = open(fnm,'a')
1657  else:
1658  if os.path.islink(fnm):
1659  os.unlink(fnm)
1660  outfile = open(fnm,'w')
1661  for line in Answer:
1662  print(line, file=outfile)
1663  outfile.close()
1664 
1665  #=====================================#
1666  #| Useful functions |#
1667  #| For doing useful things |#
1668  #=====================================#
1669 
1670  def center_of_mass(self):
1671  totMass = sum([PeriodicTable.get(self.elem[i], 0.0) for i in range(self.na)])
1672  return np.array([np.sum([xyz[i,:] * PeriodicTable.get(self.elem[i], 0.0) / totMass for i in range(xyz.shape[0])],axis=0) for xyz in self.xyzs])
1673 
1674  def radius_of_gyration(self):
1675  totMass = sum([PeriodicTable[self.elem[i]] for i in range(self.na)])
1676  coms = self.center_of_mass()
1677  rgs = []
1678  for i, xyz in enumerate(self.xyzs):
1679  xyz1 = xyz.copy()
1680  xyz1 -= coms[i]
1681  rgs.append(np.sqrt(np.sum([PeriodicTable[self.elem[i]]*np.dot(x,x) for i, x in enumerate(xyz1)])/totMass))
1682  return np.array(rgs)
1683 
1684  def rigid_water(self):
1685  """ If one atom is oxygen and the next two are hydrogen, make the water molecule rigid. """
1686  self.require('elem', 'xyzs')
1687  for i in range(len(self)):
1688  for a in range(self.na-2):
1689  if self.elem[a] == 'O' and self.elem[a+1] == 'H' and self.elem[a+2] == 'H':
1690  flex = self.xyzs[i]
1691  wat = flex[a:a+3]
1692  com = wat.mean(0)
1693  wat -= com
1694  o = wat[0]
1695  h1 = wat[1]
1696  h2 = wat[2]
1697  r1 = h1 - o
1698  r2 = h2 - o
1699  r1 /= np.linalg.norm(r1)
1700  r2 /= np.linalg.norm(r2)
1701  # Obtain unit vectors.
1702  ex = r1 + r2
1703  ey = r1 - r2
1704  ex /= np.linalg.norm(ex)
1705  ey /= np.linalg.norm(ey)
1706  Bond = 0.9572
1707  Ang = np.pi * 104.52 / 2 / 180
1708  cosx = np.cos(Ang)
1709  cosy = np.sin(Ang)
1710  h1 = o + Bond*ex*cosx + Bond*ey*cosy
1711  h2 = o + Bond*ex*cosx - Bond*ey*cosy
1712  rig = np.array([o, h1, h2]) + com
1713  self.xyzs[i][a:a+3] = rig
1714 
1715  def load_frames(self, fnm, ftype=None, **kwargs):
1716 
1717  if fnm is not None:
1718  if ftype is None:
1719 
1720  ftype = os.path.splitext(fnm)[1][1:]
1721  if not os.path.exists(fnm):
1722  logger.error('Tried to create Molecule object from a file that does not exist: %s\n' % fnm)
1723  raise IOError
1724 
1725  Parsed = self.Read_Tab[self.Funnel[ftype.lower()]](fnm, **kwargs)
1726  if 'xyzs' not in Parsed:
1727  logger.error('Did not get any coordinates from the new file %s\n' % fnm)
1728  raise RuntimeError
1729  if Parsed['xyzs'][0].shape[0] != self.na:
1730  logger.error('When loading frames, don\'t change the number of atoms\n')
1731  raise RuntimeError
1732 
1733  for key, val in Parsed.items():
1734  if key in FrameVariableNames:
1735  self.Data[key] = val
1736 
1737  if 'comms' not in self.Data:
1738  self.comms = ['Generated by %s from %s: Frame %i of %i' % (package, fnm, i+1, self.ns) for i in range(self.ns)]
1739  else:
1740  self.comms = [i.expandtabs() for i in self.comms]
1741 
1742  def edit_qcrems(self, in_dict, subcalc = None):
1743  """ Edit Q-Chem rem variables with a dictionary. Pass a value of None to delete a rem variable. """
1744  if subcalc is None:
1745  for qcrem in self.qcrems:
1746  for key, val in in_dict.items():
1747  if val is None:
1748  qcrem.pop(key, None)
1749  else:
1750  qcrem[key] = val
1751  else:
1752  for key, val in in_dict.items():
1753  if val is None:
1754  self.qcrems[subcalc].pop(key, None)
1755  else:
1756  self.qcrems[subcalc][key] = val
1757 
1758  def add_quantum(self, other):
1759  if type(other) is Molecule:
1760  OtherMol = other
1761  elif type(other) is str:
1762  OtherMol = Molecule(other)
1763  for key in OtherMol.QuantumKeys:
1764  if key in AtomVariableNames and len(OtherMol.Data[key]) != self.na:
1765  logger.error('The quantum-key %s is AtomData, but it doesn\'t have the same number of atoms as the Molecule object we\'re adding it to.')
1766  raise RuntimeError
1767  self.Data[key] = copy.deepcopy(OtherMol.Data[key])
1768 
1769  def add_virtual_site(self, idx, **kwargs):
1770  """ Add a virtual site to the system. This does NOT set the position of the virtual site; it sits at the origin. """
1771  for key in self.AtomKeys:
1772  if key in kwargs:
1773  self.Data[key].insert(idx,kwargs[key])
1774  else:
1775  logger.error('You need to specify %s when adding a virtual site to this molecule.\n' % key)
1776  raise RuntimeError
1777  if 'xyzs' in self.Data:
1778  for i, xyz in enumerate(self.xyzs):
1779  if 'pos' in kwargs:
1780  self.xyzs[i] = np.insert(xyz, idx, xyz[kwargs['pos']], axis=0)
1781  else:
1782  self.xyzs[i] = np.insert(xyz, idx, 0.0, axis=0)
1783  else:
1784  logger.error('You need to have xyzs in this molecule to add a virtual site.\n')
1785  raise RuntimeError
1786 
1787  def replace_peratom(self, key, orig, want):
1788  """ Replace all of the data for a certain attribute in the system from orig to want. """
1789  if key in self.Data:
1790  for i in range(self.na):
1791  if self.Data[key][i] == orig:
1792  self.Data[key][i] = want
1793  else:
1794  logger.error('The key that we want to replace (%s) doesn\'t exist.\n' % key)
1795  raise RuntimeError
1796 
1797  def replace_peratom_conditional(self, key1, cond, key2, orig, want):
1798  """ Replace all of the data for a attribute key2 from orig to want, contingent on key1 being equal to cond.
1799  For instance: replace H1 with H2 if resname is SOL."""
1800  if key2 in self.Data and key1 in self.Data:
1801  for i in range(self.na):
1802  if self.Data[key2][i] == orig and self.Data[key1][i] == cond:
1803  self.Data[key2][i] = want
1804  else:
1805  logger.error('Either the comparison or replacement key (%s, %s) doesn\'t exist.\n' % (key1, key2))
1806  raise RuntimeError
1807 
1808  def atom_select(self,atomslice,build_topology=True):
1809  """ Return a copy of the object with certain atoms selected. Takes an integer, list or array as argument. """
1810  if isinstance(atomslice, int):
1811  atomslice = [atomslice]
1812  if isinstance(atomslice, list):
1813  atomslice = np.array(atomslice)
1814  New = Molecule()
1815  for key in self.FrameKeys | self.MetaKeys:
1816  New.Data[key] = copy.deepcopy(self.Data[key])
1817  for key in self.AtomKeys:
1818  if key == 'tinkersuf': # Tinker suffix is a bit tricky
1819  Map = dict([(a+1, i+1) for i, a in enumerate(atomslice)])
1820  CopySuf = list(np.array(self.Data[key])[atomslice])
1821  NewSuf = []
1822  for line in CopySuf:
1823  whites = re.split('[^ ]+',line)
1824  s = line.split()
1825  if len(s) > 1:
1826  for i in range(1,len(s)):
1827  s[i] = str(Map[int(s[i])])
1828  sn = [int(i) for i in s[1:]]
1829  s = [s[0]] + list(np.array(s[1:])[np.argsort(sn)])
1830  NewSuf.append(''.join([whites[j]+s[j] for j in range(len(s))]))
1831  New.Data['tinkersuf'] = NewSuf[:]
1832  else:
1833  New.Data[key] = list(np.array(self.Data[key])[atomslice])
1834  for key in self.FrameKeys:
1835  if key in ['xyzs', 'qm_grads', 'qm_mulliken_charges', 'qm_mulliken_spins']:
1836  New.Data[key] = [self.Data[key][i][atomslice] for i in range(len(self))]
1837  if 'bonds' in self.Data:
1838  New.Data['bonds'] = [(list(atomslice).index(b[0]), list(atomslice).index(b[1])) for b in self.bonds if (b[0] in atomslice and b[1] in atomslice)]
1839  New.top_settings = self.top_settings
1840  if build_topology:
1841  New.build_topology(force_bonds=False)
1842  return New
1843 
1844  def atom_stack(self, other):
1845  """ Return a copy of the object with another molecule object appended. WARNING: This function may invalidate stuff like QM energies. """
1846  if len(other) != len(self):
1847  logger.error('The number of frames of the Molecule objects being stacked are not equal.\n')
1848  raise RuntimeError
1849 
1850  New = Molecule()
1851  for key in self.FrameKeys | self.MetaKeys:
1852  New.Data[key] = copy.deepcopy(self.Data[key])
1853 
1854  # This is how we're going to stack things like Cartesian coordinates and QM forces.
1855  def FrameStack(k):
1856  if k in self.Data and k in other.Data:
1857  New.Data[k] = [np.vstack((s, o)) for s, o in zip(self.Data[k], other.Data[k])]
1858  for i in ['xyzs', 'qm_grads', 'qm_hessians', 'qm_espxyzs', 'qm_espvals', 'qm_extchgs', 'qm_mulliken_charges', 'qm_mulliken_spins']:
1859  FrameStack(i)
1860 
1861  # Now build the new atom keys.
1862  for key in self.AtomKeys:
1863  if key not in other.Data:
1864  logger.error('Trying to stack two Molecule objects - the first object contains %s and the other does not\n' % key)
1865  raise RuntimeError
1866  if key == 'tinkersuf': # Tinker suffix is a bit tricky
1867  NewSuf = []
1868  for line in other.Data[key]:
1869  whites = re.split('[^ ]+',line)
1870  s = line.split()
1871  if len(s) > 1:
1872  for i in range(1,len(s)):
1873  s[i] = str(int(s[i]) + self.na)
1874  NewSuf.append(''.join([whites[j]+s[j] for j in range(len(s))]))
1875  New.Data[key] = copy.deepcopy(self.Data[key]) + NewSuf
1876  else:
1877  if type(self.Data[key]) is np.ndarray:
1878  New.Data[key] = np.concatenate((self.Data[key], other.Data[key]))
1879  elif type(self.Data[key]) is list:
1880  New.Data[key] = self.Data[key] + other.Data[key]
1881  else:
1882  logger.error('Cannot stack %s because it is of type %s\n' % (key, str(type(New.Data[key]))))
1883  raise RuntimeError
1884  if 'bonds' in self.Data and 'bonds' in other.Data:
1885  New.Data['bonds'] = self.bonds + [(b[0]+self.na, b[1]+self.na) for b in other.bonds]
1886  return New
1887 
1888  def align_by_moments(self):
1889  """ Align molecules using the moment of inertia.
1890  Departs from MSMBuilder convention of
1891  using arithmetic mean for mass. """
1892  coms = self.center_of_mass()
1893  xyz1 = self.xyzs[0]
1894  xyz1 -= coms[0]
1895  xyz1 = AlignToMoments(self.elem,xyz1)
1896  for index2, xyz2 in enumerate(self.xyzs):
1897  xyz2 -= coms[index2]
1898  xyz2 = AlignToMoments(self.elem,xyz1,xyz2)
1899  self.xyzs[index2] = xyz2
1900 
1901  def get_populations(self):
1902  """ Return a cloned molecule object but with X-coordinates set
1903  to Mulliken charges and Y-coordinates set to Mulliken
1904  spins. """
1905  QS = copy.deepcopy(self)
1906  QSxyz = np.array(QS.xyzs)
1907  QSxyz[:, :, 0] = self.qm_mulliken_charges
1908  QSxyz[:, :, 1] = self.qm_mulliken_spins
1909  QSxyz[:, :, 2] *= 0.0
1910  QS.xyzs = list(QSxyz)
1911  return QS
1912 
1913  def load_popxyz(self, fnm):
1914  """ Given a charge-spin xyz file, load the charges (x-coordinate) and spins (y-coordinate) into internal arrays. """
1915  QS = Molecule(fnm, ftype='xyz', build_topology = False)
1916  self.qm_mulliken_charges = list(np.array(QS.xyzs)[:, :, 0])
1917  self.qm_mulliken_spins = list(np.array(QS.xyzs)[:, :, 1])
1918 
1919  def align(self, smooth = False, center = True, center_mass = False, atom_select=None):
1920  """ Align molecules.
1921 
1922  Has the option to create smooth trajectories
1923  (align each frame to the previous one)
1924  or to align each frame to the first one.
1925 
1926  Also has the option to remove the center of mass.
1927 
1928  Provide a list of atom indices to align along selected atoms.
1929 
1930  """
1931  if isinstance(atom_select, list):
1932  atom_select = np.array(atom_select)
1933  if center and center_mass:
1934  logger.error('Specify center=True or center_mass=True but set the other one to False\n')
1935  raise RuntimeError
1936 
1937  coms = self.center_of_mass()
1938  xyz1 = self.xyzs[0]
1939  if center:
1940  xyz1 -= xyz1.mean(0)
1941  elif center_mass:
1942  xyz1 -= coms[0]
1943  for index2, xyz2 in enumerate(self.xyzs):
1944  if index2 == 0: continue
1945  xyz2 -= xyz2.mean(0)
1946  if smooth:
1947  ref = index2-1
1948  else:
1949  ref = 0
1950  if atom_select is not None:
1951  tr, rt = get_rotate_translate(xyz2[atom_select],self.xyzs[ref][atom_select])
1952  else:
1953  tr, rt = get_rotate_translate(xyz2,self.xyzs[ref])
1954  xyz2 = np.dot(xyz2, rt) + tr
1955  self.xyzs[index2] = xyz2
1956 
1957  def center(self, center_mass = False):
1958  """ Move geometric center to the origin. """
1959  if center_mass:
1960  coms = self.center_of_mass()
1961  for i in range(len(self)):
1962  self.xyzs[i] -= coms[i]
1963  else:
1964  for i in range(len(self)):
1965  self.xyzs[i] -= self.xyzs[i].mean(0)
1967  def build_bonds(self):
1968  """ Build the bond connectivity graph. """
1969  sn = self.top_settings['topframe']
1970  toppbc = self.top_settings['toppbc']
1971  Fac = self.top_settings['Fac']
1972  mindist = 1.0 # Any two atoms that are closer than this distance are bonded.
1973  # Create an atom-wise list of covalent radii.
1974  # Molecule object can have its own set of radii that overrides the global ones
1975  R = np.array([self.top_settings['radii'].get(i, (Radii[Elements.index(i)-1] if i in Elements else 0.0)) for i in self.elem])
1976  # Create a list of 2-tuples corresponding to combinations of atomic indices using a grid algorithm.
1977  mins = np.min(self.xyzs[sn],axis=0)
1978  maxs = np.max(self.xyzs[sn],axis=0)
1979  # Grid size in Angstrom. This number is optimized for speed in a 15,000 atom system (united atom pentadecane).
1980  gsz = 6.0
1981  if hasattr(self, 'boxes'):
1982  xmin = 0.0
1983  ymin = 0.0
1984  zmin = 0.0
1985  xmax = self.boxes[sn].a
1986  ymax = self.boxes[sn].b
1987  zmax = self.boxes[sn].c
1988  if any([i != 90.0 for i in [self.boxes[sn].alpha, self.boxes[sn].beta, self.boxes[sn].gamma]]):
1989  logger.warning("Warning: Topology building will not work with broken molecules in nonorthogonal cells.")
1990  toppbc = False
1991  else:
1992  xmin = mins[0]
1993  ymin = mins[1]
1994  zmin = mins[2]
1995  xmax = maxs[0]
1996  ymax = maxs[1]
1997  zmax = maxs[2]
1998  toppbc = False
1999 
2000  xext = xmax-xmin
2001  yext = ymax-ymin
2002  zext = zmax-zmin
2003 
2004  if toppbc:
2005  gszx = xext/int(xext/gsz)
2006  gszy = yext/int(yext/gsz)
2007  gszz = zext/int(zext/gsz)
2008  else:
2009  gszx = gsz
2010  gszy = gsz
2011  gszz = gsz
2012 
2013  # Run algorithm to determine bonds.
2014  # Decide if we want to use the grid algorithm.
2015  use_grid = toppbc or (np.min([xext, yext, zext]) > 2.0*gsz)
2016  if use_grid:
2017  # Inside the grid algorithm.
2018  # 1) Determine the left edges of the grid cells.
2019  # Note that we leave out the rightmost grid cell,
2020  # because this may cause spurious partitionings.
2021  xgrd = np.arange(xmin, xmax-gszx, gszx)
2022  ygrd = np.arange(ymin, ymax-gszy, gszy)
2023  zgrd = np.arange(zmin, zmax-gszz, gszz)
2024  # 2) Grid cells are denoted by a three-index tuple.
2025  gidx = list(itertools.product(range(len(xgrd)), range(len(ygrd)), range(len(zgrd))))
2026  # 3) Build a dictionary which maps a grid cell to itself plus its neighboring grid cells.
2027  # Two grid cells are defined to be neighbors if the differences between their x, y, z indices are at most 1.
2028  gngh = OrderedDict()
2029  amax = np.array(gidx[-1])
2030  amin = np.array(gidx[0])
2031  n27 = np.array(list(itertools.product([-1,0,1],repeat=3)))
2032  for i in gidx:
2033  gngh[i] = []
2034  ai = np.array(i)
2035  for j in n27:
2036  nj = ai+j
2037  for k in range(3):
2038  mod = amax[k]-amin[k]+1
2039  if nj[k] < amin[k]:
2040  nj[k] += mod
2041  elif nj[k] > amax[k]:
2042  nj[k] -= mod
2043  gngh[i].append(tuple(nj))
2044  # 4) Loop over the atoms and assign each to a grid cell.
2045  # Note: I think this step becomes the bottleneck if we choose very small grid sizes.
2046  gasn = OrderedDict([(i, []) for i in gidx])
2047  for i in range(self.na):
2048  xidx = -1
2049  yidx = -1
2050  zidx = -1
2051  for j in xgrd:
2052  xi = self.xyzs[sn][i][0]
2053  while xi < xmin: xi += xext
2054  while xi > xmax: xi -= xext
2055  if xi < j: break
2056  xidx += 1
2057  for j in ygrd:
2058  yi = self.xyzs[sn][i][1]
2059  while yi < ymin: yi += yext
2060  while yi > ymax: yi -= yext
2061  if yi < j: break
2062  yidx += 1
2063  for j in zgrd:
2064  zi = self.xyzs[sn][i][2]
2065  while zi < zmin: zi += zext
2066  while zi > zmax: zi -= zext
2067  if zi < j: break
2068  zidx += 1
2069  gasn[(xidx,yidx,zidx)].append(i)
2070 
2071  # 5) Create list of 2-tuples corresponding to combinations of atomic indices.
2072  # This is done by looping over pairs of neighboring grid cells and getting Cartesian products of atom indices inside.
2073  # It may be possible to get a 2x speedup by eliminating forward-reverse pairs (e.g. (5, 4) and (4, 5) and duplicates (5,5).)
2074  AtomIterator = []
2075  for i in gasn:
2076  for j in gngh[i]:
2077  apairs = cartesian_product2([gasn[i], gasn[j]])
2078  if len(apairs) > 0: AtomIterator.append(apairs[apairs[:,0]>apairs[:,1]])
2079  AtomIterator = np.ascontiguousarray(np.vstack(AtomIterator))
2080  else:
2081  # Create a list of 2-tuples corresponding to combinations of atomic indices.
2082  # This is much faster than using itertools.combinations.
2083  AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32), np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T)
2084 
2085  # Create a list of thresholds for determining whether a certain interatomic distance is considered to be a bond.
2086  BT0 = R[AtomIterator[:,0]]
2087  BT1 = R[AtomIterator[:,1]]
2088  BondThresh = (BT0+BT1) * Fac
2089  BondThresh = (BondThresh > mindist) * BondThresh + (BondThresh < mindist) * mindist
2090  if hasattr(self, 'boxes') and toppbc:
2091  dxij = AtomContact(self.xyzs[sn][np.newaxis, :], AtomIterator, box=np.array([[self.boxes[sn].a, self.boxes[sn].b, self.boxes[sn].c]]))[0]
2092  else:
2093  dxij = AtomContact(self.xyzs[sn][np.newaxis, :], AtomIterator)[0]
2094 
2095  # Update topology settings with what we learned
2096  self.top_settings['toppbc'] = toppbc
2097 
2098  # Create a list of atoms that each atom is bonded to.
2099  atom_bonds = [[] for i in range(self.na)]
2100  bond_bool = dxij < BondThresh
2101  for i, a in enumerate(bond_bool):
2102  if not a: continue
2103  (ii, jj) = AtomIterator[i]
2104  if ii == jj: continue
2105  atom_bonds[ii].append(jj)
2106  atom_bonds[jj].append(ii)
2107  bondlist = []
2108  for i, bi in enumerate(atom_bonds):
2109  for j in bi:
2110  if i == j: continue
2111  # Do not add a bond between resids if fragment is set to True.
2112  if self.top_settings['fragment'] and 'resid' in self.Data.keys() and self.resid[i] != self.resid[j] : continue
2113  elif i < j:
2114  bondlist.append((i, j))
2115  else:
2116  bondlist.append((j, i))
2117  bondlist = sorted(list(set(bondlist)))
2118  self.Data['bonds'] = sorted(list(set(bondlist)))
2119  self.built_bonds = True
2120 
2121  def build_topology(self, force_bonds=True, **kwargs):
2122  """
2123 
2124  Create self.topology and self.molecules; these are graph
2125  representations of the individual molecules (fragments)
2126  contained in the Molecule object.
2127 
2128  Parameters
2129  ----------
2130  force_bonds : bool
2131  Build the bonds from interatomic distances. If the user
2132  calls build_topology from outside, assume this is the
2133  default behavior. If creating a Molecule object using
2134  __init__, do not force the building of bonds by default
2135  (only build bonds if not read from file.)
2136  topframe : int, optional
2137  Provide the frame number used for reading the bonds. If
2138  not provided, this will be taken from the top_settings
2139  field. If provided, this will take priority and write
2140  the value into top_settings.
2141  """
2142  sn = kwargs.get('topframe', self.top_settings['topframe'])
2143  self.top_settings['topframe'] = sn
2144  if self.na > 100000:
2145  logger.warning("Warning: Large number of atoms (%i), topology building may take a long time" % self.na)
2146  # Build bonds from connectivity graph if not read from file.
2147  if (not self.top_settings['read_bonds']) or force_bonds:
2148  self.build_bonds()
2149  # Create a NetworkX graph object to hold the bonds.
2150  G = MyG()
2151  for i, a in enumerate(self.elem):
2152  G.add_node(i)
2153  if parse_version(nx.__version__) >= parse_version('2.0'):
2154  if 'atomname' in self.Data:
2155  nx.set_node_attributes(G,{i:self.atomname[i]}, name='n')
2156  nx.set_node_attributes(G,{i:a}, name='e')
2157  nx.set_node_attributes(G,{i:self.xyzs[sn][i]}, name='x')
2158  else:
2159  if 'atomname' in self.Data:
2160  nx.set_node_attributes(G,'n',{i:self.atomname[i]})
2161  nx.set_node_attributes(G,'e',{i:a})
2162  nx.set_node_attributes(G,'x',{i:self.xyzs[sn][i]})
2163  for (i, j) in self.bonds:
2164  G.add_edge(i, j)
2165  # The Topology is simply the NetworkX graph object.
2166  self.topology = G
2167  # LPW: Molecule.molecules is a funny misnomer... it should be fragments or substructures or something
2168  self.molecules = [G.subgraph(c).copy() for c in nx.connected_components(G)]
2169  for g in self.molecules: g.__class__ = MyG
2170  # Deprecated in networkx 2.2
2171  # self.molecules = list(nx.connected_component_subgraphs(G))
2172 
2173  def distance_matrix(self, pbc=True):
2174  """ Obtain distance matrix between all pairs of atoms. """
2175  AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32),
2176  np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T)
2177  if hasattr(self, 'boxes') and pbc:
2178  boxes = np.array([[self.boxes[i].a, self.boxes[i].b, self.boxes[i].c] for i in range(len(self))])
2179  drij = AtomContact(np.array(self.xyzs), AtomIterator, box=boxes)
2180  else:
2181  drij = AtomContact(np.array(self.xyzs), AtomIterator)
2182  return AtomIterator, list(drij)
2183 
2184  def distance_displacement(self):
2185  """ Obtain distance matrix and displacement vectors between all pairs of atoms. """
2186  AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32),
2187  np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T)
2188  if hasattr(self, 'boxes') and pbc:
2189  boxes = np.array([[self.boxes[i].a, self.boxes[i].b, self.boxes[i].c] for i in range(len(self))])
2190  drij, dxij = AtomContact(np.array(self.xyzs), AtomIterator, box=boxes, displace=True)
2191  else:
2192  drij, dxij = AtomContact(np.array(self.xyzs), AtomIterator, box=None, displace=True)
2193  return AtomIterator, list(drij), list(dxij)
2194 
2195  def rotate_bond(self, frame, aj, ak, increment=15):
2196  """
2197  Return a new Molecule object containing the selected frame
2198  plus a number of frames where the selected dihedral angle is rotated
2199  in steps of 'increment' given in degrees.
2200 
2201  This function is designed to be called by Molecule.rotate_check_clash().
2202 
2203  Parameters
2204  ----------
2205  frame : int
2206  Structure number of the current Molecule to be rotated
2207  aj, ak : int
2208  Atom numbers of the bond to be rotated
2209  increment : float
2210  Degrees of the rotation increment
2211 
2212  Returns
2213  -------
2214  Molecule
2215  New Molecule object containing the rotated structures
2216  """
2217  # Select the single frame containing the structure to be rotated.
2218  M = self[frame]
2219 
2220  # Delete the connection between atoms to be rotated in order
2221  # to determine the rotation fragments.
2222  delBonds = []
2223  for ibond, bond in enumerate(M.bonds):
2224  if bond == (aj, ak) or bond == (ak, aj):
2225  delBonds.append(bond)
2226  if len(delBonds) > 1:
2227  raise RuntimeError('Expected only one bond to be deleted')
2228  if len(M.molecules) != 1:
2229  raise RuntimeError('Expected a single molecule')
2230  M.bonds.remove(delBonds[0])
2231  M.top_settings['read_bonds']=True
2232  M.build_topology(force_bonds=False)
2233  if len(M.molecules) != 2:
2234  raise RuntimeError('Expected two molecules after removing a bond')
2236  # gAtoms contains the set of atoms to be rotated
2237  # oAtoms contains the "other" atoms
2238  if len(M.molecules[0].L()) < len(M.molecules[1].L()):
2239  gAtoms = M.molecules[0].L()
2240  oAtoms = M.molecules[1].L()
2241  else:
2242  gAtoms = M.molecules[1].L()
2243  oAtoms = M.molecules[0].L()
2244 
2245  # atom2 is the "reference atom" on the group being rotated
2246  # and will be moved to the origin during the rotation operation
2247  if aj in gAtoms:
2248  atom1 = ak
2249  atom2 = aj
2250  else:
2251  atom1 = aj
2252  atom2 = ak
2253  M.bonds.append(delBonds[0])
2255  # Rotation axis
2256  axis = M.xyzs[0][atom2] - M.xyzs[0][atom1]
2257 
2258  # Move the "reference atom" to the origin
2259  x0 = M.xyzs[0][gAtoms]
2260  x0_ref = M.xyzs[0][atom2]
2261  x0 -= x0_ref
2262 
2263  # Create grid in rotation angle
2264  # and the rotated structures
2265  for thetaDeg in np.arange(increment, 360, increment):
2266  theta = np.pi * thetaDeg / 180
2267  # Make quaternion
2268  R = axis_angle(axis, theta)
2269  # Get rotated coordinates
2270  x0_rot = np.dot(R, x0.T).T
2271  # Copy old coordinates to new
2272  xnew = M.xyzs[0].copy()
2273  # Write rotated positions into new coordinates;
2274  # shift back to original positions
2275  xnew[gAtoms] = x0_rot + x0_ref
2276  # Append new coordinates to our Molecule object
2277  M.xyzs.append(xnew)
2278  M.comms.append("Rotated by %.2f degrees" % increment)
2279  return M, (gAtoms, oAtoms)
2280 
2281  def find_clashes(self, thre=0.0, pbc=True, groups=None):
2282  """
2283  Obtain a list of atoms that 'clash' (i.e. are more than
2284  3 bonds apart and are closer than the provided threshold.)
2285 
2286  Parameters
2287  ----------
2288  thre : float
2289  Create a sorted-list of all non-bonded atom pairs
2290  with distance below this threshold
2291  pbc : bool
2292  Whether to use PBC when computing interatomic distances
2293  filt : tuple (group1, group2)
2294  If not None, only compute distances between atoms in 'group1'
2295  and atoms in 'group2'. For example this could be [gAtoms, oAtoms]
2296  from rotate_bond
2297 
2298  Returns
2299  -------
2300  minPair_frames : list of tuples
2301  The closest pair of non-bonded atoms in each frame
2302  minDist_frames : list of tuples
2303  The distance between the closest pair of non-bonded atoms in each frame
2304  clashPairs_frames : list of lists
2305  Pairs of non-bonded atoms with distance below the threshold in each frame
2306  clashDists_frames : list of lists
2307  Distances between pairs of non-bonded atoms below the threshold in each frame
2308  """
2309  if groups is not None:
2310  AtomIterator = np.ascontiguousarray([[min(g), max(g)] for g in itertools.product(groups[0], groups[1])])
2311  else:
2312  AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32),
2313  np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T)
2314  ang13 = [(min(a[0], a[2]), max(a[0], a[2])) for a in self.find_angles()]
2315  dih14 = [(min(d[0], d[3]), max(d[0], d[3])) for d in self.find_dihedrals()]
2316  bondedPairs = np.where([tuple(aPair) in (self.bonds+ang13+dih14) for aPair in AtomIterator])[0]
2317  AtomIterator_nb = np.delete(AtomIterator, bondedPairs, axis=0)
2318 
2319  minPair_frames = []
2320  minDist_frames = []
2321  clashPairs_frames = []
2322  clashDists_frames = []
2323  if hasattr(self, 'boxes') and pbc:
2324  boxes = np.array([[self.boxes[i].a, self.boxes[i].b, self.boxes[i].c] for i in range(len(self))])
2325  drij = AtomContact(np.array(self.xyzs), AtomIterator_nb, box=boxes)
2326  else:
2327  drij = AtomContact(np.array(self.xyzs), AtomIterator_nb)
2328  for frame in range(len(self)):
2329  clashPairIdx = np.where(drij[frame] < thre)[0]
2330  clashPairs = AtomIterator_nb[clashPairIdx]
2331  clashDists = drij[frame, clashPairIdx]
2332  sorter = np.argsort(clashDists)
2333  clashPairs = clashPairs[sorter]
2334  clashDists = clashDists[sorter]
2335  minIdx = np.argmin(drij[frame])
2336  minPair = AtomIterator_nb[minIdx]
2337  minDist = drij[frame, minIdx]
2338  minPair_frames.append(minPair)
2339  minDist_frames.append(minDist)
2340  clashPairs_frames.append(clashPairs.copy())
2341  clashDists_frames.append(clashDists.copy())
2342  return minPair_frames, minDist_frames, clashPairs_frames, clashDists_frames
2343 
2344  def rotate_check_clash(self, frame, rotate_index, thresh_hyd=1.4, thresh_hvy=1.8, printLevel=1):
2345  """
2346  Return a new Molecule object containing the selected frame
2347  plus a number of frames where the selected dihedral angle is rotated
2348  in steps of 'increment' given in degrees. Additionally, check for
2349  if pairs of non-bonded atoms "clash" i.e. approach below the specified
2350  thresholds.
2351 
2352  Parameters
2353  ----------
2354  frame : int
2355  Structure number of the current Molecule to be rotated
2356  (if only a single frame, this number should be 0)
2357  rotate_index : tuple
2358  4 atom indices (ai, aj, ak, al) representing the dihedral angle to be rotated.
2359  The first and last indices are used to measure the dihedral angle.
2360  thresh_hyd : float
2361  Clash threshold for hydrogen atoms. Reasonable values are in between 1.3 and 2.0.
2362  thresh_hvy : float
2363  Clash threshold for heavy atoms. Reasonable values are in between 1.7 and 2.5.
2364  printLevel: int
2365  Sets the amount of printout (larger = more printout)
2366 
2367  Returns
2368  -------
2369  Molecule
2370  New Molecule object containing the rotated structures
2371  Success : bool
2372  True if no clashes
2373  """
2374  if not hasattr(self, 'atomname'):
2375  raise RuntimeError("Please add atom names before calling rotate_check_clash().")
2376  ai, aj, ak, al = rotate_index
2377  Success = False
2378  # Create grid of rotated structures
2379  M_rot_H, frags = self.rotate_bond(frame, aj, ak, 15)
2380  phis = M_rot_H.measure_dihedrals(*rotate_index)
2381  for i in range(len(M_rot_H)):
2382  M_rot_H.comms[i] = ('Rigid scan: atomname %s, serial %s, dihedral %.3f'
2383  % ('-'.join([self.atomname[i] for i in rotate_index]),
2384  '-'.join(["%i" % (i+1) for i in rotate_index]), phis[i]))
2385  heavyIdx = [i for i in range(self.na) if self.elem[i] != 'H']
2386  heavy_frags = [[],[]]
2387  for iHeavy, iAll in enumerate(heavyIdx):
2388  if iAll in frags[0]:
2389  heavy_frags[0].append(iHeavy)
2390  if iAll in frags[1]:
2391  heavy_frags[1].append(iHeavy)
2392  M_rot_C = M_rot_H.atom_select(heavyIdx)
2393  minPair_H_frames, minDist_H_frames, clashPairs_H_frames, clashDists_H_frames = M_rot_H.find_clashes(thre=thresh_hyd, groups=frags)
2394  minPair_C_frames, minDist_C_frames, clashPairs_C_frames, clashDists_C_frames = M_rot_C.find_clashes(thre=thresh_hvy, groups=heavy_frags)
2395  # Get the following information: (1) Whether a clash exists, (2) the frame with the smallest distance,
2396  # (3) the pair of atoms with the smallest distance, (4) the smallest distance
2397  haveClash_H = any([len(c) > 0 for c in clashPairs_H_frames])
2398  minFrame_H = np.argmin(minDist_H_frames)
2399  minAtoms_H = minPair_H_frames[minFrame_H]
2400  minDist_H = minDist_H_frames[minFrame_H]
2401  haveClash_C = any([len(c) > 0 for c in clashPairs_C_frames])
2402  minFrame_C = np.argmin(minDist_C_frames)
2403  minAtoms_C = minPair_C_frames[minFrame_C]
2404  minDist_C = minDist_C_frames[minFrame_C]
2405  if not (haveClash_H or haveClash_C):
2406  if printLevel >= 1:
2407  print("\n \x1b[1;92mSuccess - no clashes. Thresh(H, Hvy) = (%.2f, %.2f)\x1b[0m" % (thresh_hyd, thresh_hvy))
2408  mini = M_rot_H.atomname[minAtoms_H[0]]
2409  minj = M_rot_H.atomname[minAtoms_H[1]]
2410  print(" Closest (Hyd) : rot-frame %i atoms %s-%s %.2f" % (minFrame_H, mini, minj, minDist_H))
2411  mini = M_rot_C.atomname[minAtoms_C[0]]
2412  minj = M_rot_C.atomname[minAtoms_C[1]]
2413  print(" Closest (Hvy) : rot-frame %i atoms %s-%s %.2f" % (minFrame_C, mini, minj, minDist_C))
2414  Success = True
2415  else:
2416  if haveClash_H:
2417  mini = M_rot_H.atomname[minAtoms_H[0]]
2418  minj = M_rot_H.atomname[minAtoms_H[1]]
2419  if printLevel >= 2: print(" Clash (Hyd) : rot-frame %i atoms %s-%s %.2f" % (minFrame_H, mini, minj, minDist_H))
2420  if haveClash_C:
2421  mini = M_rot_C.atomname[minAtoms_C[0]]
2422  minj = M_rot_C.atomname[minAtoms_C[1]]
2423  if printLevel >= 2: print(" Clash (Hvy) : rot-frame %i atoms %s-%s %.2f" % (minFrame_C, mini, minj, minDist_C))
2424  return M_rot_H, Success
2425 
2426  def find_angles(self):
2427 
2428  """ Return a list of 3-tuples corresponding to all of the
2429  angles in the system. Verified for lysine and tryptophan
2430  dipeptide when comparing to TINKER's analyze program. """
2431 
2432  if not hasattr(self, 'topology'):
2433  logger.error("Need to have built a topology to find angles\n")
2434  raise RuntimeError
2435 
2436  angidx = []
2437  # Iterate over separate molecules
2438  for mol in self.molecules:
2439  # Iterate over atoms in the molecule
2440  for a2 in list(mol.nodes()):
2441  # Find all bonded neighbors to this atom
2442  friends = sorted(list(nx.neighbors(mol, a2)))
2443  if len(friends) < 2: continue
2444  # Double loop over bonded neighbors
2445  for i, a1 in enumerate(friends):
2446  for a3 in friends[i+1:]:
2447  # Add bonded atoms in the correct order
2448  angidx.append((a1, a2, a3))
2449  return angidx
2450 
2451  def find_dihedrals(self):
2452 
2453  """ Return a list of 4-tuples corresponding to all of the
2454  dihedral angles in the system. Verified for alanine and
2455  tryptophan dipeptide when comparing to TINKER's analyze
2456  program. """
2457 
2458  if not hasattr(self, 'topology'):
2459  logger.error("Need to have built a topology to find dihedrals\n")
2460  raise RuntimeError
2461 
2462  dihidx = []
2463  # Iterate over separate molecules
2464  for mol in self.molecules:
2465  # Iterate over bonds in the molecule
2466  for edge in list(mol.edges()):
2467  # Determine correct ordering of atoms (middle atoms are ordered by convention)
2468  a2 = edge[0] if edge[0] < edge[1] else edge[1]
2469  a3 = edge[1] if edge[0] < edge[1] else edge[0]
2470  for a1 in sorted(list(nx.neighbors(mol, a2))):
2471  if a1 != a3:
2472  for a4 in sorted(list(nx.neighbors(mol, a3))):
2473  if a4 != a2 and len({a1, a2, a3, a4}) == 4:
2474  dihidx.append((a1, a2, a3, a4))
2475  return dihidx
2476 
2477  def measure_distances(self, i, j):
2478  distances = []
2479  for s in range(self.ns):
2480  x1 = self.xyzs[s][i]
2481  x2 = self.xyzs[s][j]
2482  distance = np.linalg.norm(x1-x2)
2483  distances.append(distance)
2484  return distances
2485 
2486  def measure_angles(self, i, j, k):
2487  angles = []
2488  for s in range(self.ns):
2489  x1 = self.xyzs[s][i]
2490  x2 = self.xyzs[s][j]
2491  x3 = self.xyzs[s][k]
2492  v1 = x1-x2
2493  v2 = x3-x2
2494  n = np.dot(v1,v2)/(np.linalg.norm(v1)*np.linalg.norm(v2))
2495  angle = np.arccos(n)
2496  angles.append(angle * 180/ np.pi)
2497  return angles
2498 
2499  def measure_dihedrals(self, i, j, k, l):
2500  """ Return a series of dihedral angles, given four atom indices numbered from zero. """
2501  phis = []
2502  if 'bonds' in self.Data:
2503  if any(p not in self.bonds for p in [(min(i,j),max(i,j)),(min(j,k),max(j,k)),(min(k,l),max(k,l))]):
2504  logger.warning([(min(i,j),max(i,j)),(min(j,k),max(j,k)),(min(k,l),max(k,l))])
2505  logger.warning("Measuring dihedral angle for four atoms that aren't bonded. Hope you know what you're doing!")
2506  else:
2507  logger.warning("This molecule object doesn't have bonds defined, sanity-checking is off.")
2508  for s in range(self.ns):
2509  x4 = self.xyzs[s][l]
2510  x3 = self.xyzs[s][k]
2511  x2 = self.xyzs[s][j]
2512  x1 = self.xyzs[s][i]
2513  v1 = x2-x1
2514  v2 = x3-x2
2515  v3 = x4-x3
2516  t1 = np.linalg.norm(v2)*np.dot(v1,np.cross(v2,v3))
2517  t2 = np.dot(np.cross(v1,v2),np.cross(v2,v3))
2518  phi = np.arctan2(t1,t2)
2519  phis.append(phi * 180 / np.pi)
2520  #phimod = phi*180/pi % 360
2521  #phis.append(phimod)
2522  #print phimod
2523  return phis
2524 
2525  def find_rings(self, max_size=6):
2526  """
2527  Return a list of rings in the molecule. Tested on a DNA base
2528  pair and C60. Warning: Using large max_size for rings
2529  (e.g. for finding the macrocycle in porphyrin) could lead to
2530  some undefined behavior.
2531 
2532  Parameters
2533  ----------
2534  max_size : int
2535  The maximum ring size. If a large ring contains smaller
2536  sub-rings, they are all mapped into one.
2537 
2538  Returns
2539  -------
2540  list
2541  A list of rings sorted in ascending order of the smallest
2542  atom number. The atoms in each ring are ordered starting
2543  from the lowest number and going along the ring, with the
2544  second atom being the lower of the two possible choices.
2545  """
2546  friends = []
2547  for i in range(self.na):
2548  friends.append(self.topology.neighbors(i))
2549  # Determine if atom is in a ring
2550  self.build_topology()
2551  # Get triplets of atoms that are in rings
2552  triplets = []
2553  for i in range(self.na):
2554  g = copy.deepcopy(self.topology)
2555  n = g.neighbors(i)
2556  g.remove_node(i)
2557  for a, b in itertools.combinations(n, 2):
2558  try:
2559  PathLength = nx.shortest_path_length(g, a, b)
2560  except nx.exception.NetworkXNoPath:
2561  PathLength = 0
2562  if PathLength > 0 and PathLength <= (max_size-2):
2563  if b > a:
2564  triplets.append((a, i, b))
2565  else:
2566  triplets.append((b, i, a))
2567  # Organize triplets into rings
2568  rings = []
2569  # Triplets are assigned to rings
2570  assigned = {}
2571  # For each triplet that isn't already counted, see if it belongs to a ring already
2572  while set(assigned.keys()) != set(triplets):
2573  for t in triplets:
2574  if t not in assigned:
2575  # print t, "has not been assigned yet"
2576  # Whether this triplet has been assigned to a ring
2577  has_been_assigned = False
2578  # Create variable for new rings
2579  new_rings = copy.deepcopy(rings)
2580  # Assign triplet to a ring
2581  for iring, ring in enumerate(rings):
2582  # Loop over triplets in the ring
2583  for r in ring:
2584  # Two triplets belong to the same ring if two of the atoms
2585  # are the same AND there exists a path connecting them with the
2586  # center atom deleted. Check the forward and reverse orientations
2587  if ((r[0] == t[1] and r[1] == t[2]) or
2588  (r[::-1][0] == t[1] and r[::-1][1] == t[2]) or
2589  (r[0] == t[::-1][1] and r[1] == t[::-1][2]) or
2590  (r[::-1][0] == t[::-1][1] and r[1] == t[::-1][2])):
2591  ends = list(set(r).symmetric_difference(t))
2592  mids = set(r).intersection(t)
2593  g = copy.deepcopy(self.topology)
2594  for m in mids: g.remove_node(m)
2595  try:
2596  PathLength = nx.shortest_path_length(g, ends[0], ends[1])
2597  except nx.exception.NetworkXNoPath:
2598  PathLength = 0
2599  if PathLength <= 0 or PathLength > (max_size-2):
2600  # print r, t, "share two atoms but are on different rings"
2601  continue
2602  if has_been_assigned:
2603  # This happens if two rings have separately been found but they're actually the same
2604  # print "trying to assign t=", t, "to r=", r, "but it's already in", rings[assigned[t]]
2605  # print "Merging", rings[iring], "into", rings[assigned[t]]
2606  for r1 in rings[iring]:
2607  new_rings[assigned[t]].append(r1)
2608  del new_rings[new_rings.index(rings[iring])]
2609  break
2610  new_rings[iring].append(t)
2611  assigned[t] = iring
2612  has_been_assigned = True
2613  # print t, "assigned to ring", iring
2614  break
2615  # If the triplet was not assigned to a ring,
2616  # then create a new one
2617  if not has_been_assigned:
2618  # print t, "creating new ring", len(new_rings)
2619  assigned[t] = len(new_rings)
2620  new_rings.append([t])
2621  # Now the ring has a new triplet assigned to it
2622  rings = copy.deepcopy(new_rings)
2623  # Keep the middle atom in each triplet
2624  rings = [sorted(list(set([t[1] for t in r]))) for r in rings]
2625  # print rings
2626  # Sorted rings start from the lowest atom and go around the ring in ascending order
2627  sorted_rings = []
2628  for ring in rings:
2629  # print "Sorting Ring", ring
2630  minr = min(ring)
2631  ring.remove(minr)
2632  sring = [minr]
2633  while len(ring) > 0:
2634  for r in sorted(ring):
2635  if sring[-1] in friends[r]:
2636  ring.remove(r)
2637  sring.append(r)
2638  break
2639  sorted_rings.append(sring[:])
2640  return sorted(sorted_rings, key = lambda val: val[0])
2641 
2642  def order_by_connectivity(self, m, i, currList, max_min_path):
2643  """
2644  Recursive function that orders atoms based on connectivity.
2645  To call the function, pass in the topology object (e.g. M.molecules[0])
2646  and the index of the atom assigned to be "first".
2647 
2648  The neighbors of "i" are placed in decreasing order of the
2649  maximum length of the shortest path to all other atoms - i.e.
2650  an atom closer to the "edge" of the molecule has a longer
2651  shortest-path to atoms on the other "edge", and they should
2652  be added first.
2653 
2654  Snippet:
2655 
2656  from forcebalance.molecule import *
2657  import numpy as np
2658  M = Molecule('movProt.xyz', Fac=1.25)
2659  # Arbitrarily select heaviest atom as the first atom in each molecule
2660  firstAtom = [m.L()[np.argmax([PeriodicTable[M.elem[i]] for i in m.L()])] for m in M.molecules]
2661  # Explicitly set the first atom in the first molecule
2662  firstAtom[0] = 40
2663  # Build a list of new atom orderings
2664  newOrder = []
2665  for i in range(len(M.molecules)):
2666  newOrder += M.order_by_connectivity(M.molecules[i], firstAtom[i], [], None)
2667  # Write the new Molecule object
2668  newM = M.atom_select(newOrder)
2669  newM.write('reordered1.xyz')
2670 
2671  Parameters
2672  ----------
2673  m : topology object (contained in Molecule)
2674  i : integer
2675  Index of the atom to be added first (if called recursively,
2676  the index of subsequent atoms to be added)
2677  currList : list
2678  Current list of new atom orderings
2679  max_min_path : dict
2680  Dictionary mapping
2681  """
2682  if m not in self.molecules:
2683  raise RuntimeError("topology is not part of Molecule object")
2684  if max_min_path is None:
2685  spl = nx.shortest_path_length(m)
2686  # Dictionary mapping atom index to
2687  # maximum length of shortest path to all other atoms
2688  # The larger this number, the closer to the "edge" of the molecule
2689  max_min_path = dict([(k, max(spl[k].values())) for k in m.L()])
2690  #currList = currList[:]
2691  #print currList
2692  #print max_min_path
2693  #raw_input()
2694  currList.append(i)
2695  matom = m.L()
2696  #print spl.keys()
2697  #print matom
2698  #raw_input()
2699  if i not in matom:
2700  raise RuntimeError('atom %i not in molecule' % i)
2701  jlist = np.array(m.neighbors(i))
2702  jmass = [PeriodicTable[self.elem[j]] for j in jlist]
2703  jnghs = [len(m.neighbors(j)) for j in jlist]
2704  jpath = [max_min_path[j] for j in jlist]
2705  #print "i", i, M.elem[i], "currList", currList, "jpath", jpath
2706  #raw_input()
2707  for j in jlist[np.argsort(jpath)[::-1]]:
2708  if j in currList: continue
2709  #print "adding", j, "to currList"
2710  #print "calling order_by_connectivity: j", j, "currList", currList
2711  self.order_by_connectivity(m, j, currList, max_min_path)
2712  return currList
2713 
2714  def aliphatic_hydrogens(self):
2715  hyds = []
2716  for i in range(self.na):
2717  if self.elem[i] != "H": continue
2718  if len(self.topology.neighbors(i)) != 1:
2719  raise RuntimeError("Hydrogen with not one bond?")
2720  if self.elem[self.topology.neighbors(i)[0]] not in ["N", "O", "F", "P", "S", "Cl"]:
2721  hyds.append(i)
2722  return hyds
2723 
2724  def all_pairwise_rmsd(self):
2725  """ Find pairwise RMSD (super slow, not like the one in MSMBuilder.) """
2726  N = len(self)
2727  Mat = np.zeros((N,N),dtype=float)
2728  for i in range(N):
2729  xyzi = self.xyzs[i].copy()
2730  xyzi -= xyzi.mean(0)
2731  for j in range(i):
2732  xyzj = self.xyzs[j].copy()
2733  xyzj -= xyzj.mean(0)
2734  tr, rt = get_rotate_translate(xyzj, xyzi)
2735  xyzj = np.dot(xyzj, rt) + tr
2736  rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2))
2737  Mat[i,j] = rmsd
2738  Mat[j,i] = rmsd
2739  return Mat
2740 
2741  def pathwise_rmsd(self, align=True):
2742  """ Find RMSD between frames along path. """
2743  N = len(self)
2744  Vec = np.zeros(N-1, dtype=float)
2745  for i in range(N-1):
2746  xyzi = self.xyzs[i].copy()
2747  j=i+1
2748  xyzj = self.xyzs[j].copy()
2749  if align:
2750  xyzi -= xyzi.mean(0)
2751  xyzj -= xyzj.mean(0)
2752  tr, rt = get_rotate_translate(xyzj, xyzi)
2753  xyzj = np.dot(xyzj, rt) + tr
2754  rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2))
2755  Vec[i] = rmsd
2756  return Vec
2757 
2758  def ref_rmsd(self, i, align=True):
2759  """ Find RMSD to a reference frame. """
2760  N = len(self)
2761  Vec = np.zeros(N)
2762  xyzi = self.xyzs[i].copy()
2763  if align: xyzi -= xyzi.mean(0)
2764  for j in range(N):
2765  xyzj = self.xyzs[j].copy()
2766  if align:
2767  xyzj -= xyzj.mean(0)
2768  tr, rt = get_rotate_translate(xyzj, xyzi)
2769  xyzj = np.dot(xyzj, rt) + tr
2770  rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2))
2771  Vec[j] = rmsd
2772  return Vec
2773 
2774  def align_center(self):
2775  self.align()
2776 
2777  def openmm_positions(self):
2778  """ Returns the Cartesian coordinates in the Molecule object in
2779  a list of OpenMM-compatible positions, so it is possible to type
2780  simulation.context.setPositions(Mol.openmm_positions()[0])
2781  or something like that.
2782  """
2783 
2784  Positions = []
2785  self.require('xyzs')
2786  for xyz in self.xyzs:
2787  Pos = []
2788  for xyzi in xyz:
2789  Pos.append(Vec3(xyzi[0]/10,xyzi[1]/10,xyzi[2]/10))
2790  Positions.append(Pos*nanometer)
2791  return Positions
2792 
2793  def openmm_boxes(self):
2794  """ Returns the periodic box vectors in the Molecule object in
2795  a list of OpenMM-compatible boxes, so it is possible to type
2796  simulation.context.setPeriodicBoxVectors(Mol.openmm_boxes()[0])
2797  or something like that.
2798  """
2799 
2800  self.require('boxes')
2801  return [(Vec3(*box.A)/10.0, Vec3(*box.B)/10.0, Vec3(*box.C)/10.0) * nanometer for box in self.boxes]
2802 
2803  def split(self, fnm=None, ftype=None, method="chunks", num=None):
2804 
2805  """ Split the molecule object into a number of separate files
2806  (chunks), either by specifying the number of frames per chunk
2807  or the number of chunks. Only relevant for "trajectories".
2808  The type of file may be specified; if they aren't specified
2809  then the original file type is used.
2810 
2811  The output file names are [name].[numbers].[extension] where
2812  [name] can be specified by passing 'fnm' or taken from the
2813  object's 'fnm' attribute by default. [numbers] are integers
2814  ranging from the lowest to the highest chunk number, prepended
2815  by zeros.
2816 
2817  If the number of chunks / frames is not specified, then one file
2818  is written for each frame.
2819 
2820  @return fnms A list of the file names that were written.
2821  """
2822  logger.error('Apparently this function has not been implemented!!')
2823  raise NotImplementedError
2824 
2825  def without(self, *args):
2826  """
2827  Return a copy of the Molecule object but with certain fields
2828  deleted if they exist. This is useful for when we'd like to
2829  add Molecule objects that don't share some fields that we know
2830  about.
2831  """
2832  # Create the sum of the two classes by copying the first class.
2833  New = Molecule()
2834  for key in AllVariableNames:
2835  if key in self.Data and key not in args:
2836  New.Data[key] = copy.deepcopy(self.Data[key])
2837  return New
2838 
2839  def read_comm_charge_mult(self, verbose=False):
2840  """ Set charge and multiplicity from reading the comment line, formatted in a specific way. """
2841  q, sz = extract_pop(self, verbose=verbose)
2842  self.charge = q
2843  self.mult = abs(sz) + 1
2844 
2845  #=====================================#
2846  #| Reading functions |#
2847  #=====================================#
2848  def read_qcschema(self, schema, **kwargs):
2849 
2850  # Already read in
2851  if isinstance(schema, dict):
2852  pass
2853 
2854  # Try to read file
2855  elif isinstance(schema, str):
2856  with open(schema, "r") as handle:
2857  schema = json.loads(handle)
2858  else:
2859  raise TypeError("Schema type not understood '{}'".format(type(schema)))
2860  ret = {"elem": schema["symbols"],
2861  "xyzs": [np.array(schema["geometry"])],
2862  "comments": []}
2863  return ret
2864 
2865  def read_xyz(self, fnm, **kwargs):
2866  """ .xyz files can be TINKER formatted which is why we have the try/except here. """
2867  try:
2868  return self.read_xyz0(fnm, **kwargs)
2869  except ActuallyArcError:
2870  return self.read_arc(fnm, **kwargs)
2871 
2872  def read_xyz0(self, fnm, **kwargs):
2873  """ Parse a .xyz file which contains several xyz coordinates, and return their elements.
2874 
2875  @param[in] fnm The input file name
2876  @return elem A list of chemical elements in the XYZ file
2877  @return comms A list of comments.
2878  @return xyzs A list of XYZ coordinates (number of snapshots times number of atoms)
2879 
2880  """
2881  xyz = []
2882  xyzs = []
2883  comms = []
2884  elem = []
2885  an = 0
2886  na = 0
2887  ln = 0
2888  absln = 0
2889  for line in open(fnm):
2890  line = line.strip().expandtabs()
2891  if ln == 0:
2892  # Skip blank lines.
2893  if len(line.strip()) > 0:
2894  try:
2895  na = int(line.strip())
2896  except:
2897  # If the first line contains a comment, it's a TINKER .arc file
2898  logger.warning("Non-integer detected in first line; will parse as TINKER .arc file.")
2899  raise ActuallyArcError
2900  elif ln == 1:
2901  sline = line.split()
2902  if len(sline) == 6 and all([isfloat(word) for word in sline]):
2903  # If the second line contains box data, it's a TINKER .arc file
2904  logger.warning("Tinker box data detected in second line; will parse as TINKER .arc file.")
2905  raise ActuallyArcError
2906  elif len(sline) >= 5 and isint(sline[0]) and isfloat(sline[2]) and isfloat(sline[3]) and isfloat(sline[4]):
2907  # If the second line contains coordinate data, it's a TINKER .arc file
2908  logger.warning("Tinker coordinate data detected in second line; will parse as TINKER .arc file.")
2909  raise ActuallyArcError
2910  comms.append(line.strip())
2911  else:
2912  line = re.sub(r"([0-9])(-[0-9])", r"\1 \2", line)
2913  # Error checking. Slows performance by ~20% when tested on a 200 MB .xyz file
2914  if not re.match(r"[A-Z][A-Za-z]?( +[-+]?([0-9]*\.)?[0-9]+){3}$", line):
2915  raise IOError("Expected coordinates at line %i but got this instead:\n%s" % (absln, line))
2916  sline = line.split()
2917  xyz.append([float(i) for i in sline[1:]])
2918  if len(elem) < na:
2919  elem.append(sline[0])
2920  an += 1
2921  if an == na:
2922  xyzs.append(np.array(xyz))
2923  xyz = []
2924  an = 0
2925  if ln == na+1:
2926  # Reset the line number counter when we hit the last line in a block.
2927  ln = -1
2928  ln += 1
2929  absln += 1
2930  Answer = {'elem' : elem,
2931  'xyzs' : xyzs,
2932  'comms': comms}
2933  return Answer
2934 
2935  def read_mdcrd(self, fnm, **kwargs):
2936  """ Parse an AMBER .mdcrd file. This requires at least the number of atoms.
2937  This will FAIL for monatomic trajectories (but who the heck makes those?)
2938 
2939  @param[in] fnm The input file name
2940  @return xyzs A list of XYZ coordinates (number of snapshots times number of atoms)
2941  @return boxes Boxes (if present.)
2942 
2943  """
2944  self.require('na')
2945  xyz = []
2946  xyzs = []
2947  boxes = []
2948  ln = 0
2949  for line in open(fnm):
2950  sline = line.split()
2951  if ln == 0:
2952  pass
2953  else:
2954  if xyz == [] and len(sline) == 3:
2955  a, b, c = (float(i) for i in line.split())
2956  boxes.append(BuildLatticeFromLengthsAngles(a, b, c, 90.0, 90.0, 90.0))
2957  else:
2958  xyz += [float(i) for i in line.split()]
2959  if len(xyz) == self.na * 3:
2960  xyzs.append(np.array(xyz).reshape(-1,3))
2961  xyz = []
2962  ln += 1
2963  Answer = {'xyzs' : xyzs}
2964  if len(boxes) > 0:
2965  Answer['boxes'] = boxes
2966  return Answer
2968  def read_inpcrd(self, fnm, **kwargs):
2969  """ Parse an AMBER .inpcrd or .rst file.
2970 
2971  @param[in] fnm The input file name
2972  @return xyzs A list of XYZ coordinates (number of snapshots times number of atoms)
2973  @return boxes Boxes (if present.)
2974 
2975  """
2976  xyz = []
2977  xyzs = []
2978  # We read in velocities but never use them.
2979  vel = []
2980  vels = []
2981  boxes = []
2982  ln = 0
2983  an = 0
2984  mode = 'x'
2985  for line in open(fnm):
2986  line = line.replace('\n', '')
2987  if ln == 0:
2988  comms = [line]
2989  elif ln == 1:
2990  # Although is isn't exactly up to spec,
2991  # it seems that some .rst7 files have spaces that precede the "integer"
2992  # and others have >99999 atoms
2993  # na = int(line[:5])
2994  na = int(line.split()[0])
2995  elif mode == 'x':
2996  xyz.append([float(line[:12]), float(line[12:24]), float(line[24:36])])
2997  an += 1
2998  if an == na:
2999  xyzs.append(np.array(xyz))
3000  mode = 'v'
3001  an = 0
3002  if len(line) > 36:
3003  xyz.append([float(line[36:48]), float(line[48:60]), float(line[60:72])])
3004  an += 1
3005  if an == na:
3006  xyzs.append(np.array(xyz))
3007  mode = 'v'
3008  an = 0
3009  elif mode == 'v':
3010  vel.append([float(line[:12]), float(line[12:24]), float(line[24:36])])
3011  an += 1
3012  if an == na:
3013  vels.append(np.array(vel))
3014  mode = 'b'
3015  an = 0
3016  if len(line) > 36:
3017  vel.append([float(line[36:48]), float(line[48:60]), float(line[60:72])])
3018  an += 1
3019  if an == na:
3020  vels.append(np.array(vel))
3021  mode = 'b'
3022  an = 0
3023  elif mode == 'b':
3024  a, b, c = (float(line[:12]), float(line[12:24]), float(line[24:36]))
3025  boxes.append(BuildLatticeFromLengthsAngles(a, b, c, 90.0, 90.0, 90.0))
3026  ln += 1
3027  # If there is only one velocity, then it should actually be a periodic box.
3028  if len(vel) == 1:
3029  a, b, c = vel[0]
3030  boxes.append(BuildLatticeFromLengthsAngles(a, b, c, 90.0, 90.0, 90.0))
3031  Answer = {'xyzs' : xyzs, 'comms' : comms}
3032  if len(boxes) > 0:
3033  Answer['boxes'] = boxes
3034  return Answer
3035 
3036  def read_qdata(self, fnm, **kwargs):
3037  xyzs = []
3038  energies = []
3039  grads = []
3040  espxyzs = []
3041  espvals = []
3042  interaction = []
3043  for line in open(fnm):
3044  line = line.strip().expandtabs()
3045  if 'COORDS' in line:
3046  xyzs.append(np.array([float(i) for i in line.split()[1:]]).reshape(-1,3))
3047  elif 'FORCES' in line or 'GRADIENT' in line: # 'FORCES' is from an earlier version and a misnomer
3048  grads.append(np.array([float(i) for i in line.split()[1:]]).reshape(-1,3))
3049  elif 'ESPXYZ' in line:
3050  espxyzs.append(np.array([float(i) for i in line.split()[1:]]).reshape(-1,3))
3051  elif 'ESPVAL' in line:
3052  espvals.append(np.array([float(i) for i in line.split()[1:]]))
3053  elif 'ENERGY' in line:
3054  energies.append(float(line.split()[1]))
3055  elif 'INTERACTION' in line:
3056  interaction.append(float(line.split()[1]))
3057  Answer = {}
3058  if len(xyzs) > 0:
3059  Answer['xyzs'] = xyzs
3060  if len(energies) > 0:
3061  Answer['qm_energies'] = energies
3062  if len(interaction) > 0:
3063  Answer['qm_interaction'] = interaction
3064  if len(grads) > 0:
3065  Answer['qm_grads'] = grads
3066  if len(espxyzs) > 0:
3067  Answer['qm_espxyzs'] = espxyzs
3068  if len(espvals) > 0:
3069  Answer['qm_espvals'] = espvals
3070  return Answer
3071 
3072  def read_mol2(self, fnm, **kwargs):
3073  xyz = []
3074  charge = []
3075  atomname = []
3076  atomtype = []
3077  elem = []
3078  resname = []
3079  resid = []
3080  data = Mol2.mol2_set(fnm)
3081  if len(data.compounds) > 1:
3082  sys.stderr.write("Not sure what to do if the MOL2 file contains multiple compounds\n")
3083  for i, atom in enumerate(list(data.compounds.items())[0][1].atoms):
3084  xyz.append([atom.x, atom.y, atom.z])
3085  charge.append(atom.charge)
3086  atomname.append(atom.atom_name)
3087  atomtype.append(atom.atom_type)
3088  resname.append(atom.subst_name)
3089  resid.append(atom.subst_id)
3090  thiselem = atom.atom_name
3091  if len(thiselem) > 1:
3092  thiselem = thiselem[0] + re.sub('[A-Z0-9]','',thiselem[1:])
3093  elem.append(thiselem)
3094 
3095  # resname = [list(data.compounds.items())[0][0] for i in range(len(elem))]
3096  # resid = [1 for i in range(len(elem))]
3097 
3098  # Deprecated 'abonds' format.
3099  # bonds = [[] for i in range(len(elem))]
3100  # for bond in data.compounds.items()[0][1].bonds:
3101  # a1 = bond.origin_atom_id - 1
3102  # a2 = bond.target_atom_id - 1
3103  # aL, aH = (a1, a2) if a1 < a2 else (a2, a1)
3104  # bonds[aL].append(aH)
3105 
3106  bonds = []
3107  for bond in list(data.compounds.items())[0][1].bonds:
3108  a1 = bond.origin_atom_id - 1
3109  a2 = bond.target_atom_id - 1
3110  aL, aH = (a1, a2) if a1 < a2 else (a2, a1)
3111  bonds.append((aL,aH))
3112 
3113  self.top_settings["read_bonds"] = True
3114  Answer = {'xyzs' : [np.array(xyz)],
3115  'partial_charge' : charge,
3116  'atomname' : atomname,
3117  'atomtype' : atomtype,
3118  'elem' : elem,
3119  'resname' : resname,
3120  'resid' : resid,
3121  'bonds' : bonds
3122  }
3123 
3124  return Answer
3126  def read_dcd(self, fnm, **kwargs):
3127  xyzs = []
3128  boxes = []
3129  if _dcdlib.vmdplugin_init() != 0:
3130  logger.error("Unable to init DCD plugin\n")
3131  raise IOError
3132  natoms = c_int(-1)
3133  frame = 0
3134  dcd = _dcdlib.open_dcd_read(fnm, "dcd", byref(natoms))
3135  ts = MolfileTimestep()
3136  _xyz = c_float * (natoms.value * 3)
3137  xyzvec = _xyz()
3138  ts.coords = xyzvec
3139  while True:
3140  result = _dcdlib.read_next_timestep(dcd, natoms, byref(ts))
3141  if result == 0:
3142  frame += 1
3143  elif result == -1:
3144  break
3145  #npa = np.array(xyzvec)
3146  xyz = np.asfarray(xyzvec)
3147  xyzs.append(xyz.reshape(-1, 3))
3148  boxes.append(BuildLatticeFromLengthsAngles(ts.A, ts.B, ts.C, 90.0, 90.0, 90.0))
3149  _dcdlib.close_file_read(dcd)
3150  dcd = None
3151  Answer = {'xyzs' : xyzs,
3152  'boxes' : boxes}
3153  return Answer
3154 
3155  def read_com(self, fnm, **kwargs):
3156  """ Parse a Gaussian .com file and return a SINGLE-ELEMENT list of xyz coordinates (no multiple file support)
3157 
3158  @param[in] fnm The input file name
3159  @return elem A list of chemical elements in the XYZ file
3160  @return comms A single-element list for the comment.
3161  @return xyzs A single-element list for the XYZ coordinates.
3162  @return charge The total charge of the system.
3163  @return mult The spin multiplicity of the system.
3164 
3165  """
3166  elem = []
3167  xyz = []
3168  ln = 0
3169  absln = 0
3170  comfile = open(fnm).readlines()
3171  inxyz = 0
3172  for line in comfile:
3173  line = line.strip().expandtabs()
3174  # Everything after exclamation point is a comment
3175  sline = line.split('!')[0].split()
3176  if len(sline) == 2:
3177  if isint(sline[0]) and isint(sline[1]):
3178  charge = int(sline[0])
3179  mult = int(sline[1])
3180  title_ln = ln - 2
3181  elif len(sline) == 4:
3182  inxyz = 1
3183  if sline[0].capitalize() in PeriodicTable and isfloat(sline[1]) and isfloat(sline[2]) and isfloat(sline[3]):
3184  elem.append(sline[0])
3185  xyz.append(np.array([float(sline[1]),float(sline[2]),float(sline[3])]))
3186  elif inxyz:
3187  break
3188  ln += 1
3189  absln += 1
3190 
3191  Answer = {'xyzs' : [np.array(xyz)],
3192  'elem' : elem,
3193  'comms' : [comfile[title_ln].strip()],
3194  'charge' : charge,
3195  'mult' : mult}
3196  return Answer
3197 
3198  def read_arc(self, fnm, **kwargs):
3199  """ Read a TINKER .arc file.
3200 
3201  @param[in] fnm The input file name
3202  @return xyzs A list for the XYZ coordinates.
3203  @return boxes A list of periodic boxes (newer .arc files have these)
3204  @return resid The residue ID numbers. These are not easy to get!
3205  @return elem A list of chemical elements in the XYZ file
3206  @return comms A single-element list for the comment.
3207  @return tinkersuf The suffix that comes after lines in the XYZ coordinates; this is usually topology info
3208 
3209  """
3210  tinkersuf = []
3211  boxes = []
3212  xyzs = []
3213  xyz = []
3214  resid = []
3215  elem = []
3216  comms = []
3217  thisres = set([])
3218  forwardres = set([])
3219  title = True
3220  nframes = 0
3221  thisresid = 1
3222  ln = 0
3223  thisatom = 0
3224  for line in open(fnm):
3225  line = line.strip().expandtabs()
3226  sline = line.split()
3227  if len(sline) == 0: continue
3228  # The first line always contains the number of atoms
3229  # The words after the first line are comments
3230  if title:
3231  na = int(sline[0])
3232  comms.append(' '.join(sline[1:]))
3233  title = False
3234  elif len(sline) >= 5:
3235  if len(sline) == 6 and isfloat(sline[1]) and all([isfloat(i) for i in sline]): # Newer .arc files have a .box line.
3236  a, b, c, alpha, beta, gamma = (float(i) for i in sline[:6])
3237  boxes.append(BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma))
3238  elif isint(sline[0]) and isfloat(sline[2]) and isfloat(sline[3]) and isfloat(sline[4]): # A line of data better look like this
3239  if nframes == 0:
3240  elem.append(elem_from_atomname(sline[1]))
3241  resid.append(thisresid)
3242  whites = re.split('[^ ]+',line)
3243  if len(sline) > 5:
3244  s = sline[5:]
3245  if len(s) > 1:
3246  sn = [int(i) for i in s[1:]]
3247  s = [s[0]] + list(np.array(s[1:])[np.argsort(sn)])
3248  tinkersuf.append(''.join([whites[j]+s[j-5] for j in range(5,len(sline))]))
3249  else:
3250  tinkersuf.append('')
3251  # LPW Make sure ..
3252  thisatom += 1
3253  #thisatom = int(sline[0])
3254  thisres.add(thisatom)
3255  forwardres.add(thisatom)
3256  if len(sline) >= 6:
3257  forwardres.update([int(j) for j in sline[6:]])
3258  if thisres == forwardres:
3259  thisres = set([])
3260  forwardres = set([])
3261  thisresid += 1
3262  xyz.append([float(sline[2]),float(sline[3]),float(sline[4])])
3263  if thisatom == na:
3264  thisatom = 0
3265  nframes += 1
3266  title = True
3267  xyzs.append(np.array(xyz))
3268  xyz = []
3269  ln += 1
3270  Answer = {'xyzs' : xyzs,
3271  'resid' : resid,
3272  'elem' : elem,
3273  'comms' : comms,
3274  'tinkersuf' : tinkersuf}
3275  if len(boxes) > 0: Answer['boxes'] = boxes
3276  return Answer
3277 
3278  def read_gro(self, fnm, **kwargs):
3279  """ Read a GROMACS .gro file.
3280 
3281  """
3282  xyzs = []
3283  elem = [] # The element, most useful for quantum chemistry calculations
3284  atomname = [] # The atom name, for instance 'HW1'
3285  comms = []
3286  resid = []
3287  resname = []
3288  boxes = []
3289  xyz = []
3290  ln = 0
3291  frame = 0
3292  absln = 0
3293  na = -10
3294  for line in open(fnm):
3295  sline = line.split()
3296  if ln == 0:
3297  comms.append(line.strip())
3298  elif ln == 1:
3299  na = int(line.strip())
3300  elif ln == na + 2:
3301  box = [float(i)*10 for i in sline]
3302  if len(box) == 3:
3303  a = box[0]
3304  b = box[1]
3305  c = box[2]
3306  alpha = 90.0
3307  beta = 90.0
3308  gamma = 90.0
3309  boxes.append(BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma))
3310  elif len(box) == 9:
3311  v1 = np.array([box[0], box[3], box[4]])
3312  v2 = np.array([box[5], box[1], box[6]])
3313  v3 = np.array([box[7], box[8], box[2]])
3314  boxes.append(BuildLatticeFromVectors(v1, v2, v3))
3315  xyzs.append(np.array(xyz)*10)
3316  xyz = []
3317  ln = -1
3318  frame += 1
3319  else:
3320  coord = []
3321  if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame.
3322  # Name of the residue, for instance '153SOL1 -> SOL1' ; strips leading numbers
3323  thisresid = int(line[0:5].strip())
3324  resid.append(thisresid)
3325  thisresname = line[5:10].strip()
3326  resname.append(thisresname)
3327  thisatomname = line[10:15].strip()
3328  atomname.append(thisatomname)
3329 
3330  thiselem = sline[1]
3331  if len(thiselem) > 1:
3332  thiselem = thiselem[0] + re.sub('[A-Z0-9]','',thiselem[1:])
3333  elem.append(thiselem)
3334 
3335  # Different frames may have different decimal precision
3336  if ln == 2:
3337  pdeci = [i for i, x in enumerate(line) if x == '.']
3338  ndeci = pdeci[1] - pdeci[0] - 5
3339 
3340  for i in range(1,4):
3341  try:
3342  thiscoord = float(line[(pdeci[0]-4)+(5+ndeci)*(i-1):(pdeci[0]-4)+(5+ndeci)*i].strip())
3343  except: # Attempt to read incorrectly formatted GRO files.
3344  thiscoord = float(line.split()[i+2])
3345  coord.append(thiscoord)
3346  xyz.append(coord)
3347 
3348  ln += 1
3349  absln += 1
3350  Answer = {'xyzs' : xyzs,
3351  'elem' : elem,
3352  'atomname' : atomname,
3353  'resid' : resid,
3354  'resname' : resname,
3355  'boxes' : boxes,
3356  'comms' : comms}
3357  return Answer
3358 
3359  def read_charmm(self, fnm, **kwargs):
3360  """ Read a CHARMM .cor (or .crd) file.
3361 
3362  """
3363  xyzs = []
3364  elem = [] # The element, most useful for quantum chemistry calculations
3365  atomname = [] # The atom name, for instance 'HW1'
3366  comms = []
3367  resid = []
3368  resname = []
3369  xyz = []
3370  thiscomm = []
3371  ln = 0
3372  frame = 0
3373  an = 0
3374  for line in open(fnm):
3375  line = line.strip().expandtabs()
3376  sline = line.split()
3377  if re.match(r'^\*',line):
3378  if len(sline) == 1:
3379  comms.append(';'.join(list(thiscomm)))
3380  thiscomm = []
3381  else:
3382  thiscomm.append(' '.join(sline[1:]))
3383  elif re.match('^ *[0-9]+ +(EXT)?$',line):
3384  na = int(sline[0])
3385  elif is_charmm_coord(line):
3386  if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame.
3387  resid.append(sline[1])
3388  resname.append(sline[2])
3389  atomname.append(sline[3])
3390  thiselem = sline[3]
3391  if len(thiselem) > 1:
3392  thiselem = thiselem[0] + re.sub('[A-Z0-9]','',thiselem[1:])
3393  elem.append(thiselem)
3394  xyz.append([float(i) for i in sline[4:7]])
3395  an += 1
3396  if an == na:
3397  xyzs.append(np.array(xyz))
3398  xyz = []
3399  an = 0
3400  frame += 1
3401  ln += 1
3402  Answer = {'xyzs' : xyzs,
3403  'elem' : elem,
3404  'atomname' : atomname,
3405  'resid' : resid,
3406  'resname' : resname,
3407  'comms' : comms}
3408  return Answer
3409 
3410  def read_qcin(self, fnm, **kwargs):
3411  """ Read a Q-Chem input file.
3412 
3413  These files can be very complicated, and I can't write a completely
3414  general parser for them. It is important to keep our goal in
3415  mind:
3416 
3417  1) The main goal is to convert a trajectory to Q-Chem input
3418  files with identical calculation settings.
3419 
3420  2) When we print the Q-Chem file, we should preserve the line
3421  ordering of the 'rem' section, but also be able to add 'rem'
3422  options at the end.
3423 
3424  3) We should accommodate the use case that the Q-Chem file may have
3425  follow-up calculations delimited by '@@@'.
3426 
3427  4) We can read in all of the xyz's as a trajectory, but only the
3428  Q-Chem settings belonging to the first xyz will be saved.
3429 
3430  """
3431 
3432  qcrem = OrderedDict()
3433  qcrems = []
3434  xyz = []
3435  xyzs = []
3436  elem = []
3437  section = None
3438  # The Z-matrix printing in new versions throws me off.
3439  # This could appear at the end of an optimization.
3440  # We detect when "Z-matrix Print" appears and skip the
3441  # section that follows.
3442  zmatrix = False
3443  template = []
3444  fff = False
3445  inside_section = False
3446  reading_template = True
3447  charge = 0
3448  mult = 0
3449  Answer = {}
3450  SectionData = []
3451  template_cut = 0
3452  readsuf = True
3453  suffix = [] # The suffix, which comes after every atom line in the $molecule section, is for determining the MM atom type and topology.
3454  ghost = [] # If the element in the $molecule section is preceded by an '@' sign, it's a ghost atom for counterpoise calculations.
3455  infsm = False
3456 
3457  for line in open(fnm).readlines():
3458  line = line.strip().expandtabs()
3459  sline = line.split()
3460  dline = line.split('!')[0].split()
3461  if "Z-matrix Print" in line:
3462  zmatrix = True
3463  if re.match(r'^\$',line):
3464  wrd = re.sub(r'\$','',line).lower()
3465  if zmatrix:
3466  if wrd == 'end':
3467  zmatrix = False
3468  else:
3469  if wrd == 'end':
3470  inside_section = False
3471  if section == 'molecule':
3472  if len(xyz) > 0:
3473  xyzs.append(np.array(xyz))
3474  xyz = []
3475  fff = True
3476  if suffix:
3477  readsuf = False
3478  elif section == 'rem':
3479  if reading_template:
3480  qcrems.append(qcrem)
3481  qcrem = OrderedDict()
3482  if reading_template:
3483  if section != 'external_charges': # Ignore the external charges section because it varies from frame to frame.
3484  template.append((section,SectionData))
3485  SectionData = []
3486  else:
3487  section = wrd
3488  inside_section = True
3489  elif inside_section:
3490  if zmatrix:
3491  raise RuntimeError('Parse error - zmatrix should not activate inside_section')
3492  if section == 'molecule':
3493  if line.startswith("*"):
3494  infsm = True
3495  if (not infsm) and (len(dline) >= 4 and all([isfloat(dline[i]) for i in range(1,4)])):
3496  if fff:
3497  reading_template = False
3498  template_cut = list(i for i, dat in enumerate(template) if '@@@' in dat[0])[-1]
3499  else:
3500  if re.match('^@', sline[0]): # This is a ghost atom
3501  ghost.append(True)
3502  else:
3503  ghost.append(False)
3504  elem.append(re.sub('@','',sline[0]))
3505  xyz.append([float(i) for i in sline[1:4]])
3506  if readsuf and len(sline) > 4:
3507  whites = re.split('[^ ]+',line)
3508  suffix.append(''.join([whites[j]+sline[j] for j in range(4,len(sline))]))
3509  elif re.match("[+-]?[0-9]+ +[0-9]+$",line.split('!')[0].strip()):
3510  if not fff:
3511  charge = int(sline[0])
3512  mult = int(sline[1])
3513  else:
3514  SectionData.append(line)
3515  elif reading_template:
3516  if section == 'basis':
3517  SectionData.append(line.split('!')[0])
3518  elif section == 'rem':
3519  S = splitter.findall(line)
3520  if S[0] == '!':
3521  qcrem[''.join(S[0:3]).lower()] = ''.join(S[4:])
3522  else:
3523  qcrem[S[0].lower()] = ''.join(S[2:])
3524  else:
3525  SectionData.append(line)
3526  elif re.match('^@+$', line) and reading_template:
3527  template.append(('@@@', []))
3528  elif re.match('Welcome to Q-Chem', line) and reading_template and fff:
3529  template.append(('@@@', []))
3530 
3531  if template_cut != 0:
3532  template = template[:template_cut]
3533 
3534  Answer = {'qctemplate' : template,
3535  'qcrems' : qcrems,
3536  'charge' : charge,
3537  'mult' : mult,
3538  }
3539  if suffix:
3540  Answer['qcsuf'] = suffix
3541 
3542  if len(xyzs) > 0:
3543  Answer['xyzs'] = xyzs
3544  else:
3545  Answer['xyzs'] = [np.array([])]
3546  if len(elem) > 0:
3547  Answer['elem'] = elem
3548  if len(ghost) > 0:
3549  Answer['qm_ghost'] = ghost
3550  return Answer
3551 
3552 
3553  def read_pdb(self, fnm, **kwargs):
3554  """ Loads a PDB and returns a dictionary containing its data. """
3555 
3556  F1=open(fnm,'r')
3557  ParsedPDB=readPDB(F1)
3558 
3559  Box = None
3560  #Separate into distinct lists for each model.
3561  PDBLines=[[]]
3562  # LPW: Keep a record of atoms which are followed by a terminal group.
3563  PDBTerms=[]
3564  ReadTerms = True
3565  for x in ParsedPDB[0]:
3566  if x.__class__ in [END, ENDMDL]:
3567  PDBLines.append([])
3568  ReadTerms = False
3569  if x.__class__ in [ATOM, HETATM]:
3570  PDBLines[-1].append(x)
3571  if ReadTerms:
3572  PDBTerms.append(0)
3573  if x.__class__ in [TER] and ReadTerms:
3574  PDBTerms[-1] = 1
3575  if x.__class__==CRYST1:
3576  Box = BuildLatticeFromLengthsAngles(x.a, x.b, x.c, x.alpha, x.beta, x.gamma)
3577 
3578  X=PDBLines[0]
3579 
3580  XYZ=np.array([[x.x,x.y,x.z] for x in X])/10.0#Convert to nanometers
3581  AltLoc=np.array([x.altLoc for x in X],'str') # Alternate location
3582  ICode=np.array([x.iCode for x in X],'str') # Insertion code
3583  ChainID=np.array([x.chainID for x in X],'str')
3584  AtomNames=np.array([x.name for x in X],'str')
3585  ResidueNames=np.array([x.resName for x in X],'str')
3586  ResidueID=np.array([x.resSeq for x in X],'int')
3587  # LPW: Try not to number Residue IDs starting from 1...
3588  if self.positive_resid:
3589  ResidueID=ResidueID-ResidueID[0]+1
3590 
3591  XYZList=[]
3592  for Model in PDBLines:
3593  # Skip over subsequent models with the wrong number of atoms.
3594  NewXYZ = []
3595  for x in Model:
3596  NewXYZ.append([x.x,x.y,x.z])
3597  if len(XYZList) == 0:
3598  XYZList.append(NewXYZ)
3599  elif len(XYZList) >= 1 and (np.array(NewXYZ).shape == np.array(XYZList[-1]).shape):
3600  XYZList.append(NewXYZ)
3601 
3602  if len(XYZList[-1])==0:#If PDB contains trailing END / ENDMDL, remove empty list
3603  XYZList.pop()
3604 
3605  # Build a list of chemical elements
3606  elem = []
3607  for i in range(len(AtomNames)):
3608  # QYD: try to use original element list
3609  if X[i].element:
3610  elem.append(X[i].element)
3611  else:
3612  thiselem = AtomNames[i]
3613  if len(thiselem) > 1:
3614  thiselem = re.sub('^[0-9]','',thiselem)
3615  thiselem = thiselem[0] + re.sub('[A-Z0-9]','',thiselem[1:])
3616  elem.append(thiselem)
3617 
3618  XYZList=list(np.array(XYZList).reshape((-1,len(ChainID),3)))
3619 
3620  bonds = []
3621  # Read in CONECT records.
3622  F2=open(fnm,'r')
3623  # QYD: Rewrite to support atom indices with 5 digits
3624  # i.e. CONECT143321433314334 -> 14332 connected to 14333 and 14334
3625  for line in F2:
3626  if line[:6] == "CONECT":
3627  conect_A = int(line[6:11]) - 1
3628  conect_B_list = []
3629  line_rest = line[11:]
3630  while line_rest.strip():
3631  # Take 5 characters a time until run out of characters
3632  conect_B_list.append(int(line_rest[:5]) - 1)
3633  line_rest = line_rest[5:]
3634  for conect_B in conect_B_list:
3635  bond = (min((conect_A, conect_B)), max((conect_A, conect_B)))
3636  bonds.append(bond)
3637 
3638  Answer={"xyzs":XYZList, "chain":list(ChainID), "altloc":list(AltLoc), "icode":list(ICode),
3639  "atomname":[str(i) for i in AtomNames], "resid":list(ResidueID), "resname":list(ResidueNames),
3640  "elem":elem, "comms":['' for i in range(len(XYZList))], "terminal" : PDBTerms}
3641 
3642  if len(bonds) > 0:
3643  self.top_settings["read_bonds"] = True
3644  Answer["bonds"] = bonds
3645 
3646  if Box is not None:
3647  Answer["boxes"] = [Box for i in range(len(XYZList))]
3648 
3649  return Answer
3650 
3651  def read_qcesp(self, fnm, **kwargs):
3652  espxyz = []
3653  espval = []
3654  for line in open(fnm):
3655  line = line.strip().expandtabs()
3656  sline = line.split()
3657  if len(sline) == 4 and all([isfloat(sline[i]) for i in range(4)]):
3658  espxyz.append([float(sline[i]) for i in range(3)])
3659  espval.append(float(sline[3]))
3660  Answer = {'qm_espxyzs' : [np.array(espxyz) * bohr2ang],
3661  'qm_espvals' : [np.array(espval)]
3662  }
3663  return Answer
3664 
3665  def read_qcout(self, fnm, errok=None, **kwargs):
3666  """ Q-Chem output file reader, adapted for our parser.
3667 
3668  Q-Chem output files are very flexible and there's no way I can account for all of them. Here's what
3669  I am able to account for:
3670 
3671  A list of:
3672  - Coordinates
3673  - Energies
3674  - Forces
3675 
3676  Calling with errok will proceed with reading file even if the specified error messages are encountered.
3677 
3678  Note that each step in a geometry optimization counts as a frame.
3679 
3680  As with all Q-Chem output files, note that successive calculations can have different numbers of atoms.
3681 
3682  """
3683 
3684  if errok is None:
3685  errok = []
3686  Answer = {}
3687  xyzs = []
3688  xyz = []
3689  elem = []
3690  elemThis = []
3691  mkchg = []
3692  mkspn = []
3693  mkchgThis= []
3694  mkspnThis= []
3695  frqs = []
3696  modes = []
3697  XMode = 0
3698  MMode = 0
3699  VMode = 0
3700  conv = []
3701  convThis = 0
3702  readChargeMult = 0
3703  energy_scf = []
3704  float_match = {'energy_scfThis' : (r"^[1-9][0-9]* +[-+]?([0-9]*\.)?[0-9]+ +[-+]?([0-9]*\.)?[0-9]+([eE][-+]?[0-9]+)[A-Za-z0 ]*$", 1),
3705  'energy_opt' : (r"^Final energy is +[-+]?([0-9]*\.)?[0-9]+$", -1),
3706  'charge' : ("Sum of atomic charges", -1),
3707  'mult' : ("Sum of spin +charges", -1),
3708  'energy_mp2' : (r"^(ri)*(-)*mp2 +total energy += +[-+]?([0-9]*\.)?[0-9]+ +au$",-2),
3709  'energy_ccsd' : (r"^CCSD Total Energy += +[-+]?([0-9]*\.)?[0-9]+$",-1),
3710  'energy_ccsdt' : (r"^CCSD\(T\) Total Energy += +[-+]?([0-9]*\.)?[0-9]+$",-1),
3711  'zpe' : (r"^(\s+)?Zero point vibrational energy:\s+[-+]?([0-9]*\.)?[0-9]+\s+kcal\/mol$", -2),
3712  'entropy' : (r"^(\s+)?Total Entropy:\s+[-+]?([0-9]*\.)?[0-9]+\s+cal\/mol\.K$", -2),
3713  'enthalpy' : (r"^(\s+)?Total Enthalpy:\s+[-+]?([0-9]*\.)?[0-9]+\s+kcal\/mol$", -2)
3714  }
3715  matrix_match = {'analytical_grad' :'Full Analytical Gradient',
3716  'gradient_scf' :'Gradient of SCF Energy',
3717  'gradient_mp2' :'Gradient of MP2 Energy',
3718  'gradient_dualbas' :'Gradient of the Dual-Basis Energy',
3719  'hessian_scf' :'Hessian of the SCF Energy',
3720  'mayer' :'Mayer SCF Bond Order'
3721  }
3722  qcrem = OrderedDict()
3723 
3724  matblank = {'match' : '', 'All' : [], 'This' : [], 'Strip' : [], 'Mode' : 0}
3725  Mats = {}
3726  Floats = {}
3727  for key, val in matrix_match.items():
3728  Mats[key] = copy.deepcopy(matblank)
3729  for key, val in float_match.items():
3730  Floats[key] = []
3731 
3732 
3733  FSM = False
3734 
3735  IRCDir = 0
3736  RPLine = False
3737 
3738  FDiff = False
3739  #---- Intrinsic reaction coordinate data.
3740  # stat: Status, X : Coordinates, E : Energies, Q : Charges, Sz: Spin-Z
3741  # Explanation of Status:
3742  # -1 : IRC calculation does not exist in this direction.
3743  # 0 : IRC calculation finished successfully.
3744  # 1 : IRC calculation did not finish but we can start a geometry optimization from the final point.
3745  # 2 : IRC calculation failed in this direction (i.e. set to 2 once we encounter first_irc_step).
3746  # Two dictionaries of coordinates, energies, Mulliken Charges and Spin Populations.
3747  IRCData = [OrderedDict([('stat', -1), ('X', []), ('E', []), ('Q', []), ('Sz', [])]) for i in range(2)]
3748 
3749  Answer['qcerr'] = ''
3750  fatal = 0
3751  pcmgradmode = False
3752  pcmgrads = []
3753  pcmgrad = []
3754  for line in open(fnm):
3755  line = line.strip().expandtabs()
3756  if 'Welcome to Q-Chem' in line:
3757  Answer['qcerr'] = ''
3758  if 'total processes killed' in line:
3759  Answer['qcerr'] = 'killed'
3760  if fatal and len(line.split()) > 0:
3761  # Print the error message that comes after the "fatal error" line.
3762  if line in errok:
3763  Answer['qcerr'] = line.strip()
3764  fatal = 0
3765  else:
3766  logger.error('Calculation encountered a fatal error! (%s)\n' % line)
3767  raise RuntimeError
3768  if 'Q-Chem fatal error' in line:
3769  fatal = 1
3770  if XMode >= 1:
3771  # Perfectionist here; matches integer, element, and three floating points
3772  if re.match(r"^[0-9]+ +[A-Z][A-Za-z]?( +[-+]?([0-9]*\.)?[0-9]+){3}$", line):
3773  XMode = 2
3774  sline = line.split()
3775  elemThis.append(sline[1])
3776  xyz.append([float(i) for i in sline[2:]])
3777  elif XMode == 2: # Break out of the loop if we encounter anything other than atomic data
3778  if not elem:
3779  elem = elemThis
3780  elif elem != elemThis:
3781  logger.error('Q-Chem output parser will not work if successive calculations have different numbers of atoms!\n')
3782  raise RuntimeError
3783  elemThis = []
3784  xyzs.append(np.array(xyz))
3785  xyz = []
3786  XMode = 0
3787  elif re.match("Standard Nuclear Orientation".lower(), line.lower()):
3788  XMode = 1
3789  if MMode >= 1:
3790  # Perfectionist here; matches integer, element, and two floating points
3791  if re.match(r"^[0-9]+ +[A-Z][a-z]?( +[-+]?([0-9]*\.)?[0-9]+){2}$", line):
3792  MMode = 2
3793  sline = line.split()
3794  mkchgThis.append(float(sline[2]))
3795  mkspnThis.append(float(sline[3]))
3796  elif re.match(r"^[0-9]+ +[A-Z][a-z]?( +[-+]?([0-9]*\.)?[0-9]+){1}$", line):
3797  MMode = 2
3798  sline = line.split()
3799  mkchgThis.append(float(sline[2]))
3800  mkspnThis.append(0.0)
3801  elif MMode == 2: # Break out of the loop if we encounter anything other than Mulliken charges
3802  mkchg.append(mkchgThis[:])
3803  mkspn.append(mkspnThis[:])
3804  mkchgThis = []
3805  mkspnThis = []
3806  MMode = 0
3807  elif re.match("Ground-State Mulliken Net Atomic Charges".lower(), line.lower()):
3808  MMode = 1
3809  for key, val in float_match.items():
3810  if re.match(val[0].lower(), line.lower()):
3811  Floats[key].append(float(line.split()[val[1]]))
3812  #----- Begin Intrinsic reaction coordinate stuff
3813  if line.startswith('IRC') and IRCData[IRCDir]['stat'] == -1:
3814  IRCData[IRCDir]['stat'] = 2
3815  if "Reaction path following." in line:
3816  RPLine = True
3817  IRCData[IRCDir]['X'].append(xyzs[-1])
3818 
3819  elif RPLine:
3820  RPLine = False
3821  IRCData[IRCDir]['E'].append(float(line.split()[3]))
3822  IRCData[IRCDir]['Q'].append(mkchg[-1])
3823  IRCData[IRCDir]['Sz'].append(mkspn[-1])
3824 
3827  if "GEOMETRY OPTIMIZATION" in line:
3828  IRCData[IRCDir]['X'].append(xyzs[-1])
3829  IRCData[IRCDir]['E'].append(energy_scf[-1])
3830  IRCData[IRCDir]['Q'].append(mkchg[-1])
3831  IRCData[IRCDir]['Sz'].append(mkspn[-1])
3832  # Determine whether we are in the forward or the backward part of the IRC.
3833  if "IRC -- convergence criterion reached." in line or "OPTIMIZATION CONVERGED" in line:
3834  IRCData[IRCDir]['stat'] = 0
3835  IRCDir = 1
3836  if "MAXIMUM OPTIMIZATION CYCLES REACHED" in line:
3837  IRCData[IRCDir]['stat'] = 1
3838  # Output file indicates whether we can start a geometry optimization from this point.
3839  if "geom opt from" in line:
3840  IRCData[IRCDir]['stat'] = 1
3841  IRCDir = 1
3842  #----- End IRC stuff
3843  # Look for SCF energy
3844  # Note that COSMO has two SCF energies per calculation so this parser won't work.
3845  # Need to think of a better way.
3846  if re.match(".*Convergence criterion met$".lower(), line.lower()):
3847  conv.append(1)
3848  energy_scf.append(Floats['energy_scfThis'][-1])
3849  Floats['energy_scfThis'] = []
3850  elif re.match(".*Including correction$".lower(), line.lower()):
3851  energy_scf[-1] = Floats['energy_scfThis'][-1]
3852  Floats['energy_scfThis'] = []
3853  elif re.match(".*Convergence failure$".lower(), line.lower()):
3854  conv.append(0)
3855  Floats['energy_scfThis'] = []
3856  energy_scf.append(0.0)
3857  #----- If doing freezing string calculation, do NOT treat as a geometry optimization.
3858  if 'Starting FSM Calculation' in line:
3859  FSM = True
3860  if 'needFdiff: TRUE' in line:
3861  FDiff = True
3862  #----- Gradient from PCM
3863  if "total gradient after adding PCM contribution" in line:
3864  pcmgradmode = True
3865  if pcmgradmode:
3866  # Perfectionist here; matches integer, and three floating points
3867  if re.match(r"^[0-9]+ +( +[-+]?([0-9]*\.)?[0-9]+){3}$", line):
3868  pcmgrad.append([float(i) for i in line.split()[1:]])
3869  if 'Gradient time' in line:
3870  pcmgradmode = False
3871  pcmgrads.append(np.array(pcmgrad).T)
3872  pcmgrad = []
3873  #----- Vibrational stuff
3874  VModeNxt = None
3875  if 'VIBRATIONAL ANALYSIS' in line:
3876  VMode = 1
3877  if VMode > 0 and line.strip().startswith('Mode:'):
3878  VMode = 2
3879  if VMode == 2:
3880  s = line.split()
3881  if 'Frequency:' in line:
3882  nfrq = len(s) - 1
3883  frqs += [float(i) for i in s[1:]]
3884  if re.match('^X +Y +Z', line):
3885  VModeNxt = 3
3886  readmodes = [[] for i in range(nfrq)]
3887  if 'Imaginary Frequencies' in line:
3888  VMode = 0
3889  if VMode == 3:
3890  s = line.split()
3891  if len(s) != nfrq*3+1:
3892  VMode = 2
3893  modes += readmodes[:]
3894  elif 'TransDip' not in s:
3895  for i in range(nfrq):
3896  readmodes[i].append([float(s[j]) for j in range(1+3*i,4+3*i)])
3897  if VModeNxt is not None: VMode = VModeNxt
3898  for key, val in matrix_match.items():
3899  if Mats[key]["Mode"] >= 1:
3900  # Match any number of integers on a line. This signifies a column header to start the matrix
3901  if re.match("^[0-9]+( +[0-9]+)*$",line):
3902  Mats[key]["This"] = add_strip_to_mat(Mats[key]["This"],Mats[key]["Strip"])
3903  Mats[key]["Strip"] = []
3904  Mats[key]["Mode"] = 2
3905  # Match a single integer followed by any number of floats. This is a strip of data to be added to the matrix
3906  elif re.match(r"^[0-9]+( +[-+]?([0-9]*\.)?[0-9]+)+$",line):
3907  Mats[key]["Strip"].append([float(i) for i in line.split()[1:]])
3908  # In any other case, the matrix is terminated.
3909  elif Mats[key]["Mode"] >= 2:
3910  Mats[key]["This"] = add_strip_to_mat(Mats[key]["This"],Mats[key]["Strip"])
3911  Mats[key]["Strip"] = []
3912  Mats[key]["All"].append(np.array(Mats[key]["This"]))
3913  Mats[key]["This"] = []
3914  Mats[key]["Mode"] = 0
3915  elif re.match(val.lower(), line.lower()):
3916  Mats[key]["Mode"] = 1
3917 
3918  if len(Floats['mult']) == 0:
3919  Floats['mult'] = [0]
3920 
3921  # Copy out the coordinate lists; Q-Chem output cannot be trusted to get the chemical elements
3922  Answer['xyzs'] = xyzs
3923  Answer['elem'] = elem
3924  # Read the output file as an input file to get a Q-Chem template.
3925  Aux = self.read_qcin(fnm)
3926  for i in ['qctemplate', 'qcrems', 'elem', 'qm_ghost', 'charge', 'mult']:
3927  if i in Aux: Answer[i] = Aux[i]
3928  # Copy out the charge and multiplicity
3929  if len(Floats['charge']) > 0:
3930  Answer['charge'] = int(Floats['charge'][0])
3931  if len(Floats['mult']) > 0:
3932  Answer['mult'] = int(Floats['mult'][0]) + 1
3933  # Copy out the energies and forces
3934  # Q-Chem can print out gradients with several different headings.
3935  # We start with the most reliable heading and work our way down.
3936  if len(pcmgrads) > 0:
3937  Answer['qm_grads'] = pcmgrads
3938  elif len(Mats['analytical_grad']['All']) > 0:
3939  Answer['qm_grads'] = Mats['analytical_grad']['All']
3940  elif len(Mats['gradient_mp2']['All']) > 0:
3941  Answer['qm_grads'] = Mats['gradient_mp2']['All']
3942  elif len(Mats['gradient_dualbas']['All']) > 0:
3943  Answer['qm_grads'] = Mats['gradient_dualbas']['All']
3944  elif len(Mats['gradient_scf']['All']) > 0:
3945  Answer['qm_grads'] = Mats['gradient_scf']['All']
3946  # Mayer bond order matrix from SCF_FINAL_PRINT=1
3947  if len(Mats['mayer']['All']) > 0:
3948  Answer['qm_bondorder'] = Mats['mayer']['All'][-1]
3949  if len(Mats['hessian_scf']['All']) > 0:
3950  Answer['qm_hessians'] = Mats['hessian_scf']['All']
3951  #else:
3952  # raise RuntimeError('There are no forces in %s' % fnm)
3953  # Also work our way down with the energies.
3954  if len(Floats['energy_ccsdt']) > 0:
3955  Answer['qm_energies'] = Floats['energy_ccsdt']
3956  elif len(Floats['energy_ccsd']) > 0:
3957  Answer['qm_energies'] = Floats['energy_ccsd']
3958  elif len(Floats['energy_mp2']) > 0:
3959  Answer['qm_energies'] = Floats['energy_mp2']
3960  elif len(energy_scf) > 0:
3961  if len(Answer['qcrems']) > 0 and 'correlation' in Answer['qcrems'][0] and Answer['qcrems'][0]['correlation'].lower() in ['mp2', 'rimp2', 'ccsd', 'ccsd(t)']:
3962  logger.error("Q-Chem was called with a post-HF theory but we only got the SCF energy\n")
3963  raise RuntimeError
3964  Answer['qm_energies'] = energy_scf
3965  elif 'SCF failed to converge' not in errok:
3966  logger.error('There are no energies in %s\n' % fnm)
3967  raise RuntimeError
3968  # Process ZPE, entropy, and enthalpy from a freq calculation
3969  if len(Floats['zpe']) > 0:
3970  Answer['qm_zpe'] = Floats['zpe']
3971  if len(Floats['entropy']) > 0:
3972  Answer['qm_entropy'] = Floats['entropy']
3973  if len(Floats['enthalpy']) > 0:
3974  Answer['qm_enthalpy'] = Floats['enthalpy']
3975 
3976 
3979  if 0 in conv and 'SCF failed to converge' not in errok:
3980  logger.error('SCF convergence failure encountered in parsing %s\n' % fnm)
3981  raise RuntimeError
3982  elif 0 not in conv:
3983  # The molecule should have only one charge and one multiplicity
3984  if len(set(Floats['charge'])) != 1 or len(set(Floats['mult'])) != 1:
3985  logger.error('Unexpected number of charges or multiplicities in parsing %s\n' % fnm)
3986  raise RuntimeError
3987 
3988  # If we have any QM energies (not the case if SCF convergence failure)
3989  if 'qm_energies' in Answer:
3990  # Catch the case of failed geometry optimizations.
3991  if len(Answer['xyzs']) == len(Answer['qm_energies']) + 1:
3992  Answer['xyzs'] = Answer['xyzs'][:-1]
3993  # Catch the case of freezing string method, it prints out two extra coordinates.
3994  if len(Answer['xyzs']) == len(Answer['qm_energies']) + 2:
3995  for i in range(2):
3996  Answer['qm_energies'].append(0.0)
3997  mkchg.append([0.0 for j in mkchg[-1]])
3998  mkspn.append([0.0 for j in mkchg[-1]])
3999  # Q-Chem 4.4 prints out three more coordinates.
4000  if FSM and (len(Answer['xyzs']) == len(Answer['qm_energies']) + 3):
4001  Answer['xyzs'] = Answer['xyz'][1:]
4002  for i in range(2):
4003  Answer['qm_energies'].append(0.0)
4004  mkchg.append([0.0 for j in mkchg[-1]])
4005  mkspn.append([0.0 for j in mkchg[-1]])
4006  if FDiff and (len(Answer['qm_energies']) == (len(Answer['xyzs'])+1)):
4007  logger.info("Aligning energies because finite difference calculation prints one extra")
4008  Answer['qm_energies'] = Answer['qm_energies'][:-1]
4009  lens = [len(i) for i in (Answer['qm_energies'], Answer['xyzs'])]
4010  if len(set(lens)) != 1:
4011  logger.error('The number of energies and coordinates in %s are not the same : %s\n' % (fnm, str(lens)))
4012  raise RuntimeError
4013 
4014  # The number of atoms should all be the same
4015  if len(set([len(i) for i in Answer['xyzs']])) > 1:
4016  logger.error('The numbers of atoms across frames in %s are not all the same\n' % fnm)
4017  raise RuntimeError
4018 
4019  if 'qm_grads' in Answer:
4020  for i, frc in enumerate(Answer['qm_grads']):
4021  Answer['qm_grads'][i] = frc.T
4022  for i in np.where(np.array(conv) == 0)[0]:
4023  Answer['qm_grads'].insert(i, Answer['qm_grads'][0]*0.0)
4024  if len(Answer['qm_grads']) != len(Answer['qm_energies']):
4025  logger.warning("Number of energies and gradients is inconsistent (composite jobs?) Deleting gradients.")
4026  del Answer['qm_grads']
4027  # A strange peculiarity; Q-Chem sometimes prints out the final Mulliken charges a second time, after the geometry optimization.
4028  if mkchg:
4029  Answer['qm_mulliken_charges'] = list(np.array(mkchg))
4030  for i in np.where(np.array(conv) == 0)[0]:
4031  Answer['qm_mulliken_charges'].insert(i, np.array([0.0 for i in mkchg[-1]]))
4032  Answer['qm_mulliken_charges'] = Answer['qm_mulliken_charges'][:len(Answer['qm_energies'])]
4033  if mkspn:
4034  Answer['qm_mulliken_spins'] = list(np.array(mkspn))
4035  for i in np.where(np.array(conv) == 0)[0]:
4036  Answer['qm_mulliken_spins'].insert(i, np.array([0.0 for i in mkspn[-1]]))
4037  Answer['qm_mulliken_spins'] = Answer['qm_mulliken_spins'][:len(Answer['qm_energies'])]
4038 
4039  Answer['Irc'] = IRCData
4040  if len(modes) > 0:
4041  unnorm = [np.array(i) for i in modes]
4042  Answer['freqs'] = np.array(frqs)
4043  Answer['modes'] = [i/np.linalg.norm(i) for i in unnorm]
4044 
4045  return Answer
4046 
4047  #=====================================#
4048  #| Writing functions |#
4049  #=====================================#
4050 
4051  def write_qcin(self, selection, **kwargs):
4052  self.require('qctemplate','qcrems','charge','mult')
4053  out = []
4054  if 'read' in kwargs:
4055  read = kwargs['read']
4056  else:
4057  read = False
4058  for SI, I in enumerate(selection):
4059  fsm = False
4060  remidx = 0
4061  molecule_printed = False
4062  # Each 'extchg' has number_of_atoms * 4 elements corresponding to x, y, z, q.
4063  if 'qm_extchgs' in self.Data:
4064  extchg = self.qm_extchgs[I]
4065  out.append('$external_charges')
4066  for i in range(len(extchg)):
4067  out.append("% 15.10f % 15.10f % 15.10f %15.10f" % (extchg[i,0],extchg[i,1],extchg[i,2],extchg[i,3]))
4068  out.append('$end')
4069  for SectName, SectData in self.qctemplate:
4070  if 'jobtype' in self.qcrems[remidx] and self.qcrems[remidx]['jobtype'].lower() == 'fsm':
4071  fsm = True
4072  if len(selection) != 2:
4073  logger.error('For freezing string method, please provide two structures only.\n')
4074  raise RuntimeError
4075  if SectName != '@@@':
4076  out.append('$%s' % SectName)
4077  for line in SectData:
4078  out.append(line)
4079  if SectName == 'molecule':
4080  if molecule_printed == False:
4081  molecule_printed = True
4082  if read:
4083  out.append("read")
4084  elif self.na > 0:
4085  out.append("%i %i" % (self.charge, self.mult))
4086  an = 0
4087  for e, x in zip(self.elem, self.xyzs[I]):
4088  pre = '@' if ('qm_ghost' in self.Data and self.Data['qm_ghost'][an]) else ''
4089  suf = self.Data['qcsuf'][an] if 'qcsuf' in self.Data else ''
4090  out.append(pre + format_xyz_coord(e, x) + suf)
4091  an += 1
4092  if fsm:
4093  out.append("****")
4094  an = 0
4095  for e, x in zip(self.elem, self.xyzs[selection[SI+1]]):
4096  pre = '@' if ('qm_ghost' in self.Data and self.Data['qm_ghost'][an]) else ''
4097  suf = self.Data['qcsuf'][an] if 'qcsuf' in self.Data else ''
4098  out.append(pre + format_xyz_coord(e, x) + suf)
4099  an += 1
4100  if SectName == 'rem':
4101  for key, val in self.qcrems[remidx].items():
4102  out.append("%-21s %-s" % (key, str(val)))
4103  if SectName == 'comments' and 'comms' in self.Data:
4104  out.append(self.comms[I])
4105  out.append('$end')
4106  else:
4107  remidx += 1
4108  out.append('@@@')
4109  out.append('')
4110  #if I < (len(self) - 1):
4111  if fsm: break
4112  if I != selection[-1]:
4113  out.append('@@@')
4114  out.append('')
4115  return out
4116 
4117  def write_xyz(self, selection, **kwargs):
4118  self.require('elem','xyzs')
4119  out = []
4120  for I in selection:
4121  xyz = self.xyzs[I]
4122  out.append("%-5i" % self.na)
4123  out.append(self.comms[I])
4124  for i in range(self.na):
4125  out.append(format_xyz_coord(self.elem[i],xyz[i]))
4126  return out
4127 
4128  def get_reaxff_atom_types(self):
4129  """
4130  Return a list of element names which maps the LAMMPS atom types
4131  to the ReaxFF elements
4132  """
4133  elist = []
4134  for i in range(self.na):
4135  if self.elem[i] not in elist:
4136  elist.append(self.elem[i])
4137  return elist
4138 
4139  def write_lammps_data(self, selection, **kwargs):
4140  """
4141  Write the first frame of the selection to a LAMMPS data file
4142  for the purpose of automatically initializing a LAMMPS simulation.
4143  This function makes several assumptions until further notice:
4144 
4145  (1) We are interested in a ReaxFF simulation
4146  (2) Atom types will be generated from elements
4147  """
4148  I = selection[0]
4149  out = []
4150  comm = self.comms[I]
4151  if not comm.startswith("#"):
4152  comm = "# " + comm
4153  atmap = OrderedDict()
4154  for i in range(self.na):
4155  if self.elem[i] not in atmap:
4156  atmap[self.elem[i]] = len(atmap.keys()) + 1
4157 
4158  # First line is a comment
4159  out.append(comm)
4160  out.append("")
4161  # Next, print the number of atoms and atom types
4162  out.append("%i atoms" % self.na)
4163  out.append("%i atom types" % len(atmap.keys()))
4164  out.append("")
4165  # Next, print the simulation box
4166  # We throw an error if the atoms are outside the simulation box
4167  # If there is no simulation box, then we print upper and lower bounds
4168  xlo = 0.0
4169  ylo = 0.0
4170  zlo = 0.0
4171  if 'boxes' in self.Data:
4172  xhi = self.boxes[I].a
4173  yhi = self.boxes[I].b
4174  zhi = self.boxes[I].c
4175  else:
4176  xlo = np.floor(np.min(self.xyzs[I][:,0]))
4177  ylo = np.floor(np.min(self.xyzs[I][:,1]))
4178  zlo = np.floor(np.min(self.xyzs[I][:,2]))
4179  xhi = np.ceil(np.max(self.xyzs[I][:,0]))+30
4180  yhi = np.ceil(np.max(self.xyzs[I][:,1]))+30
4181  zhi = np.ceil(np.max(self.xyzs[I][:,2]))+30
4182  if (np.min(self.xyzs[I][:,0]) < xlo or
4183  np.min(self.xyzs[I][:,1]) < ylo or
4184  np.min(self.xyzs[I][:,2]) < zlo or
4185  np.max(self.xyzs[I][:,0]) > xhi or
4186  np.max(self.xyzs[I][:,1]) > yhi or
4187  np.max(self.xyzs[I][:,2]) > zhi):
4188  logger.warning("Some atom positions are outside the simulation box, be careful")
4189  out.append("% .3f % .3f xlo xhi" % (xlo, xhi))
4190  out.append("% .3f % .3f ylo yhi" % (ylo, yhi))
4191  out.append("% .3f % .3f zlo zhi" % (zlo, zhi))
4192  out.append("")
4193  # Next, get the masses
4194  out.append("Masses")
4195  out.append("")
4196  for i, a in enumerate(atmap.keys()):
4197  out.append("%i %.4f" % (i+1, PeriodicTable[a]))
4198  out.append("")
4199  # Next, print the atom positions
4200  out.append("Atoms")
4201  out.append("")
4202  for i in range(self.na):
4203  # First number is the index of the atom starting from 1.
4204  # Second number is a molecule tag that is unimportant.
4205  # Third number is the atom type.
4206  # Fourth number is the charge (set to zero).
4207  # Fifth through seventh numbers are the positions
4208  out.append("%4i 1 %2i 0.0 % 15.10f % 15.10f % 15.10f" % (i+1, list(atmap.keys()).index(self.elem[i])+1, self.xyzs[I][i, 0], self.xyzs[I][i, 1], self.xyzs[I][i, 2]))
4209  return out
4210 
4211  def write_molproq(self, selection, **kwargs):
4212  self.require('xyzs','partial_charge')
4213  out = []
4214  for I in selection:
4215  xyz = self.xyzs[I]
4216  # Comment comes first, then number of atoms.
4217  out.append(self.comms[I])
4218  out.append("%-5i" % self.na)
4219  for i in range(self.na):
4220  out.append("% 15.10f % 15.10f % 15.10f % 15.10f 0" % (xyz[i,0],xyz[i,1],xyz[i,2],self.partial_charge[i]))
4221  return out
4222 
4223  def write_mdcrd(self, selection, **kwargs):
4224  self.require('xyzs')
4225  # In mdcrd files, there is only one comment line
4226  out = ['mdcrd file generated using %s' % package]
4227  for I in selection:
4228  xyz = self.xyzs[I]
4229  out += [''.join(["%8.3f" % i for i in g]) for g in grouper(10, list(xyz.flatten()))]
4230  if 'boxes' in self.Data:
4231  out.append(''.join(["%8.3f" % i for i in [self.boxes[I].a, self.boxes[I].b, self.boxes[I].c]]))
4232  return out
4233 
4234  def write_inpcrd(self, selection, sn=None, **kwargs):
4235  self.require('xyzs')
4236  if len(self.xyzs) != 1 and sn is None:
4237  logger.error("inpcrd can only be written for a single-frame trajectory\n")
4238  raise RuntimeError
4239  if sn is not None:
4240  self.xyzs = [self.xyzs[sn]]
4241  self.comms = [self.comms[sn]]
4242  # In inp files, there is only one comment line
4243  # I believe 20A4 means 80 characters.
4244  out = [self.comms[0][:80], '%5i' % self.na]
4245  xyz = self.xyzs[0]
4246  strout = ''
4247  for ix, x in enumerate(xyz):
4248  strout += "%12.7f%12.7f%12.7f" % (x[0], x[1], x[2])
4249  if ix%2 == 1 or ix == (len(xyz) - 1):
4250  out.append(strout)
4251  strout = ''
4252  # From reading the AMBER file specification I am not sure if this is correct.
4253  if 'boxes' in self.Data:
4254  out.append(''.join(["%12.7f" % i for i in [self.boxes[0].a, self.boxes[0].b, self.boxes[0].c]]))
4255  return out
4256 
4257  def write_arc(self, selection, **kwargs):
4258  self.require('elem','xyzs')
4259  out = []
4260  if 'tinkersuf' not in self.Data:
4261  sys.stderr.write("Beware, this .arc file contains no atom type or topology info\n")
4262  for I in selection:
4263  xyz = self.xyzs[I]
4264  out.append("%6i %s" % (self.na, self.comms[I]))
4265  if 'boxes' in self.Data:
4266  b = self.boxes[I]
4267  out.append(" %11.6f %11.6f %11.6f %11.6f %11.6f %11.6f" % (b.a, b.b, b.c, b.alpha, b.beta, b.gamma))
4268  for i in range(self.na):
4269  out.append("%6i %s%s" % (i+1,format_xyz_coord(self.elem[i],xyz[i],tinker=True),self.tinkersuf[i] if 'tinkersuf' in self.Data else ''))
4270  return out
4271 
4272  def write_gro(self, selection, **kwargs):
4273  out = []
4274  if sys.stdin.isatty():
4275  self.require('elem','xyzs')
4276  self.require_resname()
4277  self.require_resid()
4278  self.require_boxes()
4279  else:
4280  self.require('elem','xyzs','resname','resid','boxes')
4281 
4282  if 'atomname' not in self.Data:
4283  count = 0
4284  resid = -1
4285  atomname = []
4286  for i in range(self.na):
4287  if self.resid[i] != resid:
4288  count = 0
4289  count += 1
4290  resid = self.resid[i]
4291  atomname.append("%s%i" % (self.elem[i], count))
4292  else:
4293  atomname = self.atomname
4294 
4295  for I in selection:
4296  xyz = self.xyzs[I]
4297  xyzwrite = xyz.copy()
4298  xyzwrite /= 10.0 # GROMACS uses nanometers
4299  out.append(self.comms[I])
4300  out.append("%5i" % self.na)
4301  for an, line in enumerate(xyzwrite):
4302  out.append(format_gro_coord(self.resid[an],self.resname[an],atomname[an],an+1,xyzwrite[an]))
4303  out.append(format_gro_box(self.boxes[I]))
4304  return out
4305 
4306  def write_dcd(self, selection, **kwargs):
4307  if _dcdlib.vmdplugin_init() != 0:
4308  logger.error("Unable to init DCD plugin\n")
4309  raise IOError
4310  natoms = c_int(self.na)
4311  fname = self.fout.encode('ascii')
4312  dcd = _dcdlib.open_dcd_write(create_string_buffer(fname), "dcd", natoms)
4313  ts = MolfileTimestep()
4314  _xyz = c_float * (natoms.value * 3)
4315  for I in selection:
4316  xyz = self.xyzs[I]
4317  ts.coords = _xyz(*list(xyz.flatten()))
4318  ts.A = c_float(self.boxes[I].a if 'boxes' in self.Data else 1.0)
4319  ts.B = c_float(self.boxes[I].b if 'boxes' in self.Data else 1.0)
4320  ts.C = c_float(self.boxes[I].c if 'boxes' in self.Data else 1.0)
4321  ts.alpha = c_float(self.boxes[I].alpha if 'boxes' in self.Data else 90.0)
4322  ts.beta = c_float(self.boxes[I].beta if 'boxes' in self.Data else 90.0)
4323  ts.gamma = c_float(self.boxes[I].gamma if 'boxes' in self.Data else 90.0)
4324  result = _dcdlib.write_timestep(dcd, byref(ts))
4325  if result != 0:
4326  logger.error("Error encountered when writing DCD\n")
4327  raise IOError
4328 
4329  _dcdlib.close_file_write(dcd)
4330  dcd = None
4331 
4332  def write_pdb(self, selection, **kwargs):
4333  standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', # Standard amino acids
4334  'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL', # Standard amino acids
4335  'HID', 'HIE', 'HIP', 'ASH', 'GLH', 'TYD', 'CYM', 'CYX', 'LYN', # Some alternate protonation states
4336  'PTR', 'SEP', 'TPO', 'Y1P', 'S1P', 'T1P', # Phosphorylated amino acids
4337  'HOH', 'SOL', 'WAT', # Common residue names for water
4338  'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI']
4339  # When converting from pdb to xyz in interactive prompt,
4340  # ask user for some PDB-specific info.
4341  if sys.stdin.isatty():
4342  self.require('xyzs')
4343  self.require_resname()
4344  self.require_resid()
4345  else:
4346  self.require('xyzs','resname','resid')
4347  write_conect = kwargs.pop('write_conect', 1)
4348  # Create automatic atom names if not present
4349  # in data structure: these are just placeholders.
4350  if 'atomname' not in self.Data:
4351  count = 0
4352  resid = -1
4353  atomnames = []
4354  for i in range(self.na):
4355  if self.resid[i] != resid:
4356  count = 0
4357  count += 1
4358  resid = self.resid[i]
4359  atomnames.append("%s%i" % (self.elem[i], count))
4360  self.atomname = atomnames
4361  # Standardize formatting of atom names.
4362  atomNames = []
4363  for i, atomname in enumerate(self.atomname):
4364  if len(atomname) < 4 and atomname[:1].isalpha() and len(self.elem[i]) < 2:
4365  atomName = ' '+atomname
4366  elif len(atomname) > 4:
4367  atomName = atomname[:4]
4368  else:
4369  atomName = atomname
4370  atomNames.append(atomName)
4371  # Chain names. Default to 'A' for everything
4372  if 'chain' not in self.Data:
4373  chainNames = ['A' for i in range(self.na)]
4374  else:
4375  chainNames = [i[0] if len(i)>0 else ' ' for i in self.chain]
4376  # Standardize formatting of residue names.
4377  resNames = []
4378  for resname in self.resname:
4379  if len(resname) > 3:
4380  resName = resname[:3]
4381  else:
4382  resName = resname
4383  resNames.append(resName)
4384  # Standardize formatting of residue IDs.
4385  resIds = []
4386  for resid in self.resid:
4387  resIds.append("%4d" % (resid%10000))
4388  # Standardize record names.
4389  records = []
4390  for resname in resNames:
4391  if resname in ['HOH', 'SOL', 'WAT']:
4392  records.append("HETATM")
4393  elif resname in standardResidues:
4394  records.append("ATOM ")
4395  else:
4396  records.append("HETATM")
4397 
4398  out = []
4399  # Create the PDB header.
4400  out.append("REMARK 1 CREATED WITH %s %s" % (package.upper(), str(date.today())))
4401  if 'boxes' in self.Data:
4402  a = self.boxes[0].a
4403  b = self.boxes[0].b
4404  c = self.boxes[0].c
4405  alpha = self.boxes[0].alpha
4406  beta = self.boxes[0].beta
4407  gamma = self.boxes[0].gamma
4408  out.append("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 " % (a, b, c, alpha, beta, gamma))
4409  # Write the structures as models.
4410  atomIndices = {}
4411  for sn in range(len(self)):
4412  modelIndex = sn
4413  if len(self) > 1:
4414  out.append("MODEL %4d" % modelIndex)
4415  atomIndex = 1
4416  for i in range(self.na):
4417  recordName = records[i]
4418  atomName = atomNames[i]
4419  resName = resNames[i]
4420  chainName = chainNames[i]
4421  resId = resIds[i]
4422  coords = self.xyzs[sn][i]
4423  symbol = self.elem[i]
4424  if hasattr(self, 'partial_charge'):
4425  bfactor = self.partial_charge[i]
4426  else:
4427  bfactor = 0.0
4428  atomIndices[i] = atomIndex
4429  line = "%s%5d %-4s %3s %s%4s %s%s%s %5.2f 0.00 %2s " % (
4430  recordName, atomIndex%100000, atomName, resName, chainName, resId, _format_83(coords[0]),
4431  _format_83(coords[1]), _format_83(coords[2]), bfactor, symbol)
4432  assert len(line) == 80, 'Fixed width overflow detected'
4433  out.append(line)
4434  atomIndex += 1
4435  if i < (self.na-1) and chainName != chainNames[i+1]:
4436  out.append("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId))
4437  atomIndex += 1
4438  out.append("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId))
4439  if len(self) > 1:
4440  out.append("ENDMDL")
4441  conectBonds = []
4442  if 'bonds' in self.Data:
4443  for i, j in self.bonds:
4444  if i > j: continue
4445  if self.resname[i] not in standardResidues or self.resname[j] not in standardResidues:
4446  conectBonds.append((i, j))
4447  elif self.atomname[i] == 'SG' and self.atomname[j] == 'SG' and self.resname[i] == 'CYS' and self.resname[j] == 'CYS':
4448  conectBonds.append((i, j))
4449  elif self.atomname[i] == 'SG' and self.atomname[j] == 'SG' and self.resname[i] == 'CYX' and self.resname[j] == 'CYX':
4450  conectBonds.append((i, j))
4451 
4452  atomBonds = {}
4453  for atom1, atom2 in conectBonds:
4454  index1 = atomIndices[atom1]
4455  index2 = atomIndices[atom2]
4456  if index1 not in atomBonds:
4457  atomBonds[index1] = []
4458  if index2 not in atomBonds:
4459  atomBonds[index2] = []
4460  atomBonds[index1].append(index2)
4461  atomBonds[index2].append(index1)
4462 
4463  for index1 in sorted(atomBonds):
4464  bonded = atomBonds[index1]
4465  while len(bonded) > 4:
4466  out.append("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]))
4467  del bonded[:4]
4468  line = "CONECT%5d" % index1
4469  for index2 in bonded:
4470  line = "%s%5d" % (line, index2)
4471  out.append(line)
4472  return out
4473 
4474  def write_qdata(self, selection, **kwargs):
4475  """ Text quantum data format. """
4476  #self.require('xyzs','qm_energies','qm_grads')
4477  out = []
4478  for I in selection:
4479  xyz = self.xyzs[I]
4480  out.append("JOB %i" % I)
4481  out.append("COORDS"+pvec(xyz))
4482  if 'qm_energies' in self.Data:
4483  out.append("ENERGY % .12e" % self.qm_energies[I])
4484  if 'mm_energies' in self.Data:
4485  out.append("EMD0 % .12e" % self.mm_energies[I])
4486  if 'qm_grads' in self.Data:
4487  out.append("GRADIENT"+pvec(self.qm_grads[I]))
4488  if 'qm_espxyzs' in self.Data and 'qm_espvals' in self.Data:
4489  out.append("ESPXYZ"+pvec(self.qm_espxyzs[I]))
4490  out.append("ESPVAL"+pvec(self.qm_espvals[I]))
4491  if 'qm_interaction' in self.Data:
4492  out.append("INTERACTION % .12e" % self.qm_interaction[I])
4493  out.append('')
4494  return out
4495 
4496  def require_resid(self):
4497  if 'resid' not in self.Data:
4498  na_res = int(input("Enter how many atoms are in a residue, or zero as a single residue -> "))
4499  if na_res == 0:
4500  self.resid = [1 for i in range(self.na)]
4501  else:
4502  self.resid = [1 + int(i/na_res) for i in range(self.na)]
4503 
4504  def require_resname(self):
4505  if 'resname' not in self.Data:
4506  resname = input("Enter a residue name (3-letter like 'SOL') -> ")
4507  self.resname = [resname for i in range(self.na)]
4508 
4509  def require_boxes(self):
4510  def buildbox(line):
4511  s = [float(i) for i in line.split()]
4512  if len(s) == 1:
4513  a = s[0]
4514  b = s[0]
4515  c = s[0]
4516  alpha = 90.0
4517  beta = 90.0
4518  gamma = 90.0
4519  return BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma)
4520  elif len(s) == 3:
4521  a = s[0]
4522  b = s[1]
4523  c = s[2]
4524  alpha = 90.0
4525  beta = 90.0
4526  gamma = 90.0
4527  return BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma)
4528  elif len(s) == 6:
4529  a = s[0]
4530  b = s[1]
4531  c = s[2]
4532  alpha = s[3]
4533  beta = s[4]
4534  gamma = s[5]
4535  return BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma)
4536  elif len(s) == 9:
4537  v1 = np.array([s[0], s[3], s[4]])
4538  v2 = np.array([s[5], s[1], s[6]])
4539  v3 = np.array([s[7], s[8], s[2]])
4540  return BuildLatticeFromVectors(v1, v2, v3)
4541  else:
4542  logger.error("Not sure what to do since you gave me %i numbers\n" % len(s))
4543  raise RuntimeError
4544 
4545  if 'boxes' not in self.Data or len(self.boxes) != self.ns:
4546  sys.stderr.write("Please specify the periodic box using:\n")
4547  sys.stderr.write("1 float (cubic lattice length in Angstrom)\n")
4548  sys.stderr.write("3 floats (orthogonal lattice lengths in Angstrom)\n")
4549  sys.stderr.write("6 floats (triclinic lattice lengths and angles in degrees)\n")
4550  sys.stderr.write("9 floats (triclinic lattice vectors v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y) in Angstrom)\n")
4551  sys.stderr.write("Or: Name of a file containing one of these lines for each frame in the trajectory\n")
4552  boxstr = input("Box Vector Input: -> ")
4553  if os.path.exists(boxstr):
4554  boxfile = open(boxstr).readlines()
4555  if len(boxfile) != len(self):
4556  logger.error('Tried to read in the box file, but it has a different length from the number of frames.\n')
4557  raise RuntimeError
4558  else:
4559  self.boxes = [buildbox(line) for line in boxfile]
4560  else:
4561  mybox = buildbox(boxstr)
4562  self.boxes = [mybox for i in range(self.ns)]
4563 
4564 def main():
4565  logger.info("Basic usage as an executable: molecule.py input.format1 output.format2")
4566  logger.info("where format stands for xyz, pdb, gro, etc.")
4567  Mao = Molecule(sys.argv[1])
4568  Mao.write(sys.argv[2])
4569 
4570 if __name__ == "__main__":
4571  main()
def __add__(self, other)
Add method for Molecule objects.
Definition: molecule.py:1496
def main()
Definition: molecule.py:4663
def find_rings(self, max_size=6)
Return a list of rings in the molecule.
Definition: molecule.py:2621
def radius_of_gyration(self)
Definition: molecule.py:1727
def aliphatic_hydrogens(self)
Definition: molecule.py:2791
Wrapper for the timestep C structure used in molfile plugins.
Definition: molecule.py:680
def require_boxes(self)
Definition: molecule.py:4608
def __iter__(self)
List-like behavior for looping over trajectories.
Definition: molecule.py:1485
def format_xyzgen_coord(element, xyzgen)
Print a line consisting of (element, p, q, r, s, t, ...) where (p, q, r) are arbitrary atom-wise data...
Definition: molecule.py:589
def get_rotate_translate(matrix1, matrix2)
Definition: molecule.py:788
def read_mol2(self, fnm, kwargs)
Definition: molecule.py:3161
def elem_from_atomname(atomname)
Given an atom name, attempt to get the element in most cases.
Definition: molecule.py:298
def __len__(self)
Return the number of frames in the trajectory.
Definition: molecule.py:1331
def __init__(self, stream=sys.stdout)
Definition: molecule.py:203
def extract_int(arr, avgthre, limthre, label="value", verbose=True)
Get the representative integer value from an array.
Definition: molecule.py:860
Exactly like output.StreamHandler except it does no extra formatting before sending logging messages ...
Definition: molecule.py:202
def emit(self, record)
Definition: molecule.py:206
def BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma)
This function takes in three lattice lengths and three lattice angles, and tries to return a complete...
Definition: molecule.py:437
def split(self, fnm=None, ftype=None, method="chunks", num=None)
Split the molecule object into a number of separate files (chunks), either by specifying the number o...
Definition: molecule.py:2903
def center(self, center_mass=False)
Move geometric center to the origin.
Definition: molecule.py:2023
def edit_qcrems(self, in_dict, subcalc=None)
Edit Q-Chem rem variables with a dictionary.
Definition: molecule.py:1798
def read_charmm(self, fnm, kwargs)
Read a CHARMM .cor (or .crd) file.
Definition: molecule.py:3455
Write_Tab
The table of file writers.
Definition: molecule.py:1251
def pathwise_rmsd(self, align=True)
Find RMSD between frames along path.
Definition: molecule.py:2821
def pvec(vec)
Definition: molecule.py:656
def read_qcout(self, fnm, errok=None, kwargs)
Q-Chem output file reader, adapted for our parser.
Definition: molecule.py:3778
def measure_dihedrals(self, i, j, k, l)
Return a series of dihedral angles, given four atom indices numbered from zero.
Definition: molecule.py:2575
def __getitem__(self, key)
The Molecule class has list-like behavior, so we can get slices of it.
Definition: molecule.py:1453
def find_dihedrals(self)
Return a list of 4-tuples corresponding to all of the dihedral angles in the system.
Definition: molecule.py:2529
def ComputeOverlap(theta, elem, xyz1, xyz2)
Computes an &#39;overlap&#39; between two molecules based on some fictitious density.
Definition: molecule.py:736
def load_popxyz(self, fnm)
Given a charge-spin xyz file, load the charges (x-coordinate) and spins (y-coordinate) into internal ...
Definition: molecule.py:1977
def AlignToDensity(elem, xyz1, xyz2, binary=False)
Computes a "overlap density" from two frames.
Definition: molecule.py:753
def format_gro_box(box)
Print a line corresponding to the box vector in accordance with .gro file format. ...
Definition: molecule.py:598
def rigid_water(self)
If one atom is oxygen and the next two are hydrogen, make the water molecule rigid.
Definition: molecule.py:1739
def replace_peratom(self, key, orig, want)
Replace all of the data for a certain attribute in the system from orig to want.
Definition: molecule.py:1845
def BuildLatticeFromVectors(v1, v2, v3)
This function takes in three lattice vectors and tries to return a complete box specification.
Definition: molecule.py:452
def rotate_check_clash(self, frame, rotate_index, thresh_hyd=1.4, thresh_hvy=1.8, printLevel=1)
Return a new Molecule object containing the selected frame plus a number of frames where the selected...
Definition: molecule.py:2445
def get_reaxff_atom_types(self)
Return a list of element names which maps the LAMMPS atom types to the ReaxFF elements.
Definition: molecule.py:4229
def measure_angles(self, i, j, k)
Definition: molecule.py:2560
Lee-Ping&#39;s general file format conversion class.
Definition: molecule.py:1182
def read_xyz(self, fnm, kwargs)
.xyz files can be TINKER formatted which is why we have the try/except here.
Definition: molecule.py:2952
def find_clashes(self, thre=0.0, pbc=True, groups=None)
Obtain a list of atoms that &#39;clash&#39; (i.e.
Definition: molecule.py:2379
def read_inpcrd(self, fnm, kwargs)
Parse an AMBER .inpcrd or .rst file.
Definition: molecule.py:3064
def require_resid(self)
Definition: molecule.py:4595
def EulerMatrix(T1, T2, T3)
Constructs an Euler matrix from three Euler angles.
Definition: molecule.py:708
def cartesian_product2(arrays)
Form a Cartesian product of two NumPy arrays.
Definition: molecule.py:824
def __setattr__(self, key, value)
Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionar...
Definition: molecule.py:1386
def atom_select(self, atomslice, build_topology=True)
Return a copy of the object with certain atoms selected.
Definition: molecule.py:1868
def isint(word)
ONLY matches integers! If you have a decimal point? None shall pass!
Definition: molecule.py:401
def __deepcopy__(self, memo)
Custom deepcopy method because Python 3.6 appears to have changed its behavior.
Definition: molecule.py:1398
def AtomContact(xyz, pairs, box=None, displace=False)
Compute distances between pairs of atoms.
Definition: molecule.py:1045
def write_xyz(self, selection, kwargs)
Definition: molecule.py:4213
def __getattr__(self, key)
Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionar...
Definition: molecule.py:1347
fout
I needed to add in this line because the DCD writer requires the file name, but the other methods don...
Definition: molecule.py:1692
def read_com(self, fnm, kwargs)
Parse a Gaussian .com file and return a SINGLE-ELEMENT list of xyz coordinates (no multiple file supp...
Definition: molecule.py:3255
def MolEqual(mol1, mol2)
Determine whether two Molecule objects have the same fragments by looking at elements and connectivit...
Definition: molecule.py:534
def read_pdb(self, fnm, kwargs)
Loads a PDB and returns a dictionary containing its data.
Definition: molecule.py:3649
def unmangle(M1, M2)
Create a mapping that takes M1&#39;s atom indices to M2&#39;s atom indices based on position.
Definition: molecule.py:380
def read_comm_charge_mult(self, verbose=False)
Set charge and multiplicity from reading the comment line, formatted in a specific way...
Definition: molecule.py:2925
def ref_rmsd(self, i, align=True)
Find RMSD to a reference frame.
Definition: molecule.py:2839
def nodematch(node1, node2)
Definition: molecule.py:395
def is_gro_box(line)
Determines whether a line contains a GROMACS box vector or not.
Definition: molecule.py:638
def openmm_boxes(self)
Returns the periodic box vectors in the Molecule object in a list of OpenMM-compatible boxes...
Definition: molecule.py:2880
def all_pairwise_rmsd(self)
Find pairwise RMSD (super slow, not like the one in MSMBuilder.)
Definition: molecule.py:2803
def either(A, B, key)
Definition: molecule.py:699
def read_gro(self, fnm, kwargs)
Read a GROMACS .gro file.
Definition: molecule.py:3373
def require_resname(self)
Definition: molecule.py:4603
def both(A, B, key)
Definition: molecule.py:685
top_settings
Topology settings.
Definition: molecule.py:1283
def center_of_mass(self)
Definition: molecule.py:1723
def format_xyz_coord(element, xyz, tinker=False)
Print a line consisting of (element, x, y, z) in accordance with .xyz file format.
Definition: molecule.py:546
def find_angles(self)
Return a list of 3-tuples corresponding to all of the angles in the system.
Definition: molecule.py:2502
positive_resid
Creates entries like &#39;gromacs&#39; : &#39;gromacs&#39; and &#39;xyz&#39; : &#39;xyz&#39; in the Funnel.
Definition: molecule.py:1280
def TopEqual(mol1, mol2)
For the nanoreactor project: Determine whether two Molecule objects have the same topologies...
Definition: molecule.py:521
def align(self, smooth=False, center=True, center_mass=False, atom_select=None)
Align molecules.
Definition: molecule.py:1994
def build_topology(self, force_bonds=True, kwargs)
Create self.topology and self.molecules; these are graph representations of the individual molecules ...
Definition: molecule.py:2208
def replace_peratom_conditional(self, key1, cond, key2, orig, want)
Replace all of the data for a attribute key2 from orig to want, contingent on key1 being equal to con...
Definition: molecule.py:1857
def __iadd__(self, other)
Add method for Molecule objects.
Definition: molecule.py:1543
def distance_displacement(self)
Obtain distance matrix and displacement vectors between all pairs of atoms.
Definition: molecule.py:2254
def reorder_indices(self, other)
Return the indices that would reorder atoms according to some other Molecule object.
Definition: molecule.py:1648
def distance_matrix(self, pbc=True)
Obtain distance matrix between all pairs of atoms.
Definition: molecule.py:2242
def append(self, other)
Definition: molecule.py:1652
def CubicLattice(a)
This function takes in three lattice lengths and three lattice angles, and tries to return a complete...
Definition: molecule.py:417
Funnel
A funnel dictionary that takes redundant file types and maps them down to a few.
Definition: molecule.py:1264
def encode(l)
Definition: nifty.py:230
def even_list(totlen, splitsize)
Creates a list of number sequences divided as evenly as possible.
Definition: molecule.py:667
def order_by_connectivity(self, m, i, currList, max_min_path)
Recursive function that orders atoms based on connectivity.
Definition: molecule.py:2758
comms
Read in stuff if we passed in a file name, otherwise return an empty instance.
Definition: molecule.py:1311
def rotate_bond(self, frame, aj, ak, increment=15)
Return a new Molecule object containing the selected frame plus a number of frames where the selected...
Definition: molecule.py:2286
def build_bonds(self)
Build the bond connectivity graph.
Definition: molecule.py:2034
def extract_pop(M, verbose=True)
Extract our best estimate of charge and spin-z from the comments section of a Molecule object created...
Definition: molecule.py:906
def write_qcin(self, selection, kwargs)
Definition: molecule.py:4147
Read_Tab
The table of file readers.
Definition: molecule.py:1235
def read_dcd(self, fnm, kwargs)
Definition: molecule.py:3215
def __delitem__(self, key)
Similarly, in order to delete a frame, we simply perform item deletion on framewise variables...
Definition: molecule.py:1476
def write_inpcrd(self, selection, sn=None, kwargs)
Definition: molecule.py:4332
def require(self, args)
Definition: molecule.py:1656
def write_molproq(self, selection, kwargs)
Definition: molecule.py:4309
def write_dcd(self, selection, kwargs)
Definition: molecule.py:4404
def align_by_moments(self)
Align molecules using the moment of inertia.
Definition: molecule.py:1952
def read_qcin(self, fnm, kwargs)
Read a Q-Chem input file.
Definition: molecule.py:3524
def write(self, fnm=None, ftype=None, append=False, selection=None, kwargs)
Definition: molecule.py:1678
def read_qcesp(self, fnm, kwargs)
Definition: molecule.py:3746
def add_virtual_site(self, idx, kwargs)
Add a virtual site to the system.
Definition: molecule.py:1826
def arc(Mol, begin=None, end=None, RMSD=True, align=True)
Get the arc-length for a trajectory segment.
Definition: molecule.py:956
def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True)
Equalize the spacing of frames in a trajectory with linear interpolation.
Definition: molecule.py:995
def load_frames(self, fnm, ftype=None, kwargs)
Definition: molecule.py:1769
def write_mdcrd(self, selection, kwargs)
Definition: molecule.py:4321
def add_strip_to_mat(mat, strip)
Definition: molecule.py:647
def measure_distances(self, i, j)
Definition: molecule.py:2551
def get_populations(self)
Return a cloned molecule object but with X-coordinates set to Mulliken charges and Y-coordinates set ...
Definition: molecule.py:1966
def without(self, args)
Return a copy of the Molecule object but with certain fields deleted if they exist.
Definition: molecule.py:2915
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, integer, or what have you...
Definition: molecule.py:406
def Counter()
Definition: optimizer.py:35
def read_arc(self, fnm, kwargs)
Read a TINKER .arc file.
Definition: molecule.py:3300
def AlignToMoments(elem, xyz1, xyz2=None)
Pre-aligns molecules to &#39;moment of inertia&#39;.
Definition: molecule.py:765
def read_qdata(self, fnm, kwargs)
Definition: molecule.py:3125
def write_qdata(self, selection, kwargs)
Text quantum data format.
Definition: molecule.py:4574
def write_pdb(self, selection, kwargs)
Definition: molecule.py:4430
def add_quantum(self, other)
Definition: molecule.py:1813
def write_gro(self, selection, kwargs)
Definition: molecule.py:4370
def openmm_positions(self)
Returns the Cartesian coordinates in the Molecule object in a list of OpenMM-compatible positions...
Definition: molecule.py:2863
def getElement(mass)
Definition: molecule.py:293
def is_gro_coord(line)
Determines whether a line contains GROMACS data or not.
Definition: molecule.py:610
def axis_angle(axis, angle)
Given a rotation axis and angle, return the corresponding 3x3 rotation matrix, which will rotate a (N...
Definition: molecule.py:1133
def read_qcschema(self, schema, kwargs)
Definition: molecule.py:2933
def format_gro_coord(resid, resname, aname, seqno, xyz)
Print a line in accordance with .gro file format, with six decimal points of precision.
Definition: molecule.py:577
def write_lammps_data(self, selection, kwargs)
Write the first frame of the selection to a LAMMPS data file for the purpose of automatically initial...
Definition: molecule.py:4245
def grouper(n, iterable)
Groups a big long iterable into groups of ten or what have you.
Definition: molecule.py:661
def is_charmm_coord(line)
Determines whether a line contains CHARMM data or not.
Definition: molecule.py:625
def atom_stack(self, other)
Return a copy of the object with another molecule object appended.
Definition: molecule.py:1905
def align_center(self)
Definition: molecule.py:2854
def diff(A, B, key)
Definition: molecule.py:688
def read_mdcrd(self, fnm, kwargs)
Parse an AMBER .mdcrd file.
Definition: molecule.py:3031
def form_rot(q)
Given a quaternion p, form a rotation matrix from it.
Definition: molecule.py:1093
def write_arc(self, selection, kwargs)
Definition: molecule.py:4355
def read_xyz0(self, fnm, kwargs)
Parse a .xyz file which contains several xyz coordinates, and return their elements.
Definition: molecule.py:2967
def reorder_according_to(self, other)
Reorder atoms according to some other Molecule object.
Definition: molecule.py:1621
def repair(self, key, klast)
Attempt to repair trivial issues that would otherwise break the object.
Definition: molecule.py:1586