1 from __future__
import absolute_import
2 from __future__
import division
3 from __future__
import print_function
13 from collections
import OrderedDict, namedtuple, Counter
15 from datetime
import date
16 from warnings
import warn
19 from numpy
import sin, cos, arccos
20 from numpy.linalg
import multi_dot
21 from pkg_resources
import parse_version
25 from itertools
import zip_longest
as zip_longest
27 from itertools
import izip_longest
as zip_longest
145 FrameVariableNames = {
'xyzs',
'comms',
'boxes',
'qm_hessians',
'qm_grads',
'qm_energies',
'qm_interaction',
146 'qm_espxyzs',
'qm_espvals',
'qm_extchgs',
'qm_mulliken_charges',
'qm_mulliken_spins',
'qm_zpe',
147 'qm_entropy',
'qm_enthalpy',
'qm_bondorder'}
161 AtomVariableNames = {
'elem',
'partial_charge',
'atomname',
'atomtype',
'tinkersuf',
'resid',
'resname',
'qcsuf',
162 'qm_ghost',
'chain',
'altloc',
'icode',
'terminal'}
175 MetaVariableNames = {
'fnm',
'ftype',
'qcrems',
'qctemplate',
'qcerr',
'charge',
'mult',
'bonds',
'topology',
178 QuantumVariableNames = {
'qcrems',
'qctemplate',
'charge',
'mult',
'qcsuf',
'qm_ghost',
'qm_bondorder'}
180 AllVariableNames = QuantumVariableNames | AtomVariableNames | MetaVariableNames | FrameVariableNames
186 if "forcebalance" in __name__:
188 from .output
import *
189 package =
"ForceBalance" 190 elif "geometric" in __name__:
192 from .nifty
import logger
193 package =
"geomeTRIC" 196 from logging
import *
198 """Exactly like output.StreamHandler except it does no extra formatting 199 before sending logging messages to the stream. This is more compatible with 200 how output has been displayed in ForceBalance. Default stream has also been 201 changed from stderr to stdout""" 205 def emit(self, record):
206 message = record.getMessage()
207 self.stream.write(message)
212 logger = getLogger(
"MoleculeLogger")
213 logger.setLevel(INFO)
215 logger.addHandler(handler)
216 if __name__ ==
"__main__":
217 package =
"LPW-molecule.py" 219 package = __name__.split(
'.')[0]
221 module_name = __name__.replace(
'.molecule',
'')
225 1.28, 0.96, 0.84, 0.76, 0.71, 0.66, 0.57, 0.58,
226 0.00, 1.41, 1.21, 1.11, 1.07, 1.05, 1.02, 1.06,
228 2.03, 1.76, 1.70, 1.60, 1.53, 1.39, 1.61, 1.52, 1.50,
229 1.24, 1.32, 1.22, 1.22, 1.20, 1.19, 1.20, 1.20, 1.16,
230 2.20, 1.95, 1.90, 1.75, 1.64, 1.54, 1.47, 1.46, 1.42,
231 1.39, 1.45, 1.44, 1.42, 1.39, 1.39, 1.38, 1.39, 1.40,
232 2.44, 2.15, 2.07, 2.04, 2.03, 2.01, 1.99, 1.98,
233 1.98, 1.96, 1.94, 1.92, 1.92, 1.89, 1.90, 1.87,
234 1.87, 1.75, 1.70, 1.62, 1.51, 1.44, 1.41, 1.36,
235 1.36, 1.32, 1.45, 1.46, 1.48, 1.40, 1.50, 1.50,
236 2.60, 2.21, 2.15, 2.06, 2.00, 1.96, 1.90, 1.87, 1.80, 1.69]
239 Elements = [
"None",
'H',
'He',
240 'Li',
'Be',
'B',
'C',
'N',
'O',
'F',
'Ne',
241 'Na',
'Mg',
'Al',
'Si',
'P',
'S',
'Cl',
'Ar',
242 'K',
'Ca',
'Sc',
'Ti',
'V',
'Cr',
'Mn',
'Fe',
'Co',
'Ni',
'Cu',
'Zn',
'Ga',
'Ge',
'As',
'Se',
'Br',
'Kr',
243 'Rb',
'Sr',
'Y',
'Zr',
'Nb',
'Mo',
'Tc',
'Ru',
'Rh',
'Pd',
'Ag',
'Cd',
'In',
'Sn',
'Sb',
'Te',
'I',
'Xe',
244 'Cs',
'Ba',
'La',
'Ce',
'Pr',
'Nd',
'Pm',
'Sm',
'Eu',
'Gd',
'Tb',
'Dy',
'Ho',
'Er',
'Tm',
'Yb',
245 'Lu',
'Hf',
'Ta',
'W',
'Re',
'Os',
'Ir',
'Pt',
'Au',
'Hg',
'Tl',
'Pb',
'Bi',
'Po',
'At',
'Rn',
246 'Fr',
'Ra',
'Ac',
'Th',
'Pa',
'U','Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr','Rf','Db','Sg','Bh','Hs','Mt']
260 PeriodicTable = OrderedDict([(
"H", 1.007975), (
"He", 4.002602),
261 (
"Li", 6.9675), (
"Be", 9.0121831), (
"B", 10.8135), (
"C", 12.0106), (
"N", 14.006855), (
"O", 15.99940), (
"F", 18.99840316), (
"Ne", 20.1797),
262 (
"Na", 22.98976928), (
"Mg", 24.3055), (
"Al", 26.9815385), (
"Si", 28.085), (
"P", 30.973762), (
"S", 32.0675), (
"Cl", 35.4515), (
"Ar", 39.948),
263 (
"K", 39.0983), (
"Ca", 40.078), (
"Sc", 44.955908), (
"Ti", 47.867), (
"V", 50.9415), (
"Cr", 51.9961), (
"Mn", 54.938044), (
"Fe", 55.845), (
"Co", 58.933194),
264 (
"Ni", 58.6934), (
"Cu", 63.546), (
"Zn", 65.38), (
"Ga", 69.723), (
"Ge", 72.63), (
"As", 74.921595), (
"Se", 78.971), (
"Br", 79.904), (
"Kr", 83.798),
265 (
"Rb", 85.4678), (
"Sr", 87.62), (
"Y", 88.90584), (
"Zr", 91.224), (
"Nb", 92.90637), (
"Mo", 95.95), (
"Tc", 98.), (
"Ru", 101.07), (
"Rh", 102.9055),
266 (
"Pd", 106.42), (
"Ag", 107.8682), (
"Cd", 112.414), (
"In", 114.818), (
"Sn", 118.71), (
"Sb", 121.76), (
"Te", 127.6), (
"I", 126.90447), (
"Xe", 131.293),
267 (
"Cs", 132.905452), (
"Ba", 137.327), (
"La", 138.90547), (
"Ce", 140.116), (
"Pr", 140.90766), (
"Nd", 144.242), (
"Pm", 145.), (
"Sm", 150.36),
268 (
"Eu", 151.964), (
"Gd", 157.25), (
"Tb", 158.92535), (
"Dy", 162.5), (
"Ho", 164.93033), (
"Er", 167.259), (
"Tm", 168.93422), (
"Yb", 173.054),
269 (
"Lu", 174.9668), (
"Hf", 178.49), (
"Ta", 180.94788), (
"W", 183.84), (
"Re", 186.207), (
"Os", 190.23), (
"Ir", 192.217), (
"Pt", 195.084),
270 (
"Au", 196.966569), (
"Hg", 200.592), (
"Tl", 204.3835), (
"Pb", 207.2), (
"Bi", 208.9804), (
"Po", 209.), (
"At", 210.), (
"Rn", 222.),
271 (
"Fr", 223.), (
"Ra", 226.), (
"Ac", 227.), (
"Th", 232.0377), (
"Pa", 231.03588), (
"U", 238.02891), ("Np", 237.), ("Pu", 244.), # Seventh row Fr-Og
272 (
"Am", 241.), (
"Cm", 243.), (
"Bk", 247.), (
"Cf", 249.), (
"Es", 252.), (
"Fm", 257.), (
"Md", 258.), (
"No", 259.),
273 (
"Lr", 262.), (
"Rf", 267.), (
"Db", 268.), (
"Sg", 271.), (
"Bh", 272.), (
"Hs", 270.), (
"Mt", 276.), (
"Ds", 281.),
274 (
"Rg", 280.), (
"Cn", 285.), (
"Nh", 284.), (
"Fl", 289.), (
"Mc", 288.), (
"Lv", 293.), (
"Ts", 292.), (
"Og", 294.)])
293 return PeriodicTable.keys()[np.argmin([np.abs(m-mass)
for m
in PeriodicTable.values()])]
296 """ Given an atom name, attempt to get the element in most cases. """ 297 return re.search(
'[A-Z][a-z]*',atomname).group(0)
299 if "forcebalance" in __name__:
306 for fnm
in [
"_dcdlib.so",
307 os.path.join(imp.find_module(__name__.split(
'.')[0])[1],
"_dcdlib.so"),
308 os.path.join(imp.find_module(__name__.split(
'.')[0])[1],
"_dcdlib"+str(sysconfig.get_config_var(
'EXT_SUFFIX'))),
309 os.path.join(os.path.dirname(__file__),
"_dcdlib.so"),
310 os.path.join(os.path.dirname(__file__),
"_dcdlib"+str(sysconfig.get_config_var(
'EXT_SUFFIX')))]:
311 if os.path.exists(fnm):
316 logger.debug(
'Note: Cannot import optional dcdlib module to read/write DCD files.\n')
324 logger.debug(
'Note: Cannot import optional pdb module to read/write PDB files.\n')
332 logger.debug(
'Note: Cannot import optional Mol2 module to read .mol2 files.\n')
342 logger.debug(
'Note: Cannot import optional OpenMM module.\n')
344 elif "geometric" in __name__:
351 logger.debug(
'Note: Failed to import optional pdb module to read/write PDB files.\n')
360 logger.debug(
'Note: Failed to import optional OpenMM module.\n')
367 bohr2ang = 0.529177210
371 Create a mapping that takes M1's atom indices to M2's atom indices based on position. 373 If we start with atoms in molecule "PDB", and the new molecule "M" 374 contains re-numbered atoms, then this code works: 376 M.elem = list(np.array(PDB.elem)[unmangled]) 379 logger.error(
"Unmangler only deals with same number of atoms\n")
382 for i
in range(M1.na):
383 for j
in range(M2.na):
384 if np.linalg.norm(M1.xyzs[0][i] - M2.xyzs[0][j]) < 0.1:
386 unmangled = [unmangler[i]
for i
in sorted(unmangler.keys())]
387 if len(unmangled) != M1.na:
388 logger.error(
"Unmangler failed (different structures?)\n")
394 return node1[
'e'] == node2[
'e']
397 """ONLY matches integers! If you have a decimal point? None shall pass!""" 398 return re.match(
'^[-+]?[0-9]+$',word)
401 """Matches ANY number; it can be a decimal, scientific notation, integer, or what have you""" 402 return re.match(
r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?$',word)
405 splitter = re.compile(
r'(\s+|\S+)')
408 Box = namedtuple(
'Box',[
'a',
'b',
'c',
'alpha',
'beta',
'gamma',
'A',
'B',
'C',
'V'])
409 radian = 180. / np.pi
411 """ This function takes in three lattice lengths and three lattice angles, and tries to return a complete box specification. """ 417 alph = alpha*np.pi/180
419 gamm = gamma*np.pi/180
420 v = np.sqrt(1 - cos(alph)**2 - cos(bet)**2 - cos(gamm)**2 + 2*cos(alph)*cos(bet)*cos(gamm))
421 Mat = np.array([[a, b*cos(gamm), c*cos(bet)],
422 [0, b*sin(gamm), c*((cos(alph)-cos(bet)*cos(gamm))/sin(gamm))],
423 [0, 0, c*v/sin(gamm)]])
424 L1 = Mat.dot(np.array([[1],[0],[0]]))
425 L2 = Mat.dot(np.array([[0],[1],[0]]))
426 L3 = Mat.dot(np.array([[0],[0],[1]]))
427 return Box(a,b,c,alpha,beta,gamma,np.array(L1).flatten(),np.array(L2).flatten(),np.array(L3).flatten(),v*a*b*c)
430 """ This function takes in three lattice lengths and three lattice angles, and tries to return a complete box specification. """ 431 alph = alpha*np.pi/180
433 gamm = gamma*np.pi/180
434 v = np.sqrt(1 - cos(alph)**2 - cos(bet)**2 - cos(gamm)**2 + 2*cos(alph)*cos(bet)*cos(gamm))
435 Mat = np.array([[a, b*cos(gamm), c*cos(bet)],
436 [0, b*sin(gamm), c*((cos(alph)-cos(bet)*cos(gamm))/sin(gamm))],
437 [0, 0, c*v/sin(gamm)]])
438 L1 = Mat.dot(np.array([[1],[0],[0]]))
439 L2 = Mat.dot(np.array([[0],[1],[0]]))
440 L3 = Mat.dot(np.array([[0],[0],[1]]))
441 return Box(a,b,c,alpha,beta,gamma,np.array(L1).flatten(),np.array(L2).flatten(),np.array(L3).flatten(),v*a*b*c)
444 """ This function takes in three lattice vectors and tries to return a complete box specification. """ 445 a = np.linalg.norm(v1)
446 b = np.linalg.norm(v2)
447 c = np.linalg.norm(v3)
448 alpha = arccos(np.dot(v2, v3) / np.linalg.norm(v2) / np.linalg.norm(v3)) * radian
449 beta = arccos(np.dot(v1, v3) / np.linalg.norm(v1) / np.linalg.norm(v3)) * radian
450 gamma = arccos(np.dot(v1, v2) / np.linalg.norm(v1) / np.linalg.norm(v2)) * radian
451 alph = alpha*np.pi/180
453 gamm = gamma*np.pi/180
454 v = np.sqrt(1 - cos(alph)**2 - cos(bet)**2 - cos(gamm)**2 + 2*cos(alph)*cos(bet)*cos(gamm))
455 Mat = np.array([[a, b*cos(gamm), c*cos(bet)],
456 [0, b*sin(gamm), c*((cos(alph)-cos(bet)*cos(gamm))/sin(gamm))],
457 [0, 0, c*v/sin(gamm)]])
458 L1 = Mat.dot(np.array([[1],[0],[0]]))
459 L2 = Mat.dot(np.array([[0],[1],[0]]))
460 L3 = Mat.dot(np.array([[0],[0],[1]]))
461 return Box(a,b,c,alpha,beta,gamma,np.array(L1).flatten(),np.array(L2).flatten(),np.array(L3).flatten(),v*a*b*c)
469 import networkx
as nx
472 super(MyG,self).__init__()
474 def __eq__(self, other):
480 return nx.is_isomorphic(self,other,node_match=nodematch)
482 """ The hash function is something we can use to discard two things that are obviously not equal. Here we neglect the hash. """ 485 """ Return a list of the sorted atom numbers in this graph. """ 486 return sorted(list(self.nodes()))
488 """ Return a string of atoms, which serves as a rudimentary 'fingerprint' : '99,100,103,151' . """ 489 return ','.join([
'%i' % i
for i
in self.L()])
491 """ Return an array of the elements. For instance ['H' 'C' 'C' 'H']. """ 492 elems = nx.get_node_attributes(self,
'e')
493 return [elems[i]
for i
in self.L()]
495 """ Create an Empirical Formula """ 496 Formula = list(self.e())
497 return ''.join([(
'%s%i' % (k, Formula.count(k))
if Formula.count(k) > 1
else '%s' % k)
for k
in sorted(set(Formula))])
499 """ Get a list of the coordinates. """ 500 coors = nx.get_node_attributes(self,
'x')
501 return np.array([coors[i]
for i
in self.L()])
503 logger.warning(
"Cannot import optional NetworkX module, topology tools won't work\n.")
506 """ For the nanoreactor project: Determine whether two Molecule objects have the same topologies. """ 510 AtomEqual =
Counter([tuple(m.L())
for m
in mol1.molecules]) ==
Counter([tuple(m.L())
for m
in mol2.molecules])
511 return GraphEqual
and AtomEqual
515 Determine whether two Molecule objects have the same fragments by 516 looking at elements and connectivity graphs. This is less strict 517 than TopEqual (i.e. more often returns True). 519 if mol1.na != mol2.na :
return False 524 """ Print a line consisting of (element, x, y, z) in accordance with .xyz file format 526 @param[in] element A chemical element of a single atom 527 @param[in] xyz A 3-element array containing x, y, z coordinates of that atom 531 return "%-3s % 13.8f % 13.8f % 13.8f" % (element,xyz[0],xyz[1],xyz[2])
533 return "%-5s % 15.10f % 15.10f % 15.10f" % (element,xyz[0],xyz[1],xyz[2])
536 """Format a single float into a string of width 8, with ideally 3 decimal 537 places of precision. If the number is a little too large, we can 538 gracefully degrade the precision by lopping off some of the decimal 539 places. If it's much too large, we throw a ValueError""" 540 if -999.999 < f < 9999.999:
542 if -9999999 < f < 99999999:
543 return (
'%8.3f' % f)[:8]
544 raise ValueError(
'coordinate "%s" could not be represented ' 545 'in a width-8 field' % f)
548 """ Print a line in accordance with .gro file format, with six decimal points of precision 550 Nine decimal points of precision are necessary to get forces below 1e-3 kJ/mol/nm. 552 @param[in] resid The number of the residue that the atom belongs to 553 @param[in] resname The name of the residue that the atom belongs to 554 @param[in] aname The name of the atom 555 @param[in] seqno The sequential number of the atom 556 @param[in] xyz A 3-element array containing x, y, z coordinates of that atom 559 return "%5i%-5s%5s%5i % 13.9f % 13.9f % 13.9f" % (resid,resname,aname,seqno,xyz[0],xyz[1],xyz[2])
562 """ Print a line consisting of (element, p, q, r, s, t, ...) where 563 (p, q, r) are arbitrary atom-wise data (this might happen, for 564 instance, with atomic charges) 566 @param[in] element A chemical element of a single atom 567 @param[in] xyzgen A N-element array containing data for that atom 570 return "%-5s" +
' '.join([
"% 15.10f" % i]
for i
in xyzgen)
573 """ Print a line corresponding to the box vector in accordance with .gro file format 575 @param[in] box Box NamedTuple 578 if box.alpha == 90.0
and box.beta == 90.0
and box.gamma == 90.0:
579 return ' '.join([
"% 13.9f" % (i/10)
for i
in [box.a, box.b, box.c]])
581 return ' '.join([
"% 13.9f" % (i/10)
for i
in [box.A[0], box.B[1], box.C[2], box.A[1], box.A[2], box.B[0], box.B[2], box.C[0], box.C[1]]])
584 """ Determines whether a line contains GROMACS data or not 586 @param[in] line The line to be tested 592 elif len(sline) == 5:
598 """ Determines whether a line contains CHARMM data or not 600 @param[in] line The line to be tested 610 """ Determines whether a line contains a GROMACS box vector or not 612 @param[in] line The line to be tested 616 if len(sline) == 9
and all([
isfloat(i)
for i
in sline]):
618 elif len(sline) == 3
and all([
isfloat(i)
for i
in sline]):
625 if out == []
and strip != []:
627 elif out != []
and strip != []:
628 for (i,j)
in zip(out,strip):
633 return ''.join([
' % .10e' % i
for i
in list(vec.flatten())])
636 """ Groups a big long iterable into groups of ten or what have you. """ 637 args = [iter(iterable)] * n
638 return list([e
for e
in t
if e
is not None]
for t
in zip_longest(*args))
641 """ Creates a list of number sequences divided as evenly as possible. """ 642 joblens = np.zeros(splitsize,dtype=int)
644 for i
in range(totlen):
645 joblens[i%splitsize] += 1
647 for i
in range(splitsize):
648 subsets.append(list(range(jobnow, jobnow + joblens[i])))
653 """ Wrapper for the timestep C structure used in molfile plugins. """ 654 _fields_ = [(
"coords",POINTER(c_float)), (
"velocities",POINTER(c_float)),
655 (
"A",c_float), (
"B",c_float), (
"C",c_float), (
"alpha",c_float),
656 (
"beta",c_float), (
"gamma",c_float), (
"physical_time",c_double)]
659 return key
in A.Data
and key
in B.Data
662 if not (key
in A.Data
and key
in B.Data) :
return False 664 if type(A.Data[key])
is np.ndarray:
665 return (A.Data[key] != B.Data[key]).any()
666 elif key ==
'tinkersuf':
667 return [sorted([int(j)
for j
in i.split()])
for i
in A.Data[key]] != \
668 [sorted([int(j)
for j
in i.split()])
for i
in B.Data[key]]
670 return A.Data[key] != B.Data[key]
673 return key
in A.Data
or key
in B.Data
680 """ Constructs an Euler matrix from three Euler angles. """ 681 DMat = np.zeros((3,3))
682 DMat[0,0] = np.cos(T1)
683 DMat[0,1] = np.sin(T1)
684 DMat[1,0] = -np.sin(T1)
685 DMat[1,1] = np.cos(T1)
687 CMat = np.zeros((3,3))
689 CMat[1,1] = np.cos(T2)
690 CMat[1,2] = np.sin(T2)
691 CMat[2,1] = -np.sin(T2)
692 CMat[2,2] = np.cos(T2)
693 BMat = np.zeros((3,3))
694 BMat[0,0] = np.cos(T3)
695 BMat[0,1] = np.sin(T3)
696 BMat[1,0] = -np.sin(T3)
697 BMat[1,1] = np.cos(T3)
699 EMat = multi_dot([BMat, CMat, DMat])
704 Computes an 'overlap' between two molecules based on some 705 fictitious density. Good for fine-tuning alignment but gets stuck 710 elem = np.array(elem)
712 for j
in np.where(elem==i)[0]:
713 for k
in np.where(elem==i)[0]:
714 dx = xyz1[j] - xyz2R[k]
716 Obj -= np.exp(-0.5*dx2)
721 Computes a "overlap density" from two frames. 722 This function can be called by AlignToMoments to get rid of inversion problems 724 grid = np.pi*np.array(list(itertools.product([0,1],[0,1],[0,1])))
725 ovlp = np.array([
ComputeOverlap(e, elem, xyz1, xyz2)
for e
in grid])
726 t1 = grid[np.argmin(ovlp)]
727 xyz2R = np.dot(
EulerMatrix(t1[0],t1[1],t1[2]), xyz2.T).T.copy()
731 """Pre-aligns molecules to 'moment of inertia'. 732 If xyz2 is passed in, it will assume that xyz1 is already 733 aligned to the moment of inertia, and it simply does 180-degree 734 rotations to make sure nothing is inverted.""" 735 xyz = xyz1
if xyz2
is None else xyz2
737 for i, xi
in enumerate(xyz):
738 I += (np.dot(xi,xi)*np.eye(3) - np.outer(xi,xi))
741 A, B = np.linalg.eig(I)
743 BB = B[:, np.argsort(A)]
744 determ = np.linalg.det(BB)
746 if np.abs(determ - 1.0) > Thresh:
747 if np.abs(determ + 1.0) > Thresh:
748 logger.info(
"in AlignToMoments, determinant is % .3f" % determ)
750 xyzr = np.dot(BB.T, xyz.T).T.copy()
759 assert np.shape(matrix1) == np.shape(matrix2),
'Matrices not of same dimensions' 762 nrows = np.shape(matrix1)[0]
765 avg_pos1 = matrix1.sum(axis=0)/nrows
766 avg_pos2 = matrix2.sum(axis=0)/nrows
769 avg_matrix1 = matrix1-avg_pos1
770 avg_matrix2 = matrix2-avg_pos2
773 covar = np.dot(avg_matrix1.T,avg_matrix2)
776 v,s,wt = np.linalg.svd(covar)
780 wvt = np.dot(wt.T, v.T)
784 if np.linalg.det(wvt) < 0:
787 rot_matrix = multi_dot([wt.T,d,v.T]).T
788 trans_matrix = avg_pos2-np.dot(avg_pos1,rot_matrix)
789 return trans_matrix, rot_matrix
792 """ Form a Cartesian product of two NumPy arrays. """ 794 arr = np.empty([len(a)
for a
in arrays] + [la], dtype=np.int32)
795 for i, a
in enumerate(np.ix_(*arrays)):
797 return arr.reshape(-1, la)
799 def extract_int(arr, avgthre, limthre, label="value", verbose=True):
801 Get the representative integer value from an array. 802 The integer value is the rounded mean. Perform sanity 803 checks to ensure the array isn't overly "non-integer". 808 NumPy array containing a series of floating point values 809 where we'd like to extract the representative integer value. 811 If the average deviates from the closest integer by 812 more than this amount, do not pass. 814 If any element in this array deviates from the closest integer by 815 more than this amount, do not pass. 817 Descriptive name of this variable, used only in printout. 819 Print information in case array makes excursions larger than the threshold 824 Representative integer value for the array. 826 Indicates whether the array mean and/or maximum deviations stayed with the thresholds. 828 average = np.mean(arr)
829 maximum = np.max(arr)
830 minimum = np.min(arr)
831 rounded = round(average)
833 if abs(average - rounded) > avgthre:
835 logger.info(
"Average %s (%f) deviates from integer %s (%i) by more than threshold of %f" % (label, average, label, rounded, avgthre))
837 if abs(maximum - minimum) > limthre:
839 logger.info(
"Maximum %s fluctuation (%f) is larger than threshold of %f" % (label, abs(maximum-minimum), limthre))
841 return int(rounded), passed
845 Extract our best estimate of charge and spin-z from the comments 846 section of a Molecule object created with Nanoreactor. Note that 847 spin-z is 1.0 if there is one unpaired electron (not one/half) because 848 the unit is in terms of populations. 850 This function is intended to work on atoms that are *extracted* 851 from an ab initio MD trajectory, where the Mulliken charge and spin 852 populations are not exactly integers. It attempts to return the closest 853 integer but it will sometimes fail. 855 If the number of electrons and spin-z are inconsistent, then 856 return -999,-999 (indicates failure). 862 Molecule object that we're getting charge and spin from, with a known comment syntax 863 Reaction: formula CHO -> CO+H atoms ['0-2'] -> ['0-1','2'] frame 219482 charge -0.742 sz +0.000 sz^2 0.000 868 Representative integer net charge of this Molecule, or -999 if inconsistent 870 Representative integer spin-z of this Molecule (one unpaired electron: sz=1), or -999 if inconsistent 875 srch =
lambda s : np.array([float(re.search(
r'(?<=%s )[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?' % s, c).group(0))
for c
in M.comms
if all([i
in c
for i
in (
'charge',
'sz')])])
876 Chgs = srch(
'charge')
878 Spn2s = srch(
r'sz\^2')
880 chg, chgpass =
extract_int(Chgs, 0.3, 1.0, label=
"charge")
881 spn, spnpass =
extract_int(abs(SpnZs), 0.3, 1.0, label=
"spin-z")
884 nproton = sum([Elements.index(i)
for i
in M.elem])
885 nelectron = nproton + chg
887 if verbose: logger.info(
"Going with the minimum spin consistent with charge.")
894 if (int((nelectron-spn)/2))*2 != (nelectron-spn):
895 if verbose: logger.info(
"\x1b[91mThe number of electrons (%i) is inconsistent with the spin-z (%i)\x1b[0m" % (nelectron, spn))
898 if verbose: logger.info(
"%i electrons; charge %i, spin %i" % (nelectron, chg, spn))
901 def arc(Mol, begin=None, end=None, RMSD=True, align=True):
903 Get the arc-length for a trajectory segment. 904 Uses RMSD or maximum displacement of any atom in the trajectory. 909 Molecule object for calculating the arc length. 911 Starting frame, defaults to first frame 913 Ending frame, defaults to final frame 915 Set to True to use frame-to-frame RMSD; otherwise use the maximum displacement of any atom 920 Arc length between frames in Angstrom, length is n_frames - 1 929 Arc = Mol.pathwise_rmsd(align)
931 Arc = np.array([np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j])
for j
in range(Mol.na)])
for i
in range(begin, end-1)])
934 def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True):
936 Equalize the spacing of frames in a trajectory with linear interpolation. 937 This is done in a very simple way, first calculating the arc length 938 between frames, then creating an equally spaced array, and interpolating 939 all Cartesian coordinates along this equally spaced array. 941 This is intended to be used on trajectories with smooth transformations and 942 ensures that concatenated frames containing both optimization coordinates 943 and dynamics trajectories don't have sudden changes in their derivatives. 948 Molecule object for equalizing the spacing. 950 Return a Molecule object with this number of frames. 952 Use RMSD in the arc length calculation. 957 New molecule object, either the same one (if frames > len(Mol)) 958 or with equally spaced frames. 960 ArcMol =
arc(Mol, RMSD=RMSD, align=align)
961 ArcMolCumul = np.insert(np.cumsum(ArcMol), 0, 0.0)
962 if frames != 0
and dx != 0:
963 logger.error(
"Provide dx or frames or neither")
965 frames = int(float(max(ArcMolCumul))/dx)
967 frames = len(ArcMolCumul)
969 ArcMolEqual = np.linspace(0, max(ArcMolCumul), frames)
970 xyzold = np.array(Mol.xyzs)
971 xyznew = np.zeros((frames, Mol.na, 3))
972 for a
in range(Mol.na):
974 xyznew[:,a,i] = np.interp(ArcMolEqual, ArcMolCumul, xyzold[:, a, i])
975 if len(xyzold) == len(xyznew):
976 Mol1 = copy.deepcopy(Mol)
981 Mol1 = Mol[np.array([int(round(i))
for i
in np.linspace(0, len(xyzold)-1, len(xyznew))])]
982 Mol1.xyzs = list(xyznew)
985 def AtomContact(xyz, pairs, box=None, displace=False):
987 Compute distances between pairs of atoms. 992 N_frames*N_atoms*3 (3D) array of atomic positions 993 If you only have a single set of positions, pass in xyz[np.newaxis, :] 995 List of 2-tuples of atom indices 996 box : np.ndarray, optional 997 N_frames*3 (2D) array of periodic box vectors 998 If you only have a single set of positions, pass in box[np.newaxis, :] 1000 If True, also return N_frames*N_pairs*3 array of displacement vectors 1005 N_pairs*N_frames (2D) array of minimum image convention distances 1006 np.ndarray (optional) 1007 if displace=True, N_frames*N_pairs*3 array of displacement vectors 1010 parray = np.array(pairs)
1017 xyzpbc /= box[:,np.newaxis,:]
1018 xyzpbc = xyzpbc % 1.0
1021 xyzsel1 = xyzpbc[:,sel1,:]
1022 xyzsel2 = xyzpbc[:,sel2,:]
1024 dxyz = xyzsel2-xyzsel1
1027 dxyz = np.mod(dxyz+0.5, 1.0) - 0.5
1028 dxyz *= box[:,np.newaxis,:]
1029 dr2 = np.sum(dxyz**2,axis=2)
1043 Given a quaternion p, form a rotation matrix from it. 1048 1D array with 3 elements representing the rotation quaterion. 1049 Elements of quaternion are : [cos(a/2), sin(a/2)*axis[0..2]] 1057 assert q.shape[0] == 4
1059 qc = np.zeros_like(q)
1065 al_q = np.array([[ q[0], -q[1], -q[2], -q[3]],
1066 [ q[1], q[0], -q[3], q[2]],
1067 [ q[2], q[3], q[0], -q[1]],
1068 [ q[3], -q[2], q[1], q[0]]])
1069 ar_qc = np.array([[ qc[0], -qc[1], -qc[2], -qc[3]],
1070 [ qc[1], qc[0], qc[3], -qc[2]],
1071 [ qc[2], -qc[3], qc[0], qc[1]],
1072 [ qc[3], qc[2], -qc[1], qc[0]]])
1074 R4 = np.dot(al_q,ar_qc)
1079 Given a rotation axis and angle, return the corresponding 1080 3x3 rotation matrix, which will rotate a (Nx3) array of 1081 xyz coordinates as x0_rot = np.dot(R, x0.T).T 1085 axis : numpy.ndarray 1086 1D array with 3 elements representing the rotation axis 1088 The angle of the rotation 1095 assert axis.ndim == 1
1096 assert axis.shape[0] == 3
1097 axis /= np.linalg.norm(axis)
1099 ct2 = np.cos(angle/2)
1100 st2 = np.sin(angle/2)
1101 q = np.array([ct2, st2*axis[0], st2*axis[1], st2*axis[2]])
1107 """ Lee-Ping's general file format conversion class. 1109 The purpose of this class is to read and write chemical file formats in a 1110 way that is convenient for research. There are highly general file format 1111 converters out there (e.g. catdcd, openbabel) but I find that writing 1112 my own class can be very helpful for specific purposes. Here are some things 1115 - Convert a .gro file to a .xyz file, or a .pdb file to a .dcd file. 1116 Data is stored internally, so any readable file can be converted into 1117 any writable file as long as there is sufficient information to write 1120 - Accumulate information from different files. For example, we may read 1121 A.gro to get a list of coordinates, add quantum settings from a B.in file, 1122 and write A.in (this gives us a file that we can use to run QM calculations) 1124 - Concatenate two trajectories together as long as they're compatible. This 1125 is done by creating two Molecule objects and then simply adding them. Addition 1126 means two things: (1) Information fields missing from each class, but present 1127 in the other, are added to the sum, and (2) Appendable or per-frame fields 1128 (i.e. coordinates) are concatenated together. 1130 - Slice trajectories using reasonable Python language. That is to 1131 say, MyMolecule[1:10] returns a new Molecule object that contains 1132 frames 1 through 9 (inclusive and numbered starting from zero.) 1134 Special variables: These variables cannot be set manually because 1135 there is a special method associated with getting them. 1137 na = The number of atoms. You'll get this if you use MyMol.na or MyMol['na']. 1138 ns = The number of snapshots. You'll get this if you use MyMol.ns or MyMol['ns']. 1140 Unit system: Angstroms. 1144 def __init__(self, fnm = None, ftype = None, top = None, ttype = None, **kwargs):
1146 Create a Molecule object. 1151 File name to create the Molecule object from. If provided, 1152 the file will be parsed and used to fill in the fields such as 1153 elem (elements), xyzs (coordinates) and so on. If ftype is not 1154 provided, will automatically try to determine file type from file 1155 extension. If not provided, will create an empty object. 1156 ftype : str, optional 1157 File type, corresponding to an entry in the internal table of known 1158 file types. Provide this if you have a nonstandard file extension 1159 or if you wish to force to invoke a particular parser. 1161 "Topology" file name. If provided, will build the Molecule 1162 using this file name, then "fnm" will load frames instead. 1163 ttype : str, optional 1164 "Typology" file type. 1165 build_topology : bool, optional 1166 Build the molecular topology consisting of: topology (overall connectivity graph), 1167 molecules (list of connected subgraphs), bonds (if not explicitly read in), default True 1168 toppbc : bool, optional 1169 Use periodic boundary conditions when building the molecular topology, default False 1170 The build_topology code will attempt to determine this intelligently. 1171 topframe : int, optional 1172 Provide a frame number for building the molecular topology, default first frame 1173 Fac : float, optional 1174 Multiplicative factor to covalent radii criterion for deciding whether two atoms are bonded 1175 Default value of 1.2 is reasonable, 1.4 will produce lots of bonds 1176 positive_resid : bool, optional 1177 If provided, enforce all positive resIDs. 1223 self.
Funnel = {
'gromos' :
'gromacs',
1242 self.
top_settings = {
'toppbc' : kwargs.get(
'toppbc',
False),
1243 'topframe' : kwargs.get(
'topframe', 0),
1244 'Fac' : kwargs.get(
'Fac', 1.2),
1245 'read_bonds' :
False,
1246 'fragment' : kwargs.get(
'fragment',
False),
1247 'radii' : kwargs.get(
'radii', {})}
1255 self.
Data[
'fnm'] = fnm
1258 ftype = os.path.splitext(fnm)[1][1:]
1259 if not os.path.exists(fnm):
1260 logger.error(
'Tried to create Molecule object from a file that does not exist: %s\n' % fnm)
1262 self.
Data[
'ftype'] = ftype
1266 for key, val
in Parsed.items():
1267 self.
Data[key] = val
1269 if 'comms' not in self.
Data:
1270 self.
comms = [
'From %s: Frame %i / %i' % (fnm, i+1, self.ns)
for i
in range(self.ns)]
1271 if 'qm_energies' in self.
Data:
1272 for i
in range(self.ns):
1273 self.
comms[i] +=
', Energy= % 18.10f' % self.qm_energies[i]
1275 self.
comms = [i.expandtabs()
for i
in self.
comms]
1277 if kwargs.get(
'build_topology',
True)
and hasattr(self,
'elem')
and self.na > 0:
1279 if load_fnm
is not None:
1289 """ Return the number of frames in the trajectory. """ 1293 for key
in self.FrameKeys:
1295 if L != -1
and len(self.
Data[key]) != L:
1297 L = len(self.
Data[key])
1304 """ Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionary. """ 1305 if key ==
'qm_forces':
1306 logger.warning(
'qm_forces is a deprecated keyword because it actually meant gradients; setting to qm_grads.')
1314 for key
in self.AtomKeys:
1316 if L != -1
and len(self.
Data[key]) != L:
1318 L = len(self.
Data[key])
1322 elif 'xyzs' in self.
Data:
1323 return len(self.
xyzs[0])
1329 elif key ==
'FrameKeys':
1330 return set(self.
Data) & FrameVariableNames
1331 elif key ==
'AtomKeys':
1332 return set(self.
Data) & AtomVariableNames
1333 elif key ==
'MetaKeys':
1334 return set(self.
Data) & MetaVariableNames
1335 elif key ==
'QuantumKeys':
1336 return set(self.
Data) & QuantumVariableNames
1337 elif key
in self.
Data:
1338 return self.
Data[key]
1339 return getattr(super(Molecule, self), key)
1342 """ Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionary. """ 1345 if key ==
'qm_forces':
1346 logger.warning(
'qm_forces is a deprecated keyword because it actually meant gradients; setting to qm_grads.')
1348 if key
in AllVariableNames:
1349 self.
Data[key] = value
1350 return super(Molecule,self).
__setattr__(key, value)
1353 """ Custom deepcopy method because Python 3.6 appears to have changed its behavior """ 1360 for key
in self.
Data:
1361 if key
in [
'xyzs',
'qm_grads',
'qm_hessians',
'qm_espxyzs',
'qm_espvals',
'qm_extchgs',
'qm_mulliken_charges',
'qm_mulliken_spins',
'molecules',
'qm_bondorder']:
1365 for i
in range(len(self.
Data[key])):
1366 New.Data[key].
append(copy.deepcopy(self.
Data[key][i]))
1367 elif key
in [
'topology']:
1369 New.Data[key] = self.
Data[key].copy()
1370 elif key
in [
'boxes',
'qcrems']:
1375 for i
in range(len(self.
Data[key])):
1376 New.Data[key].
append(copy.deepcopy(self.
Data[key][i]))
1377 elif key
in [
'comms',
'qm_energies',
'qm_interaction',
'qm_zpe',
'qm_entropy',
'qm_enthalpy',
'elem',
'partial_charge',
1378 'atomname',
'atomtype',
'tinkersuf',
'resid',
'resname',
'qcsuf',
'qm_ghost',
'chain',
'altloc',
'icode',
1380 if not isinstance(self.
Data[key], list):
1381 raise RuntimeError(
'Expected data attribute %s to be a list, but it is %s' % (key, str(type(self.
Data[key]))))
1383 New.Data[key] = self.
Data[key][:]
1384 elif key
in [
'fnm',
'ftype',
'charge',
'mult',
'qcerr']:
1387 elif key
in [
'bonds']:
1390 for i
in range(len(self.
Data[key])):
1392 elif key
in [
'qctemplate']:
1397 for SectName, SectData
in self.
Data[key]:
1398 New.Data[key].
append((SectName, SectData[:]))
1400 raise RuntimeError(
"Failed to copy key %s" % key)
1405 The Molecule class has list-like behavior, so we can get slices of it. 1406 If we say MyMolecule[0:10], then we'll return a copy of MyMolecule with frames 0 through 9. 1408 if isinstance(key, int)
or isinstance(key, slice)
or isinstance(key,np.ndarray)
or isinstance(key,list):
1409 if isinstance(key, int):
1412 for k
in self.FrameKeys:
1414 New.Data[k] = [j
for i, j
in enumerate(self.
Data[k])
if i
in np.arange(len(self))[key]]
1416 New.Data[k] = list(np.array(copy.deepcopy(self.
Data[k]))[key])
1417 for k
in self.AtomKeys | self.MetaKeys:
1418 New.Data[k] = copy.deepcopy(self.
Data[k])
1422 logger.error(
'getitem is not implemented for keys of type %s\n' % str(key))
1427 Similarly, in order to delete a frame, we simply perform item deletion on 1428 framewise variables. 1430 for k
in self.FrameKeys:
1431 del self.
Data[k][key]
1434 """ List-like behavior for looping over trajectories. Note that these values are returned by reference. 1435 Note that this is intended to be more efficient than __getitem__, so when we loop over a trajectory, 1436 it's best to go "for m in M" instead of "for i in range(len(M)): m = M[i]" 1438 for frame
in range(self.ns):
1440 for k
in self.FrameKeys:
1441 New.Data[k] = self.
Data[k][frame]
1442 for k
in self.AtomKeys | self.MetaKeys:
1443 New.Data[k] = self.
Data[k]
1447 """ Add method for Molecule objects. """ 1449 if not isinstance(other,Molecule):
1450 logger.error(
'A Molecule instance can only be added to another Molecule instance\n')
1454 for key
in AtomVariableNames | MetaVariableNames:
1457 if key
in [
'fnm',
'ftype',
'bonds',
'molecules',
'topology']
and key
in self.
Data:
1458 Sum.Data[key] = self.
Data[key]
1459 elif diff(self, other, key):
1460 for i, j
in zip(self.
Data[key], other.Data[key]):
1461 logger.info(
"%s %s %s" % (i, j, str(i==j)))
1462 logger.error(
'The data member called %s is not the same for these two objects\n' % key)
1464 elif key
in self.
Data:
1465 Sum.Data[key] = copy.deepcopy(self.
Data[key])
1466 elif key
in other.Data:
1467 Sum.Data[key] = copy.deepcopy(other.Data[key])
1468 for key
in FrameVariableNames:
1469 if both(self, other, key):
1470 if type(self.
Data[key])
is not list:
1471 logger.error(
'Key %s in self is a FrameKey, it must be a list\n' % key)
1473 if type(other.Data[key])
is not list:
1474 logger.error(
'Key %s in other is a FrameKey, it must be a list\n' % key)
1476 if isinstance(self.
Data[key][0], np.ndarray):
1477 Sum.Data[key] = [i.copy()
for i
in self.
Data[key]] + [i.copy()
for i
in other.Data[key]]
1479 Sum.Data[key] = list(self.
Data[key] + other.Data[key])
1480 elif either(self, other, key):
1483 if key
in self.
Data:
1484 other.Data[
'boxes'] = [self.
Data[
'boxes'][0]
for i
in range(len(other))]
1485 elif key
in other.Data:
1486 self.
Data[
'boxes'] = [other.Data[
'boxes'][0]
for i
in range(len(self))]
1488 logger.error(
'Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key)
1493 """ Add method for Molecule objects. """ 1495 if not isinstance(other,Molecule):
1496 logger.error(
'A Molecule instance can only be added to another Molecule instance\n')
1499 for key
in AtomVariableNames | MetaVariableNames:
1500 if key
in [
'fnm',
'ftype',
'bonds',
'molecules',
'topology']:
pass 1501 elif diff(self, other, key):
1502 for i, j
in zip(self.
Data[key], other.Data[key]):
1503 logger.info(
"%s %s %s" % (i, j, str(i==j)))
1504 logger.error(
'The data member called %s is not the same for these two objects\n' % key)
1507 elif key
in other.Data:
1508 self.
Data[key] = copy.deepcopy(other.Data[key])
1510 for key
in FrameVariableNames:
1511 if both(self, other, key):
1512 if type(self.
Data[key])
is not list:
1513 logger.error(
'Key %s in self is a FrameKey, it must be a list\n' % key)
1515 if type(other.Data[key])
is not list:
1516 logger.error(
'Key %s in other is a FrameKey, it must be a list\n' % key)
1518 if isinstance(self.
Data[key][0], np.ndarray):
1519 self.
Data[key] += [i.copy()
for i
in other.Data[key]]
1521 self.
Data[key] += other.Data[key]
1522 elif either(self, other, key):
1525 if key
in self.
Data:
1526 other.Data[
'boxes'] = [self.
Data[
'boxes'][0]
for i
in range(len(other))]
1527 elif key
in other.Data:
1528 self.
Data[
'boxes'] = [other.Data[
'boxes'][0]
for i
in range(len(self))]
1530 logger.error(
'Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key)
1534 def repair(self, key, klast):
1535 """ Attempt to repair trivial issues that would otherwise break the object. """ 1536 kthis = key
if len(self.
Data[key]) < len(self.
Data[klast])
else klast
1537 kother = klast
if len(self.
Data[key]) < len(self.
Data[klast])
else key
1538 diff = abs(len(self.
Data[key]) - len(self.
Data[klast]))
1539 if kthis ==
'comms':
1542 for i
in range(diff):
1545 for i
in range(-1*diff):
1546 self.
Data[
'comms'].pop()
1547 elif kthis ==
'boxes' and len(self.
Data[
'boxes']) == 1:
1549 for i
in range(diff): self.
Data[
'boxes'].
append(self.
Data[
'boxes'][-1])
1551 logger.error(
'The keys %s and %s have different lengths (%i %i)' 1552 '- this isn\'t supposed to happen for two AtomKeys member variables.' 1553 % (key, klast, len(self.
Data[key]), len(self.
Data[klast])))
1560 Reorder atoms according to some other Molecule object. This 1561 happens when we run a program like pdb2gmx or pdbxyz and it 1562 scrambles our atom ordering, forcing us to reorder the atoms 1563 for all frames in the current Molecule object. 1566 (1) Load up the scrambled file as a new Molecule object. 1567 (2) Call this function: Original_Molecule.reorder_according_to(scrambled) 1568 (3) Save Original_Molecule to a new file name. 1576 for key
in self.AtomKeys:
1577 NewData[key] = list(np.array(M.Data[key])[unmangled])
1578 for key
in self.FrameKeys:
1579 if key
in [
'xyzs',
'qm_grads',
'qm_mulliken_charges',
'qm_mulliken_spins']:
1580 NewData[key] = list([self.
Data[key][i][unmangled]
for i
in range(len(self))])
1582 setattr(self, key, copy.deepcopy(NewData[key]))
1588 Return the indices that would reorder atoms according to some 1589 other Molecule object. This happens when we run a program 1590 like pdb2gmx or pdbxyz and it scrambles our atom ordering. 1593 (1) Load up the scrambled file as a new Molecule object. 1594 (2) Call this function: Original_Molecule.reorder_indices(scrambled) 1606 if hasattr(self,
'na'):
1608 if arg ==
'qm_forces':
1609 logger.warning(
'qm_forces is a deprecated keyword because it actually meant gradients; setting to qm_grads.')
1611 if arg
not in self.
Data:
1612 logger.error(
"%s is a required attribute for writing this type of file but it's not present\n" % arg)
1625 def write(self,fnm=None,ftype=None,append=False,selection=None,**kwargs):
1626 if fnm
is None and ftype
is None:
1627 logger.error(
"Output file name and file type are not specified.\n")
1630 ftype = os.path.splitext(fnm)[1][1:]
1632 if 'comms' not in self.
Data:
1633 self.
comms = [
'Generated by %s from %s: Frame %i of %i' % (package, fnm, i+1, self.ns)
for i
in range(self.ns)]
1634 if 'xyzs' in self.
Data and len(self.
comms) < len(self.
xyzs):
1635 for i
in range(len(self.
comms), len(self.
xyzs)):
1636 self.
comms.
append(
"Frame %i: generated by %s" % (i, package))
1640 if type(selection)
in [int, np.int64, np.int32]:
1641 selection = [selection]
1642 if selection
is None:
1643 selection = list(range(len(self)))
1645 selection = list(selection)
1646 Answer = self.
Write_Tab[self.
Funnel[ftype.lower()]](selection,**kwargs)
1648 if Answer
is not None:
1649 if fnm
is None or fnm == sys.stdout:
1650 outfile = sys.stdout
1654 if os.path.islink(fnm):
1656 outfile = open(fnm,
'a')
1658 if os.path.islink(fnm):
1660 outfile = open(fnm,
'w')
1662 print(line, file=outfile)
1671 totMass = sum([PeriodicTable.get(self.elem[i], 0.0)
for i
in range(self.na)])
1672 return np.array([np.sum([xyz[i,:] * PeriodicTable.get(self.elem[i], 0.0) / totMass
for i
in range(xyz.shape[0])],axis=0)
for xyz
in self.
xyzs])
1675 totMass = sum([PeriodicTable[self.elem[i]]
for i
in range(self.na)])
1678 for i, xyz
in enumerate(self.
xyzs):
1681 rgs.append(np.sqrt(np.sum([PeriodicTable[self.elem[i]]*np.dot(x,x)
for i, x
in enumerate(xyz1)])/totMass))
1682 return np.array(rgs)
1685 """ If one atom is oxygen and the next two are hydrogen, make the water molecule rigid. """ 1687 for i
in range(len(self)):
1688 for a
in range(self.na-2):
1689 if self.elem[a] ==
'O' and self.elem[a+1] ==
'H' and self.elem[a+2] ==
'H':
1699 r1 /= np.linalg.norm(r1)
1700 r2 /= np.linalg.norm(r2)
1704 ex /= np.linalg.norm(ex)
1705 ey /= np.linalg.norm(ey)
1707 Ang = np.pi * 104.52 / 2 / 180
1710 h1 = o + Bond*ex*cosx + Bond*ey*cosy
1711 h2 = o + Bond*ex*cosx - Bond*ey*cosy
1712 rig = np.array([o, h1, h2]) + com
1713 self.
xyzs[i][a:a+3] = rig
1720 ftype = os.path.splitext(fnm)[1][1:]
1721 if not os.path.exists(fnm):
1722 logger.error(
'Tried to create Molecule object from a file that does not exist: %s\n' % fnm)
1725 Parsed = self.
Read_Tab[self.
Funnel[ftype.lower()]](fnm, **kwargs)
1726 if 'xyzs' not in Parsed:
1727 logger.error(
'Did not get any coordinates from the new file %s\n' % fnm)
1729 if Parsed[
'xyzs'][0].shape[0] != self.na:
1730 logger.error(
'When loading frames, don\'t change the number of atoms\n')
1733 for key, val
in Parsed.items():
1734 if key
in FrameVariableNames:
1735 self.
Data[key] = val
1737 if 'comms' not in self.
Data:
1738 self.
comms = [
'Generated by %s from %s: Frame %i of %i' % (package, fnm, i+1, self.ns)
for i
in range(self.ns)]
1740 self.
comms = [i.expandtabs()
for i
in self.
comms]
1743 """ Edit Q-Chem rem variables with a dictionary. Pass a value of None to delete a rem variable. """ 1745 for qcrem
in self.qcrems:
1746 for key, val
in in_dict.items():
1748 qcrem.pop(key,
None)
1752 for key, val
in in_dict.items():
1754 self.qcrems[subcalc].pop(key,
None)
1756 self.qcrems[subcalc][key] = val
1759 if type(other)
is Molecule:
1761 elif type(other)
is str:
1763 for key
in OtherMol.QuantumKeys:
1764 if key
in AtomVariableNames
and len(OtherMol.Data[key]) != self.na:
1765 logger.error(
'The quantum-key %s is AtomData, but it doesn\'t have the same number of atoms as the Molecule object we\'re adding it to.')
1767 self.
Data[key] = copy.deepcopy(OtherMol.Data[key])
1770 """ Add a virtual site to the system. This does NOT set the position of the virtual site; it sits at the origin. """ 1771 for key
in self.AtomKeys:
1773 self.
Data[key].insert(idx,kwargs[key])
1775 logger.error(
'You need to specify %s when adding a virtual site to this molecule.\n' % key)
1777 if 'xyzs' in self.
Data:
1778 for i, xyz
in enumerate(self.
xyzs):
1780 self.
xyzs[i] = np.insert(xyz, idx, xyz[kwargs[
'pos']], axis=0)
1782 self.
xyzs[i] = np.insert(xyz, idx, 0.0, axis=0)
1784 logger.error(
'You need to have xyzs in this molecule to add a virtual site.\n')
1788 """ Replace all of the data for a certain attribute in the system from orig to want. """ 1789 if key
in self.
Data:
1790 for i
in range(self.na):
1791 if self.
Data[key][i] == orig:
1792 self.
Data[key][i] = want
1794 logger.error(
'The key that we want to replace (%s) doesn\'t exist.\n' % key)
1798 """ Replace all of the data for a attribute key2 from orig to want, contingent on key1 being equal to cond. 1799 For instance: replace H1 with H2 if resname is SOL.""" 1800 if key2
in self.
Data and key1
in self.
Data:
1801 for i
in range(self.na):
1802 if self.
Data[key2][i] == orig
and self.
Data[key1][i] == cond:
1803 self.
Data[key2][i] = want
1805 logger.error(
'Either the comparison or replacement key (%s, %s) doesn\'t exist.\n' % (key1, key2))
1808 def atom_select(self,atomslice,build_topology=True):
1809 """ Return a copy of the object with certain atoms selected. Takes an integer, list or array as argument. """ 1810 if isinstance(atomslice, int):
1811 atomslice = [atomslice]
1812 if isinstance(atomslice, list):
1813 atomslice = np.array(atomslice)
1815 for key
in self.FrameKeys | self.MetaKeys:
1816 New.Data[key] = copy.deepcopy(self.
Data[key])
1817 for key
in self.AtomKeys:
1818 if key ==
'tinkersuf':
1819 Map = dict([(a+1, i+1)
for i, a
in enumerate(atomslice)])
1820 CopySuf = list(np.array(self.
Data[key])[atomslice])
1822 for line
in CopySuf:
1823 whites = re.split(
'[^ ]+',line)
1826 for i
in range(1,len(s)):
1827 s[i] = str(Map[int(s[i])])
1828 sn = [int(i)
for i
in s[1:]]
1829 s = [s[0]] + list(np.array(s[1:])[np.argsort(sn)])
1830 NewSuf.append(
''.join([whites[j]+s[j]
for j
in range(len(s))]))
1831 New.Data[
'tinkersuf'] = NewSuf[:]
1833 New.Data[key] = list(np.array(self.
Data[key])[atomslice])
1834 for key
in self.FrameKeys:
1835 if key
in [
'xyzs',
'qm_grads',
'qm_mulliken_charges',
'qm_mulliken_spins']:
1836 New.Data[key] = [self.
Data[key][i][atomslice]
for i
in range(len(self))]
1837 if 'bonds' in self.
Data:
1838 New.Data[
'bonds'] = [(list(atomslice).index(b[0]), list(atomslice).index(b[1]))
for b
in self.bonds
if (b[0]
in atomslice
and b[1]
in atomslice)]
1841 New.build_topology(force_bonds=
False)
1845 """ Return a copy of the object with another molecule object appended. WARNING: This function may invalidate stuff like QM energies. """ 1846 if len(other) != len(self):
1847 logger.error(
'The number of frames of the Molecule objects being stacked are not equal.\n')
1851 for key
in self.FrameKeys | self.MetaKeys:
1852 New.Data[key] = copy.deepcopy(self.
Data[key])
1856 if k
in self.
Data and k
in other.Data:
1857 New.Data[k] = [np.vstack((s, o))
for s, o
in zip(self.
Data[k], other.Data[k])]
1858 for i
in [
'xyzs',
'qm_grads',
'qm_hessians',
'qm_espxyzs',
'qm_espvals',
'qm_extchgs',
'qm_mulliken_charges',
'qm_mulliken_spins']:
1862 for key
in self.AtomKeys:
1863 if key
not in other.Data:
1864 logger.error(
'Trying to stack two Molecule objects - the first object contains %s and the other does not\n' % key)
1866 if key ==
'tinkersuf':
1868 for line
in other.Data[key]:
1869 whites = re.split(
'[^ ]+',line)
1872 for i
in range(1,len(s)):
1873 s[i] = str(int(s[i]) + self.na)
1874 NewSuf.append(
''.join([whites[j]+s[j]
for j
in range(len(s))]))
1875 New.Data[key] = copy.deepcopy(self.
Data[key]) + NewSuf
1877 if type(self.
Data[key])
is np.ndarray:
1878 New.Data[key] = np.concatenate((self.
Data[key], other.Data[key]))
1879 elif type(self.
Data[key])
is list:
1880 New.Data[key] = self.
Data[key] + other.Data[key]
1882 logger.error(
'Cannot stack %s because it is of type %s\n' % (key, str(type(New.Data[key]))))
1884 if 'bonds' in self.
Data and 'bonds' in other.Data:
1885 New.Data[
'bonds'] = self.bonds + [(b[0]+self.na, b[1]+self.na)
for b
in other.bonds]
1889 """ Align molecules using the moment of inertia. 1890 Departs from MSMBuilder convention of 1891 using arithmetic mean for mass. """ 1896 for index2, xyz2
in enumerate(self.
xyzs):
1897 xyz2 -= coms[index2]
1899 self.
xyzs[index2] = xyz2
1902 """ Return a cloned molecule object but with X-coordinates set 1903 to Mulliken charges and Y-coordinates set to Mulliken 1905 QS = copy.deepcopy(self)
1906 QSxyz = np.array(QS.xyzs)
1909 QSxyz[:, :, 2] *= 0.0
1910 QS.xyzs = list(QSxyz)
1914 """ Given a charge-spin xyz file, load the charges (x-coordinate) and spins (y-coordinate) into internal arrays. """ 1915 QS =
Molecule(fnm, ftype=
'xyz', build_topology =
False)
1919 def align(self, smooth = False, center = True, center_mass = False, atom_select=None):
1920 """ Align molecules. 1922 Has the option to create smooth trajectories 1923 (align each frame to the previous one) 1924 or to align each frame to the first one. 1926 Also has the option to remove the center of mass. 1928 Provide a list of atom indices to align along selected atoms. 1931 if isinstance(atom_select, list):
1932 atom_select = np.array(atom_select)
1933 if center
and center_mass:
1934 logger.error(
'Specify center=True or center_mass=True but set the other one to False\n')
1940 xyz1 -= xyz1.mean(0)
1943 for index2, xyz2
in enumerate(self.
xyzs):
1944 if index2 == 0:
continue 1945 xyz2 -= xyz2.mean(0)
1950 if atom_select
is not None:
1954 xyz2 = np.dot(xyz2, rt) + tr
1955 self.
xyzs[index2] = xyz2
1957 def center(self, center_mass = False):
1958 """ Move geometric center to the origin. """ 1961 for i
in range(len(self)):
1962 self.
xyzs[i] -= coms[i]
1964 for i
in range(len(self)):
1965 self.
xyzs[i] -= self.
xyzs[i].mean(0)
1968 """ Build the bond connectivity graph. """ 1975 R = np.array([self.
top_settings[
'radii'].get(i, (Radii[Elements.index(i)-1]
if i
in Elements
else 0.0))
for i
in self.elem])
1977 mins = np.min(self.
xyzs[sn],axis=0)
1978 maxs = np.max(self.
xyzs[sn],axis=0)
1981 if hasattr(self,
'boxes'):
1985 xmax = self.
boxes[sn].a
1986 ymax = self.
boxes[sn].b
1987 zmax = self.
boxes[sn].c
1988 if any([i != 90.0
for i
in [self.
boxes[sn].alpha, self.
boxes[sn].beta, self.
boxes[sn].gamma]]):
1989 logger.warning(
"Warning: Topology building will not work with broken molecules in nonorthogonal cells.")
2005 gszx = xext/int(xext/gsz)
2006 gszy = yext/int(yext/gsz)
2007 gszz = zext/int(zext/gsz)
2015 use_grid = toppbc
or (np.min([xext, yext, zext]) > 2.0*gsz)
2021 xgrd = np.arange(xmin, xmax-gszx, gszx)
2022 ygrd = np.arange(ymin, ymax-gszy, gszy)
2023 zgrd = np.arange(zmin, zmax-gszz, gszz)
2025 gidx = list(itertools.product(range(len(xgrd)), range(len(ygrd)), range(len(zgrd))))
2028 gngh = OrderedDict()
2029 amax = np.array(gidx[-1])
2030 amin = np.array(gidx[0])
2031 n27 = np.array(list(itertools.product([-1,0,1],repeat=3)))
2038 mod = amax[k]-amin[k]+1
2041 elif nj[k] > amax[k]:
2043 gngh[i].
append(tuple(nj))
2046 gasn = OrderedDict([(i, [])
for i
in gidx])
2047 for i
in range(self.na):
2052 xi = self.
xyzs[sn][i][0]
2053 while xi < xmin: xi += xext
2054 while xi > xmax: xi -= xext
2058 yi = self.
xyzs[sn][i][1]
2059 while yi < ymin: yi += yext
2060 while yi > ymax: yi -= yext
2064 zi = self.
xyzs[sn][i][2]
2065 while zi < zmin: zi += zext
2066 while zi > zmax: zi -= zext
2069 gasn[(xidx,yidx,zidx)].
append(i)
2078 if len(apairs) > 0: AtomIterator.append(apairs[apairs[:,0]>apairs[:,1]])
2079 AtomIterator = np.ascontiguousarray(np.vstack(AtomIterator))
2083 AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1)
for i
in range(self.na)]),dtype=np.int32), np.fromiter(itertools.chain(*[range(i+1,self.na)
for i
in range(self.na)]),dtype=np.int32))).T)
2086 BT0 = R[AtomIterator[:,0]]
2087 BT1 = R[AtomIterator[:,1]]
2088 BondThresh = (BT0+BT1) * Fac
2089 BondThresh = (BondThresh > mindist) * BondThresh + (BondThresh < mindist) * mindist
2090 if hasattr(self,
'boxes')
and toppbc:
2099 atom_bonds = [[]
for i
in range(self.na)]
2100 bond_bool = dxij < BondThresh
2101 for i, a
in enumerate(bond_bool):
2103 (ii, jj) = AtomIterator[i]
2104 if ii == jj:
continue 2105 atom_bonds[ii].
append(jj)
2106 atom_bonds[jj].
append(ii)
2108 for i, bi
in enumerate(atom_bonds):
2114 bondlist.append((i, j))
2116 bondlist.append((j, i))
2117 bondlist = sorted(list(set(bondlist)))
2118 self.
Data[
'bonds'] = sorted(list(set(bondlist)))
2124 Create self.topology and self.molecules; these are graph 2125 representations of the individual molecules (fragments) 2126 contained in the Molecule object. 2131 Build the bonds from interatomic distances. If the user 2132 calls build_topology from outside, assume this is the 2133 default behavior. If creating a Molecule object using 2134 __init__, do not force the building of bonds by default 2135 (only build bonds if not read from file.) 2136 topframe : int, optional 2137 Provide the frame number used for reading the bonds. If 2138 not provided, this will be taken from the top_settings 2139 field. If provided, this will take priority and write 2140 the value into top_settings. 2142 sn = kwargs.get(
'topframe', self.
top_settings[
'topframe'])
2144 if self.na > 100000:
2145 logger.warning(
"Warning: Large number of atoms (%i), topology building may take a long time" % self.na)
2147 if (
not self.
top_settings[
'read_bonds'])
or force_bonds:
2151 for i, a
in enumerate(self.elem):
2153 if parse_version(nx.__version__) >= parse_version(
'2.0'):
2154 if 'atomname' in self.
Data:
2155 nx.set_node_attributes(G,{i:self.
atomname[i]}, name=
'n')
2156 nx.set_node_attributes(G,{i:a}, name=
'e')
2157 nx.set_node_attributes(G,{i:self.
xyzs[sn][i]}, name=
'x')
2159 if 'atomname' in self.
Data:
2160 nx.set_node_attributes(G,
'n',{i:self.
atomname[i]})
2161 nx.set_node_attributes(G,
'e',{i:a})
2162 nx.set_node_attributes(G,
'x',{i:self.
xyzs[sn][i]})
2163 for (i, j)
in self.bonds:
2168 self.
molecules = [G.subgraph(c).copy()
for c
in nx.connected_components(G)]
2169 for g
in self.
molecules: g.__class__ = MyG
2174 """ Obtain distance matrix between all pairs of atoms. """ 2175 AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1)
for i
in range(self.na)]),dtype=np.int32),
2176 np.fromiter(itertools.chain(*[range(i+1,self.na)
for i
in range(self.na)]),dtype=np.int32))).T)
2177 if hasattr(self,
'boxes')
and pbc:
2178 boxes = np.array([[self.
boxes[i].a, self.
boxes[i].b, self.
boxes[i].c]
for i
in range(len(self))])
2182 return AtomIterator, list(drij)
2185 """ Obtain distance matrix and displacement vectors between all pairs of atoms. """ 2186 AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1)
for i
in range(self.na)]),dtype=np.int32),
2187 np.fromiter(itertools.chain(*[range(i+1,self.na)
for i
in range(self.na)]),dtype=np.int32))).T)
2188 if hasattr(self,
'boxes')
and pbc:
2189 boxes = np.array([[self.
boxes[i].a, self.
boxes[i].b, self.
boxes[i].c]
for i
in range(len(self))])
2190 drij, dxij =
AtomContact(np.array(self.
xyzs), AtomIterator, box=boxes, displace=
True)
2192 drij, dxij =
AtomContact(np.array(self.
xyzs), AtomIterator, box=
None, displace=
True)
2193 return AtomIterator, list(drij), list(dxij)
2195 def rotate_bond(self, frame, aj, ak, increment=15):
2197 Return a new Molecule object containing the selected frame 2198 plus a number of frames where the selected dihedral angle is rotated 2199 in steps of 'increment' given in degrees. 2201 This function is designed to be called by Molecule.rotate_check_clash(). 2206 Structure number of the current Molecule to be rotated 2208 Atom numbers of the bond to be rotated 2210 Degrees of the rotation increment 2215 New Molecule object containing the rotated structures 2223 for ibond, bond
in enumerate(M.bonds):
2224 if bond == (aj, ak)
or bond == (ak, aj):
2225 delBonds.append(bond)
2226 if len(delBonds) > 1:
2227 raise RuntimeError(
'Expected only one bond to be deleted')
2228 if len(M.molecules) != 1:
2229 raise RuntimeError(
'Expected a single molecule')
2230 M.bonds.remove(delBonds[0])
2231 M.top_settings[
'read_bonds']=
True 2232 M.build_topology(force_bonds=
False)
2233 if len(M.molecules) != 2:
2234 raise RuntimeError(
'Expected two molecules after removing a bond')
2238 if len(M.molecules[0].
L()) < len(M.molecules[1].
L()):
2239 gAtoms = M.molecules[0].
L()
2240 oAtoms = M.molecules[1].
L()
2242 gAtoms = M.molecules[1].
L()
2243 oAtoms = M.molecules[0].
L()
2253 M.bonds.append(delBonds[0])
2256 axis = M.xyzs[0][atom2] - M.xyzs[0][atom1]
2259 x0 = M.xyzs[0][gAtoms]
2260 x0_ref = M.xyzs[0][atom2]
2265 for thetaDeg
in np.arange(increment, 360, increment):
2266 theta = np.pi * thetaDeg / 180
2270 x0_rot = np.dot(R, x0.T).T
2272 xnew = M.xyzs[0].copy()
2275 xnew[gAtoms] = x0_rot + x0_ref
2278 M.comms.append(
"Rotated by %.2f degrees" % increment)
2279 return M, (gAtoms, oAtoms)
2281 def find_clashes(self, thre=0.0, pbc=True, groups=None):
2283 Obtain a list of atoms that 'clash' (i.e. are more than 2284 3 bonds apart and are closer than the provided threshold.) 2289 Create a sorted-list of all non-bonded atom pairs 2290 with distance below this threshold 2292 Whether to use PBC when computing interatomic distances 2293 filt : tuple (group1, group2) 2294 If not None, only compute distances between atoms in 'group1' 2295 and atoms in 'group2'. For example this could be [gAtoms, oAtoms] 2300 minPair_frames : list of tuples 2301 The closest pair of non-bonded atoms in each frame 2302 minDist_frames : list of tuples 2303 The distance between the closest pair of non-bonded atoms in each frame 2304 clashPairs_frames : list of lists 2305 Pairs of non-bonded atoms with distance below the threshold in each frame 2306 clashDists_frames : list of lists 2307 Distances between pairs of non-bonded atoms below the threshold in each frame 2309 if groups
is not None:
2310 AtomIterator = np.ascontiguousarray([[min(g), max(g)]
for g
in itertools.product(groups[0], groups[1])])
2312 AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1)
for i
in range(self.na)]),dtype=np.int32),
2313 np.fromiter(itertools.chain(*[range(i+1,self.na)
for i
in range(self.na)]),dtype=np.int32))).T)
2314 ang13 = [(min(a[0], a[2]), max(a[0], a[2]))
for a
in self.
find_angles()]
2315 dih14 = [(min(d[0], d[3]), max(d[0], d[3]))
for d
in self.
find_dihedrals()]
2316 bondedPairs = np.where([tuple(aPair)
in (self.bonds+ang13+dih14)
for aPair
in AtomIterator])[0]
2317 AtomIterator_nb = np.delete(AtomIterator, bondedPairs, axis=0)
2321 clashPairs_frames = []
2322 clashDists_frames = []
2323 if hasattr(self,
'boxes')
and pbc:
2324 boxes = np.array([[self.
boxes[i].a, self.
boxes[i].b, self.
boxes[i].c]
for i
in range(len(self))])
2328 for frame
in range(len(self)):
2329 clashPairIdx = np.where(drij[frame] < thre)[0]
2330 clashPairs = AtomIterator_nb[clashPairIdx]
2331 clashDists = drij[frame, clashPairIdx]
2332 sorter = np.argsort(clashDists)
2333 clashPairs = clashPairs[sorter]
2334 clashDists = clashDists[sorter]
2335 minIdx = np.argmin(drij[frame])
2336 minPair = AtomIterator_nb[minIdx]
2337 minDist = drij[frame, minIdx]
2338 minPair_frames.append(minPair)
2339 minDist_frames.append(minDist)
2340 clashPairs_frames.append(clashPairs.copy())
2341 clashDists_frames.append(clashDists.copy())
2342 return minPair_frames, minDist_frames, clashPairs_frames, clashDists_frames
2344 def rotate_check_clash(self, frame, rotate_index, thresh_hyd=1.4, thresh_hvy=1.8, printLevel=1):
2346 Return a new Molecule object containing the selected frame 2347 plus a number of frames where the selected dihedral angle is rotated 2348 in steps of 'increment' given in degrees. Additionally, check for 2349 if pairs of non-bonded atoms "clash" i.e. approach below the specified 2355 Structure number of the current Molecule to be rotated 2356 (if only a single frame, this number should be 0) 2357 rotate_index : tuple 2358 4 atom indices (ai, aj, ak, al) representing the dihedral angle to be rotated. 2359 The first and last indices are used to measure the dihedral angle. 2361 Clash threshold for hydrogen atoms. Reasonable values are in between 1.3 and 2.0. 2363 Clash threshold for heavy atoms. Reasonable values are in between 1.7 and 2.5. 2365 Sets the amount of printout (larger = more printout) 2370 New Molecule object containing the rotated structures 2374 if not hasattr(self,
'atomname'):
2375 raise RuntimeError(
"Please add atom names before calling rotate_check_clash().")
2376 ai, aj, ak, al = rotate_index
2380 phis = M_rot_H.measure_dihedrals(*rotate_index)
2381 for i
in range(len(M_rot_H)):
2382 M_rot_H.comms[i] = (
'Rigid scan: atomname %s, serial %s, dihedral %.3f' 2383 % (
'-'.join([self.
atomname[i]
for i
in rotate_index]),
2384 '-'.join([
"%i" % (i+1)
for i
in rotate_index]), phis[i]))
2385 heavyIdx = [i
for i
in range(self.na)
if self.elem[i] !=
'H']
2386 heavy_frags = [[],[]]
2387 for iHeavy, iAll
in enumerate(heavyIdx):
2388 if iAll
in frags[0]:
2389 heavy_frags[0].
append(iHeavy)
2390 if iAll
in frags[1]:
2391 heavy_frags[1].
append(iHeavy)
2392 M_rot_C = M_rot_H.atom_select(heavyIdx)
2393 minPair_H_frames, minDist_H_frames, clashPairs_H_frames, clashDists_H_frames = M_rot_H.find_clashes(thre=thresh_hyd, groups=frags)
2394 minPair_C_frames, minDist_C_frames, clashPairs_C_frames, clashDists_C_frames = M_rot_C.find_clashes(thre=thresh_hvy, groups=heavy_frags)
2397 haveClash_H = any([len(c) > 0
for c
in clashPairs_H_frames])
2398 minFrame_H = np.argmin(minDist_H_frames)
2399 minAtoms_H = minPair_H_frames[minFrame_H]
2400 minDist_H = minDist_H_frames[minFrame_H]
2401 haveClash_C = any([len(c) > 0
for c
in clashPairs_C_frames])
2402 minFrame_C = np.argmin(minDist_C_frames)
2403 minAtoms_C = minPair_C_frames[minFrame_C]
2404 minDist_C = minDist_C_frames[minFrame_C]
2405 if not (haveClash_H
or haveClash_C):
2407 print(
"\n \x1b[1;92mSuccess - no clashes. Thresh(H, Hvy) = (%.2f, %.2f)\x1b[0m" % (thresh_hyd, thresh_hvy))
2408 mini = M_rot_H.atomname[minAtoms_H[0]]
2409 minj = M_rot_H.atomname[minAtoms_H[1]]
2410 print(
" Closest (Hyd) : rot-frame %i atoms %s-%s %.2f" % (minFrame_H, mini, minj, minDist_H))
2411 mini = M_rot_C.atomname[minAtoms_C[0]]
2412 minj = M_rot_C.atomname[minAtoms_C[1]]
2413 print(
" Closest (Hvy) : rot-frame %i atoms %s-%s %.2f" % (minFrame_C, mini, minj, minDist_C))
2417 mini = M_rot_H.atomname[minAtoms_H[0]]
2418 minj = M_rot_H.atomname[minAtoms_H[1]]
2419 if printLevel >= 2: print(
" Clash (Hyd) : rot-frame %i atoms %s-%s %.2f" % (minFrame_H, mini, minj, minDist_H))
2421 mini = M_rot_C.atomname[minAtoms_C[0]]
2422 minj = M_rot_C.atomname[minAtoms_C[1]]
2423 if printLevel >= 2: print(
" Clash (Hvy) : rot-frame %i atoms %s-%s %.2f" % (minFrame_C, mini, minj, minDist_C))
2424 return M_rot_H, Success
2428 """ Return a list of 3-tuples corresponding to all of the 2429 angles in the system. Verified for lysine and tryptophan 2430 dipeptide when comparing to TINKER's analyze program. """ 2432 if not hasattr(self,
'topology'):
2433 logger.error(
"Need to have built a topology to find angles\n")
2440 for a2
in list(mol.nodes()):
2442 friends = sorted(list(nx.neighbors(mol, a2)))
2443 if len(friends) < 2:
continue 2445 for i, a1
in enumerate(friends):
2446 for a3
in friends[i+1:]:
2448 angidx.append((a1, a2, a3))
2453 """ Return a list of 4-tuples corresponding to all of the 2454 dihedral angles in the system. Verified for alanine and 2455 tryptophan dipeptide when comparing to TINKER's analyze 2458 if not hasattr(self,
'topology'):
2459 logger.error(
"Need to have built a topology to find dihedrals\n")
2466 for edge
in list(mol.edges()):
2468 a2 = edge[0]
if edge[0] < edge[1]
else edge[1]
2469 a3 = edge[1]
if edge[0] < edge[1]
else edge[0]
2470 for a1
in sorted(list(nx.neighbors(mol, a2))):
2472 for a4
in sorted(list(nx.neighbors(mol, a3))):
2473 if a4 != a2
and len({a1, a2, a3, a4}) == 4:
2474 dihidx.append((a1, a2, a3, a4))
2479 for s
in range(self.ns):
2480 x1 = self.
xyzs[s][i]
2481 x2 = self.
xyzs[s][j]
2482 distance = np.linalg.norm(x1-x2)
2483 distances.append(distance)
2488 for s
in range(self.ns):
2489 x1 = self.
xyzs[s][i]
2490 x2 = self.
xyzs[s][j]
2491 x3 = self.
xyzs[s][k]
2494 n = np.dot(v1,v2)/(np.linalg.norm(v1)*np.linalg.norm(v2))
2495 angle = np.arccos(n)
2496 angles.append(angle * 180/ np.pi)
2500 """ Return a series of dihedral angles, given four atom indices numbered from zero. """ 2503 if any(p
not in self.bonds
for p
in [(min(i,j),max(i,j)),(min(j,k),max(j,k)),(min(k,l),max(k,l))]):
2504 logger.warning([(min(i,j),max(i,j)),(min(j,k),max(j,k)),(min(k,l),max(k,l))])
2505 logger.warning(
"Measuring dihedral angle for four atoms that aren't bonded. Hope you know what you're doing!")
2507 logger.warning(
"This molecule object doesn't have bonds defined, sanity-checking is off.")
2508 for s
in range(self.ns):
2509 x4 = self.
xyzs[s][l]
2510 x3 = self.
xyzs[s][k]
2511 x2 = self.
xyzs[s][j]
2512 x1 = self.
xyzs[s][i]
2516 t1 = np.linalg.norm(v2)*np.dot(v1,np.cross(v2,v3))
2517 t2 = np.dot(np.cross(v1,v2),np.cross(v2,v3))
2518 phi = np.arctan2(t1,t2)
2519 phis.append(phi * 180 / np.pi)
2527 Return a list of rings in the molecule. Tested on a DNA base 2528 pair and C60. Warning: Using large max_size for rings 2529 (e.g. for finding the macrocycle in porphyrin) could lead to 2530 some undefined behavior. 2535 The maximum ring size. If a large ring contains smaller 2536 sub-rings, they are all mapped into one. 2541 A list of rings sorted in ascending order of the smallest 2542 atom number. The atoms in each ring are ordered starting 2543 from the lowest number and going along the ring, with the 2544 second atom being the lower of the two possible choices. 2547 for i
in range(self.na):
2548 friends.append(self.
topology.neighbors(i))
2553 for i
in range(self.na):
2557 for a, b
in itertools.combinations(n, 2):
2559 PathLength = nx.shortest_path_length(g, a, b)
2560 except nx.exception.NetworkXNoPath:
2562 if PathLength > 0
and PathLength <= (max_size-2):
2564 triplets.append((a, i, b))
2566 triplets.append((b, i, a))
2572 while set(assigned.keys()) != set(triplets):
2574 if t
not in assigned:
2577 has_been_assigned =
False 2579 new_rings = copy.deepcopy(rings)
2581 for iring, ring
in enumerate(rings):
2587 if ((r[0] == t[1]
and r[1] == t[2])
or 2588 (r[::-1][0] == t[1]
and r[::-1][1] == t[2])
or 2589 (r[0] == t[::-1][1]
and r[1] == t[::-1][2])
or 2590 (r[::-1][0] == t[::-1][1]
and r[1] == t[::-1][2])):
2591 ends = list(set(r).symmetric_difference(t))
2592 mids = set(r).intersection(t)
2594 for m
in mids: g.remove_node(m)
2596 PathLength = nx.shortest_path_length(g, ends[0], ends[1])
2597 except nx.exception.NetworkXNoPath:
2599 if PathLength <= 0
or PathLength > (max_size-2):
2602 if has_been_assigned:
2606 for r1
in rings[iring]:
2607 new_rings[assigned[t]].
append(r1)
2608 del new_rings[new_rings.index(rings[iring])]
2610 new_rings[iring].
append(t)
2612 has_been_assigned =
True 2617 if not has_been_assigned:
2619 assigned[t] = len(new_rings)
2620 new_rings.append([t])
2622 rings = copy.deepcopy(new_rings)
2624 rings = [sorted(list(set([t[1]
for t
in r])))
for r
in rings]
2633 while len(ring) > 0:
2634 for r
in sorted(ring):
2635 if sring[-1]
in friends[r]:
2639 sorted_rings.append(sring[:])
2640 return sorted(sorted_rings, key =
lambda val: val[0])
2644 Recursive function that orders atoms based on connectivity. 2645 To call the function, pass in the topology object (e.g. M.molecules[0]) 2646 and the index of the atom assigned to be "first". 2648 The neighbors of "i" are placed in decreasing order of the 2649 maximum length of the shortest path to all other atoms - i.e. 2650 an atom closer to the "edge" of the molecule has a longer 2651 shortest-path to atoms on the other "edge", and they should 2656 from forcebalance.molecule import * 2658 M = Molecule('movProt.xyz', Fac=1.25) 2659 # Arbitrarily select heaviest atom as the first atom in each molecule 2660 firstAtom = [m.L()[np.argmax([PeriodicTable[M.elem[i]] for i in m.L()])] for m in M.molecules] 2661 # Explicitly set the first atom in the first molecule 2663 # Build a list of new atom orderings 2665 for i in range(len(M.molecules)): 2666 newOrder += M.order_by_connectivity(M.molecules[i], firstAtom[i], [], None) 2667 # Write the new Molecule object 2668 newM = M.atom_select(newOrder) 2669 newM.write('reordered1.xyz') 2673 m : topology object (contained in Molecule) 2675 Index of the atom to be added first (if called recursively, 2676 the index of subsequent atoms to be added) 2678 Current list of new atom orderings 2683 raise RuntimeError(
"topology is not part of Molecule object")
2684 if max_min_path
is None:
2685 spl = nx.shortest_path_length(m)
2689 max_min_path = dict([(k, max(spl[k].values()))
for k
in m.L()])
2700 raise RuntimeError(
'atom %i not in molecule' % i)
2701 jlist = np.array(m.neighbors(i))
2702 jmass = [PeriodicTable[self.elem[j]]
for j
in jlist]
2703 jnghs = [len(m.neighbors(j))
for j
in jlist]
2704 jpath = [max_min_path[j]
for j
in jlist]
2707 for j
in jlist[np.argsort(jpath)[::-1]]:
2708 if j
in currList:
continue 2716 for i
in range(self.na):
2717 if self.elem[i] !=
"H":
continue 2718 if len(self.
topology.neighbors(i)) != 1:
2719 raise RuntimeError(
"Hydrogen with not one bond?")
2720 if self.elem[self.
topology.neighbors(i)[0]]
not in [
"N",
"O",
"F",
"P",
"S",
"Cl"]:
2725 """ Find pairwise RMSD (super slow, not like the one in MSMBuilder.) """ 2727 Mat = np.zeros((N,N),dtype=float)
2729 xyzi = self.
xyzs[i].copy()
2730 xyzi -= xyzi.mean(0)
2732 xyzj = self.
xyzs[j].copy()
2733 xyzj -= xyzj.mean(0)
2735 xyzj = np.dot(xyzj, rt) + tr
2736 rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2))
2742 """ Find RMSD between frames along path. """ 2744 Vec = np.zeros(N-1, dtype=float)
2745 for i
in range(N-1):
2746 xyzi = self.
xyzs[i].copy()
2748 xyzj = self.
xyzs[j].copy()
2750 xyzi -= xyzi.mean(0)
2751 xyzj -= xyzj.mean(0)
2753 xyzj = np.dot(xyzj, rt) + tr
2754 rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2))
2759 """ Find RMSD to a reference frame. """ 2762 xyzi = self.
xyzs[i].copy()
2763 if align: xyzi -= xyzi.mean(0)
2765 xyzj = self.
xyzs[j].copy()
2767 xyzj -= xyzj.mean(0)
2769 xyzj = np.dot(xyzj, rt) + tr
2770 rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2))
2778 """ Returns the Cartesian coordinates in the Molecule object in 2779 a list of OpenMM-compatible positions, so it is possible to type 2780 simulation.context.setPositions(Mol.openmm_positions()[0]) 2781 or something like that. 2786 for xyz
in self.
xyzs:
2789 Pos.append(Vec3(xyzi[0]/10,xyzi[1]/10,xyzi[2]/10))
2790 Positions.append(Pos*nanometer)
2794 """ Returns the periodic box vectors in the Molecule object in 2795 a list of OpenMM-compatible boxes, so it is possible to type 2796 simulation.context.setPeriodicBoxVectors(Mol.openmm_boxes()[0]) 2797 or something like that. 2801 return [(Vec3(*box.A)/10.0, Vec3(*box.B)/10.0, Vec3(*box.C)/10.0) * nanometer
for box
in self.
boxes]
2803 def split(self, fnm=None, ftype=None, method="chunks", num=None):
2805 """ Split the molecule object into a number of separate files 2806 (chunks), either by specifying the number of frames per chunk 2807 or the number of chunks. Only relevant for "trajectories". 2808 The type of file may be specified; if they aren't specified 2809 then the original file type is used. 2811 The output file names are [name].[numbers].[extension] where 2812 [name] can be specified by passing 'fnm' or taken from the 2813 object's 'fnm' attribute by default. [numbers] are integers 2814 ranging from the lowest to the highest chunk number, prepended 2817 If the number of chunks / frames is not specified, then one file 2818 is written for each frame. 2820 @return fnms A list of the file names that were written. 2822 logger.error(
'Apparently this function has not been implemented!!')
2823 raise NotImplementedError
2827 Return a copy of the Molecule object but with certain fields 2828 deleted if they exist. This is useful for when we'd like to 2829 add Molecule objects that don't share some fields that we know 2834 for key
in AllVariableNames:
2835 if key
in self.
Data and key
not in args:
2836 New.Data[key] = copy.deepcopy(self.
Data[key])
2840 """ Set charge and multiplicity from reading the comment line, formatted in a specific way. """ 2843 self.
mult = abs(sz) + 1
2851 if isinstance(schema, dict):
2855 elif isinstance(schema, str):
2856 with open(schema,
"r") as handle: 2857 schema = json.loads(handle) 2859 raise TypeError(
"Schema type not understood '{}'".format(type(schema)))
2860 ret = {
"elem": schema[
"symbols"],
2861 "xyzs": [np.array(schema[
"geometry"])],
2866 """ .xyz files can be TINKER formatted which is why we have the try/except here. """ 2869 except ActuallyArcError:
2870 return self.
read_arc(fnm, **kwargs)
2873 """ Parse a .xyz file which contains several xyz coordinates, and return their elements. 2875 @param[in] fnm The input file name 2876 @return elem A list of chemical elements in the XYZ file 2877 @return comms A list of comments. 2878 @return xyzs A list of XYZ coordinates (number of snapshots times number of atoms) 2889 for line
in open(fnm):
2890 line = line.strip().expandtabs()
2893 if len(line.strip()) > 0:
2895 na = int(line.strip())
2898 logger.warning(
"Non-integer detected in first line; will parse as TINKER .arc file.")
2899 raise ActuallyArcError
2901 sline = line.split()
2902 if len(sline) == 6
and all([
isfloat(word)
for word
in sline]):
2904 logger.warning(
"Tinker box data detected in second line; will parse as TINKER .arc file.")
2905 raise ActuallyArcError
2908 logger.warning(
"Tinker coordinate data detected in second line; will parse as TINKER .arc file.")
2909 raise ActuallyArcError
2910 comms.append(line.strip())
2912 line = re.sub(
r"([0-9])(-[0-9])",
r"\1 \2", line)
2914 if not re.match(
r"[A-Z][A-Za-z]?( +[-+]?([0-9]*\.)?[0-9]+){3}$", line):
2915 raise IOError(
"Expected coordinates at line %i but got this instead:\n%s" % (absln, line))
2916 sline = line.split()
2917 xyz.append([float(i)
for i
in sline[1:]])
2919 elem.append(sline[0])
2922 xyzs.append(np.array(xyz))
2930 Answer = {
'elem' : elem,
2936 """ Parse an AMBER .mdcrd file. This requires at least the number of atoms. 2937 This will FAIL for monatomic trajectories (but who the heck makes those?) 2939 @param[in] fnm The input file name 2940 @return xyzs A list of XYZ coordinates (number of snapshots times number of atoms) 2941 @return boxes Boxes (if present.) 2949 for line
in open(fnm):
2950 sline = line.split()
2954 if xyz == []
and len(sline) == 3:
2955 a, b, c = (float(i)
for i
in line.split())
2958 xyz += [float(i)
for i
in line.split()]
2959 if len(xyz) == self.na * 3:
2960 xyzs.append(np.array(xyz).reshape(-1,3))
2963 Answer = {
'xyzs' : xyzs}
2965 Answer[
'boxes'] = boxes
2969 """ Parse an AMBER .inpcrd or .rst file. 2971 @param[in] fnm The input file name 2972 @return xyzs A list of XYZ coordinates (number of snapshots times number of atoms) 2973 @return boxes Boxes (if present.) 2985 for line
in open(fnm):
2986 line = line.replace(
'\n',
'')
2994 na = int(line.split()[0])
2996 xyz.append([float(line[:12]), float(line[12:24]), float(line[24:36])])
2999 xyzs.append(np.array(xyz))
3003 xyz.append([float(line[36:48]), float(line[48:60]), float(line[60:72])])
3006 xyzs.append(np.array(xyz))
3010 vel.append([float(line[:12]), float(line[12:24]), float(line[24:36])])
3013 vels.append(np.array(vel))
3017 vel.append([float(line[36:48]), float(line[48:60]), float(line[60:72])])
3020 vels.append(np.array(vel))
3024 a, b, c = (float(line[:12]), float(line[12:24]), float(line[24:36]))
3031 Answer = {
'xyzs' : xyzs,
'comms' : comms}
3033 Answer[
'boxes'] = boxes
3043 for line
in open(fnm):
3044 line = line.strip().expandtabs()
3045 if 'COORDS' in line:
3046 xyzs.append(np.array([float(i)
for i
in line.split()[1:]]).reshape(-1,3))
3047 elif 'FORCES' in line
or 'GRADIENT' in line:
3048 grads.append(np.array([float(i)
for i
in line.split()[1:]]).reshape(-1,3))
3049 elif 'ESPXYZ' in line:
3050 espxyzs.append(np.array([float(i)
for i
in line.split()[1:]]).reshape(-1,3))
3051 elif 'ESPVAL' in line:
3052 espvals.append(np.array([float(i)
for i
in line.split()[1:]]))
3053 elif 'ENERGY' in line:
3054 energies.append(float(line.split()[1]))
3055 elif 'INTERACTION' in line:
3056 interaction.append(float(line.split()[1]))
3059 Answer[
'xyzs'] = xyzs
3060 if len(energies) > 0:
3061 Answer[
'qm_energies'] = energies
3062 if len(interaction) > 0:
3063 Answer[
'qm_interaction'] = interaction
3065 Answer[
'qm_grads'] = grads
3066 if len(espxyzs) > 0:
3067 Answer[
'qm_espxyzs'] = espxyzs
3068 if len(espvals) > 0:
3069 Answer[
'qm_espvals'] = espvals
3081 if len(data.compounds) > 1:
3082 sys.stderr.write(
"Not sure what to do if the MOL2 file contains multiple compounds\n")
3083 for i, atom
in enumerate(list(data.compounds.items())[0][1].atoms):
3084 xyz.append([atom.x, atom.y, atom.z])
3085 charge.append(atom.charge)
3086 atomname.append(atom.atom_name)
3087 atomtype.append(atom.atom_type)
3088 resname.append(atom.subst_name)
3089 resid.append(atom.subst_id)
3090 thiselem = atom.atom_name
3091 if len(thiselem) > 1:
3092 thiselem = thiselem[0] + re.sub(
'[A-Z0-9]',
'',thiselem[1:])
3093 elem.append(thiselem)
3107 for bond
in list(data.compounds.items())[0][1].bonds:
3108 a1 = bond.origin_atom_id - 1
3109 a2 = bond.target_atom_id - 1
3110 aL, aH = (a1, a2)
if a1 < a2
else (a2, a1)
3111 bonds.append((aL,aH))
3114 Answer = {
'xyzs' : [np.array(xyz)],
3115 'partial_charge' : charge,
3116 'atomname' : atomname,
3117 'atomtype' : atomtype,
3119 'resname' : resname,
3129 if _dcdlib.vmdplugin_init() != 0:
3130 logger.error(
"Unable to init DCD plugin\n")
3134 dcd = _dcdlib.open_dcd_read(fnm,
"dcd", byref(natoms))
3136 _xyz = c_float * (natoms.value * 3)
3140 result = _dcdlib.read_next_timestep(dcd, natoms, byref(ts))
3146 xyz = np.asfarray(xyzvec)
3147 xyzs.append(xyz.reshape(-1, 3))
3149 _dcdlib.close_file_read(dcd)
3151 Answer = {
'xyzs' : xyzs,
3156 """ Parse a Gaussian .com file and return a SINGLE-ELEMENT list of xyz coordinates (no multiple file support) 3158 @param[in] fnm The input file name 3159 @return elem A list of chemical elements in the XYZ file 3160 @return comms A single-element list for the comment. 3161 @return xyzs A single-element list for the XYZ coordinates. 3162 @return charge The total charge of the system. 3163 @return mult The spin multiplicity of the system. 3170 comfile = open(fnm).readlines()
3172 for line
in comfile:
3173 line = line.strip().expandtabs()
3175 sline = line.split(
'!')[0].
split()
3178 charge = int(sline[0])
3179 mult = int(sline[1])
3181 elif len(sline) == 4:
3183 if sline[0].capitalize()
in PeriodicTable
and isfloat(sline[1])
and isfloat(sline[2])
and isfloat(sline[3]):
3184 elem.append(sline[0])
3185 xyz.append(np.array([float(sline[1]),float(sline[2]),float(sline[3])]))
3191 Answer = {
'xyzs' : [np.array(xyz)],
3193 'comms' : [comfile[title_ln].strip()],
3199 """ Read a TINKER .arc file. 3201 @param[in] fnm The input file name 3202 @return xyzs A list for the XYZ coordinates. 3203 @return boxes A list of periodic boxes (newer .arc files have these) 3204 @return resid The residue ID numbers. These are not easy to get! 3205 @return elem A list of chemical elements in the XYZ file 3206 @return comms A single-element list for the comment. 3207 @return tinkersuf The suffix that comes after lines in the XYZ coordinates; this is usually topology info 3218 forwardres = set([])
3224 for line
in open(fnm):
3225 line = line.strip().expandtabs()
3226 sline = line.split()
3227 if len(sline) == 0:
continue 3232 comms.append(
' '.join(sline[1:]))
3234 elif len(sline) >= 5:
3236 a, b, c, alpha, beta, gamma = (float(i)
for i
in sline[:6])
3241 resid.append(thisresid)
3242 whites = re.split(
'[^ ]+',line)
3246 sn = [int(i)
for i
in s[1:]]
3247 s = [s[0]] + list(np.array(s[1:])[np.argsort(sn)])
3248 tinkersuf.append(
''.join([whites[j]+s[j-5]
for j
in range(5,len(sline))]))
3250 tinkersuf.append(
'')
3254 thisres.add(thisatom)
3255 forwardres.add(thisatom)
3257 forwardres.update([int(j)
for j
in sline[6:]])
3258 if thisres == forwardres:
3260 forwardres = set([])
3262 xyz.append([float(sline[2]),float(sline[3]),float(sline[4])])
3267 xyzs.append(np.array(xyz))
3270 Answer = {
'xyzs' : xyzs,
3274 'tinkersuf' : tinkersuf}
3275 if len(boxes) > 0: Answer[
'boxes'] = boxes
3279 """ Read a GROMACS .gro file. 3294 for line
in open(fnm):
3295 sline = line.split()
3297 comms.append(line.strip())
3299 na = int(line.strip())
3301 box = [float(i)*10
for i
in sline]
3311 v1 = np.array([box[0], box[3], box[4]])
3312 v2 = np.array([box[5], box[1], box[6]])
3313 v3 = np.array([box[7], box[8], box[2]])
3315 xyzs.append(np.array(xyz)*10)
3323 thisresid = int(line[0:5].strip())
3324 resid.append(thisresid)
3325 thisresname = line[5:10].strip()
3326 resname.append(thisresname)
3327 thisatomname = line[10:15].strip()
3328 atomname.append(thisatomname)
3331 if len(thiselem) > 1:
3332 thiselem = thiselem[0] + re.sub(
'[A-Z0-9]',
'',thiselem[1:])
3333 elem.append(thiselem)
3337 pdeci = [i
for i, x
in enumerate(line)
if x ==
'.']
3338 ndeci = pdeci[1] - pdeci[0] - 5
3340 for i
in range(1,4):
3342 thiscoord = float(line[(pdeci[0]-4)+(5+ndeci)*(i-1):(pdeci[0]-4)+(5+ndeci)*i].strip())
3344 thiscoord = float(line.split()[i+2])
3345 coord.append(thiscoord)
3350 Answer = {
'xyzs' : xyzs,
3352 'atomname' : atomname,
3354 'resname' : resname,
3360 """ Read a CHARMM .cor (or .crd) file. 3374 for line
in open(fnm):
3375 line = line.strip().expandtabs()
3376 sline = line.split()
3377 if re.match(
r'^\*',line):
3379 comms.append(
';'.join(list(thiscomm)))
3382 thiscomm.append(
' '.join(sline[1:]))
3383 elif re.match(
'^ *[0-9]+ +(EXT)?$',line):
3387 resid.append(sline[1])
3388 resname.append(sline[2])
3389 atomname.append(sline[3])
3391 if len(thiselem) > 1:
3392 thiselem = thiselem[0] + re.sub(
'[A-Z0-9]',
'',thiselem[1:])
3393 elem.append(thiselem)
3394 xyz.append([float(i)
for i
in sline[4:7]])
3397 xyzs.append(np.array(xyz))
3402 Answer = {
'xyzs' : xyzs,
3404 'atomname' : atomname,
3406 'resname' : resname,
3411 """ Read a Q-Chem input file. 3413 These files can be very complicated, and I can't write a completely 3414 general parser for them. It is important to keep our goal in 3417 1) The main goal is to convert a trajectory to Q-Chem input 3418 files with identical calculation settings. 3420 2) When we print the Q-Chem file, we should preserve the line 3421 ordering of the 'rem' section, but also be able to add 'rem' 3424 3) We should accommodate the use case that the Q-Chem file may have 3425 follow-up calculations delimited by '@@@'. 3427 4) We can read in all of the xyz's as a trajectory, but only the 3428 Q-Chem settings belonging to the first xyz will be saved. 3432 qcrem = OrderedDict()
3445 inside_section =
False 3446 reading_template =
True 3457 for line
in open(fnm).readlines():
3458 line = line.strip().expandtabs()
3459 sline = line.split()
3460 dline = line.split(
'!')[0].
split()
3461 if "Z-matrix Print" in line:
3463 if re.match(
r'^\$',line):
3464 wrd = re.sub(
r'\$',
'',line).lower()
3470 inside_section =
False 3471 if section ==
'molecule':
3473 xyzs.append(np.array(xyz))
3478 elif section ==
'rem':
3479 if reading_template:
3480 qcrems.append(qcrem)
3481 qcrem = OrderedDict()
3482 if reading_template:
3483 if section !=
'external_charges':
3484 template.append((section,SectionData))
3488 inside_section =
True 3489 elif inside_section:
3491 raise RuntimeError(
'Parse error - zmatrix should not activate inside_section')
3492 if section ==
'molecule':
3493 if line.startswith(
"*"):
3495 if (
not infsm)
and (len(dline) >= 4
and all([
isfloat(dline[i])
for i
in range(1,4)])):
3497 reading_template =
False 3498 template_cut = list(i
for i, dat
in enumerate(template)
if '@@@' in dat[0])[-1]
3500 if re.match(
'^@', sline[0]):
3504 elem.append(re.sub(
'@',
'',sline[0]))
3505 xyz.append([float(i)
for i
in sline[1:4]])
3506 if readsuf
and len(sline) > 4:
3507 whites = re.split(
'[^ ]+',line)
3508 suffix.append(
''.join([whites[j]+sline[j]
for j
in range(4,len(sline))]))
3509 elif re.match(
"[+-]?[0-9]+ +[0-9]+$",line.split(
'!')[0].strip()):
3511 charge = int(sline[0])
3512 mult = int(sline[1])
3514 SectionData.append(line)
3515 elif reading_template:
3516 if section ==
'basis':
3517 SectionData.append(line.split(
'!')[0])
3518 elif section ==
'rem':
3519 S = splitter.findall(line)
3521 qcrem[
''.join(S[0:3]).lower()] =
''.join(S[4:])
3523 qcrem[S[0].lower()] =
''.join(S[2:])
3525 SectionData.append(line)
3526 elif re.match(
'^@+$', line)
and reading_template:
3527 template.append((
'@@@', []))
3528 elif re.match(
'Welcome to Q-Chem', line)
and reading_template
and fff:
3529 template.append((
'@@@', []))
3531 if template_cut != 0:
3532 template = template[:template_cut]
3534 Answer = {
'qctemplate' : template,
3540 Answer[
'qcsuf'] = suffix
3543 Answer[
'xyzs'] = xyzs
3545 Answer[
'xyzs'] = [np.array([])]
3547 Answer[
'elem'] = elem
3549 Answer[
'qm_ghost'] = ghost
3554 """ Loads a PDB and returns a dictionary containing its data. """ 3557 ParsedPDB=readPDB(F1) 3565 for x
in ParsedPDB[0]:
3566 if x.__class__
in [END, ENDMDL]:
3569 if x.__class__
in [ATOM, HETATM]:
3573 if x.__class__
in [TER]
and ReadTerms:
3575 if x.__class__==CRYST1:
3580 XYZ=np.array([[x.x,x.y,x.z]
for x
in X])/10.0
3581 AltLoc=np.array([x.altLoc
for x
in X],
'str')
3582 ICode=np.array([x.iCode
for x
in X],
'str')
3583 ChainID=np.array([x.chainID
for x
in X],
'str')
3584 AtomNames=np.array([x.name
for x
in X],
'str')
3585 ResidueNames=np.array([x.resName
for x
in X],
'str')
3586 ResidueID=np.array([x.resSeq
for x
in X],
'int')
3589 ResidueID=ResidueID-ResidueID[0]+1
3592 for Model
in PDBLines:
3596 NewXYZ.append([x.x,x.y,x.z])
3597 if len(XYZList) == 0:
3598 XYZList.append(NewXYZ)
3599 elif len(XYZList) >= 1
and (np.array(NewXYZ).shape == np.array(XYZList[-1]).shape):
3600 XYZList.append(NewXYZ)
3602 if len(XYZList[-1])==0:
3607 for i
in range(len(AtomNames)):
3610 elem.append(X[i].element)
3612 thiselem = AtomNames[i]
3613 if len(thiselem) > 1:
3614 thiselem = re.sub(
'^[0-9]',
'',thiselem)
3615 thiselem = thiselem[0] + re.sub(
'[A-Z0-9]',
'',thiselem[1:])
3616 elem.append(thiselem)
3618 XYZList=list(np.array(XYZList).reshape((-1,len(ChainID),3)))
3626 if line[:6] ==
"CONECT":
3627 conect_A = int(line[6:11]) - 1
3629 line_rest = line[11:]
3630 while line_rest.strip():
3632 conect_B_list.append(int(line_rest[:5]) - 1)
3633 line_rest = line_rest[5:]
3634 for conect_B
in conect_B_list:
3635 bond = (min((conect_A, conect_B)), max((conect_A, conect_B)))
3638 Answer={
"xyzs":XYZList,
"chain":list(ChainID),
"altloc":list(AltLoc),
"icode":list(ICode),
3639 "atomname":[str(i)
for i
in AtomNames],
"resid":list(ResidueID),
"resname":list(ResidueNames),
3640 "elem":elem,
"comms":[
'' for i
in range(len(XYZList))],
"terminal" : PDBTerms}
3644 Answer[
"bonds"] = bonds
3647 Answer[
"boxes"] = [Box
for i
in range(len(XYZList))]
3654 for line
in open(fnm):
3655 line = line.strip().expandtabs()
3656 sline = line.split()
3657 if len(sline) == 4
and all([
isfloat(sline[i])
for i
in range(4)]):
3658 espxyz.append([float(sline[i])
for i
in range(3)])
3659 espval.append(float(sline[3]))
3660 Answer = {
'qm_espxyzs' : [np.array(espxyz) * bohr2ang],
3661 'qm_espvals' : [np.array(espval)]
3665 def read_qcout(self, fnm, errok=None, **kwargs):
3666 """ Q-Chem output file reader, adapted for our parser. 3668 Q-Chem output files are very flexible and there's no way I can account for all of them. Here's what 3669 I am able to account for: 3676 Calling with errok will proceed with reading file even if the specified error messages are encountered. 3678 Note that each step in a geometry optimization counts as a frame. 3680 As with all Q-Chem output files, note that successive calculations can have different numbers of atoms. 3704 float_match = {
'energy_scfThis' : (
r"^[1-9][0-9]* +[-+]?([0-9]*\.)?[0-9]+ +[-+]?([0-9]*\.)?[0-9]+([eE][-+]?[0-9]+)[A-Za-z0 ]*$", 1),
3705 'energy_opt' : (
r"^Final energy is +[-+]?([0-9]*\.)?[0-9]+$", -1),
3706 'charge' : (
"Sum of atomic charges", -1),
3707 'mult' : (
"Sum of spin +charges", -1),
3708 'energy_mp2' : (
r"^(ri)*(-)*mp2 +total energy += +[-+]?([0-9]*\.)?[0-9]+ +au$",-2),
3709 'energy_ccsd' : (
r"^CCSD Total Energy += +[-+]?([0-9]*\.)?[0-9]+$",-1),
3710 'energy_ccsdt' : (
r"^CCSD\(T\) Total Energy += +[-+]?([0-9]*\.)?[0-9]+$",-1),
3711 'zpe' : (
r"^(\s+)?Zero point vibrational energy:\s+[-+]?([0-9]*\.)?[0-9]+\s+kcal\/mol$", -2),
3712 'entropy' : (
r"^(\s+)?Total Entropy:\s+[-+]?([0-9]*\.)?[0-9]+\s+cal\/mol\.K$", -2),
3713 'enthalpy' : (
r"^(\s+)?Total Enthalpy:\s+[-+]?([0-9]*\.)?[0-9]+\s+kcal\/mol$", -2)
3715 matrix_match = {
'analytical_grad' :
'Full Analytical Gradient',
3716 'gradient_scf' :
'Gradient of SCF Energy',
3717 'gradient_mp2' :
'Gradient of MP2 Energy',
3718 'gradient_dualbas' :
'Gradient of the Dual-Basis Energy',
3719 'hessian_scf' :
'Hessian of the SCF Energy',
3720 'mayer' :
'Mayer SCF Bond Order' 3722 qcrem = OrderedDict()
3724 matblank = {
'match' :
'',
'All' : [],
'This' : [],
'Strip' : [],
'Mode' : 0}
3727 for key, val
in matrix_match.items():
3728 Mats[key] = copy.deepcopy(matblank)
3729 for key, val
in float_match.items():
3747 IRCData = [OrderedDict([(
'stat', -1), (
'X', []), (
'E', []), (
'Q', []), (
'Sz', [])])
for i
in range(2)]
3749 Answer[
'qcerr'] =
'' 3754 for line
in open(fnm):
3755 line = line.strip().expandtabs()
3756 if 'Welcome to Q-Chem' in line:
3757 Answer[
'qcerr'] =
'' 3758 if 'total processes killed' in line:
3759 Answer[
'qcerr'] =
'killed' 3760 if fatal
and len(line.split()) > 0:
3763 Answer[
'qcerr'] = line.strip()
3766 logger.error(
'Calculation encountered a fatal error! (%s)\n' % line)
3768 if 'Q-Chem fatal error' in line:
3772 if re.match(
r"^[0-9]+ +[A-Z][A-Za-z]?( +[-+]?([0-9]*\.)?[0-9]+){3}$", line):
3774 sline = line.split()
3775 elemThis.append(sline[1])
3776 xyz.append([float(i)
for i
in sline[2:]])
3780 elif elem != elemThis:
3781 logger.error(
'Q-Chem output parser will not work if successive calculations have different numbers of atoms!\n')
3784 xyzs.append(np.array(xyz))
3787 elif re.match(
"Standard Nuclear Orientation".lower(), line.lower()):
3791 if re.match(
r"^[0-9]+ +[A-Z][a-z]?( +[-+]?([0-9]*\.)?[0-9]+){2}$", line):
3793 sline = line.split()
3794 mkchgThis.append(float(sline[2]))
3795 mkspnThis.append(float(sline[3]))
3796 elif re.match(
r"^[0-9]+ +[A-Z][a-z]?( +[-+]?([0-9]*\.)?[0-9]+){1}$", line):
3798 sline = line.split()
3799 mkchgThis.append(float(sline[2]))
3800 mkspnThis.append(0.0)
3802 mkchg.append(mkchgThis[:])
3803 mkspn.append(mkspnThis[:])
3807 elif re.match(
"Ground-State Mulliken Net Atomic Charges".lower(), line.lower()):
3809 for key, val
in float_match.items():
3810 if re.match(val[0].lower(), line.lower()):
3811 Floats[key].
append(float(line.split()[val[1]]))
3813 if line.startswith(
'IRC')
and IRCData[IRCDir][
'stat'] == -1:
3814 IRCData[IRCDir][
'stat'] = 2
3815 if "Reaction path following." in line:
3817 IRCData[IRCDir][
'X'].
append(xyzs[-1])
3821 IRCData[IRCDir][
'E'].
append(float(line.split()[3]))
3822 IRCData[IRCDir][
'Q'].
append(mkchg[-1])
3823 IRCData[IRCDir][
'Sz'].
append(mkspn[-1])
3827 if "GEOMETRY OPTIMIZATION" in line:
3828 IRCData[IRCDir][
'X'].
append(xyzs[-1])
3829 IRCData[IRCDir][
'E'].
append(energy_scf[-1])
3830 IRCData[IRCDir][
'Q'].
append(mkchg[-1])
3831 IRCData[IRCDir][
'Sz'].
append(mkspn[-1])
3833 if "IRC -- convergence criterion reached." in line
or "OPTIMIZATION CONVERGED" in line:
3834 IRCData[IRCDir][
'stat'] = 0
3836 if "MAXIMUM OPTIMIZATION CYCLES REACHED" in line:
3837 IRCData[IRCDir][
'stat'] = 1
3839 if "geom opt from" in line:
3840 IRCData[IRCDir][
'stat'] = 1
3846 if re.match(
".*Convergence criterion met$".lower(), line.lower()):
3848 energy_scf.append(Floats[
'energy_scfThis'][-1])
3849 Floats[
'energy_scfThis'] = []
3850 elif re.match(
".*Including correction$".lower(), line.lower()):
3851 energy_scf[-1] = Floats[
'energy_scfThis'][-1]
3852 Floats[
'energy_scfThis'] = []
3853 elif re.match(
".*Convergence failure$".lower(), line.lower()):
3855 Floats[
'energy_scfThis'] = []
3856 energy_scf.append(0.0)
3858 if 'Starting FSM Calculation' in line:
3860 if 'needFdiff: TRUE' in line:
3863 if "total gradient after adding PCM contribution" in line:
3867 if re.match(
r"^[0-9]+ +( +[-+]?([0-9]*\.)?[0-9]+){3}$", line):
3868 pcmgrad.append([float(i)
for i
in line.split()[1:]])
3869 if 'Gradient time' in line:
3871 pcmgrads.append(np.array(pcmgrad).T)
3875 if 'VIBRATIONAL ANALYSIS' in line:
3877 if VMode > 0
and line.strip().startswith(
'Mode:'):
3881 if 'Frequency:' in line:
3883 frqs += [float(i)
for i
in s[1:]]
3884 if re.match(
'^X +Y +Z', line):
3886 readmodes = [[]
for i
in range(nfrq)]
3887 if 'Imaginary Frequencies' in line:
3891 if len(s) != nfrq*3+1:
3893 modes += readmodes[:]
3894 elif 'TransDip' not in s:
3895 for i
in range(nfrq):
3896 readmodes[i].
append([float(s[j])
for j
in range(1+3*i,4+3*i)])
3897 if VModeNxt
is not None: VMode = VModeNxt
3898 for key, val
in matrix_match.items():
3899 if Mats[key][
"Mode"] >= 1:
3901 if re.match(
"^[0-9]+( +[0-9]+)*$",line):
3903 Mats[key][
"Strip"] = []
3904 Mats[key][
"Mode"] = 2
3906 elif re.match(
r"^[0-9]+( +[-+]?([0-9]*\.)?[0-9]+)+$",line):
3907 Mats[key][
"Strip"].
append([float(i)
for i
in line.split()[1:]])
3909 elif Mats[key][
"Mode"] >= 2:
3911 Mats[key][
"Strip"] = []
3912 Mats[key][
"All"].
append(np.array(Mats[key][
"This"]))
3913 Mats[key][
"This"] = []
3914 Mats[key][
"Mode"] = 0
3915 elif re.match(val.lower(), line.lower()):
3916 Mats[key][
"Mode"] = 1
3918 if len(Floats[
'mult']) == 0:
3919 Floats[
'mult'] = [0]
3922 Answer[
'xyzs'] = xyzs
3923 Answer[
'elem'] = elem
3926 for i
in [
'qctemplate',
'qcrems',
'elem',
'qm_ghost',
'charge',
'mult']:
3927 if i
in Aux: Answer[i] = Aux[i]
3929 if len(Floats[
'charge']) > 0:
3930 Answer[
'charge'] = int(Floats[
'charge'][0])
3931 if len(Floats[
'mult']) > 0:
3932 Answer[
'mult'] = int(Floats[
'mult'][0]) + 1
3936 if len(pcmgrads) > 0:
3937 Answer[
'qm_grads'] = pcmgrads
3938 elif len(Mats[
'analytical_grad'][
'All']) > 0:
3939 Answer[
'qm_grads'] = Mats[
'analytical_grad'][
'All']
3940 elif len(Mats[
'gradient_mp2'][
'All']) > 0:
3941 Answer[
'qm_grads'] = Mats[
'gradient_mp2'][
'All']
3942 elif len(Mats[
'gradient_dualbas'][
'All']) > 0:
3943 Answer[
'qm_grads'] = Mats[
'gradient_dualbas'][
'All']
3944 elif len(Mats[
'gradient_scf'][
'All']) > 0:
3945 Answer[
'qm_grads'] = Mats[
'gradient_scf'][
'All']
3947 if len(Mats[
'mayer'][
'All']) > 0:
3948 Answer[
'qm_bondorder'] = Mats[
'mayer'][
'All'][-1]
3949 if len(Mats[
'hessian_scf'][
'All']) > 0:
3950 Answer[
'qm_hessians'] = Mats[
'hessian_scf'][
'All']
3954 if len(Floats[
'energy_ccsdt']) > 0:
3955 Answer[
'qm_energies'] = Floats[
'energy_ccsdt']
3956 elif len(Floats[
'energy_ccsd']) > 0:
3957 Answer[
'qm_energies'] = Floats[
'energy_ccsd']
3958 elif len(Floats[
'energy_mp2']) > 0:
3959 Answer[
'qm_energies'] = Floats[
'energy_mp2']
3960 elif len(energy_scf) > 0:
3961 if len(Answer[
'qcrems']) > 0
and 'correlation' in Answer[
'qcrems'][0]
and Answer[
'qcrems'][0][
'correlation'].lower()
in [
'mp2',
'rimp2',
'ccsd',
'ccsd(t)']:
3962 logger.error(
"Q-Chem was called with a post-HF theory but we only got the SCF energy\n")
3964 Answer[
'qm_energies'] = energy_scf
3965 elif 'SCF failed to converge' not in errok:
3966 logger.error(
'There are no energies in %s\n' % fnm)
3969 if len(Floats[
'zpe']) > 0:
3970 Answer[
'qm_zpe'] = Floats[
'zpe']
3971 if len(Floats[
'entropy']) > 0:
3972 Answer[
'qm_entropy'] = Floats[
'entropy']
3973 if len(Floats[
'enthalpy']) > 0:
3974 Answer[
'qm_enthalpy'] = Floats[
'enthalpy']
3979 if 0
in conv
and 'SCF failed to converge' not in errok:
3980 logger.error(
'SCF convergence failure encountered in parsing %s\n' % fnm)
3984 if len(set(Floats[
'charge'])) != 1
or len(set(Floats[
'mult'])) != 1:
3985 logger.error(
'Unexpected number of charges or multiplicities in parsing %s\n' % fnm)
3989 if 'qm_energies' in Answer:
3991 if len(Answer[
'xyzs']) == len(Answer[
'qm_energies']) + 1:
3992 Answer[
'xyzs'] = Answer[
'xyzs'][:-1]
3994 if len(Answer[
'xyzs']) == len(Answer[
'qm_energies']) + 2:
3996 Answer[
'qm_energies'].
append(0.0)
3997 mkchg.append([0.0
for j
in mkchg[-1]])
3998 mkspn.append([0.0
for j
in mkchg[-1]])
4000 if FSM
and (len(Answer[
'xyzs']) == len(Answer[
'qm_energies']) + 3):
4001 Answer[
'xyzs'] = Answer[
'xyz'][1:]
4003 Answer[
'qm_energies'].
append(0.0)
4004 mkchg.append([0.0
for j
in mkchg[-1]])
4005 mkspn.append([0.0
for j
in mkchg[-1]])
4006 if FDiff
and (len(Answer[
'qm_energies']) == (len(Answer[
'xyzs'])+1)):
4007 logger.info(
"Aligning energies because finite difference calculation prints one extra")
4008 Answer[
'qm_energies'] = Answer[
'qm_energies'][:-1]
4009 lens = [len(i)
for i
in (Answer[
'qm_energies'], Answer[
'xyzs'])]
4010 if len(set(lens)) != 1:
4011 logger.error(
'The number of energies and coordinates in %s are not the same : %s\n' % (fnm, str(lens)))
4015 if len(set([len(i)
for i
in Answer[
'xyzs']])) > 1:
4016 logger.error(
'The numbers of atoms across frames in %s are not all the same\n' % fnm)
4019 if 'qm_grads' in Answer:
4020 for i, frc
in enumerate(Answer[
'qm_grads']):
4021 Answer[
'qm_grads'][i] = frc.T
4022 for i
in np.where(np.array(conv) == 0)[0]:
4023 Answer[
'qm_grads'].insert(i, Answer[
'qm_grads'][0]*0.0)
4024 if len(Answer[
'qm_grads']) != len(Answer[
'qm_energies']):
4025 logger.warning(
"Number of energies and gradients is inconsistent (composite jobs?) Deleting gradients.")
4026 del Answer[
'qm_grads']
4029 Answer[
'qm_mulliken_charges'] = list(np.array(mkchg))
4030 for i
in np.where(np.array(conv) == 0)[0]:
4031 Answer[
'qm_mulliken_charges'].insert(i, np.array([0.0
for i
in mkchg[-1]]))
4032 Answer[
'qm_mulliken_charges'] = Answer[
'qm_mulliken_charges'][:len(Answer[
'qm_energies'])]
4034 Answer[
'qm_mulliken_spins'] = list(np.array(mkspn))
4035 for i
in np.where(np.array(conv) == 0)[0]:
4036 Answer[
'qm_mulliken_spins'].insert(i, np.array([0.0
for i
in mkspn[-1]]))
4037 Answer[
'qm_mulliken_spins'] = Answer[
'qm_mulliken_spins'][:len(Answer[
'qm_energies'])]
4039 Answer[
'Irc'] = IRCData
4041 unnorm = [np.array(i)
for i
in modes]
4042 Answer[
'freqs'] = np.array(frqs)
4043 Answer[
'modes'] = [i/np.linalg.norm(i)
for i
in unnorm]
4052 self.
require(
'qctemplate',
'qcrems',
'charge',
'mult')
4054 if 'read' in kwargs:
4055 read = kwargs[
'read']
4058 for SI, I
in enumerate(selection):
4061 molecule_printed =
False 4063 if 'qm_extchgs' in self.
Data:
4064 extchg = self.qm_extchgs[I]
4065 out.append(
'$external_charges')
4066 for i
in range(len(extchg)):
4067 out.append(
"% 15.10f % 15.10f % 15.10f %15.10f" % (extchg[i,0],extchg[i,1],extchg[i,2],extchg[i,3]))
4069 for SectName, SectData
in self.qctemplate:
4070 if 'jobtype' in self.qcrems[remidx]
and self.qcrems[remidx][
'jobtype'].lower() ==
'fsm':
4072 if len(selection) != 2:
4073 logger.error(
'For freezing string method, please provide two structures only.\n')
4075 if SectName !=
'@@@':
4076 out.append(
'$%s' % SectName)
4077 for line
in SectData:
4079 if SectName ==
'molecule':
4080 if molecule_printed ==
False:
4081 molecule_printed =
True 4085 out.append(
"%i %i" % (self.
charge, self.
mult))
4087 for e, x
in zip(self.elem, self.
xyzs[I]):
4088 pre =
'@' if (
'qm_ghost' in self.
Data and self.
Data[
'qm_ghost'][an])
else '' 4089 suf = self.
Data[
'qcsuf'][an]
if 'qcsuf' in self.
Data else '' 4095 for e, x
in zip(self.elem, self.
xyzs[selection[SI+1]]):
4096 pre =
'@' if (
'qm_ghost' in self.
Data and self.
Data[
'qm_ghost'][an])
else '' 4097 suf = self.
Data[
'qcsuf'][an]
if 'qcsuf' in self.
Data else '' 4100 if SectName ==
'rem':
4101 for key, val
in self.qcrems[remidx].items():
4102 out.append(
"%-21s %-s" % (key, str(val)))
4103 if SectName ==
'comments' and 'comms' in self.
Data:
4104 out.append(self.
comms[I])
4112 if I != selection[-1]:
4117 def write_xyz(self, selection, **kwargs):
4122 out.append(
"%-5i" % self.na)
4123 out.append(self.
comms[I])
4124 for i
in range(self.na):
4130 Return a list of element names which maps the LAMMPS atom types 4131 to the ReaxFF elements 4134 for i
in range(self.na):
4135 if self.elem[i]
not in elist:
4136 elist.append(self.elem[i])
4141 Write the first frame of the selection to a LAMMPS data file 4142 for the purpose of automatically initializing a LAMMPS simulation. 4143 This function makes several assumptions until further notice: 4145 (1) We are interested in a ReaxFF simulation 4146 (2) Atom types will be generated from elements 4150 comm = self.
comms[I]
4151 if not comm.startswith(
"#"):
4153 atmap = OrderedDict()
4154 for i
in range(self.na):
4155 if self.elem[i]
not in atmap:
4156 atmap[self.elem[i]] = len(atmap.keys()) + 1
4162 out.append(
"%i atoms" % self.na)
4163 out.append(
"%i atom types" % len(atmap.keys()))
4171 if 'boxes' in self.
Data:
4172 xhi = self.
boxes[I].a
4173 yhi = self.
boxes[I].b
4174 zhi = self.
boxes[I].c
4176 xlo = np.floor(np.min(self.
xyzs[I][:,0]))
4177 ylo = np.floor(np.min(self.
xyzs[I][:,1]))
4178 zlo = np.floor(np.min(self.
xyzs[I][:,2]))
4179 xhi = np.ceil(np.max(self.
xyzs[I][:,0]))+30
4180 yhi = np.ceil(np.max(self.
xyzs[I][:,1]))+30
4181 zhi = np.ceil(np.max(self.
xyzs[I][:,2]))+30
4182 if (np.min(self.
xyzs[I][:,0]) < xlo
or 4183 np.min(self.
xyzs[I][:,1]) < ylo
or 4184 np.min(self.
xyzs[I][:,2]) < zlo
or 4185 np.max(self.
xyzs[I][:,0]) > xhi
or 4186 np.max(self.
xyzs[I][:,1]) > yhi
or 4187 np.max(self.
xyzs[I][:,2]) > zhi):
4188 logger.warning(
"Some atom positions are outside the simulation box, be careful")
4189 out.append(
"% .3f % .3f xlo xhi" % (xlo, xhi))
4190 out.append(
"% .3f % .3f ylo yhi" % (ylo, yhi))
4191 out.append(
"% .3f % .3f zlo zhi" % (zlo, zhi))
4194 out.append(
"Masses")
4196 for i, a
in enumerate(atmap.keys()):
4197 out.append(
"%i %.4f" % (i+1, PeriodicTable[a]))
4202 for i
in range(self.na):
4208 out.append(
"%4i 1 %2i 0.0 % 15.10f % 15.10f % 15.10f" % (i+1, list(atmap.keys()).index(self.elem[i])+1, self.
xyzs[I][i, 0], self.
xyzs[I][i, 1], self.
xyzs[I][i, 2]))
4212 self.
require(
'xyzs',
'partial_charge')
4217 out.append(self.
comms[I])
4218 out.append(
"%-5i" % self.na)
4219 for i
in range(self.na):
4220 out.append(
"% 15.10f % 15.10f % 15.10f % 15.10f 0" % (xyz[i,0],xyz[i,1],xyz[i,2],self.partial_charge[i]))
4226 out = [
'mdcrd file generated using %s' % package]
4229 out += [
''.join([
"%8.3f" % i
for i
in g])
for g
in grouper(10, list(xyz.flatten()))]
4230 if 'boxes' in self.
Data:
4231 out.append(
''.join([
"%8.3f" % i
for i
in [self.
boxes[I].a, self.
boxes[I].b, self.
boxes[I].c]]))
4236 if len(self.
xyzs) != 1
and sn
is None:
4237 logger.error(
"inpcrd can only be written for a single-frame trajectory\n")
4244 out = [self.
comms[0][:80],
'%5i' % self.na]
4247 for ix, x
in enumerate(xyz):
4248 strout +=
"%12.7f%12.7f%12.7f" % (x[0], x[1], x[2])
4249 if ix%2 == 1
or ix == (len(xyz) - 1):
4253 if 'boxes' in self.
Data:
4254 out.append(
''.join([
"%12.7f" % i
for i
in [self.
boxes[0].a, self.
boxes[0].b, self.
boxes[0].c]]))
4257 def write_arc(self, selection, **kwargs):
4260 if 'tinkersuf' not in self.
Data:
4261 sys.stderr.write(
"Beware, this .arc file contains no atom type or topology info\n")
4264 out.append(
"%6i %s" % (self.na, self.
comms[I]))
4265 if 'boxes' in self.
Data:
4267 out.append(
" %11.6f %11.6f %11.6f %11.6f %11.6f %11.6f" % (b.a, b.b, b.c, b.alpha, b.beta, b.gamma))
4268 for i
in range(self.na):
4269 out.append(
"%6i %s%s" % (i+1,
format_xyz_coord(self.elem[i],xyz[i],tinker=
True),self.tinkersuf[i]
if 'tinkersuf' in self.
Data else ''))
4272 def write_gro(self, selection, **kwargs):
4274 if sys.stdin.isatty():
4280 self.
require(
'elem',
'xyzs',
'resname',
'resid',
'boxes')
4282 if 'atomname' not in self.
Data:
4286 for i
in range(self.na):
4287 if self.
resid[i] != resid:
4290 resid = self.
resid[i]
4291 atomname.append(
"%s%i" % (self.elem[i], count))
4297 xyzwrite = xyz.copy()
4299 out.append(self.
comms[I])
4300 out.append(
"%5i" % self.na)
4301 for an, line
in enumerate(xyzwrite):
4306 def write_dcd(self, selection, **kwargs):
4307 if _dcdlib.vmdplugin_init() != 0:
4308 logger.error(
"Unable to init DCD plugin\n")
4310 natoms = c_int(self.na)
4312 dcd = _dcdlib.open_dcd_write(create_string_buffer(fname),
"dcd", natoms)
4314 _xyz = c_float * (natoms.value * 3)
4317 ts.coords = _xyz(*list(xyz.flatten()))
4318 ts.A = c_float(self.
boxes[I].a
if 'boxes' in self.
Data else 1.0)
4319 ts.B = c_float(self.
boxes[I].b
if 'boxes' in self.
Data else 1.0)
4320 ts.C = c_float(self.
boxes[I].c
if 'boxes' in self.
Data else 1.0)
4321 ts.alpha = c_float(self.
boxes[I].alpha
if 'boxes' in self.
Data else 90.0)
4322 ts.beta = c_float(self.
boxes[I].beta
if 'boxes' in self.
Data else 90.0)
4323 ts.gamma = c_float(self.
boxes[I].gamma
if 'boxes' in self.
Data else 90.0)
4324 result = _dcdlib.write_timestep(dcd, byref(ts))
4326 logger.error(
"Error encountered when writing DCD\n")
4329 _dcdlib.close_file_write(dcd)
4333 standardResidues = [
'ALA',
'ASN',
'CYS',
'GLU',
'HIS',
'LEU',
'MET',
'PRO',
'THR',
'TYR',
4334 'ARG',
'ASP',
'GLN',
'GLY',
'ILE',
'LYS',
'PHE',
'SER',
'TRP',
'VAL',
4335 'HID',
'HIE',
'HIP',
'ASH',
'GLH',
'TYD',
'CYM',
'CYX',
'LYN',
4336 'PTR',
'SEP',
'TPO',
'Y1P',
'S1P',
'T1P',
4337 'HOH',
'SOL',
'WAT',
4338 'A',
'G',
'C',
'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI']
4341 if sys.stdin.isatty():
4346 self.
require(
'xyzs',
'resname',
'resid')
4347 write_conect = kwargs.pop(
'write_conect', 1)
4350 if 'atomname' not in self.
Data:
4354 for i
in range(self.na):
4358 resid = self.
resid[i]
4359 atomnames.append(
"%s%i" % (self.elem[i], count))
4363 for i, atomname
in enumerate(self.
atomname):
4364 if len(atomname) < 4
and atomname[:1].isalpha()
and len(self.elem[i]) < 2:
4365 atomName =
' '+atomname
4366 elif len(atomname) > 4:
4367 atomName = atomname[:4]
4370 atomNames.append(atomName)
4372 if 'chain' not in self.
Data:
4373 chainNames = [
'A' for i
in range(self.na)]
4375 chainNames = [i[0]
if len(i)>0
else ' ' for i
in self.chain]
4379 if len(resname) > 3:
4380 resName = resname[:3]
4383 resNames.append(resName)
4386 for resid
in self.
resid:
4387 resIds.append(
"%4d" % (resid%10000))
4390 for resname
in resNames:
4391 if resname
in [
'HOH',
'SOL',
'WAT']:
4392 records.append(
"HETATM")
4393 elif resname
in standardResidues:
4394 records.append(
"ATOM ")
4396 records.append(
"HETATM")
4400 out.append(
"REMARK 1 CREATED WITH %s %s" % (package.upper(), str(date.today())))
4401 if 'boxes' in self.
Data:
4405 alpha = self.
boxes[0].alpha
4406 beta = self.
boxes[0].beta
4407 gamma = self.
boxes[0].gamma
4408 out.append(
"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 " % (a, b, c, alpha, beta, gamma))
4411 for sn
in range(len(self)):
4414 out.append(
"MODEL %4d" % modelIndex)
4416 for i
in range(self.na):
4417 recordName = records[i]
4418 atomName = atomNames[i]
4419 resName = resNames[i]
4420 chainName = chainNames[i]
4422 coords = self.
xyzs[sn][i]
4423 symbol = self.elem[i]
4424 if hasattr(self,
'partial_charge'):
4425 bfactor = self.partial_charge[i]
4428 atomIndices[i] = atomIndex
4429 line =
"%s%5d %-4s %3s %s%4s %s%s%s %5.2f 0.00 %2s " % (
4430 recordName, atomIndex%100000, atomName, resName, chainName, resId, _format_83(coords[0]),
4431 _format_83(coords[1]), _format_83(coords[2]), bfactor, symbol)
4432 assert len(line) == 80,
'Fixed width overflow detected' 4435 if i < (self.na-1)
and chainName != chainNames[i+1]:
4436 out.append(
"TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId))
4438 out.append(
"TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId))
4440 out.append(
"ENDMDL")
4442 if 'bonds' in self.
Data:
4443 for i, j
in self.bonds:
4445 if self.
resname[i]
not in standardResidues
or self.
resname[j]
not in standardResidues:
4446 conectBonds.append((i, j))
4448 conectBonds.append((i, j))
4450 conectBonds.append((i, j))
4453 for atom1, atom2
in conectBonds:
4454 index1 = atomIndices[atom1]
4455 index2 = atomIndices[atom2]
4456 if index1
not in atomBonds:
4457 atomBonds[index1] = []
4458 if index2
not in atomBonds:
4459 atomBonds[index2] = []
4460 atomBonds[index1].
append(index2)
4461 atomBonds[index2].
append(index1)
4463 for index1
in sorted(atomBonds):
4464 bonded = atomBonds[index1]
4465 while len(bonded) > 4:
4466 out.append(
"CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]))
4468 line =
"CONECT%5d" % index1
4469 for index2
in bonded:
4470 line =
"%s%5d" % (line, index2)
4475 """ Text quantum data format. """ 4480 out.append(
"JOB %i" % I)
4481 out.append(
"COORDS"+
pvec(xyz))
4482 if 'qm_energies' in self.
Data:
4483 out.append(
"ENERGY % .12e" % self.qm_energies[I])
4484 if 'mm_energies' in self.
Data:
4485 out.append(
"EMD0 % .12e" % self.mm_energies[I])
4486 if 'qm_grads' in self.
Data:
4487 out.append(
"GRADIENT"+
pvec(self.qm_grads[I]))
4488 if 'qm_espxyzs' in self.
Data and 'qm_espvals' in self.
Data:
4489 out.append(
"ESPXYZ"+
pvec(self.qm_espxyzs[I]))
4490 out.append(
"ESPVAL"+
pvec(self.qm_espvals[I]))
4491 if 'qm_interaction' in self.
Data:
4492 out.append(
"INTERACTION % .12e" % self.qm_interaction[I])
4497 if 'resid' not in self.
Data:
4498 na_res = int(
input(
"Enter how many atoms are in a residue, or zero as a single residue -> "))
4500 self.
resid = [1
for i
in range(self.na)]
4502 self.
resid = [1 + int(i/na_res)
for i
in range(self.na)]
4505 if 'resname' not in self.
Data:
4506 resname =
input(
"Enter a residue name (3-letter like 'SOL') -> ")
4507 self.
resname = [resname
for i
in range(self.na)]
4511 s = [float(i)
for i
in line.split()]
4537 v1 = np.array([s[0], s[3], s[4]])
4538 v2 = np.array([s[5], s[1], s[6]])
4539 v3 = np.array([s[7], s[8], s[2]])
4542 logger.error(
"Not sure what to do since you gave me %i numbers\n" % len(s))
4545 if 'boxes' not in self.
Data or len(self.
boxes) != self.ns:
4546 sys.stderr.write(
"Please specify the periodic box using:\n")
4547 sys.stderr.write(
"1 float (cubic lattice length in Angstrom)\n")
4548 sys.stderr.write(
"3 floats (orthogonal lattice lengths in Angstrom)\n")
4549 sys.stderr.write(
"6 floats (triclinic lattice lengths and angles in degrees)\n")
4550 sys.stderr.write(
"9 floats (triclinic lattice vectors v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y) in Angstrom)\n")
4551 sys.stderr.write(
"Or: Name of a file containing one of these lines for each frame in the trajectory\n")
4552 boxstr =
input(
"Box Vector Input: -> ")
4553 if os.path.exists(boxstr):
4554 boxfile = open(boxstr).readlines()
4555 if len(boxfile) != len(self):
4556 logger.error(
'Tried to read in the box file, but it has a different length from the number of frames.\n')
4559 self.
boxes = [buildbox(line)
for line
in boxfile]
4561 mybox = buildbox(boxstr)
4562 self.
boxes = [mybox
for i
in range(self.ns)]
4565 logger.info(
"Basic usage as an executable: molecule.py input.format1 output.format2")
4566 logger.info(
"where format stands for xyz, pdb, gro, etc.")
4568 Mao.write(sys.argv[2])
4570 if __name__ ==
"__main__":
def __add__(self, other)
Add method for Molecule objects.
def find_rings(self, max_size=6)
Return a list of rings in the molecule.
def radius_of_gyration(self)
def aliphatic_hydrogens(self)
Wrapper for the timestep C structure used in molfile plugins.
def __iter__(self)
List-like behavior for looping over trajectories.
def format_xyzgen_coord(element, xyzgen)
Print a line consisting of (element, p, q, r, s, t, ...) where (p, q, r) are arbitrary atom-wise data...
def get_rotate_translate(matrix1, matrix2)
def read_mol2(self, fnm, kwargs)
def elem_from_atomname(atomname)
Given an atom name, attempt to get the element in most cases.
def __len__(self)
Return the number of frames in the trajectory.
def __init__(self, stream=sys.stdout)
def extract_int(arr, avgthre, limthre, label="value", verbose=True)
Get the representative integer value from an array.
Exactly like output.StreamHandler except it does no extra formatting before sending logging messages ...
def BuildLatticeFromLengthsAngles(a, b, c, alpha, beta, gamma)
This function takes in three lattice lengths and three lattice angles, and tries to return a complete...
def split(self, fnm=None, ftype=None, method="chunks", num=None)
Split the molecule object into a number of separate files (chunks), either by specifying the number o...
def center(self, center_mass=False)
Move geometric center to the origin.
def edit_qcrems(self, in_dict, subcalc=None)
Edit Q-Chem rem variables with a dictionary.
def read_charmm(self, fnm, kwargs)
Read a CHARMM .cor (or .crd) file.
Write_Tab
The table of file writers.
def pathwise_rmsd(self, align=True)
Find RMSD between frames along path.
def read_qcout(self, fnm, errok=None, kwargs)
Q-Chem output file reader, adapted for our parser.
def measure_dihedrals(self, i, j, k, l)
Return a series of dihedral angles, given four atom indices numbered from zero.
def __getitem__(self, key)
The Molecule class has list-like behavior, so we can get slices of it.
def find_dihedrals(self)
Return a list of 4-tuples corresponding to all of the dihedral angles in the system.
def ComputeOverlap(theta, elem, xyz1, xyz2)
Computes an 'overlap' between two molecules based on some fictitious density.
def load_popxyz(self, fnm)
Given a charge-spin xyz file, load the charges (x-coordinate) and spins (y-coordinate) into internal ...
def AlignToDensity(elem, xyz1, xyz2, binary=False)
Computes a "overlap density" from two frames.
def format_gro_box(box)
Print a line corresponding to the box vector in accordance with .gro file format. ...
def rigid_water(self)
If one atom is oxygen and the next two are hydrogen, make the water molecule rigid.
def replace_peratom(self, key, orig, want)
Replace all of the data for a certain attribute in the system from orig to want.
def BuildLatticeFromVectors(v1, v2, v3)
This function takes in three lattice vectors and tries to return a complete box specification.
def rotate_check_clash(self, frame, rotate_index, thresh_hyd=1.4, thresh_hvy=1.8, printLevel=1)
Return a new Molecule object containing the selected frame plus a number of frames where the selected...
def get_reaxff_atom_types(self)
Return a list of element names which maps the LAMMPS atom types to the ReaxFF elements.
def measure_angles(self, i, j, k)
Lee-Ping's general file format conversion class.
def read_xyz(self, fnm, kwargs)
.xyz files can be TINKER formatted which is why we have the try/except here.
def find_clashes(self, thre=0.0, pbc=True, groups=None)
Obtain a list of atoms that 'clash' (i.e.
def read_inpcrd(self, fnm, kwargs)
Parse an AMBER .inpcrd or .rst file.
def EulerMatrix(T1, T2, T3)
Constructs an Euler matrix from three Euler angles.
def cartesian_product2(arrays)
Form a Cartesian product of two NumPy arrays.
def __setattr__(self, key, value)
Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionar...
def atom_select(self, atomslice, build_topology=True)
Return a copy of the object with certain atoms selected.
def isint(word)
ONLY matches integers! If you have a decimal point? None shall pass!
def __deepcopy__(self, memo)
Custom deepcopy method because Python 3.6 appears to have changed its behavior.
def AtomContact(xyz, pairs, box=None, displace=False)
Compute distances between pairs of atoms.
def write_xyz(self, selection, kwargs)
def __getattr__(self, key)
Whenever we try to get a class attribute, it first tries to get the attribute from the Data dictionar...
fout
I needed to add in this line because the DCD writer requires the file name, but the other methods don...
def read_com(self, fnm, kwargs)
Parse a Gaussian .com file and return a SINGLE-ELEMENT list of xyz coordinates (no multiple file supp...
def MolEqual(mol1, mol2)
Determine whether two Molecule objects have the same fragments by looking at elements and connectivit...
def read_pdb(self, fnm, kwargs)
Loads a PDB and returns a dictionary containing its data.
def unmangle(M1, M2)
Create a mapping that takes M1's atom indices to M2's atom indices based on position.
def read_comm_charge_mult(self, verbose=False)
Set charge and multiplicity from reading the comment line, formatted in a specific way...
def ref_rmsd(self, i, align=True)
Find RMSD to a reference frame.
def nodematch(node1, node2)
def is_gro_box(line)
Determines whether a line contains a GROMACS box vector or not.
def openmm_boxes(self)
Returns the periodic box vectors in the Molecule object in a list of OpenMM-compatible boxes...
def all_pairwise_rmsd(self)
Find pairwise RMSD (super slow, not like the one in MSMBuilder.)
def read_gro(self, fnm, kwargs)
Read a GROMACS .gro file.
def require_resname(self)
top_settings
Topology settings.
def format_xyz_coord(element, xyz, tinker=False)
Print a line consisting of (element, x, y, z) in accordance with .xyz file format.
def find_angles(self)
Return a list of 3-tuples corresponding to all of the angles in the system.
positive_resid
Creates entries like 'gromacs' : 'gromacs' and 'xyz' : 'xyz' in the Funnel.
def TopEqual(mol1, mol2)
For the nanoreactor project: Determine whether two Molecule objects have the same topologies...
def align(self, smooth=False, center=True, center_mass=False, atom_select=None)
Align molecules.
def build_topology(self, force_bonds=True, kwargs)
Create self.topology and self.molecules; these are graph representations of the individual molecules ...
def replace_peratom_conditional(self, key1, cond, key2, orig, want)
Replace all of the data for a attribute key2 from orig to want, contingent on key1 being equal to con...
def __iadd__(self, other)
Add method for Molecule objects.
def distance_displacement(self)
Obtain distance matrix and displacement vectors between all pairs of atoms.
def reorder_indices(self, other)
Return the indices that would reorder atoms according to some other Molecule object.
def distance_matrix(self, pbc=True)
Obtain distance matrix between all pairs of atoms.
def CubicLattice(a)
This function takes in three lattice lengths and three lattice angles, and tries to return a complete...
Funnel
A funnel dictionary that takes redundant file types and maps them down to a few.
def even_list(totlen, splitsize)
Creates a list of number sequences divided as evenly as possible.
def order_by_connectivity(self, m, i, currList, max_min_path)
Recursive function that orders atoms based on connectivity.
comms
Read in stuff if we passed in a file name, otherwise return an empty instance.
def rotate_bond(self, frame, aj, ak, increment=15)
Return a new Molecule object containing the selected frame plus a number of frames where the selected...
def build_bonds(self)
Build the bond connectivity graph.
def extract_pop(M, verbose=True)
Extract our best estimate of charge and spin-z from the comments section of a Molecule object created...
def write_qcin(self, selection, kwargs)
Read_Tab
The table of file readers.
def read_dcd(self, fnm, kwargs)
def __delitem__(self, key)
Similarly, in order to delete a frame, we simply perform item deletion on framewise variables...
def write_inpcrd(self, selection, sn=None, kwargs)
def write_molproq(self, selection, kwargs)
def write_dcd(self, selection, kwargs)
def align_by_moments(self)
Align molecules using the moment of inertia.
def read_qcin(self, fnm, kwargs)
Read a Q-Chem input file.
def write(self, fnm=None, ftype=None, append=False, selection=None, kwargs)
def read_qcesp(self, fnm, kwargs)
def add_virtual_site(self, idx, kwargs)
Add a virtual site to the system.
def arc(Mol, begin=None, end=None, RMSD=True, align=True)
Get the arc-length for a trajectory segment.
def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True)
Equalize the spacing of frames in a trajectory with linear interpolation.
def load_frames(self, fnm, ftype=None, kwargs)
def write_mdcrd(self, selection, kwargs)
def add_strip_to_mat(mat, strip)
def measure_distances(self, i, j)
def get_populations(self)
Return a cloned molecule object but with X-coordinates set to Mulliken charges and Y-coordinates set ...
def without(self, args)
Return a copy of the Molecule object but with certain fields deleted if they exist.
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, integer, or what have you...
def read_arc(self, fnm, kwargs)
Read a TINKER .arc file.
def AlignToMoments(elem, xyz1, xyz2=None)
Pre-aligns molecules to 'moment of inertia'.
def read_qdata(self, fnm, kwargs)
def write_qdata(self, selection, kwargs)
Text quantum data format.
def write_pdb(self, selection, kwargs)
def add_quantum(self, other)
def write_gro(self, selection, kwargs)
def openmm_positions(self)
Returns the Cartesian coordinates in the Molecule object in a list of OpenMM-compatible positions...
def is_gro_coord(line)
Determines whether a line contains GROMACS data or not.
def axis_angle(axis, angle)
Given a rotation axis and angle, return the corresponding 3x3 rotation matrix, which will rotate a (N...
def read_qcschema(self, schema, kwargs)
def format_gro_coord(resid, resname, aname, seqno, xyz)
Print a line in accordance with .gro file format, with six decimal points of precision.
def write_lammps_data(self, selection, kwargs)
Write the first frame of the selection to a LAMMPS data file for the purpose of automatically initial...
def grouper(n, iterable)
Groups a big long iterable into groups of ten or what have you.
def is_charmm_coord(line)
Determines whether a line contains CHARMM data or not.
def atom_stack(self, other)
Return a copy of the object with another molecule object appended.
def read_mdcrd(self, fnm, kwargs)
Parse an AMBER .mdcrd file.
def form_rot(q)
Given a quaternion p, form a rotation matrix from it.
def write_arc(self, selection, kwargs)
def read_xyz0(self, fnm, kwargs)
Parse a .xyz file which contains several xyz coordinates, and return their elements.
def reorder_according_to(self, other)
Reorder atoms according to some other Molecule object.
def repair(self, key, klast)
Attempt to repair trivial issues that would otherwise break the object.