ForceBalance API  1.3
Automated optimization of force fields and empirical potentials
Mol2.py
Go to the documentation of this file.
1 """
2 Author: P. Tuffery 2008
3 Ressource Parisienne en Bioinformatique Structurale
4 http://bioserv.rpbs.univ-paris-diderot.fr
5 
6 This is free software. You can use it, modify it, distribute it.
7 However, thanks for the feedback for any improvement you bring to it!
8 Changed by Lee-Ping from dict to OrderedDict.
9 
10 mol2_set:
11  A class to manage multipe mol2 files
12 mol2:
13  A class to manage simple mol2 data
14 """
15 from __future__ import print_function
16 
17 from builtins import range
18 from builtins import object
19 import sys
20 import types
21 from collections import OrderedDict
22 
23 #=====================================================================
24 #
25 # The mol2 atom line class
26 #
27 #=====================================================================
28 
29 class mol2_atom(object):
30  """
31  This is to manage mol2 atomic lines on the form:
32  1 C1 5.4790 42.2880 49.5910 C.ar 1 <1> 0.0424
33  """
34  def __init__(self, data=None):
35  """
36  if data is passed, it will be installed
37  """
38  self.atom_id = None
39  self.atom_name = None
40  self.x = None
41  self.y = None
42  self.z = None
43  self.atom_type = None
44  self.subst_id = None
45  self.subst_name = None
46  self.charge = None
47  self.status_bit = None
48 
49  if data is not None:
50  self.parse(data)
51 
52  def parse(self, data):
53  """
54  split the text line into a series of properties
55  """
56  it = data.split()
57  self.set_atom_id(it[0])
58  self.set_atom_name(it[1])
59  self.set_crds(it[2],it[3],it[4])
60  self.set_atom_type(it[5])
61  self.set_subst_id(it[6])
62  self.set_subst_name(it[7])
63  try:
64  self.set_charge(it[8])
65  except:
66  self.set_charge(0.)
67  try:
68  self.set_status_bit(it[9])
69  except:
70  self.status_bit = None
71  # self.__repr__()
72 
73  def __repr__(self):
74  """
75  assemble the properties as a text line, and return it
76  """
77  # print "mol2_atom.__repr__()"
78  # print self.atom_id, self.atom_name, self.x, self.y, self.z, self.atom_type, self.subst_id, self.subst_name, self.charge
79  rs = "%7d %-5s %9.4f %9.4f %9.4f %-7s %2d %4s %12.6f" % (self.atom_id, self.atom_name, self.x, self.y, self.z, self.atom_type, self.subst_id, self.subst_name, self.charge)
80  if self.status_bit is not None:
81  rs = rs + " %s" % self.status_bit
82  return rs
83 
84  def set_atom_id(self, atom_id=None):
85  """
86  atom identifier (integer, starting from 1)
87  """
88  if atom_id is not None:
89  self.atom_id = int(atom_id)
90  return self.atom_id
91 
92  def set_atom_name(self, atom_name=None):
93  """
94  The name of the atom (string)
95  """
96  if atom_name is not None:
97  self.atom_name = atom_name
98  return self.atom_name
99 
100  def set_crds(self, x = None, y = None, z = None):
101  """
102  the coordinates of the atom
103  """
104  if (x is not None) and (y is not None) and (z is not None):
105  self.x = float(x)
106  self.y = float(y)
107  self.z = float(z)
108  return self.x, self.y, self.z
109 
110  def set_atom_type(self, atom_type=None):
111  """
112  The mol2 type of the atom
113  """
114  if atom_type is not None:
115  self.atom_type = atom_type
116  return self.atom_type
117 
118  def set_subst_id(self, subst_id=None):
119  """
120  substructure identifier
121  """
122  if subst_id is not None:
123  self.subst_id = int(subst_id)
124  return self.subst_id
125 
126  def set_subst_name(self, subst_name=None):
127  """
128  substructure name
129  """
130  if subst_name is not None:
131  self.subst_name = subst_name
132  return self.subst_name
133 
134  def set_charge(self, charge=None):
135  """
136  atomic charge
137  """
138  if charge is not None:
139  self.charge = float(charge)
140  return self.charge
141 
142  def set_status_bit(self, status_bit=None):
143  """
144  Never to use (in theory)
145  """
146  if status_bit is not None:
147  self.status_bit = status_bit
148  return self.status_bit
149 
150 
151 #=====================================================================
152 #
153 # The mol2 bond line class
154 #
155 #=====================================================================
156 
157 class mol2_bond(object):
158  """
159  This is to manage mol2 bond lines on the form:
160  1 1 2 ar
161  """
162  def __init__(self, data=None):
163  """
164  if data is passed, it will be installed
165  """
166  self.bond_id = None
167  self.origin_atom_id = None
168  self.target_atom_id = None
169  self.bond_type = None
170 
171  if data is not None:
172  self.parse(data)
173 
174  def __repr__(self):
175  # print "mol2_bond.__repr__()", self.bond_id, self.origin_atom_id, self.target_atom_id, self.bond_type
176  rs = "%6d %5d %5d %4s" % (self.bond_id, self.origin_atom_id, self.target_atom_id, self.bond_type)
177  if self.status_bit is not None:
178  rs = rs + " %s" % self.status_bit
179  return rs
181  def parse(self, data):
182  """
183  split the text line into a series of properties
184  """
185  it = data.split()
186  self.bond_id = int(it[0])
187  self.origin_atom_id = int(it[1])
188  self.target_atom_id = int(it[2])
189  self.bond_type = it[3]
190  try:
191  self.set_status_bit(it[4])
192  except:
193  self.status_bit = None
194  # print "mol2_bond.__repr__():", self.__repr__()
195 
196 
197  def set_bond_id(self, bond_id=None):
198  """
199  bond identifier (integer, starting from 1)
200  """
201  if bond_id is not None:
202  self.bond_id = bond_id
203  return self.bond_id
204 
205  def set_origin_atom_id(self, origin_atom_id=None):
206  """
207  the origin atom identifier (integer)
208  """
209  if origin_atom_id is not None:
210  self.origin_atom_id = origin_atom_id
211  return self.origin_atom_id
212 
213  def set_target_atom_id(self, target_atom_id=None):
214  """
215  the target atom identifier (integer)
216  """
217  if target_atom_id is not None:
218  self.target_atom_id = target_atom_id
219  return self.target_atom_id
220 
221  def set_bond_type(self, bond_type=None):
222  """
223  bond type (string)
224  one of:
225  1 = single
226  2 = double
227  3 = triple
228  am = amide
229  ar = aromatic
230  du = dummy
231  un = unknown
232  nc = not connected
233  """
234  if bond_type is not None:
235  self.bond_type = bond_type
236  return self.bond_type
237 
238  def set_status_bit(self, status_bit=None):
239  """
240  Never to use (in theory)
241  """
242  if status_bit is not None:
243  self.status_bit = status_bit
244  return self.status_bit
245 
246 #=====================================================================
247 #
248 # The one mol2 class
249 #
250 #=====================================================================
251 
252 class mol2(object):
253  """
254  This is to manage one mol2 series of lines on the form: @verbatim
255 @<TRIPOS>MOLECULE
256 CDK2.xray.inh1.1E9H
257  34 37 0 0 0
258 SMALL
259 GASTEIGER
260 Energy = 0
261 
262 @<TRIPOS>ATOM
263  1 C1 5.4790 42.2880 49.5910 C.ar 1 <1> 0.0424
264  2 C2 4.4740 42.6430 50.5070 C.ar 1 <1> 0.0447
265 @<TRIPOS>BOND
266  1 1 2 ar
267  2 1 6 ar
268 
269  @endverbatim"""
270  def __init__(self, data):
271  self.mol_name = None
272  self.num_atoms = 0
273  self.num_bonds = 0
274  self.num_subst = 0
275  self.num_feat = 0
276  self.num_sets = 0
277  self.mol_type = None
278  self.charge_type = None
279  self.comments = ""
280 
281  self.atoms = []
282  self.bonds = []
283 
284  self.parse(data)
285 
286  def __repr__(self):
287  # print "mol2.__repr__()", self.mol_name, self.num_atoms, self.num_bonds, self.num_subst, self.num_feat, self.num_sets
288  rs = ""
289  rs = rs + "%s\n" % "@<TRIPOS>MOLECULE"
290  rs = rs + "%s\n" % self.mol_name
291  rs = rs + "%d %d %d %d %d\n" % (self.num_atoms, self.num_bonds, self.num_subst, self.num_feat, self.num_sets)
292  rs = rs + "%s\n" % self.mol_type
293  rs = rs + "%s\n" % self.charge_type
294  rs = rs + "%s" % self.comments
295  rs = rs + "%s\n" % "@<TRIPOS>ATOM"
296 
297  for atom in self.atoms:
298  rs = rs + "%s\n" % atom.__repr__()
299  rs = rs + "%s\n" % "@<TRIPOS>BOND"
300  for bond in self.bonds:
301  rs = rs + "%s\n" % bond.__repr__()
302  rs = rs + "\n"
303 
304  return rs
305 
306  def out(self, f = sys.stdout):
307  f.write(self.__repr__())
309  def set_mol_name(self, mol_name=None):
310  """
311  bond identifier (integer, starting from 1)
312  """
313  if mol_name is not None:
314  self.mol_name = mol_name
315  return self.mol_name
316 
317  def set_num_atoms(self, num_atoms=None):
318  """
319  number of atoms (integer)
320  """
321  if num_atoms is not None:
322  self.num_atoms = int(num_atoms)
323  return self.num_atoms
324 
325  def set_num_bonds(self, num_bonds=None):
326  """
327  number of bonds (integer)
328  """
329  if num_bonds is not None:
330  self.num_bonds = int(num_bonds)
331  return self.num_bonds
332 
333  def set_num_subst(self, num_subst=None):
334  """
335  number of substructures (integer)
336  """
337  if num_subst is not None:
338  self.num_subst = int(num_subst)
339  return self.num_subst
340 
341  def set_num_feat(self, num_feat=None):
342  """
343  number of features (integer)
344  """
345  if num_feat is not None:
346  self.num_feat = int(num_feat)
347  return self.num_feat
348 
349  def set_num_sets(self, num_sets=None):
350  """
351  number of sets (integer)
352  """
353  if num_sets is not None:
354  self.num_sets = int(num_sets)
355  return self.num_sets
356 
357  def set_mol_type(self, mol_type=None):
358  """
359  bond identifier (integer, starting from 1)
360  """
361  if mol_type is not None:
362  self.mol_type = mol_type
363  return self.mol_type
364 
365  def set_charge_type(self, charge_type=None):
366  """
367  bond identifier (integer, starting from 1)
368  """
369  if charge_type is not None:
370  self.charge_type = charge_type
371  return self.charge_type
372 
373  def parse(self, data):
374  """
375  Parse a series of text lines,
376  and setup compound information
377  """
378  for l in data:
379  if l.count("@<TRIPOS>MOLECULE"):
380  status = 1
381  continue
382  if status == 1:
383  self.set_mol_name(l.split()[0])
384  status = 2
385  continue
386  if status == 2:
387  it = l.split()
388  self.set_num_atoms(it[0])
389  self.set_num_bonds(it[1])
390  self.set_num_subst(it[2])
391  self.set_num_feat(it[3])
392  self.set_num_sets(it[4])
393  status = 3
394  continue
395  if status == 3:
396  self.set_mol_type(l.split()[0])
397  status = 4
398  continue
399  if status == 4:
400  self.set_charge_type(l.split()[0])
401  status = 5
402  continue
403  if status == 5:
404  if l.count("@<TRIPOS>ATOM"):
405  status = 6
406  if self.comments == "":
407  self.comments = "\n"
408  continue
409  self.comments = self.comments + l
410  continue
411  if status == 6:
412  if l.count("@<TRIPOS>BOND"):
413  status = 7
414  continue
415  self.atoms.append(mol2_atom(l))
416 # if len(self.atoms) == self.num_atoms:
417 # status = 7
418 # continue
419  if status == 7:
420  if l.count("@<TRIPOS>"):
421  status = 8
422  continue
423  self.bonds.append(mol2_bond(l))
424  if len(self.bonds) == self.num_bonds:
425  status = 8
426  continue
427  # print self.__repr__()
428 
429  def get_atom(self, id):
430  """
431  return the atom instance given its atom identifier
432  """
433  if self.atoms[id-1].set_atom_id() == id:
434  return self.atoms[id-1]
435  else:
436  for i in range(0,len(self.atoms)):
437  if self.atoms[i].set_atom_id() == id:
438  return self.atoms[i]
439  else:
440  return None
441 
442  def get_bonded_atoms(self, id):
443  """
444  return a dictionnary of atom instances bonded to the atom, and their types
445  """
446  rs = []
447  for i in range(0,len(self.bonds)):
448  if self.bonds[i].set_origin_atom_id() == id:
449  # print id, "connected to",self.bonds[i].set_target_atom_id(),"(target)"
450  rs.append(self.get_atom(self.bonds[i].set_target_atom_id()))
451  if self.bonds[i].set_target_atom_id() == id:
452  # print id, "connected to",self.bonds[i].set_origin_atom_id(),"(origin)"
453  rs.append(self.get_atom(self.bonds[i].set_origin_atom_id()))
454  return rs
455 
456  def set_donnor_acceptor_atoms(self, verbose = 0):
457  """
458  modify atom types to specify donnor, acceptor, or both
459  """
460  # print "set_donnor_acceptor_atoms", len(self.atoms)
461  for i in range(0,len(self.atoms)):
462  # for i in range(8,12):
463  # print self.atoms[i]
464  bonds = self.get_bonded_atoms(self.atoms[i].set_atom_id())
465  atmType = self.atoms[i].set_atom_type()
466  # print atmType
467  # print bonds
468 
469  # sulfur
470  if atmType in ["S.2", "S.3"]:
471  isDonnor = 0
472  for j in bonds:
473  if j.set_atom_type()[0] == "H":
474  isDonnor = 1
475  break
476  if isDonnor:
477  self.atoms[i].set_atom_type("S.don")
478  else:
479  self.atoms[i].set_atom_type("S.acc")
480  continue
481 
482  # oxygen
483  if atmType in ["O.3","O.2","O.co2"]:
484  isAcceptor = 1
485  isDonnor = 0
486  if atmType in ["O.3"]:
487  for j in bonds:
488  if j.set_atom_type()[0] == "H":
489  isDonnor = 1
490  break
491  if isDonnor:
492  self.atoms[i].set_atom_type("O.da") # donnor acceptor
493  else:
494  self.atoms[i].set_atom_type("O.acc")
495  continue
496 
497  # nitrogen
498  if atmType in ["N.1","N.2","N.ar","N.3","N.4"]:
499  hasH = 0
500  for j in bonds:
501  if j.set_atom_type()[0] == "H":
502  hasH = 1
503  break
504  isDonnor = 0
505  if (atmType in ["N.2","N.ar","N.3","N.4"]) and hasH:
506  isDonnor = 1
507  isAcceptor = 0
508  if (atmType in ["N.1","N.2","N.ar"]) and (not hasH):
509  isAcceptor = 1
510  if isDonnor and isAcceptor:
511  self.atoms[i].set_atom_type("N.da") # donnor acceptor
512  elif isDonnor:
513  self.atoms[i].set_atom_type("N.don")
514  elif isAcceptor:
515  self.atoms[i].set_atom_type("N.acc")
516  continue
517 
518  # hydrophobic
519  if atmType in ["C.2","C.3","N.pl3"]:
520  hasH = 0
521  for j in bonds:
522  if j.set_atom_type()[0] == "H":
523  hasH = 1
524  break
525  hasONS = 0
526  for j in bonds:
527  if j.set_atom_type()[0] in "ONS":
528  hasONS = 1
529  break
530  if (atmType in ["N.pl3"]) and (hasH == 0):
531  self.atoms[i].set_atom_type("N.phb")
532  continue
533  if (atmType in ["C.2","C.3"]) and hasH and (not hasONS):
534  self.atoms[i].set_atom_type("C.phb")
535  continue
536 
537 class mol2_set(object):
538  def __init__(self, data = None, subset = None):
539  """
540  A collection is organized as a dictionnary of compounds
541  self.num_compounds : the number of compounds
542  self.compounds : the dictionnary of compounds
543  data : the data to setup the set
544  subset: it is possible to specify a subset of the compounds to load, based on their mol_name identifiers.
545  """
546  self.num_compounds = 0
547  self.comments = ""
548  self.compounds = OrderedDict()
549 
550  # subset management
551  if subset is not None:
552  if isinstance(subset,list):
553  pass
554  elif isinstance(subset,str):
555  try:
556  f = open(subset)
557  lines = f.readlines()
558  f.close()
559  for i in range(0, len(lines)):
560  lines[i] = lines[i].replace("\n","")
561  subset = lines
562  except:
563  subset = None
564 
565  # data management
566  if data is not None:
567  if isinstance(data,mol2_set):
568  self.num_compounds = data.num_compounds
569  self.compounds = data.compounds
570  self.comments = data.comments
571  elif isinstance(data,str):
572  try:
573  f = open(data)
574  lines = f.readlines()
575  f.close()
576  # print "Parsing %d lines" % len(lines)
577  self.parse(lines, subset)
578  except:
579  pass
580  elif isinstance(data,list):
581  self.parse(data, subset)
582  # return self
584  def parse(self, data, subset = None):
585  """
586  parse a list of lines, detect compounds, load them
587  only load the subset if specified.
588  """
589  status = 0
590  cmpnds = OrderedDict()
591  for l in range(0,len(data)):
592  if (not status) and (data[l][0] == "#"):
593  self.comments = self.comments + l
594  if data[l].count("@<TRIPOS>MOLECULE"):
595  status = 1
596  if len(cmpnds):
597  if (subset is None) or (cmpnd in subset):
598  cmpnds[cmpnd]["to"] = l
599  ffrom = l
600  cmpnd = data[l+1].split()[0]
601  if (subset is None) or (cmpnd in subset):
602  cmpnds[cmpnd] = OrderedDict([("from", l)])
603  if (subset is None) or (cmpnd in subset):
604  cmpnds[cmpnd]["to"] = len(data)
605 
606  for cmpnd in cmpnds.keys():
607  # print cmpnd, cmpnds[cmpnd]["from"],cmpnds[cmpnd]["to"]
608  # print data[cmpnds[cmpnd]["from"]:cmpnds[cmpnd]["to"]]
609  self.compounds[cmpnd] = mol2(data[cmpnds[cmpnd]["from"]:cmpnds[cmpnd]["to"]])
610  self.num_compounds += 1
611  # break
612 
613 if __name__ == "__main__":
614 
615  import sys
616 
617  # data = mol2_set(sys.argv[1], subset=["CDK2.xray.inh1.1E9H", 'RNAse.xray.inh4.1O0F']) # , 'RNAse.xray.inh4.1O0F'
618  data = mol2_set(sys.argv[1], subset=["RNAse.xray.inh8.1QHC"]) # No subset
619 
620  sys.stderr.write("Loaded %d compounds\n" % data.num_compounds)
621 
622  for cmpnd in data.compounds.keys():
623  # print data.compounds[cmpnd],
624  data.compounds[cmpnd].set_donnor_acceptor_atoms()
625  print(data.compounds[cmpnd], end=' ')
626  break
def set_crds(self, x=None, y=None, z=None)
the coordinates of the atom
Definition: Mol2.py:111
def __repr__(self)
Definition: Mol2.py:189
def set_target_atom_id(self, target_atom_id=None)
the target atom identifier (integer)
Definition: Mol2.py:235
def __init__(self, data=None)
if data is passed, it will be installed
Definition: Mol2.py:180
def get_atom(self, id)
return the atom instance given its atom identifier
Definition: Mol2.py:464
def set_mol_type(self, mol_type=None)
bond identifier (integer, starting from 1)
Definition: Mol2.py:389
def set_subst_id(self, subst_id=None)
substructure identifier
Definition: Mol2.py:131
def get_bonded_atoms(self, id)
return a dictionnary of atom instances bonded to the atom, and their types
Definition: Mol2.py:478
def set_atom_type(self, atom_type=None)
The mol2 type of the atom.
Definition: Mol2.py:122
def parse(self, data, subset=None)
parse a list of lines, detect compounds, load them only load the subset if specified.
Definition: Mol2.py:624
def __repr__(self)
assemble the properties as a text line, and return it
Definition: Mol2.py:81
def set_num_bonds(self, num_bonds=None)
number of bonds (integer)
Definition: Mol2.py:353
def __repr__(self)
Definition: Mol2.py:308
def set_charge_type(self, charge_type=None)
bond identifier (integer, starting from 1)
Definition: Mol2.py:398
def set_status_bit(self, status_bit=None)
Never to use (in theory)
Definition: Mol2.py:262
def __init__(self, data=None)
if data is passed, it will be installed
Definition: Mol2.py:40
def set_num_atoms(self, num_atoms=None)
number of atoms (integer)
Definition: Mol2.py:344
def out(self, f=sys.stdout)
Definition: Mol2.py:328
This is to manage one mol2 series of lines on the form:
Definition: Mol2.py:291
This is to manage mol2 atomic lines on the form: 1 C1 5.4790 42.2880 49.5910 C.ar 1 <1> 0...
Definition: Mol2.py:35
def set_donnor_acceptor_atoms(self, verbose=0)
modify atom types to specify donnor, acceptor, or both
Definition: Mol2.py:493
def set_atom_id(self, atom_id=None)
atom identifier (integer, starting from 1)
Definition: Mol2.py:93
def set_status_bit(self, status_bit=None)
Never to use (in theory)
Definition: Mol2.py:158
def set_charge(self, charge=None)
atomic charge
Definition: Mol2.py:149
def parse(self, data)
split the text line into a series of properties
Definition: Mol2.py:59
def set_num_subst(self, num_subst=None)
number of substructures (integer)
Definition: Mol2.py:362
def set_subst_name(self, subst_name=None)
substructure name
Definition: Mol2.py:140
def __init__(self, data)
Definition: Mol2.py:292
def set_bond_id(self, bond_id=None)
bond identifier (integer, starting from 1)
Definition: Mol2.py:217
def parse(self, data)
Parse a series of text lines, and setup compound information.
Definition: Mol2.py:408
def set_num_feat(self, num_feat=None)
number of features (integer)
Definition: Mol2.py:371
def set_num_sets(self, num_sets=None)
number of sets (integer)
Definition: Mol2.py:380
def set_mol_name(self, mol_name=None)
bond identifier (integer, starting from 1)
Definition: Mol2.py:335
def set_atom_name(self, atom_name=None)
The name of the atom (string)
Definition: Mol2.py:102
def parse(self, data)
split the text line into a series of properties
Definition: Mol2.py:200
def set_origin_atom_id(self, origin_atom_id=None)
the origin atom identifier (integer)
Definition: Mol2.py:226
def set_bond_type(self, bond_type=None)
bond type (string) one of: 1 = single 2 = double 3 = triple am = amide ar = aromatic du = dummy un = ...
Definition: Mol2.py:253
This is to manage mol2 bond lines on the form: 1 1 2 ar.
Definition: Mol2.py:175