2 Author: P. Tuffery 2008 3 Ressource Parisienne en Bioinformatique Structurale 4 http://bioserv.rpbs.univ-paris-diderot.fr 6 This is free software. You can use it, modify it, distribute it. 7 However, thanks for the feedback for any improvement you bring to it! 8 Changed by Lee-Ping from dict to OrderedDict. 11 A class to manage multipe mol2 files 13 A class to manage simple mol2 data 15 from __future__
import print_function
17 from builtins
import range
18 from builtins
import object
21 from collections
import OrderedDict
31 This is to manage mol2 atomic lines on the form: 32 1 C1 5.4790 42.2880 49.5910 C.ar 1 <1> 0.0424 34 def __init__(self, data=None):
36 if data is passed, it will be installed 52 def parse(self, data):
54 split the text line into a series of properties 75 assemble the properties as a text line, and return it 86 atom identifier (integer, starting from 1) 88 if atom_id
is not None:
94 The name of the atom (string) 96 if atom_name
is not None:
100 def set_crds(self, x = None, y = None, z = None):
102 the coordinates of the atom 104 if (x
is not None)
and (y
is not None)
and (z
is not None):
108 return self.
x, self.
y, self.
z 112 The mol2 type of the atom 114 if atom_type
is not None:
120 substructure identifier 122 if subst_id
is not None:
130 if subst_name
is not None:
138 if charge
is not None:
139 self.
charge = float(charge)
144 Never to use (in theory) 146 if status_bit
is not None:
159 This is to manage mol2 bond lines on the form: 164 if data is passed, it will be installed 167 self.origin_atom_id =
None 168 self.target_atom_id =
None 169 self.bond_type =
None 183 split the text line into a series of properties 199 bond identifier (integer, starting from 1) 201 if bond_id
is not None:
207 the origin atom identifier (integer) 209 if origin_atom_id
is not None:
215 the target atom identifier (integer) 217 if target_atom_id
is not None:
234 if bond_type
is not None:
240 Never to use (in theory) 242 if status_bit
is not None:
254 This is to manage one mol2 series of lines on the form: @verbatim 263 1 C1 5.4790 42.2880 49.5910 C.ar 1 <1> 0.0424 264 2 C2 4.4740 42.6430 50.5070 C.ar 1 <1> 0.0447 278 self.charge_type =
None 289 rs = rs +
"%s\n" %
"@<TRIPOS>MOLECULE" 290 rs = rs +
"%s\n" % self.mol_name
295 rs = rs +
"%s\n" %
"@<TRIPOS>ATOM" 298 rs = rs +
"%s\n" % atom.__repr__()
299 rs = rs +
"%s\n" %
"@<TRIPOS>BOND" 301 rs = rs +
"%s\n" % bond.__repr__()
306 def out(self, f = sys.stdout):
311 bond identifier (integer, starting from 1) 313 if mol_name
is not None:
319 number of atoms (integer) 321 if num_atoms
is not None:
327 number of bonds (integer) 329 if num_bonds
is not None:
335 number of substructures (integer) 337 if num_subst
is not None:
343 number of features (integer) 345 if num_feat
is not None:
351 number of sets (integer) 353 if num_sets
is not None:
359 bond identifier (integer, starting from 1) 361 if mol_type
is not None:
367 bond identifier (integer, starting from 1) 369 if charge_type
is not None:
373 def parse(self, data):
375 Parse a series of text lines, 376 and setup compound information 379 if l.count(
"@<TRIPOS>MOLECULE"):
404 if l.count(
"@<TRIPOS>ATOM"):
412 if l.count(
"@<TRIPOS>BOND"):
420 if l.count(
"@<TRIPOS>"):
431 return the atom instance given its atom identifier 433 if self.
atoms[id-1].set_atom_id() == id:
434 return self.
atoms[id-1]
436 for i
in range(0,len(self.
atoms)):
437 if self.
atoms[i].set_atom_id() == id:
444 return a dictionnary of atom instances bonded to the atom, and their types 447 for i
in range(0,len(self.
bonds)):
448 if self.
bonds[i].set_origin_atom_id() == id:
451 if self.
bonds[i].set_target_atom_id() == id:
458 modify atom types to specify donnor, acceptor, or both 461 for i
in range(0,len(self.
atoms)):
465 atmType = self.
atoms[i].set_atom_type()
470 if atmType
in [
"S.2",
"S.3"]:
473 if j.set_atom_type()[0] ==
"H":
477 self.
atoms[i].set_atom_type(
"S.don")
479 self.
atoms[i].set_atom_type(
"S.acc")
483 if atmType
in [
"O.3",
"O.2",
"O.co2"]:
486 if atmType
in [
"O.3"]:
488 if j.set_atom_type()[0] ==
"H":
492 self.
atoms[i].set_atom_type(
"O.da")
494 self.
atoms[i].set_atom_type(
"O.acc")
498 if atmType
in [
"N.1",
"N.2",
"N.ar",
"N.3",
"N.4"]:
501 if j.set_atom_type()[0] ==
"H":
505 if (atmType
in [
"N.2",
"N.ar",
"N.3",
"N.4"])
and hasH:
508 if (atmType
in [
"N.1",
"N.2",
"N.ar"])
and (
not hasH):
510 if isDonnor
and isAcceptor:
511 self.
atoms[i].set_atom_type(
"N.da")
513 self.
atoms[i].set_atom_type(
"N.don")
515 self.
atoms[i].set_atom_type(
"N.acc")
519 if atmType
in [
"C.2",
"C.3",
"N.pl3"]:
522 if j.set_atom_type()[0] ==
"H":
527 if j.set_atom_type()[0]
in "ONS":
530 if (atmType
in [
"N.pl3"])
and (hasH == 0):
531 self.
atoms[i].set_atom_type(
"N.phb")
533 if (atmType
in [
"C.2",
"C.3"])
and hasH
and (
not hasONS):
534 self.
atoms[i].set_atom_type(
"C.phb")
538 def __init__(self, data = None, subset = None):
540 A collection is organized as a dictionnary of compounds 541 self.num_compounds : the number of compounds 542 self.compounds : the dictionnary of compounds 543 data : the data to setup the set 544 subset: it is possible to specify a subset of the compounds to load, based on their mol_name identifiers. 546 self.num_compounds = 0
548 self.compounds = OrderedDict()
551 if subset
is not None:
552 if isinstance(subset,list):
554 elif isinstance(subset,str):
557 lines = f.readlines()
559 for i
in range(0, len(lines)):
560 lines[i] = lines[i].replace(
"\n",
"")
567 if isinstance(data,mol2_set):
568 self.num_compounds = data.num_compounds
569 self.compounds = data.compounds
571 elif isinstance(data,str):
574 lines = f.readlines()
577 self.
parse(lines, subset)
580 elif isinstance(data,list):
584 def parse(self, data, subset = None):
586 parse a list of lines, detect compounds, load them 587 only load the subset if specified. 590 cmpnds = OrderedDict()
591 for l
in range(0,len(data)):
592 if (
not status)
and (data[l][0] ==
"#"):
594 if data[l].count(
"@<TRIPOS>MOLECULE"):
597 if (subset
is None)
or (cmpnd
in subset):
598 cmpnds[cmpnd][
"to"] = l
600 cmpnd = data[l+1].split()[0]
601 if (subset
is None)
or (cmpnd
in subset):
602 cmpnds[cmpnd] = OrderedDict([(
"from", l)])
603 if (subset
is None)
or (cmpnd
in subset):
604 cmpnds[cmpnd][
"to"] = len(data)
606 for cmpnd
in cmpnds.keys():
609 self.
compounds[cmpnd] =
mol2(data[cmpnds[cmpnd][
"from"]:cmpnds[cmpnd][
"to"]])
613 if __name__ ==
"__main__":
618 data =
mol2_set(sys.argv[1], subset=[
"RNAse.xray.inh8.1QHC"])
620 sys.stderr.write(
"Loaded %d compounds\n" % data.num_compounds)
622 for cmpnd
in data.compounds.keys():
624 data.compounds[cmpnd].set_donnor_acceptor_atoms()
625 print(data.compounds[cmpnd], end=
' ')
def set_crds(self, x=None, y=None, z=None)
the coordinates of the atom
def set_target_atom_id(self, target_atom_id=None)
the target atom identifier (integer)
def __init__(self, data=None)
if data is passed, it will be installed
def get_atom(self, id)
return the atom instance given its atom identifier
def set_mol_type(self, mol_type=None)
bond identifier (integer, starting from 1)
def set_subst_id(self, subst_id=None)
substructure identifier
def get_bonded_atoms(self, id)
return a dictionnary of atom instances bonded to the atom, and their types
def set_atom_type(self, atom_type=None)
The mol2 type of the atom.
def parse(self, data, subset=None)
parse a list of lines, detect compounds, load them only load the subset if specified.
def __repr__(self)
assemble the properties as a text line, and return it
def set_num_bonds(self, num_bonds=None)
number of bonds (integer)
def set_charge_type(self, charge_type=None)
bond identifier (integer, starting from 1)
def set_status_bit(self, status_bit=None)
Never to use (in theory)
def __init__(self, data=None)
if data is passed, it will be installed
def set_num_atoms(self, num_atoms=None)
number of atoms (integer)
def out(self, f=sys.stdout)
This is to manage one mol2 series of lines on the form:
This is to manage mol2 atomic lines on the form: 1 C1 5.4790 42.2880 49.5910 C.ar 1 <1> 0...
def set_donnor_acceptor_atoms(self, verbose=0)
modify atom types to specify donnor, acceptor, or both
def set_atom_id(self, atom_id=None)
atom identifier (integer, starting from 1)
def set_status_bit(self, status_bit=None)
Never to use (in theory)
def set_charge(self, charge=None)
atomic charge
def parse(self, data)
split the text line into a series of properties
def set_num_subst(self, num_subst=None)
number of substructures (integer)
def set_subst_name(self, subst_name=None)
substructure name
def set_bond_id(self, bond_id=None)
bond identifier (integer, starting from 1)
def parse(self, data)
Parse a series of text lines, and setup compound information.
def set_num_feat(self, num_feat=None)
number of features (integer)
def set_num_sets(self, num_sets=None)
number of sets (integer)
def set_mol_name(self, mol_name=None)
bond identifier (integer, starting from 1)
def set_atom_name(self, atom_name=None)
The name of the atom (string)
def parse(self, data)
split the text line into a series of properties
def set_origin_atom_id(self, origin_atom_id=None)
the origin atom identifier (integer)
def set_bond_type(self, bond_type=None)
bond type (string) one of: 1 = single 2 = double 3 = triple am = amide ar = aromatic du = dummy un = ...
This is to manage mol2 bond lines on the form: 1 1 2 ar.