ForceBalance API  1.3
Automated optimization of force fields and empirical potentials
nifty.py
Go to the documentation of this file.
1 """@package forcebalance.nifty Nifty functions, intended to be imported by any module within ForceBalance.
2 
3 Table of Contents:
4 - I/O formatting
5 - Math: Variable manipulation, linear algebra, least squares polynomial fitting
6 - Pickle: Expand Python's own pickle to accommodate writing XML etree objects
7 - Commands for submitting things to the Work Queue
8 - Various file and process management functions
9 - Development stuff (not commonly used)
10 
11 Named after the mighty Sniffy Handy Nifty (King Sniffy)
12 
13 @author Lee-Ping Wang
14 @date 2018-03-10
15 """
16 from __future__ import absolute_import
17 from __future__ import division
18 from __future__ import print_function
19 
20 import filecmp
21 import itertools
22 import distutils.dir_util
23 import os
24 import re
25 import shutil
26 import sys
27 from select import select
28 
29 import numpy as np
30 from numpy.linalg import multi_dot
31 
32 # For Python 3 compatibility
33 try:
34  from itertools import zip_longest as zip_longest
35 except ImportError:
36  from itertools import izip_longest as zip_longest
37 import threading
38 from pickle import Pickler, Unpickler
39 import tarfile
40 import time
41 import subprocess
42 import math
43 import six # For six.string_types
44 from subprocess import PIPE
45 from collections import OrderedDict, defaultdict
46 
47 #================================#
48 # Set up the logger #
49 #================================#
50 if "forcebalance" in __name__:
51  # If this module is part of ForceBalance, use the package level logger
52  from .output import *
53  package="ForceBalance"
54 else:
55  from logging import *
56  # Define two handlers that don't print newline characters at the end of each line
57  class RawStreamHandler(StreamHandler):
58  """
59  Exactly like StreamHandler, except no newline character is printed at the end of each message.
60  This is done in order to ensure functions in molecule.py and nifty.py work consistently
61  across multiple packages.
62  """
63  def __init__(self, stream = sys.stdout):
64  super(RawStreamHandler, self).__init__(stream)
65 
66  def emit(self, record):
67  message = record.getMessage()
68  self.stream.write(message)
69  self.flush()
70 
71  class RawFileHandler(FileHandler):
72  """
73  Exactly like FileHandler, except no newline character is printed at the end of each message.
74  This is done in order to ensure functions in molecule.py and nifty.py work consistently
75  across multiple packages.
76  """
77  def __init__(self, *args, **kwargs):
78  super(RawFileHandler, self).__init__(*args, **kwargs)
79 
80  def emit(self, record):
81  if self.stream is None:
82  self.stream = self._open()
83  message = record.getMessage()
84  self.stream.write(message)
85  self.flush()
86 
87  if "geometric" in __name__:
88  # This ensures logging behavior is consistent with the rest of geomeTRIC
89  logger = getLogger(__name__)
90  logger.setLevel(INFO)
91  package="geomeTRIC"
92  else:
93  logger = getLogger("NiftyLogger")
94  logger.setLevel(INFO)
95  handler = RawStreamHandler()
96  logger.addHandler(handler)
97  if __name__ == "__main__":
98  package = "LPW-nifty.py"
99  else:
100  package = __name__.split('.')[0]
101 
102 try:
103  import bz2
104  HaveBZ2 = True
105 except ImportError:
106  logger.warning("bz2 module import failed (used in compressing or decompressing pickle files)\n")
107  HaveBZ2 = False
108 
109 try:
110  import gzip
111  HaveGZ = True
112 except ImportError:
113  logger.warning("gzip module import failed (used in compressing or decompressing pickle files)\n")
114  HaveGZ = False
115 
116 # The directory that this file lives in
117 rootdir = os.path.dirname(os.path.abspath(__file__))
118 
119 # On 2020-05-07, these values were revised to CODATA 2018 values
120 # hartree-joule relationship 4.359 744 722 2071(85) e-18
121 # Hartree energy in eV 27.211 386 245 988(53)
122 # Avogadro constant 6.022 140 76 e23 (exact)
123 # molar gas constant 8.314 462 618 (exact)
124 # Boltzmann constant 1.380649e-23 (exact)
125 # Bohr radius 5.291 772 109 03(80) e-11
126 # speed of light in vacuum 299 792 458 (exact)
127 # reduced Planck's constant 1.054571817e-34 (exact)
128 # calorie-joule relationship 4.184 J (exact; from NIST)
129 
130 
131 kb = 0.008314462618 # Previous value: 0.0083144100163
132 kb_si = 1.380649e-23
133 
134 # Conversion factors
135 bohr2ang = 0.529177210903 # Previous value: 0.529177210
136 ang2bohr = 1.0 / bohr2ang
137 au2kcal = 627.5094740630558 # Previous value: 627.5096080306
138 kcal2au = 1.0 / au2kcal
139 au2kj = 2625.4996394798254 # Previous value: 2625.5002
140 kj2au = 1.0 / au2kj
141 grad_au2gmx = 49614.75258920567 # Previous value: 49614.75960959161
142 grad_gmx2au = 1.0 / grad_au2gmx
143 au2evang = 51.422067476325886 # Previous value: 51.42209166566339
144 evang2au = 1.0 / au2evang
145 c_lightspeed = 299792458.
146 hbar = 1.054571817e-34
147 avogadro = 6.02214076e23
148 au_mass = 9.1093837015e-31 # Atomic unit of mass in kg
149 amu_mass = 1.66053906660e-27 # Atomic mass unit in kg
150 amu2au = amu_mass / au_mass
151 cm2au = 100 * c_lightspeed * (2*np.pi*hbar) * avogadro / 1000 / au2kj # Multiply to convert cm^-1 to Hartree
152 ambervel2au = 9.349961132249932e-04 # Multiply to go from AMBER velocity unit Ang/(1/20.455 ps) to bohr/atu.
156 eqcgmx = au2kj # Previous value: 2625.5002
157 
158 fqcgmx = -grad_au2gmx # Previous value: -49621.9
160 
161 #=========================#
162 # I/O formatting #
163 #=========================#
164 # These functions may be useful someday but I have not tested them
165 # def bzip2(src):
166 # dest = src+'.bz2'
167 # if not os.path.exists(src):
168 # logger.error('File to be compressed does not exist')
169 # raise RuntimeError
170 # if os.path.exists(dest):
171 # logger.error('Archive to be created already exists')
172 # raise RuntimeError
173 # with open(src, 'rb') as input:
174 # with bz2.BZ2File(dest, 'wb', compresslevel=9) as output:
175 # copyfileobj(input, output)
176 # os.remove(input)
177 
178 # def bunzip2(src):
179 # dest = re.sub('\.bz2$', '', src)
180 # if not os.path.exists(src):
181 # logger.error('File to be decompressed does not exist')
182 # raise RuntimeError
183 # if os.path.exists(dest):
184 # logger.error('Target path for decompression already exists')
185 # raise RuntimeError
186 # with bz2.BZ2File(src, 'rb', compresslevel=9) as input:
187 # with open(dest, 'wb') as output:
188 # copyfileobj(input, output)
189 # os.remove(input)
190 
191 def pvec1d(vec1d, precision=1, format="e", loglevel=INFO):
192  """Printout of a 1-D vector.
193 
194  @param[in] vec1d a 1-D vector
195  """
196  v2a = np.array(vec1d)
197  for i in range(v2a.shape[0]):
198  logger.log(loglevel, "%% .%i%s " % (precision, format) % v2a[i])
199  logger.log(loglevel, '\n')
200 
201 def astr(vec1d, precision=4):
202  """ Write an array to a string so we can use it to key a dictionary. """
203  return ' '.join([("%% .%ie " % precision % i) for i in vec1d])
204 
205 def pmat2d(mat2d, precision=1, format="e", loglevel=INFO):
206  """Printout of a 2-D array.
208  @param[in] mat2d a 2-D array
209  """
210  m2a = np.array(mat2d)
211  for i in range(m2a.shape[0]):
212  for j in range(m2a.shape[1]):
213  logger.log(loglevel, "%% .%i%s " % (precision, format) % m2a[i][j])
214  logger.log(loglevel, '\n')
216 def grouper(iterable, n):
217  """Collect data into fixed-length chunks or blocks"""
218  # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
219  args = [iter(iterable)] * n
220  lzip = [[j for j in i if j is not None] for i in list(zip_longest(*args))]
221  return lzip
222 
223 def encode(l):
224  return [[len(list(group)),name] for name, group in itertools.groupby(l)]
225 
226 def segments(e):
227  # Takes encoded input.
228  begins = np.array([sum([k[0] for k in e][:j]) for j,i in enumerate(e) if i[1] == 1])
229  lens = np.array([i[0] for i in e if i[1] == 1])
230  return [(i, i+j) for i, j in zip(begins, lens)]
231 
232 def commadash(l):
233  # Formats a list like [27, 28, 29, 30, 31, 88, 89, 90, 91, 100, 136, 137, 138, 139]
234  # into '27-31,88-91,100,136-139
235  L = sorted(l)
236  if len(L) == 0:
237  return "(empty)"
238  L.append(L[-1]+1)
239  LL = [i in L for i in range(L[-1])]
240  return ','.join('%i-%i' % (i[0]+1,i[1]) if (i[1]-1 > i[0]) else '%i' % (i[0]+1) for i in segments(encode(LL)))
241 
242 def uncommadash(s):
243  # Takes a string like '27-31,88-91,100,136-139'
244  # and turns it into a list like [26, 27, 28, 29, 30, 87, 88, 89, 90, 99, 135, 136, 137, 138]
245  L = []
246  try:
247  for w in s.split(','):
248  ws = w.split('-')
249  a = int(ws[0])-1
250  if len(ws) == 1:
251  b = int(ws[0])
252  elif len(ws) == 2:
253  b = int(ws[1])
254  else:
255  logger.warning("Dash-separated list cannot exceed length 2\n")
256  raise
257  if a < 0 or b <= 0 or b <= a:
258  if a < 0 or b <= 0:
259  logger.warning("Items in list cannot be zero or negative: %d %d\n" % (a, b))
260  else:
261  logger.warning("Second number cannot be smaller than first: %d %d\n" % (a, b))
262  raise
263  newL = range(a,b)
264  if any([i in L for i in newL]):
265  logger.warning("Duplicate entries found in list\n")
266  raise
267  L += newL
268  if sorted(L) != L:
269  logger.warning("List is out of order\n")
270  raise
271  except:
272  logger.error('Invalid string for converting to list of numbers: %s\n' % s)
273  raise RuntimeError
274  return L
275 
276 def natural_sort(l):
277  """ Return a natural sorted list. """
278  # Convert a character to a digit or a lowercase character
279  convert = lambda text: int(text) if text.isdigit() else text.lower()
280  # Split string into "integer" and "noninteger" fields and convert each one
281  alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
282  # Sort strings using these keys in descending order of importance, I guess.
283  return sorted(l, key = alphanum_key)
284 
285 def printcool(text,sym="#",bold=False,color=2,ansi=None,bottom='-',minwidth=50,center=True,sym2="="):
286  """Cool-looking printout for slick formatting of output.
287 
288  @param[in] text The string that the printout is based upon. This function
289  will print out the string, ANSI-colored and enclosed in the symbol
290  for example:\n
291  <tt> ################# </tt>\n
292  <tt> ### I am cool ### </tt>\n
293  <tt> ################# </tt>
294  @param[in] sym The surrounding symbol\n
295  @param[in] bold Whether to use bold print
296 
297  @param[in] color The ANSI color:\n
298  1 red\n
299  2 green\n
300  3 yellow\n
301  4 blue\n
302  5 magenta\n
303  6 cyan\n
304  7 white
305 
306  @param[in] bottom The symbol for the bottom bar
307 
308  @param[in] minwidth The minimum width for the box, if the text is very short
309  then we insert the appropriate number of padding spaces
310 
311  @return bar The bottom bar is returned for the user to print later, e.g. to mark off a 'section'
312  """
313  def newlen(l):
314  return len(re.sub(r"\x1b\[[0-9;]*m","",l))
315  text = text.split('\n')
316  width = max(minwidth,max([newlen(line) for line in text]))
317  bar = ''.join([sym2 for i in range(width + 6)])
318  bar = sym + bar + sym
319  #bar = ''.join([sym for i in range(width + 8)])
320  logger.info('\r'+bar + '\n')
321  for ln, line in enumerate(text):
322  if type(center) is list: c1 = center[ln]
323  else: c1 = center
324  if c1:
325  padleft = ' ' * (int((width - newlen(line))/2))
326  else:
327  padleft = ''
328  padright = ' '* (width - newlen(line) - len(padleft))
329  if ansi is not None:
330  ansi = str(ansi)
331  logger.info("%s| \x1b[%sm%s " % (sym, ansi, padleft)+line+" %s\x1b[0m |%s\n" % (padright, sym))
332  elif color is not None:
333  if color == 0 and bold:
334  logger.info("%s| \x1b[1m%s " % (sym, padleft) + line + " %s\x1b[0m |%s\n" % (padright, sym))
335  elif color == 0:
336  logger.info("%s| %s " % (sym, padleft)+line+" %s |%s\n" % (padright, sym))
337  else:
338  logger.info("%s| \x1b[%s9%im%s " % (sym, bold and "1;" or "", color, padleft)+line+" %s\x1b[0m |%s\n" % (padright, sym))
339  # if color == 3 or color == 7:
340  # print "%s\x1b[40m\x1b[%s9%im%s" % (''.join([sym for i in range(3)]), bold and "1;" or "", color, padleft),line,"%s\x1b[0m%s" % (padright, ''.join([sym for i in range(3)]))
341  # else:
342  # print "%s\x1b[%s9%im%s" % (''.join([sym for i in range(3)]), bold and "1;" or "", color, padleft),line,"%s\x1b[0m%s" % (padright, ''.join([sym for i in range(3)]))
343  else:
344  warn_press_key("Inappropriate use of printcool")
345  logger.info(bar + '\n')
346  botbar = ''.join([bottom for i in range(width + 8)])
347  return botbar + '\n'
348 
349 def printcool_dictionary(Dict,title="Dictionary Keys : Values",bold=False,color=2,keywidth=25,topwidth=50,center=True,leftpad=0):
350  """See documentation for printcool; this is a nice way to print out keys/values in a dictionary.
351 
352  The keys in the dictionary are sorted before printing out.
353 
354  @param[in] dict The dictionary to be printed
355  @param[in] title The title of the printout
356  """
357  if Dict is None: return
358  bar = printcool(title,bold=bold,color=color,minwidth=topwidth,center=center)
359  def magic_string(str):
360  # This cryptic command returns a string with the number of characters specified as a variable. :P
361  # Useful for printing nice-looking dictionaries, i guess.
362  # print "\'%%-%is\' %% '%s'" % (keywidth,str.replace("'","\\'").replace('"','\\"'))
363  return eval("\'%%-%is\' %% '%s'" % (keywidth,str.replace("'","\\'").replace('"','\\"')))
364  if isinstance(Dict, OrderedDict):
365  logger.info('\n'.join([' '*leftpad + "%s %s " % (magic_string(str(key)),str(Dict[key])) for key in Dict if Dict[key] is not None]))
366  else:
367  logger.info('\n'.join([' '*leftpad + "%s %s " % (magic_string(str(key)),str(Dict[key])) for key in sorted([i for i in Dict]) if Dict[key] is not None]))
368  logger.info("\n%s" % bar)
369 
370 #===============================#
371 #| Math: Variable manipulation |#
372 #===============================#
373 def isint(word):
374  """ONLY matches integers! If you have a decimal point? None shall pass!
375 
376  @param[in] word String (for instance, '123', '153.0', '2.', '-354')
377  @return answer Boolean which specifies whether the string is an integer (only +/- sign followed by digits)
378 
379  """
380  try:
381  word = str(word)
382  except:
383  return False
384  return re.match('^[-+]?[0-9]+$', word)
385 
386 def isfloat(word):
387  """Matches ANY number; it can be a decimal, scientific notation, what have you
388  CAUTION - this will also match an integer.
389 
390  @param[in] word String (for instance, '123', '153.0', '2.', '-354')
391  @return answer Boolean which specifies whether the string is any number
392 
393  """
394  try: word = str(word)
395  except: return False
396  if len(word) == 0: return False
397  return re.match(r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?$',word)
398 
399 def isdecimal(word):
400  """Matches things with a decimal only; see isint and isfloat.
401 
402  @param[in] word String (for instance, '123', '153.0', '2.', '-354')
403  @return answer Boolean which specifies whether the string is a number with a decimal point
404 
405  """
406  try: word = str(word)
407  except: return False
408  return isfloat(word) and not isint(word)
409 
410 def floatornan(word):
411  """Returns a big number if we encounter NaN.
412 
413  @param[in] word The string to be converted
414  @return answer The string converted to a float; if not a float, return 1e10
415  @todo I could use suggestions for making this better.
416  """
417  big = 1e10
418  if isfloat(word):
419  return float(word)
420  else:
421  logger.info("Setting %s to % .1e\n" % big)
422  return big
423 
424 def col(vec):
425  """
426  Given any list, array, or matrix, return a 1-column 2D array.
427 
428  Input:
429  vec = The input vector that is to be made into a column
431  Output:
432  A 1-column 2D array
433  """
434  return np.array(vec).reshape(-1, 1)
435 
436 def row(vec):
437  """Given any list, array, or matrix, return a 1-row 2D array.
438 
439  @param[in] vec The input vector that is to be made into a row
440 
441  @return answer A 1-row 2D array
442  """
443  return np.array(vec).reshape(1, -1)
444 
445 def flat(vec):
446  """Given any list, array, or matrix, return a single-index array.
447 
448  @param[in] vec The data to be flattened
449  @return answer The flattened data
450  """
451  return np.array(vec).reshape(-1)
452 
453 def est124(val):
454  """Given any positive floating point value, return a value [124]e+xx
455  that is closest to it in the log space.
456  """
457  log = np.log10(val)
458  logint = math.floor(log)
459  logfrac = log - logint
460  log1 = 0.0
461  log2 = 0.3010299956639812
462  log4 = 0.6020599913279624
463  log10 = 1.0
464  if logfrac < 0.5*(log1+log2):
465  fac = 1.0
466  elif logfrac < 0.5*(log2+log4):
467  fac = 2.0
468  elif logfrac < 0.5*(log4+log10):
469  fac = 4.0
470  else:
471  fac = 10.0
472  return fac*10**logint
473 
474 def est1234568(val):
475  """Given any positive floating point value, return a value [1234568]e+xx
476  that is closest to it in the log space. Just because I don't like seven
477  and nine. Call me a numberist?
478  """
479  log = np.log10(val)
480  logint = math.floor(log)
481  logfrac = log - logint
482  log1 = 0.0
483  log2 = 0.3010299956639812
484  log3 = np.log10(3)
485  log4 = 0.6020599913279624
486  log5 = np.log10(5)
487  log6 = np.log10(6)
488  log8 = np.log10(8)
489  log10 = 1.0
490  if logfrac < 0.5*(log1+log2):
491  fac = 1.0
492  elif logfrac < 0.5*(log2+log3):
493  fac = 2.0
494  elif logfrac < 0.5*(log3+log4):
495  fac = 3.0
496  elif logfrac < 0.5*(log4+log5):
497  fac = 4.0
498  elif logfrac < 0.5*(log5+log6):
499  fac = 5.0
500  elif logfrac < 0.5*(log6+log8):
501  fac = 6.0
502  elif logfrac < 0.5*(log8+log10):
503  fac = 8.0
504  else:
505  fac = 10.0
506  return fac*10**logint
507 
508 def monotonic(arr, start, end):
509  # Make sure an array is monotonically decreasing from the start to the end.
510  a0 = arr[start]
511  i0 = start
512  if end > start:
513  i = start+1
514  while i < end:
515  if arr[i] < a0:
516  arr[i0:i+1] = np.linspace(a0, arr[i], i-i0+1)
517  a0 = arr[i]
518  i0 = i
519  i += 1
520  if end < start:
521  i = start-1
522  while i >= end:
523  if arr[i] < a0:
524  arr[i:i0+1] = np.linspace(arr[i], a0, i0-i+1)
525  a0 = arr[i]
526  i0 = i
527  i -= 1
528 
529 def monotonic_decreasing(arr, start=None, end=None, verbose=False):
530  """
531  Return the indices of an array corresponding to strictly monotonic
532  decreasing behavior.
533 
534  Parameters
535  ----------
536  arr : numpy.ndarray
537  Input array
538  start : int
539  Starting index (first element if None)
540  end : int
541  Ending index (last element if None)
542 
543  Returns
544  -------
545  indices : numpy.ndarray
546  Selected indices
547  """
548  if start is None:
549  start = 0
550  if end is None:
551  end = len(arr) - 1
552  a0 = arr[start]
553  idx = [start]
554  if verbose: logger.info("Starting @ %i : %.6f\n" % (start, arr[start]))
555  if end > start:
556  i = start+1
557  while i < end:
558  if arr[i] < a0:
559  a0 = arr[i]
560  idx.append(i)
561  if verbose: logger.info("Including %i : %.6f\n" % (i, arr[i]))
562  else:
563  if verbose: logger.info("Excluding %i : %.6f\n" % (i, arr[i]))
564  i += 1
565  if end < start:
566  i = start-1
567  while i >= end:
568  if arr[i] < a0:
569  a0 = arr[i]
570  idx.append(i)
571  if verbose: logger.info("Including %i : %.6f\n" % (i, arr[i]))
572  else:
573  if verbose: logger.info("Excluding %i : %.6f\n" % (i, arr[i]))
574  i -= 1
575  return np.array(idx)
576 
577 #====================================#
578 #| Math: Vectors and linear algebra |#
579 #====================================#
580 def orthogonalize(vec1, vec2):
581  """Given two vectors vec1 and vec2, project out the component of vec1
582  that is along the vec2-direction.
583 
584  @param[in] vec1 The projectee (i.e. output is some modified version of vec1)
585  @param[in] vec2 The projector (component subtracted out from vec1 is parallel to this)
586  @return answer A copy of vec1 but with the vec2-component projected out.
587  """
588  v2u = vec2/np.linalg.norm(vec2)
589  return vec1 - v2u*np.dot(vec1, v2u)
590 
591 def invert_svd(X,thresh=1e-12):
592 
593  """
594 
595  Invert a matrix using singular value decomposition.
596  @param[in] X The 2-D NumPy array containing the matrix to be inverted
597  @param[in] thresh The SVD threshold; eigenvalues below this are not inverted but set to zero
598  @return Xt The 2-D NumPy array containing the inverted matrix
599 
600  """
601 
602  u,s,vh = np.linalg.svd(X, full_matrices=0)
603  uh = np.transpose(u)
604  v = np.transpose(vh)
605  si = s.copy()
606  for i in range(s.shape[0]):
607  if abs(s[i]) > thresh:
608  si[i] = 1./s[i]
609  else:
610  si[i] = 0.0
611  si = np.diag(si)
612  Xt = multi_dot([v, si, uh])
613  return Xt
614 
615 #==============================#
616 #| Linear least squares |#
617 #==============================#
618 def get_least_squares(x, y, w = None, thresh=1e-12):
619  """
620  @code
621  __ __
622  | |
623  | 1 (x0) (x0)^2 (x0)^3 |
624  | 1 (x1) (x1)^2 (x1)^3 |
625  | 1 (x2) (x2)^2 (x2)^3 |
626  | 1 (x3) (x3)^2 (x3)^3 |
627  | 1 (x4) (x4)^2 (x4)^3 |
628  |__ __|
629 
630  @endcode
631 
632  @param[in] X (2-D array) An array of X-values (see above)
633  @param[in] Y (array) An array of Y-values (only used in getting the least squares coefficients)
634  @param[in] w (array) An array of weights, hopefully normalized to one.
635  @param[out] Beta The least-squares coefficients
636  @param[out] Hat The hat matrix that takes linear combinations of data y-values to give fitted y-values (weights)
637  @param[out] yfit The fitted y-values
638  @param[out] MPPI The Moore-Penrose pseudoinverse (multiply by Y to get least-squares coefficients, multiply by dY/dk to get derivatives of least-squares coefficients)
639  """
640  # X is a 'tall' matrix.
641  X = np.array(x)
642  if len(X.shape) == 1:
643  X = X[:,np.newaxis]
644  Y = col(y)
645  n_x = X.shape[0]
646  n_fit = X.shape[1]
647  if n_fit > n_x:
648  logger.warning("Argh? It seems like this problem is underdetermined!\n")
649  # Build the weight matrix.
650  if w is not None:
651  if len(w) != n_x:
652  warn_press_key("The weight array length (%i) must be the same as the number of 'X' data points (%i)!" % len(w), n_x)
653  w /= np.mean(w)
654  WH = np.diag(w**0.5)
655  else:
656  WH = np.eye(n_x)
657  # Make the Moore-Penrose Pseudoinverse.
658  # if n_fit == n_x:
659  # MPPI = np.linalg.inv(WH*X)
660  # else:
661  # This resembles the formula (X'WX)^-1 X' W^1/2
662  MPPI = np.linalg.pinv(np.dot(WH, X))
663  Beta = multi_dot([MPPI, WH, Y])
664  Hat = multi_dot([WH, X, MPPI])
665  yfit = flat(np.dot(Hat, Y))
666  # Return three things: the least-squares coefficients, the hat matrix (turns y into yfit), and yfit
667  # We could get these all from MPPI, but I might get confused later on, so might as well do it here :P
668  return np.array(Beta).flatten(), np.array(Hat), np.array(yfit).flatten(), np.array(MPPI)
669 
670 #===========================================#
671 #| John's statisticalInefficiency function |#
672 #===========================================#
673 def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True):
674 
675  """
676  Compute the (cross) statistical inefficiency of (two) timeseries.
677 
678  Notes
679  The same timeseries can be used for both A_n and B_n to get the autocorrelation statistical inefficiency.
680  The fast method described in Ref [1] is used to compute g.
681 
682  References
683  [1] J. D. Chodera, W. C. Swope, J. W. Pitera, C. Seok, and K. A. Dill. Use of the weighted
684  histogram analysis method for the analysis of simulated and parallel tempering simulations.
685  JCTC 3(1):26-41, 2007.
686 
687  Examples
688 
689  Compute statistical inefficiency of timeseries data with known correlation time.
690 
691  >>> import timeseries
692  >>> A_n = timeseries.generateCorrelatedTimeseries(N=100000, tau=5.0)
693  >>> g = statisticalInefficiency(A_n, fast=True)
694 
695  @param[in] A_n (required, numpy array) - A_n[n] is nth value of
696  timeseries A. Length is deduced from vector.
697 
698  @param[in] B_n (optional, numpy array) - B_n[n] is nth value of
699  timeseries B. Length is deduced from vector. If supplied, the
700  cross-correlation of timeseries A and B will be estimated instead of
701  the autocorrelation of timeseries A.
702 
703  @param[in] fast (optional, boolean) - if True, will use faster (but
704  less accurate) method to estimate correlation time, described in
705  Ref. [1] (default: False)
706 
707  @param[in] mintime (optional, int) - minimum amount of correlation
708  function to compute (default: 3) The algorithm terminates after
709  computing the correlation time out to mintime when the correlation
710  function furst goes negative. Note that this time may need to be
711  increased if there is a strong initial negative peak in the
712  correlation function.
713 
714  @return g The estimated statistical inefficiency (equal to 1 + 2
715  tau, where tau is the correlation time). We enforce g >= 1.0.
716 
717  """
718  # Create numpy copies of input arguments.
719  A_n = np.array(A_n)
720  if B_n is not None:
721  B_n = np.array(B_n)
722  else:
723  B_n = np.array(A_n)
724  # Get the length of the timeseries.
725  N = A_n.shape[0]
726  # Be sure A_n and B_n have the same dimensions.
727  if A_n.shape != B_n.shape:
728  logger.error('A_n and B_n must have same dimensions.\n')
729  raise ParameterError
730  # Initialize statistical inefficiency estimate with uncorrelated value.
731  g = 1.0
732  # Compute mean of each timeseries.
733  mu_A = A_n.mean()
734  mu_B = B_n.mean()
735  # Make temporary copies of fluctuation from mean.
736  dA_n = A_n.astype(np.float64) - mu_A
737  dB_n = B_n.astype(np.float64) - mu_B
738  # Compute estimator of covariance of (A,B) using estimator that will ensure C(0) = 1.
739  sigma2_AB = (dA_n * dB_n).mean() # standard estimator to ensure C(0) = 1
740  # Trap the case where this covariance is zero, and we cannot proceed.
741  if sigma2_AB == 0:
742  if warn:
743  logger.warning('Sample covariance sigma_AB^2 = 0 -- cannot compute statistical inefficiency\n')
744  return 1.0
745  # Accumulate the integrated correlation time by computing the normalized correlation time at
746  # increasing values of t. Stop accumulating if the correlation function goes negative, since
747  # this is unlikely to occur unless the correlation function has decayed to the point where it
748  # is dominated by noise and indistinguishable from zero.
749  t = 1
750  increment = 1
751  while t < N-1:
752  # compute normalized fluctuation correlation function at time t
753  C = sum( dA_n[0:(N-t)]*dB_n[t:N] + dB_n[0:(N-t)]*dA_n[t:N] ) / (2.0 * float(N-t) * sigma2_AB)
754  # Terminate if the correlation function has crossed zero and we've computed the correlation
755  # function at least out to 'mintime'.
756  if (C <= 0.0) and (t > mintime):
757  break
758  # Accumulate contribution to the statistical inefficiency.
759  g += 2.0 * C * (1.0 - float(t)/float(N)) * float(increment)
760  # Increment t and the amount by which we increment t.
761  t += increment
762  # Increase the interval if "fast mode" is on.
763  if fast: increment += 1
764  # g must be at least unity
765  if g < 1.0: g = 1.0
766  # Return the computed statistical inefficiency.
767  return g
768 
769 def mean_stderr(ts):
770  """Return mean and standard deviation of a time series ts."""
771  return np.mean(ts), \
772  np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts))
773 
774 # Slices a 2D array of data by column. The new array is fed into the statisticalInefficiency function.
775 def multiD_statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True):
776  n_row = A_n.shape[0]
777  n_col = A_n.shape[-1]
778  multiD_sI = np.zeros((n_row, n_col))
779  for col in range(n_col):
780  if B_n is None:
781  multiD_sI[:,col] = statisticalInefficiency(A_n[:,col], B_n, fast, mintime, warn)
782  else:
783  multiD_sI[:,col] = statisticalInefficiency(A_n[:,col], B_n[:,col], fast, mintime, warn)
784  return multiD_sI
785 
786 #========================================#
787 #| Loading compressed pickles |#
788 #========================================#
789 
790 def lp_dump(obj, fnm, protocol=0):
791  """ Write an object to a zipped pickle file specified by the path. """
792  # Safeguard against overwriting files? Nah.
793  # if os.path.exists(fnm):
794  # logger.error("lp_dump cannot write to an existing path")
795  # raise IOError
796  if os.path.islink(fnm):
797  logger.warning("Trying to write to a symbolic link %s, removing it first\n" % fnm)
798  os.unlink(fnm)
799  if HaveGZ:
800  f = gzip.GzipFile(fnm, 'wb')
801  elif HaveBZ2:
802  f = bz2.BZ2File(fnm, 'wb')
803  else:
804  f = open(fnm, 'wb')
805  Pickler(f, protocol).dump(obj)
806  f.close()
807 
808 def lp_load(fnm):
809  """ Read an object from a bzipped file specified by the path. """
810  if not os.path.exists(fnm):
811  logger.error("lp_load cannot read from a path that doesn't exist (%s)" % fnm)
812  raise IOError
813 
814  def load_uncompress():
815  logger.warning("Compressed file loader failed, attempting to read as uncompressed file\n")
816  f = open(fnm, 'rb')
817  try:
818  answer = Unpickler(f).load()
819  except UnicodeDecodeError:
820  answer = Unpickler(f, encoding='latin1').load()
821  f.close()
822  return answer
823 
824  def load_bz2():
825  f = bz2.BZ2File(fnm, 'rb')
826  try:
827  answer = Unpickler(f).load()
828  except UnicodeDecodeError:
829  answer = Unpickler(f, encoding='latin1').load()
830  f.close()
831  return answer
832 
833  def load_gz():
834  f = gzip.GzipFile(fnm, 'rb')
835  try:
836  answer = Unpickler(f).load()
837  except UnicodeDecodeError:
838  answer = Unpickler(f, encoding='latin1').load()
839  f.close()
840  return answer
841 
842  if HaveGZ:
843  try:
844  answer = load_gz()
845  except:
846  if HaveBZ2:
847  try:
848  answer = load_bz2()
849  except:
850  answer = load_uncompress()
851  else:
852  answer = load_uncompress()
853  elif HaveBZ2:
854  try:
855  answer = load_bz2()
856  except:
857  answer = load_uncompress()
858  else:
859  answer = load_uncompress()
860  return answer
861 
862 #==============================#
863 #| Work Queue stuff |#
864 #==============================#
865 try:
866  import work_queue
867 except:
868  pass
869  #logger.warning("Work Queue library import fail (You can't queue up jobs using Work Queue)\n")
870 
871 # Global variable corresponding to the Work Queue object
872 WORK_QUEUE = None
873 
874 # Global variable containing a mapping from target names to Work Queue task IDs
875 WQIDS = defaultdict(list)
876 
877 def getWorkQueue():
878  global WORK_QUEUE
879  return WORK_QUEUE
880 
881 def getWQIds():
882  global WQIDS
883  return WQIDS
884 
885 def createWorkQueue(wq_port, debug=True, name=package):
886  global WORK_QUEUE
887  if debug:
888  work_queue.set_debug_flag('all')
889  WORK_QUEUE = work_queue.WorkQueue(port=wq_port)
890  WORK_QUEUE.specify_name(name)
891  # QYD: prefer the worker that is fastest in previous tasks
892  # another choice is first-come-first serve: WORK_QUEUE_SCHEDULE_FCFS
893  WORK_QUEUE.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_TIME)
894  # QYD: We don't want to specify the following extremely long keepalive times
895  # because they will prevent checking "dead" workers, causing the program to wait forever
896  #WORK_QUEUE.specify_keepalive_timeout(8640000)
897  #WORK_QUEUE.specify_keepalive_interval(8640000)
898 
900  # Convenience function to destroy the Work Queue objects.
901  global WORK_QUEUE, WQIDS
902  WORK_QUEUE = None
903  WQIDS = defaultdict(list)
905 def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60):
906  """
907  Submit a job to the Work Queue.
909  @param[in] wq (Work Queue Object)
910  @param[in] command (string) The command to run on the remote worker.
911  @param[in] input_files (list of files) A list of locations of the input files.
912  @param[in] output_files (list of files) A list of locations of the output files.
913  """
914  global WQIDS
915  task = work_queue.Task(command)
916  cwd = os.getcwd()
917  for f in input_files:
918  lf = os.path.join(cwd,f)
919  task.specify_input_file(lf,f,cache=False)
920  for f in output_files:
921  lf = os.path.join(cwd,f)
922  task.specify_output_file(lf,f,cache=False)
923  if tag is None: tag = command
924  task.specify_tag(tag)
925  task.print_time = print_time
926  taskid = wq.submit(task)
927  if verbose:
928  logger.info("Submitting command '%s' to the Work Queue, %staskid %i\n" % (command, "tag %s, " % tag if tag != command else "", taskid))
929  if tgt is not None:
930  WQIDS[tgt.name].append(taskid)
931  else:
932  WQIDS["None"].append(taskid)
933 
934 def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60):
935  """
936  Submit a job to the Work Queue. This function is a bit fancier in that we can explicitly
937  specify where the input files come from, and where the output files go to.
938 
939  @param[in] wq (Work Queue Object)
940  @param[in] command (string) The command to run on the remote worker.
941  @param[in] input_files (list of 2-tuples) A list of local and
942  remote locations of the input files.
943  @param[in] output_files (list of 2-tuples) A list of local and
944  remote locations of the output files.
945  """
946  global WQIDS
947  task = work_queue.Task(command)
948  for f in input_files:
949  # print f[0], f[1]
950  task.specify_input_file(f[0],f[1],cache=False)
951  for f in output_files:
952  # print f[0], f[1]
953  task.specify_output_file(f[0],f[1],cache=False)
954  if tag is None: tag = command
955  task.specify_tag(tag)
956  task.print_time = print_time
957  taskid = wq.submit(task)
958  if verbose:
959  logger.info("Submitting command '%s' to the Work Queue, taskid %i\n" % (command, taskid))
960  if tgt is not None:
961  WQIDS[tgt.name].append(taskid)
962  else:
963  WQIDS["None"].append(taskid)
964 
965 def wq_wait1(wq, wait_time=10, wait_intvl=1, print_time=60, verbose=False):
966  """ This function waits ten seconds to see if a task in the Work Queue has finished. """
967  global WQIDS
968  if verbose: logger.info('---\n')
969  if wait_intvl >= wait_time:
970  wait_time = wait_intvl
971  numwaits = 1
972  else:
973  numwaits = int(wait_time/wait_intvl)
974  for sec in range(numwaits):
975  task = wq.wait(wait_intvl)
976  if task:
977  exectime = task.cmd_execution_time/1000000
978  if verbose:
979  logger.info('A job has finished!\n')
980  logger.info('Job name = ' + task.tag + 'command = ' + task.command + '\n')
981  logger.info("status = " + task.status + '\n')
982  logger.info("return_status = " + task.return_status)
983  logger.info("result = " + task.result)
984  logger.info("host = " + task.hostname + '\n')
985  logger.info("execution time = " + exectime)
986  logger.info("total_bytes_transferred = " + task.total_bytes_transferred + '\n')
987  if task.result != 0:
988  oldid = task.id
989  oldhost = task.hostname
990  tgtname = "None"
991  for tnm in WQIDS:
992  if task.id in WQIDS[tnm]:
993  tgtname = tnm
994  WQIDS[tnm].remove(task.id)
995  taskid = wq.submit(task)
996  logger.warning("Task '%s' (task %i) failed on host %s (%i seconds), resubmitted: taskid %i\n" % (task.tag, oldid, oldhost, exectime, taskid))
997  WQIDS[tgtname].append(taskid)
998  else:
999  if hasattr(task, 'print_time'):
1000  print_time = task.print_time
1001  if exectime > print_time: # Assume that we're only interested in printing jobs that last longer than a minute.
1002  logger.info("Task '%s' (task %i) finished successfully on host %s (%i seconds)\n" % (task.tag, task.id, task.hostname, exectime))
1003  for tnm in WQIDS:
1004  if task.id in WQIDS[tnm]:
1005  WQIDS[tnm].remove(task.id)
1006  del task
1007 
1008  # LPW 2018-09-10 Updated to use stats fields from CCTools 6.2.10
1009  # Please upgrade CCTools version if errors are encountered during runtime.
1010  if verbose:
1011  logger.info("Workers: %i init, %i idle, %i busy, %i total joined, %i total removed\n" \
1012  % (wq.stats.workers_init, wq.stats.workers_idle, wq.stats.workers_busy, wq.stats.workers_joined, wq.stats.workers_removed))
1013  logger.info("Tasks: %i running, %i waiting, %i dispatched, %i submitted, %i total complete\n" \
1014  % (wq.stats.tasks_running, wq.stats.tasks_waiting, wq.stats.tasks_dispatched, wq.stats.tasks_submitted, wq.stats.tasks_done))
1015  logger.info("Data: %i / %i kb sent/received\n" % (int(wq.stats.bytes_sent/1024), int(wq.stats.bytes_received/1024)))
1016  else:
1017  logger.info("\r%s : %i/%i workers busy; %i/%i jobs complete \r" %\
1018  (time.ctime(), wq.stats.workers_busy, wq.stats.workers_connected, wq.stats.tasks_done, wq.stats.tasks_submitted))
1019  if time.time() - wq_wait1.t0 > 900:
1020  wq_wait1.t0 = time.time()
1021  logger.info('\n')
1022 wq_wait1.t0 = time.time()
1023 
1024 def wq_wait(wq, wait_time=10, wait_intvl=10, print_time=60, verbose=False):
1025  """ This function waits until the work queue is completely empty. """
1026  while not wq.empty():
1027  wq_wait1(wq, wait_time=wait_time, wait_intvl=wait_intvl, print_time=print_time, verbose=verbose)
1028 
1029 #=====================================#
1030 #| File and process management stuff |#
1031 #=====================================#
1032 def click():
1033  """ Stopwatch function for timing. """
1034  ans = time.time() - click.t0
1035  click.t0 = time.time()
1036  return ans
1037 click.t0 = time.time()
1038 
1039 def splitall(path):
1040  allparts = []
1041  while 1:
1042  parts = os.path.split(path)
1043  if parts[0] == path: # sentinel for absolute paths
1044  allparts.insert(0, parts[0])
1045  break
1046  elif parts[1] == path: # sentinel for relative paths
1047  allparts.insert(0, parts[1])
1048  break
1049  else:
1050  path = parts[0]
1051  allparts.insert(0, parts[1])
1052  return allparts
1053 
1054 # Back up a file.
1055 def bak(path, dest=None, cwd=None, start=1):
1056  oldf = path
1057  newf = None
1058  if cwd != None:
1059  if not os.path.exists(cwd):
1060  raise RuntimeError("%s is not an existing folder" % cwd)
1061  old_d = os.getcwd()
1062  os.chdir(cwd)
1063  if os.path.exists(path):
1064  dnm, fnm = os.path.split(path)
1065  if dnm == '' : dnm = '.'
1066  base, ext = os.path.splitext(fnm)
1067  if dest is None:
1068  dest = dnm
1069  if not os.path.isdir(dest): os.makedirs(dest)
1070  i = start
1071  while True:
1072  fnm = "%s_%i%s" % (base,i,ext)
1073  newf = os.path.join(dest, fnm)
1074  if not os.path.exists(newf): break
1075  i += 1
1076  logger.info("Backing up %s -> %s\n" % (oldf, newf))
1077  shutil.move(oldf,newf)
1078  if cwd != None:
1079  os.chdir(old_d)
1080  return newf
1081 
1082 # Purpose: Given a file name and/or an extension, do one of the following:
1083 # 1) If provided a file name, check the file, crash if not exist and err==True. Return the file name.
1084 # 2) If list is empty but extension is provided, check if one file exists that matches
1085 # the extension. If so, return the file name.
1086 # 3) If list is still empty and err==True, then crash with an error.
1087 def onefile(fnm=None, ext=None, err=False):
1088  if fnm is None and ext is None:
1089  if err:
1090  logger.error("Must provide either filename or extension to onefile()")
1091  raise RuntimeError
1092  else:
1093  return None
1094  if fnm is not None:
1095  if os.path.exists(fnm):
1096  if os.path.dirname(os.path.abspath(fnm)) != os.getcwd():
1097  fsrc = os.path.abspath(fnm)
1098  fdest = os.path.join(os.getcwd(), os.path.basename(fnm))
1099  #-----
1100  # If the file path doesn't correspond to the current directory, copy the file over
1101  # If the file exists in the current directory already and it's different, then crash.
1102  #-----
1103  if os.path.exists(fdest):
1104  if not filecmp.cmp(fsrc, fdest):
1105  logger.error("onefile() will not overwrite %s with %s\n" % (os.path.join(os.getcwd(), os.path.basename(fnm)),os.path.abspath(fnm)))
1106  raise RuntimeError
1107  else:
1108  logger.info("\x1b[93monefile() says the files %s and %s are identical\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1109  else:
1110  logger.info("\x1b[93monefile() will copy %s to %s\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1111  shutil.copy2(fsrc, fdest)
1112  return os.path.basename(fnm)
1113  elif err==True or ext is None:
1114  logger.error("File specified by %s does not exist!" % fnm)
1115  raise RuntimeError
1116  elif ext is not None:
1117  warn_once("File specified by %s does not exist - will try to autodetect .%s extension" % (fnm, ext))
1118  answer = None
1119  cwd = os.getcwd()
1120  ls = [i for i in os.listdir(cwd) if i.endswith('.%s' % ext)]
1121  if len(ls) != 1:
1122  if err:
1123  logger.error("Cannot find a unique file with extension .%s in %s (%i found; %s)" % (ext, cwd, len(ls), ' '.join(ls)))
1124  raise RuntimeError
1125  else:
1126  warn_once("Cannot find a unique file with extension .%s in %s (%i found; %s)" %
1127  (ext, cwd, len(ls), ' '.join(ls)), warnhash = "Found %i .%s files" % (len(ls), ext))
1128  else:
1129  answer = os.path.basename(ls[0])
1130  warn_once("Autodetected %s in %s" % (answer, cwd), warnhash = "Autodetected %s" % answer)
1131  return answer
1132 
1133 # Purpose: Given a file name / file list and/or an extension, do one of the following:
1134 # 1) If provided a file list, check each file in the list
1135 # and crash if any file does not exist. Return the list.
1136 # 2) If provided a file name, check the file and crash if the file
1137 # does not exist. Return a length-one list with the file name.
1138 # 3) If list is empty but extension is provided, check for files that
1139 # match the extension. If so, append them to the list.
1140 # 4) If list is still empty and err==True, then crash with an error.
1141 def listfiles(fnms=None, ext=None, err=False, dnm=None):
1142  answer = []
1143  cwd = os.path.abspath(os.getcwd())
1144  if dnm is not None:
1145  os.chdir(dnm)
1146  if isinstance(fnms, list):
1147  for i in fnms:
1148  if not os.path.exists(i):
1149  logger.error('Specified %s but it does not exist' % i)
1150  raise RuntimeError
1151  answer.append(i)
1152  elif isinstance(fnms, six.string_types):
1153  if not os.path.exists(fnms):
1154  logger.error('Specified %s but it does not exist' % fnms)
1155  raise RuntimeError
1156  answer = [fnms]
1157  elif fnms is not None:
1158  logger.info(str(fnms))
1159  logger.error('First argument to listfiles must be a list, a string, or None')
1160  raise RuntimeError
1161  if answer == [] and ext is not None:
1162  answer = [os.path.basename(i) for i in os.listdir(os.getcwd()) if i.endswith('.%s' % ext)]
1163  if answer == [] and err:
1164  logger.error('listfiles function failed to come up with a file! (fnms = %s ext = %s)' % (str(fnms), str(ext)))
1165  raise RuntimeError
1166 
1167  for ifnm, fnm in enumerate(answer):
1168  if os.path.dirname(os.path.abspath(fnm)) != os.getcwd():
1169  fsrc = os.path.abspath(fnm)
1170  fdest = os.path.join(os.getcwd(), os.path.basename(fnm))
1171  #-----
1172  # If the file path doesn't correspond to the current directory, copy the file over
1173  # If the file exists in the current directory already and it's different, then crash.
1174  #-----
1175  if os.path.exists(fdest):
1176  if not filecmp.cmp(fsrc, fdest):
1177  logger.error("onefile() will not overwrite %s with %s\n" % (os.path.join(os.getcwd(), os.path.basename(fnm)),os.path.abspath(fnm)))
1178  raise RuntimeError
1179  else:
1180  logger.info("\x1b[93monefile() says the files %s and %s are identical\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1181  answer[ifnm] = os.path.basename(fnm)
1182  else:
1183  logger.info("\x1b[93monefile() will copy %s to %s\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1184  shutil.copy2(fsrc, fdest)
1185  answer[ifnm] = os.path.basename(fnm)
1186  os.chdir(cwd)
1187  return answer
1188 
1189 def extract_tar(tarfnm, fnms, force=False):
1190  """
1191  Extract a list of files from .tar archive with any compression.
1192  The file is extracted to the base folder of the archive.
1193 
1194  Parameters
1195  ----------
1196  tarfnm :
1197  Name of the archive file.
1198  fnms : str or list
1199  File names to be extracted.
1200  force : bool, optional
1201  If true, then force extraction of file even if they already exist on disk.
1202  """
1203  # Get path of tar file.
1204  fdir = os.path.abspath(os.path.dirname(tarfnm))
1205  # If all files exist, then return - no need to extract.
1206  if (not force) and all([os.path.exists(os.path.join(fdir, f)) for f in fnms]): return
1207  # If the tar file doesn't exist or isn't valid, do nothing.
1208  if not os.path.exists(tarfnm): return
1209  if not tarfile.is_tarfile(tarfnm): return
1210  # Check type of fnms argument.
1211  if isinstance(fnms, six.string_types): fnms = [fnms]
1212  # Load the tar file.
1213  arch = tarfile.open(tarfnm, 'r')
1214  # Extract only the files we have (to avoid an exception).
1215  all_members = arch.getmembers()
1216  all_names = [f.name for f in all_members]
1217  members = [f for f in all_members if f.name in fnms]
1218  # Extract files to the destination.
1219  arch.extractall(fdir, members=members)
1220 
1221 def GoInto(Dir):
1222  if os.path.exists(Dir):
1223  if os.path.isdir(Dir): pass
1224  else:
1225  logger.error("Tried to create directory %s, it exists but isn't a directory\n" % newdir)
1226  raise RuntimeError
1227  else:
1228  os.makedirs(Dir)
1229  os.chdir(Dir)
1230 
1231 def allsplit(Dir):
1232  # Split a directory into all directories involved.
1233  s = os.path.split(os.path.normpath(Dir))
1234  if s[1] == '' or s[1] == '.' : return []
1235  return allsplit(s[0]) + [s[1]]
1236 
1237 def Leave(Dir):
1238  if os.path.split(os.getcwd())[1] != Dir:
1239  logger.error("Trying to leave directory %s, but we're actually in directory %s (check your code)\n" % (Dir,os.path.split(os.getcwd())[1]))
1240  raise RuntimeError
1241  for i in range(len(allsplit(Dir))):
1242  os.chdir('..')
1243 
1244 # Dictionary containing specific error messages for specific missing files or file patterns
1245 specific_lst = [(['mdrun','grompp','trjconv','g_energy','g_traj'], "Make sure to install GROMACS and add it to your path (or set the gmxpath option)"),
1246  (['force.mdin', 'stage.leap'], "This file is needed for setting up AMBER force matching targets"),
1247  (['conf.pdb', 'mono.pdb'], "This file is needed for setting up OpenMM condensed phase property targets"),
1248  (['liquid.xyz', 'liquid.key', 'mono.xyz', 'mono.key'], "This file is needed for setting up OpenMM condensed phase property targets"),
1249  (['dynamic', 'analyze', 'minimize', 'testgrad', 'vibrate', 'optimize', 'polarize', 'superpose'], "Make sure to install TINKER and add it to your path (or set the tinkerpath option)"),
1250  (['runcuda.sh', 'npt.py', 'npt_tinker.py'], "This file belongs in the ForceBalance source directory, not sure why it is missing"),
1251  (['input.xyz'], "This file is needed for TINKER molecular property targets"),
1252  (['.*key$', '.*xyz$'], "I am guessing this file is probably needed by TINKER"),
1253  (['.*gro$', '.*top$', '.*itp$', '.*mdp$', '.*ndx$'], "I am guessing this file is probably needed by GROMACS")
1254  ]
1255 
1256 # Build a dictionary mapping all of the keys in the above lists to their error messages
1257 specific_dct = dict(list(itertools.chain(*[[(j,i[1]) for j in i[0]] for i in specific_lst])))
1258 
1259 def MissingFileInspection(fnm):
1260  fnm = os.path.split(fnm)[1]
1261  answer = ""
1262  for key in specific_dct:
1263  if answer == "":
1264  answer += "\n"
1265  if re.match(key, fnm):
1266  answer += "%s\n" % specific_dct[key]
1267  return answer
1268 
1269 def wopen(dest, binary=False):
1270  """ If trying to write to a symbolic link, remove it first. """
1271  if os.path.islink(dest):
1272  logger.warning("Trying to write to a symbolic link %s, removing it first\n" % dest)
1273  os.unlink(dest)
1274  if binary:
1275  return open(dest,'wb')
1276  else:
1277  return open(dest,'w')
1279 def LinkFile(src, dest, nosrcok = False):
1280  if os.path.abspath(src) == os.path.abspath(dest): return
1281  if os.path.exists(src):
1282  # Remove broken link
1283  if os.path.islink(dest) and not os.path.exists(dest):
1284  os.remove(dest)
1285  os.symlink(src, dest)
1286  elif os.path.exists(dest):
1287  if os.path.islink(dest): pass
1288  else:
1289  logger.error("Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link\n" % (src, dest))
1290  raise RuntimeError
1291  else:
1292  os.symlink(src, dest)
1293  else:
1294  if not nosrcok:
1295  logger.error("Tried to create symbolic link %s to %s, but source file doesn't exist%s\n" % (src,dest,MissingFileInspection(src)))
1296  raise RuntimeError
1297 
1298 
1299 def CopyFile(src, dest):
1300  if os.path.exists(src):
1301  if os.path.exists(dest):
1302  if os.path.islink(dest):
1303  logger.error("Tried to copy %s to %s, destination exists but it's a symbolic link\n" % (src, dest))
1304  raise RuntimeError
1305  else:
1306  shutil.copy2(src, dest)
1307  else:
1308  logger.error("Tried to copy %s to %s, but source file doesn't exist%s\n" % (src,dest,MissingFileInspection(src)))
1309  raise RuntimeError
1310 
1311 def link_dir_contents(abssrcdir, absdestdir):
1312  for fnm in os.listdir(abssrcdir):
1313  srcfnm = os.path.join(abssrcdir, fnm)
1314  destfnm = os.path.join(absdestdir, fnm)
1315  if os.path.islink(destfnm) and not os.path.exists(destfnm):
1316  os.remove(destfnm)
1317  if os.path.isfile(srcfnm) or (os.path.isdir(srcfnm) and fnm == 'IC'):
1318  if not os.path.exists(destfnm):
1319  #print "Linking %s to %s" % (srcfnm, destfnm)
1320  os.symlink(srcfnm, destfnm)
1321 
1322 def remove_if_exists(fnm):
1323  """ Remove the file if it exists (doesn't return an error). """
1324  if os.path.exists(fnm):
1325  os.remove(fnm)
1326 
1327 def which(fnm):
1328  # Get the location of a file. Works only on UNIX-like file systems.
1329  try:
1330  return os.path.split(os.popen('which %s 2> /dev/null' % fnm).readlines()[0].strip())[0]
1331  except:
1332  return ''
1334 def copy_tree_over(src, dest):
1335  """
1336  Copy a source directory tree to a destination directory tree,
1337  overwriting files as necessary. This does not require removing
1338  the destination folder, which can reduce the number of times
1339  shutil.rmtree needs to be called.
1340  """
1341  # From https://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree/28055993 :
1342  # If you copy folder, then remove it, then copy again it will fail, because it caches all the created dirs.
1343  # To workaround you can clear _path_created before copy:
1344  distutils.dir_util._path_created = {}
1345  distutils.dir_util.copy_tree(src, dest)
1346 
1347 # Thanks to cesarkawakami on #python (IRC freenode) for this code.
1348 class LineChunker(object):
1349  def __init__(self, callback):
1350  self.callback = callback
1351  self.buf = ""
1352 
1353  def push(self, data):
1354  # Added by LPW during Py3 compatibility; ran into some trouble decoding strings such as
1355  # "a" with umlaut on top. I guess we can ignore these for now. For some reason,
1356  # Py2 never required decoding of data, I can simply add it to the wtring.
1357  # self.buf += data # Old Py2 code...
1358  self.buf += data.decode('utf-8')#errors='ignore')
1359  self.nomnom()
1360 
1361  def close(self):
1362  if self.buf:
1363  self.callback(self.buf + "\n")
1364 
1365  def nomnom(self):
1366  # Splits buffer by new line or carriage return, and passes
1367  # the splitted results onto processing.
1368  while "\n" in self.buf or "\r" in self.buf:
1369  chunk, sep, self.buf = re.split(r"(\r|\n)", self.buf, maxsplit=1)
1370  self.callback(chunk + sep)
1371 
1372  def __enter__(self):
1373  return self
1374 
1375  def __exit__(self, *args, **kwargs):
1376  self.close()
1377 
1378 def _exec(command, print_to_screen = False, outfnm = None, logfnm = None, stdin = "", print_command = True, copy_stdout = True, copy_stderr = False, persist = False, expand_cr=False, print_error=True, rbytes=1, cwd=None, **kwargs):
1379  """Runs command line using subprocess, optionally returning stdout.
1380  Options:
1381  command (required) = Name of the command you want to execute
1382  outfnm (optional) = Name of the output file name (overwritten if exists)
1383  logfnm (optional) = Name of the log file name (appended if exists)
1384  stdin (optional) = A string to be passed to stdin, as if it were typed (use newline character to mimic Enter key)
1385  print_command = Whether to print the command.
1386  copy_stdout = Copy the stdout stream; can set to False in strange situations
1387  copy_stderr = Copy the stderr stream to the stdout stream; useful for GROMACS which prints out everything to stderr (argh.)
1388  expand_cr = Whether to expand carriage returns into newlines (useful for GROMACS mdrun).
1389  print_error = Whether to print error messages on a crash. Should be true most of the time.
1390  persist = Continue execution even if the command gives a nonzero return code.
1391  rbytes = Number of bytes to read from stdout and stderr streams at a time. GMX requires rbytes = 1 otherwise streams are interleaved. Higher values for speed.
1392  """
1393 
1394  # Dictionary of options to be passed to the Popen object.
1395  cmd_options={'shell':isinstance(command, six.string_types), 'stdin':PIPE, 'stdout':PIPE, 'stderr':PIPE, 'universal_newlines':expand_cr, 'cwd':cwd}
1396 
1397  # If the current working directory is provided, the outputs will be written to there as well.
1398  if cwd is not None:
1399  if outfnm is not None:
1400  outfnm = os.path.abspath(os.path.join(cwd, outfnm))
1401  if logfnm is not None:
1402  logfnm = os.path.abspath(os.path.join(cwd, logfnm))
1403 
1404  # "write to file" : Function for writing some characters to the log and/or output files.
1405  def wtf(out):
1406  if logfnm is not None:
1407  with open(logfnm,'ab+') as f:
1408  f.write(out.encode('utf-8'))
1409  f.flush()
1410  if outfnm is not None:
1411  with open(outfnm,'wb+' if wtf.first else 'ab+') as f:
1412  f.write(out.encode('utf-8'))
1413  f.flush()
1414  wtf.first = False
1415  wtf.first = True
1416 
1417  # Preserve backwards compatibility; sometimes None gets passed to stdin.
1418  if stdin is None: stdin = ""
1419 
1420  if print_command:
1421  logger.info("Executing process: \x1b[92m%-50s\x1b[0m%s%s%s%s\n" % (' '.join(command) if type(command) is list else command,
1422  " In: %s" % cwd if cwd is not None else "",
1423  " Output: %s" % outfnm if outfnm is not None else "",
1424  " Append: %s" % logfnm if logfnm is not None else "",
1425  (" Stdin: %s" % stdin.replace('\n','\\n')) if stdin else ""))
1426  wtf("Executing process: %s%s\n" % (command, (" Stdin: %s" % stdin.replace('\n','\\n')) if stdin else ""))
1427 
1428  cmd_options.update(kwargs)
1429  p = subprocess.Popen(command, **cmd_options)
1430 
1431  # Write the stdin stream to the process.
1432  p.stdin.write(stdin.encode('ascii'))
1433  p.stdin.close()
1434 
1435  #===============================================================#
1436  #| Read the output streams from the process. This is a bit |#
1437  #| complicated because programs like GROMACS tend to print out |#
1438  #| stdout as well as stderr streams, and also carriage returns |#
1439  #| along with newline characters. |#
1440  #===============================================================#
1441  # stdout and stderr streams of the process.
1442  streams = [p.stdout, p.stderr]
1443  # Are we using Python 2?
1444  p2 = sys.version_info.major == 2
1445  # These are functions that take chunks of lines (read) as inputs.
1446  def process_out(read):
1447  if print_to_screen:
1448  # LPW 2019-11-25: We should be writing a string, not a representation of bytes
1449  if p2:
1450  sys.stdout.write(str(read.encode('utf-8')))
1451  else:
1452  sys.stdout.write(read)
1453  if copy_stdout:
1454  process_out.stdout.append(read)
1455  wtf(read)
1456  process_out.stdout = []
1457 
1458  def process_err(read):
1459  if print_to_screen:
1460  if p2:
1461  sys.stderr.write(str(read.encode('utf-8')))
1462  else:
1463  sys.stderr.write(read)
1464  process_err.stderr.append(read)
1465  if copy_stderr:
1466  process_out.stdout.append(read)
1467  wtf(read)
1468  process_err.stderr = []
1469  # This reads the streams one byte at a time, and passes it to the LineChunker
1470  # which splits it by either newline or carriage return.
1471  # If the stream has ended, then it is removed from the list.
1472  with LineChunker(process_out) as out_chunker, LineChunker(process_err) as err_chunker:
1473  while True:
1474  to_read, _, _ = select(streams, [], [])
1475  for fh in to_read:
1476  # We want to call fh.read below, but this can lead to a system hang when executing Tinker on mac.
1477  # This hang can be avoided by running fh.read1 (with a "1" at the end), however python2.7
1478  # doesn't implement ByteStream.read1. So, to enable python3 builds on mac to work, we pick the "best"
1479  # fh.read function we can get
1480  if hasattr(fh, 'read1'):
1481  fhread = fh.read1
1482  else:
1483  fhread = fh.read
1484 
1485  if fh is p.stdout:
1486  read_nbytes = 0
1487  read = ''.encode('utf-8')
1488  while True:
1489  if read_nbytes == 0:
1490  read += fhread(rbytes)
1491  read_nbytes += rbytes
1492  else:
1493  read += fhread(1)
1494  read_nbytes += 1
1495  if read_nbytes > 10+rbytes:
1496  raise RuntimeError("Failed to decode stdout from external process.")
1497  if not read:
1498  streams.remove(p.stdout)
1499  p.stdout.close()
1500  break
1501  else:
1502  try:
1503  out_chunker.push(read)
1504  break
1505  except UnicodeDecodeError:
1506  pass
1507  elif fh is p.stderr:
1508  read_nbytes = 0
1509  read = ''.encode('utf-8')
1510  while True:
1511  if read_nbytes == 0:
1512  read += fhread(rbytes)
1513  read_nbytes += rbytes
1514  else:
1515  read += fhread(1)
1516  read_nbytes += 1
1517  if read_nbytes > 10+rbytes:
1518  raise RuntimeError("Failed to decode stderr from external process.")
1519  if not read:
1520  streams.remove(p.stderr)
1521  p.stderr.close()
1522  break
1523  else:
1524  try:
1525  err_chunker.push(read)
1526  break
1527  except UnicodeDecodeError:
1528  pass
1529  else:
1530  raise RuntimeError
1531  if len(streams) == 0: break
1532 
1533  p.wait()
1534 
1535  process_out.stdout = ''.join(process_out.stdout)
1536  process_err.stderr = ''.join(process_err.stderr)
1537 
1538  _exec.returncode = p.returncode
1539  if p.returncode != 0:
1540  if process_err.stderr and print_error:
1541  logger.warning("Received an error message:\n")
1542  logger.warning("\n[====] \x1b[91mError Message\x1b[0m [====]\n")
1543  logger.warning(process_err.stderr)
1544  logger.warning("[====] \x1b[91mEnd o'Message\x1b[0m [====]\n")
1545  if persist:
1546  if print_error:
1547  logger.info("%s gave a return code of %i (it may have crashed) -- carrying on\n" % (command, p.returncode))
1548  else:
1549  # This code (commented out) would not throw an exception, but instead exit with the returncode of the crashed program.
1550  # sys.stderr.write("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode))
1551  # sys.exit(p.returncode)
1552  logger.error("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n\n" % (command, p.returncode))
1553  raise RuntimeError
1554 
1555  # Return the output in the form of a list of lines, so we can loop over it using "for line in output".
1556  Out = process_out.stdout.split('\n')
1557  if Out[-1] == '':
1558  Out = Out[:-1]
1559  return Out
1560 _exec.returncode = None
1561 
1562 def warn_press_key(warning, timeout=10):
1563  logger.warning(warning + '\n')
1564  if sys.stdin.isatty():
1565  logger.warning("\x1b[1;91mPress Enter or wait %i seconds (I assume no responsibility for what happens after this!)\x1b[0m\n" % timeout)
1566  try:
1567  rlist, wlist, xlist = select([sys.stdin], [], [], timeout)
1568  if rlist:
1569  sys.stdin.readline()
1570  except: pass
1571 
1572 def warn_once(warning, warnhash = None):
1573  """ Prints a warning but will only do so once in a given run. """
1574  if warnhash is None:
1575  warnhash = warning
1576  if warnhash in warn_once.already:
1577  return
1578  warn_once.already.add(warnhash)
1579  if type(warning) is str:
1580  logger.info(warning + '\n')
1581  elif type(warning) is list:
1582  for line in warning:
1583  logger.info(line + '\n')
1584 warn_once.already = set()
1585 
1586 #=========================================#
1587 #| Development stuff (not commonly used) |#
1588 #=========================================#
1589 def concurrent_map(func, data):
1590  """
1591  Similar to the bultin function map(). But spawn a thread for each argument
1592  and apply `func` concurrently.
1593 
1594  Note: unlike map(), we cannot take an iterable argument. `data` should be an
1595  indexable sequence.
1596  """
1597 
1598  N = len(data)
1599  result = [None] * N
1600 
1601  # wrapper to dispose the result in the right slot
1602  def task_wrapper(i):
1603  result[i] = func(data[i])
1604 
1605  threads = [threading.Thread(target=task_wrapper, args=(i,)) for i in range(N)]
1606  for t in threads:
1607  t.start()
1608  for t in threads:
1609  t.join()
1610 
1611  return result
def mean_stderr(ts)
Return mean and standard deviation of a time series ts.
Definition: nifty.py:795
def remove_if_exists(fnm)
Remove the file if it exists (doesn&#39;t return an error).
Definition: nifty.py:1358
def MissingFileInspection(fnm)
Definition: nifty.py:1292
def copy_tree_over(src, dest)
Copy a source directory tree to a destination directory tree, overwriting files as necessary...
Definition: nifty.py:1376
def isdecimal(word)
Matches things with a decimal only; see isint and isfloat.
Definition: nifty.py:418
def destroyWorkQueue()
Definition: nifty.py:926
def CopyFile(src, dest)
Definition: nifty.py:1333
def allsplit(Dir)
Definition: nifty.py:1264
def __init__(self, stream=sys.stdout)
Definition: nifty.py:65
def pmat2d(mat2d, precision=1, format="e", loglevel=INFO)
Printout of a 2-D array.
Definition: nifty.py:215
def Leave(Dir)
Definition: nifty.py:1270
def concurrent_map(func, data)
Similar to the bultin function map().
Definition: nifty.py:1635
def lp_load(fnm)
Read an object from a bzipped file specified by the path.
Definition: nifty.py:836
def splitall(path)
Definition: nifty.py:1071
def extract_tar(tarfnm, fnms, force=False)
Extract a list of files from .tar archive with any compression.
Definition: nifty.py:1235
def LinkFile(src, dest, nosrcok=False)
Definition: nifty.py:1313
def createWorkQueue(wq_port, debug=True, name=package)
Definition: nifty.py:912
def invert_svd(X, thresh=1e-12)
Invert a matrix using singular value decomposition.
Definition: nifty.py:621
def which(fnm)
Definition: nifty.py:1362
def flat(vec)
Given any list, array, or matrix, return a single-index array.
Definition: nifty.py:467
def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True)
Compute the (cross) statistical inefficiency of (two) timeseries.
Definition: nifty.py:740
def natural_sort(l)
Return a natural sorted list.
Definition: nifty.py:285
def GoInto(Dir)
Definition: nifty.py:1254
def get_least_squares(x, y, w=None, thresh=1e-12)
Definition: nifty.py:662
def emit(self, record)
Definition: nifty.py:83
def floatornan(word)
Returns a big number if we encounter NaN.
Definition: nifty.py:430
def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue.
Definition: nifty.py:941
def lp_dump(obj, fnm, protocol=0)
Write an object to a zipped pickle file specified by the path.
Definition: nifty.py:817
def col(vec)
Given any list, array, or matrix, return a 1-column 2D array.
Definition: nifty.py:448
def monotonic(arr, start, end)
Definition: nifty.py:527
def commadash(l)
Definition: nifty.py:239
def est124(val)
Given any positive floating point value, return a value [124]e+xx that is closest to it in the log sp...
Definition: nifty.py:474
def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue.
Definition: nifty.py:974
def link_dir_contents(abssrcdir, absdestdir)
Definition: nifty.py:1345
def wq_wait(wq, wait_time=10, wait_intvl=10, print_time=60, verbose=False)
This function waits until the work queue is completely empty.
Definition: nifty.py:1056
Exactly like FileHandler, except no newline character is printed at the end of each message...
Definition: nifty.py:79
def est1234568(val)
Given any positive floating point value, return a value [1234568]e+xx that is closest to it in the lo...
Definition: nifty.py:497
def orthogonalize(vec1, vec2)
Given two vectors vec1 and vec2, project out the component of vec1 that is along the vec2-direction...
Definition: nifty.py:608
def getWQIds()
Definition: nifty.py:908
def encode(l)
Definition: nifty.py:230
def warn_press_key(warning, timeout=10)
Definition: nifty.py:1599
def printcool_dictionary(Dict, title="Dictionary Keys : Values", bold=False, color=2, keywidth=25, topwidth=50, center=True, leftpad=0)
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
Definition: nifty.py:366
def grouper(iterable, n)
Collect data into fixed-length chunks or blocks.
Definition: nifty.py:224
def segments(e)
Definition: nifty.py:233
def wq_wait1(wq, wait_time=10, wait_intvl=1, print_time=60, verbose=False)
This function waits ten seconds to see if a task in the Work Queue has finished.
Definition: nifty.py:996
def printcool(text, sym="#", bold=False, color=2, ansi=None, bottom='-', minwidth=50, center=True, sym2="=")
Cool-looking printout for slick formatting of output.
Definition: nifty.py:321
def multiD_statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True)
Definition: nifty.py:800
def click()
Stopwatch function for timing.
Definition: nifty.py:1065
def bak(path, dest=None, cwd=None, start=1)
Definition: nifty.py:1087
def pvec1d(vec1d, precision=1, format="e", loglevel=INFO)
Printout of a 1-D vector.
Definition: nifty.py:199
def onefile(fnm=None, ext=None, err=False)
Definition: nifty.py:1119
def isint(word)
ONLY matches integers! If you have a decimal point? None shall pass!
Definition: nifty.py:390
def listfiles(fnms=None, ext=None, err=False, dnm=None)
Definition: nifty.py:1173
def warn_once(warning, warnhash=None)
Prints a warning but will only do so once in a given run.
Definition: nifty.py:1611
def astr(vec1d, precision=4)
Write an array to a string so we can use it to key a dictionary.
Definition: nifty.py:207
def wopen(dest, binary=False)
If trying to write to a symbolic link, remove it first.
Definition: nifty.py:1304
def monotonic_decreasing(arr, start=None, end=None, verbose=False)
Return the indices of an array corresponding to strictly monotonic decreasing behavior.
Definition: nifty.py:567
def uncommadash(s)
Definition: nifty.py:249
def getWorkQueue()
Definition: nifty.py:904
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, what have you CAUTION - this will also ...
Definition: nifty.py:405
def row(vec)
Given any list, array, or matrix, return a 1-row 2D array.
Definition: nifty.py:458
def emit(self, record)
Definition: nifty.py:68
Exactly like StreamHandler, except no newline character is printed at the end of each message...
Definition: nifty.py:64