1 """@package forcebalance.nifty Nifty functions, intended to be imported by any module within ForceBalance. 5 - Math: Variable manipulation, linear algebra, least squares polynomial fitting 6 - Pickle: Expand Python's own pickle to accommodate writing XML etree objects 7 - Commands for submitting things to the Work Queue 8 - Various file and process management functions 9 - Development stuff (not commonly used) 11 Named after the mighty Sniffy Handy Nifty (King Sniffy) 16 from __future__
import absolute_import
17 from __future__
import division
18 from __future__
import print_function
22 import distutils.dir_util
27 from select
import select
30 from numpy.linalg
import multi_dot
34 from itertools
import zip_longest
as zip_longest
36 from itertools
import izip_longest
as zip_longest
38 from pickle
import Pickler, Unpickler
44 from subprocess
import PIPE
45 from collections
import OrderedDict, defaultdict
50 if "forcebalance" in __name__:
53 package=
"ForceBalance" 59 Exactly like StreamHandler, except no newline character is printed at the end of each message. 60 This is done in order to ensure functions in molecule.py and nifty.py work consistently 61 across multiple packages. 63 def __init__(self, stream = sys.stdout):
66 def emit(self, record):
67 message = record.getMessage()
68 self.stream.write(message)
73 Exactly like FileHandler, except no newline character is printed at the end of each message. 74 This is done in order to ensure functions in molecule.py and nifty.py work consistently 75 across multiple packages. 78 super(RawFileHandler, self).
__init__(*args, **kwargs)
83 message = record.getMessage()
87 if "geometric" in __name__:
89 logger = getLogger(__name__)
93 logger = getLogger(
"NiftyLogger")
96 logger.addHandler(handler)
97 if __name__ ==
"__main__":
98 package =
"LPW-nifty.py" 100 package = __name__.split(
'.')[0]
106 logger.warning(
"bz2 module import failed (used in compressing or decompressing pickle files)\n")
113 logger.warning(
"gzip module import failed (used in compressing or decompressing pickle files)\n")
117 rootdir = os.path.dirname(os.path.abspath(__file__))
135 bohr2ang = 0.529177210903
136 ang2bohr = 1.0 / bohr2ang
137 au2kcal = 627.5094740630558
138 kcal2au = 1.0 / au2kcal
139 au2kj = 2625.4996394798254
141 grad_au2gmx = 49614.75258920567
142 grad_gmx2au = 1.0 / grad_au2gmx
143 au2evang = 51.422067476325886
144 evang2au = 1.0 / au2evang
145 c_lightspeed = 299792458.
146 hbar = 1.054571817e-34
147 avogadro = 6.02214076e23
148 au_mass = 9.1093837015e-31
149 amu_mass = 1.66053906660e-27
150 amu2au = amu_mass / au_mass
151 cm2au = 100 * c_lightspeed * (2*np.pi*hbar) * avogadro / 1000 / au2kj
152 ambervel2au = 9.349961132249932e-04
158 fqcgmx = -grad_au2gmx
191 def pvec1d(vec1d, precision=1, format="e", loglevel=INFO):
192 """Printout of a 1-D vector. 194 @param[in] vec1d a 1-D vector 196 v2a = np.array(vec1d)
197 for i
in range(v2a.shape[0]):
198 logger.log(loglevel,
"%% .%i%s " % (precision, format) % v2a[i])
199 logger.log(loglevel,
'\n')
201 def astr(vec1d, precision=4):
202 """ Write an array to a string so we can use it to key a dictionary. """ 203 return ' '.join([(
"%% .%ie " % precision % i)
for i
in vec1d])
205 def pmat2d(mat2d, precision=1, format="e", loglevel=INFO):
206 """Printout of a 2-D array. 208 @param[in] mat2d a 2-D array 210 m2a = np.array(mat2d)
211 for i
in range(m2a.shape[0]):
212 for j
in range(m2a.shape[1]):
213 logger.log(loglevel,
"%% .%i%s " % (precision, format) % m2a[i][j])
214 logger.log(loglevel,
'\n')
217 """Collect data into fixed-length chunks or blocks""" 219 args = [iter(iterable)] * n
220 lzip = [[j
for j
in i
if j
is not None]
for i
in list(zip_longest(*args))]
224 return [[len(list(group)),name]
for name, group
in itertools.groupby(l)]
228 begins = np.array([sum([k[0]
for k
in e][:j])
for j,i
in enumerate(e)
if i[1] == 1])
229 lens = np.array([i[0]
for i
in e
if i[1] == 1])
230 return [(i, i+j)
for i, j
in zip(begins, lens)]
239 LL = [i
in L
for i
in range(L[-1])]
240 return ','.join(
'%i-%i' % (i[0]+1,i[1])
if (i[1]-1 > i[0])
else '%i' % (i[0]+1)
for i
in segments(
encode(LL)))
247 for w
in s.split(
','):
255 logger.warning(
"Dash-separated list cannot exceed length 2\n")
257 if a < 0
or b <= 0
or b <= a:
259 logger.warning(
"Items in list cannot be zero or negative: %d %d\n" % (a, b))
261 logger.warning(
"Second number cannot be smaller than first: %d %d\n" % (a, b))
264 if any([i
in L
for i
in newL]):
265 logger.warning(
"Duplicate entries found in list\n")
269 logger.warning(
"List is out of order\n")
272 logger.error(
'Invalid string for converting to list of numbers: %s\n' % s)
277 """ Return a natural sorted list. """ 279 convert =
lambda text: int(text)
if text.isdigit()
else text.lower()
281 alphanum_key =
lambda key: [ convert(c)
for c
in re.split(
'([0-9]+)', key) ]
283 return sorted(l, key = alphanum_key)
285 def printcool(text,sym="#",bold=False,color=2,ansi=None,bottom='-',minwidth=50,center=True,sym2="="):
286 """Cool-looking printout for slick formatting of output. 288 @param[in] text The string that the printout is based upon. This function 289 will print out the string, ANSI-colored and enclosed in the symbol 291 <tt> ################# </tt>\n 292 <tt> ### I am cool ### </tt>\n 293 <tt> ################# </tt> 294 @param[in] sym The surrounding symbol\n 295 @param[in] bold Whether to use bold print 297 @param[in] color The ANSI color:\n 306 @param[in] bottom The symbol for the bottom bar 308 @param[in] minwidth The minimum width for the box, if the text is very short 309 then we insert the appropriate number of padding spaces 311 @return bar The bottom bar is returned for the user to print later, e.g. to mark off a 'section' 314 return len(re.sub(
r"\x1b\[[0-9;]*m",
"",l))
315 text = text.split(
'\n')
316 width = max(minwidth,max([newlen(line)
for line
in text]))
317 bar =
''.join([sym2
for i
in range(width + 6)])
318 bar = sym + bar + sym
320 logger.info(
'\r'+bar +
'\n')
321 for ln, line
in enumerate(text):
322 if type(center)
is list: c1 = center[ln]
325 padleft =
' ' * (int((width - newlen(line))/2))
328 padright =
' '* (width - newlen(line) - len(padleft))
331 logger.info(
"%s| \x1b[%sm%s " % (sym, ansi, padleft)+line+
" %s\x1b[0m |%s\n" % (padright, sym))
332 elif color
is not None:
333 if color == 0
and bold:
334 logger.info(
"%s| \x1b[1m%s " % (sym, padleft) + line +
" %s\x1b[0m |%s\n" % (padright, sym))
336 logger.info(
"%s| %s " % (sym, padleft)+line+
" %s |%s\n" % (padright, sym))
338 logger.info(
"%s| \x1b[%s9%im%s " % (sym, bold
and "1;" or "", color, padleft)+line+
" %s\x1b[0m |%s\n" % (padright, sym))
345 logger.info(bar +
'\n')
346 botbar =
''.join([bottom
for i
in range(width + 8)])
349 def printcool_dictionary(Dict,title="Dictionary Keys : Values
",bold=False,color=2,keywidth=25,topwidth=50,center=True,leftpad=0): 350 """See documentation for printcool; this is a nice way to print out keys/values in a dictionary. 352 The keys in the dictionary are sorted before printing out. 354 @param[in] dict The dictionary to be printed 355 @param[in] title The title of the printout 357 if Dict
is None:
return 358 bar =
printcool(title,bold=bold,color=color,minwidth=topwidth,center=center)
359 def magic_string(str):
363 return eval(
"\'%%-%is\' %% '%s'" % (keywidth,str.replace(
"'",
"\\'").replace(
'"',
'\\"')))
364 if isinstance(Dict, OrderedDict):
365 logger.info(
'\n'.join([
' '*leftpad +
"%s %s " % (magic_string(str(key)),str(Dict[key]))
for key
in Dict
if Dict[key]
is not None]))
367 logger.info(
'\n'.join([
' '*leftpad +
"%s %s " % (magic_string(str(key)),str(Dict[key]))
for key
in sorted([i
for i
in Dict])
if Dict[key]
is not None]))
368 logger.info(
"\n%s" % bar)
374 """ONLY matches integers! If you have a decimal point? None shall pass! 376 @param[in] word String (for instance, '123', '153.0', '2.', '-354') 377 @return answer Boolean which specifies whether the string is an integer (only +/- sign followed by digits) 384 return re.match(
'^[-+]?[0-9]+$', word)
387 """Matches ANY number; it can be a decimal, scientific notation, what have you 388 CAUTION - this will also match an integer. 390 @param[in] word String (for instance, '123', '153.0', '2.', '-354') 391 @return answer Boolean which specifies whether the string is any number 394 try: word = str(word)
396 if len(word) == 0:
return False 397 return re.match(
r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?$',word)
400 """Matches things with a decimal only; see isint and isfloat. 402 @param[in] word String (for instance, '123', '153.0', '2.', '-354') 403 @return answer Boolean which specifies whether the string is a number with a decimal point 406 try: word = str(word)
411 """Returns a big number if we encounter NaN. 413 @param[in] word The string to be converted 414 @return answer The string converted to a float; if not a float, return 1e10 415 @todo I could use suggestions for making this better. 421 logger.info(
"Setting %s to % .1e\n" % big)
426 Given any list, array, or matrix, return a 1-column 2D array. 429 vec = The input vector that is to be made into a column 434 return np.array(vec).reshape(-1, 1)
437 """Given any list, array, or matrix, return a 1-row 2D array. 439 @param[in] vec The input vector that is to be made into a row 441 @return answer A 1-row 2D array 443 return np.array(vec).reshape(1, -1)
446 """Given any list, array, or matrix, return a single-index array. 448 @param[in] vec The data to be flattened 449 @return answer The flattened data 451 return np.array(vec).reshape(-1)
454 """Given any positive floating point value, return a value [124]e+xx 455 that is closest to it in the log space. 458 logint = math.floor(log)
459 logfrac = log - logint
461 log2 = 0.3010299956639812
462 log4 = 0.6020599913279624
464 if logfrac < 0.5*(log1+log2):
466 elif logfrac < 0.5*(log2+log4):
468 elif logfrac < 0.5*(log4+log10):
472 return fac*10**logint
475 """Given any positive floating point value, return a value [1234568]e+xx 476 that is closest to it in the log space. Just because I don't like seven 477 and nine. Call me a numberist? 480 logint = math.floor(log)
481 logfrac = log - logint
483 log2 = 0.3010299956639812
485 log4 = 0.6020599913279624
490 if logfrac < 0.5*(log1+log2):
492 elif logfrac < 0.5*(log2+log3):
494 elif logfrac < 0.5*(log3+log4):
496 elif logfrac < 0.5*(log4+log5):
498 elif logfrac < 0.5*(log5+log6):
500 elif logfrac < 0.5*(log6+log8):
502 elif logfrac < 0.5*(log8+log10):
506 return fac*10**logint
516 arr[i0:i+1] = np.linspace(a0, arr[i], i-i0+1)
524 arr[i:i0+1] = np.linspace(arr[i], a0, i0-i+1)
531 Return the indices of an array corresponding to strictly monotonic 539 Starting index (first element if None) 541 Ending index (last element if None) 545 indices : numpy.ndarray 554 if verbose: logger.info(
"Starting @ %i : %.6f\n" % (start, arr[start]))
561 if verbose: logger.info(
"Including %i : %.6f\n" % (i, arr[i]))
563 if verbose: logger.info(
"Excluding %i : %.6f\n" % (i, arr[i]))
571 if verbose: logger.info(
"Including %i : %.6f\n" % (i, arr[i]))
573 if verbose: logger.info(
"Excluding %i : %.6f\n" % (i, arr[i]))
581 """Given two vectors vec1 and vec2, project out the component of vec1 582 that is along the vec2-direction. 584 @param[in] vec1 The projectee (i.e. output is some modified version of vec1) 585 @param[in] vec2 The projector (component subtracted out from vec1 is parallel to this) 586 @return answer A copy of vec1 but with the vec2-component projected out. 588 v2u = vec2/np.linalg.norm(vec2)
589 return vec1 - v2u*np.dot(vec1, v2u)
595 Invert a matrix using singular value decomposition. 596 @param[in] X The 2-D NumPy array containing the matrix to be inverted 597 @param[in] thresh The SVD threshold; eigenvalues below this are not inverted but set to zero 598 @return Xt The 2-D NumPy array containing the inverted matrix 602 u,s,vh = np.linalg.svd(X, full_matrices=0)
606 for i
in range(s.shape[0]):
607 if abs(s[i]) > thresh:
612 Xt = multi_dot([v, si, uh])
623 | 1 (x0) (x0)^2 (x0)^3 | 624 | 1 (x1) (x1)^2 (x1)^3 | 625 | 1 (x2) (x2)^2 (x2)^3 | 626 | 1 (x3) (x3)^2 (x3)^3 | 627 | 1 (x4) (x4)^2 (x4)^3 | 632 @param[in] X (2-D array) An array of X-values (see above) 633 @param[in] Y (array) An array of Y-values (only used in getting the least squares coefficients) 634 @param[in] w (array) An array of weights, hopefully normalized to one. 635 @param[out] Beta The least-squares coefficients 636 @param[out] Hat The hat matrix that takes linear combinations of data y-values to give fitted y-values (weights) 637 @param[out] yfit The fitted y-values 638 @param[out] MPPI The Moore-Penrose pseudoinverse (multiply by Y to get least-squares coefficients, multiply by dY/dk to get derivatives of least-squares coefficients) 642 if len(X.shape) == 1:
648 logger.warning(
"Argh? It seems like this problem is underdetermined!\n")
652 warn_press_key(
"The weight array length (%i) must be the same as the number of 'X' data points (%i)!" % len(w), n_x)
662 MPPI = np.linalg.pinv(np.dot(WH, X))
663 Beta = multi_dot([MPPI, WH, Y])
664 Hat = multi_dot([WH, X, MPPI])
665 yfit =
flat(np.dot(Hat, Y))
668 return np.array(Beta).flatten(), np.array(Hat), np.array(yfit).flatten(), np.array(MPPI)
676 Compute the (cross) statistical inefficiency of (two) timeseries. 679 The same timeseries can be used for both A_n and B_n to get the autocorrelation statistical inefficiency. 680 The fast method described in Ref [1] is used to compute g. 683 [1] J. D. Chodera, W. C. Swope, J. W. Pitera, C. Seok, and K. A. Dill. Use of the weighted 684 histogram analysis method for the analysis of simulated and parallel tempering simulations. 685 JCTC 3(1):26-41, 2007. 689 Compute statistical inefficiency of timeseries data with known correlation time. 691 >>> import timeseries 692 >>> A_n = timeseries.generateCorrelatedTimeseries(N=100000, tau=5.0) 693 >>> g = statisticalInefficiency(A_n, fast=True) 695 @param[in] A_n (required, numpy array) - A_n[n] is nth value of 696 timeseries A. Length is deduced from vector. 698 @param[in] B_n (optional, numpy array) - B_n[n] is nth value of 699 timeseries B. Length is deduced from vector. If supplied, the 700 cross-correlation of timeseries A and B will be estimated instead of 701 the autocorrelation of timeseries A. 703 @param[in] fast (optional, boolean) - if True, will use faster (but 704 less accurate) method to estimate correlation time, described in 705 Ref. [1] (default: False) 707 @param[in] mintime (optional, int) - minimum amount of correlation 708 function to compute (default: 3) The algorithm terminates after 709 computing the correlation time out to mintime when the correlation 710 function furst goes negative. Note that this time may need to be 711 increased if there is a strong initial negative peak in the 712 correlation function. 714 @return g The estimated statistical inefficiency (equal to 1 + 2 715 tau, where tau is the correlation time). We enforce g >= 1.0. 727 if A_n.shape != B_n.shape:
728 logger.error(
'A_n and B_n must have same dimensions.\n')
736 dA_n = A_n.astype(np.float64) - mu_A
737 dB_n = B_n.astype(np.float64) - mu_B
739 sigma2_AB = (dA_n * dB_n).mean()
743 logger.warning(
'Sample covariance sigma_AB^2 = 0 -- cannot compute statistical inefficiency\n')
753 C = sum( dA_n[0:(N-t)]*dB_n[t:N] + dB_n[0:(N-t)]*dA_n[t:N] ) / (2.0 * float(N-t) * sigma2_AB)
756 if (C <= 0.0)
and (t > mintime):
759 g += 2.0 * C * (1.0 - float(t)/float(N)) * float(increment)
763 if fast: increment += 1
770 """Return mean and standard deviation of a time series ts.""" 771 return np.mean(ts), \
777 n_col = A_n.shape[-1]
778 multiD_sI = np.zeros((n_row, n_col))
779 for col
in range(n_col):
790 def lp_dump(obj, fnm, protocol=0):
791 """ Write an object to a zipped pickle file specified by the path. """ 796 if os.path.islink(fnm):
797 logger.warning(
"Trying to write to a symbolic link %s, removing it first\n" % fnm)
800 f = gzip.GzipFile(fnm,
'wb')
802 f = bz2.BZ2File(fnm,
'wb')
805 Pickler(f, protocol).dump(obj)
809 """ Read an object from a bzipped file specified by the path. """ 810 if not os.path.exists(fnm):
811 logger.error(
"lp_load cannot read from a path that doesn't exist (%s)" % fnm)
814 def load_uncompress():
815 logger.warning(
"Compressed file loader failed, attempting to read as uncompressed file\n")
818 answer = Unpickler(f).load()
819 except UnicodeDecodeError:
820 answer = Unpickler(f, encoding=
'latin1').load()
825 f = bz2.BZ2File(fnm,
'rb')
827 answer = Unpickler(f).load()
828 except UnicodeDecodeError:
829 answer = Unpickler(f, encoding=
'latin1').load()
834 f = gzip.GzipFile(fnm,
'rb')
836 answer = Unpickler(f).load()
837 except UnicodeDecodeError:
838 answer = Unpickler(f, encoding=
'latin1').load()
850 answer = load_uncompress()
852 answer = load_uncompress()
857 answer = load_uncompress()
859 answer = load_uncompress()
875 WQIDS = defaultdict(list)
888 work_queue.set_debug_flag(
'all')
889 WORK_QUEUE = work_queue.WorkQueue(port=wq_port)
890 WORK_QUEUE.specify_name(name)
893 WORK_QUEUE.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_TIME)
901 global WORK_QUEUE, WQIDS
903 WQIDS = defaultdict(list)
905 def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60):
907 Submit a job to the Work Queue. 909 @param[in] wq (Work Queue Object) 910 @param[in] command (string) The command to run on the remote worker. 911 @param[in] input_files (list of files) A list of locations of the input files. 912 @param[in] output_files (list of files) A list of locations of the output files. 915 task = work_queue.Task(command)
917 for f
in input_files:
918 lf = os.path.join(cwd,f)
919 task.specify_input_file(lf,f,cache=
False)
920 for f
in output_files:
921 lf = os.path.join(cwd,f)
922 task.specify_output_file(lf,f,cache=
False)
923 if tag
is None: tag = command
924 task.specify_tag(tag)
925 task.print_time = print_time
926 taskid = wq.submit(task)
928 logger.info(
"Submitting command '%s' to the Work Queue, %staskid %i\n" % (command,
"tag %s, " % tag
if tag != command
else "", taskid))
930 WQIDS[tgt.name].append(taskid)
932 WQIDS[
"None"].append(taskid)
934 def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60):
936 Submit a job to the Work Queue. This function is a bit fancier in that we can explicitly 937 specify where the input files come from, and where the output files go to. 939 @param[in] wq (Work Queue Object) 940 @param[in] command (string) The command to run on the remote worker. 941 @param[in] input_files (list of 2-tuples) A list of local and 942 remote locations of the input files. 943 @param[in] output_files (list of 2-tuples) A list of local and 944 remote locations of the output files. 947 task = work_queue.Task(command)
948 for f
in input_files:
950 task.specify_input_file(f[0],f[1],cache=
False)
951 for f
in output_files:
953 task.specify_output_file(f[0],f[1],cache=
False)
954 if tag
is None: tag = command
955 task.specify_tag(tag)
956 task.print_time = print_time
957 taskid = wq.submit(task)
959 logger.info(
"Submitting command '%s' to the Work Queue, taskid %i\n" % (command, taskid))
961 WQIDS[tgt.name].append(taskid)
963 WQIDS[
"None"].append(taskid)
965 def wq_wait1(wq, wait_time=10, wait_intvl=1, print_time=60, verbose=False):
966 """ This function waits ten seconds to see if a task in the Work Queue has finished. """ 968 if verbose: logger.info(
'---\n')
969 if wait_intvl >= wait_time:
970 wait_time = wait_intvl
973 numwaits = int(wait_time/wait_intvl)
974 for sec
in range(numwaits):
975 task = wq.wait(wait_intvl)
977 exectime = task.cmd_execution_time/1000000
979 logger.info(
'A job has finished!\n')
980 logger.info(
'Job name = ' + task.tag +
'command = ' + task.command +
'\n')
981 logger.info(
"status = " + task.status +
'\n')
982 logger.info(
"return_status = " + task.return_status)
983 logger.info(
"result = " + task.result)
984 logger.info(
"host = " + task.hostname +
'\n')
985 logger.info(
"execution time = " + exectime)
986 logger.info(
"total_bytes_transferred = " + task.total_bytes_transferred +
'\n')
989 oldhost = task.hostname
992 if task.id
in WQIDS[tnm]:
994 WQIDS[tnm].remove(task.id)
995 taskid = wq.submit(task)
996 logger.warning(
"Task '%s' (task %i) failed on host %s (%i seconds), resubmitted: taskid %i\n" % (task.tag, oldid, oldhost, exectime, taskid))
997 WQIDS[tgtname].append(taskid)
999 if hasattr(task,
'print_time'):
1000 print_time = task.print_time
1001 if exectime > print_time:
1002 logger.info(
"Task '%s' (task %i) finished successfully on host %s (%i seconds)\n" % (task.tag, task.id, task.hostname, exectime))
1004 if task.id
in WQIDS[tnm]:
1005 WQIDS[tnm].remove(task.id)
1011 logger.info(
"Workers: %i init, %i idle, %i busy, %i total joined, %i total removed\n" \
1012 % (wq.stats.workers_init, wq.stats.workers_idle, wq.stats.workers_busy, wq.stats.workers_joined, wq.stats.workers_removed))
1013 logger.info(
"Tasks: %i running, %i waiting, %i dispatched, %i submitted, %i total complete\n" \
1014 % (wq.stats.tasks_running, wq.stats.tasks_waiting, wq.stats.tasks_dispatched, wq.stats.tasks_submitted, wq.stats.tasks_done))
1015 logger.info(
"Data: %i / %i kb sent/received\n" % (int(wq.stats.bytes_sent/1024), int(wq.stats.bytes_received/1024)))
1017 logger.info(
"\r%s : %i/%i workers busy; %i/%i jobs complete \r" %\
1018 (time.ctime(), wq.stats.workers_busy, wq.stats.workers_connected, wq.stats.tasks_done, wq.stats.tasks_submitted))
1019 if time.time() - wq_wait1.t0 > 900:
1020 wq_wait1.t0 = time.time()
1022 wq_wait1.t0 = time.time()
1024 def wq_wait(wq, wait_time=10, wait_intvl=10, print_time=60, verbose=False):
1025 """ This function waits until the work queue is completely empty. """ 1026 while not wq.empty():
1027 wq_wait1(wq, wait_time=wait_time, wait_intvl=wait_intvl, print_time=print_time, verbose=verbose)
1033 """ Stopwatch function for timing. """ 1034 ans = time.time() - click.t0
1035 click.t0 = time.time()
1037 click.t0 = time.time()
1042 parts = os.path.split(path)
1043 if parts[0] == path:
1044 allparts.insert(0, parts[0])
1046 elif parts[1] == path:
1047 allparts.insert(0, parts[1])
1051 allparts.insert(0, parts[1])
1055 def bak(path, dest=None, cwd=None, start=1):
1059 if not os.path.exists(cwd):
1060 raise RuntimeError(
"%s is not an existing folder" % cwd)
1063 if os.path.exists(path):
1064 dnm, fnm = os.path.split(path)
1065 if dnm ==
'' : dnm =
'.' 1066 base, ext = os.path.splitext(fnm)
1069 if not os.path.isdir(dest): os.makedirs(dest)
1072 fnm =
"%s_%i%s" % (base,i,ext)
1073 newf = os.path.join(dest, fnm)
1074 if not os.path.exists(newf):
break 1076 logger.info(
"Backing up %s -> %s\n" % (oldf, newf))
1077 shutil.move(oldf,newf)
1088 if fnm
is None and ext
is None:
1090 logger.error(
"Must provide either filename or extension to onefile()")
1095 if os.path.exists(fnm):
1096 if os.path.dirname(os.path.abspath(fnm)) != os.getcwd():
1097 fsrc = os.path.abspath(fnm)
1098 fdest = os.path.join(os.getcwd(), os.path.basename(fnm))
1103 if os.path.exists(fdest):
1104 if not filecmp.cmp(fsrc, fdest):
1105 logger.error(
"onefile() will not overwrite %s with %s\n" % (os.path.join(os.getcwd(), os.path.basename(fnm)),os.path.abspath(fnm)))
1108 logger.info(
"\x1b[93monefile() says the files %s and %s are identical\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1110 logger.info(
"\x1b[93monefile() will copy %s to %s\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1111 shutil.copy2(fsrc, fdest)
1112 return os.path.basename(fnm)
1113 elif err==
True or ext
is None:
1114 logger.error(
"File specified by %s does not exist!" % fnm)
1116 elif ext
is not None:
1117 warn_once(
"File specified by %s does not exist - will try to autodetect .%s extension" % (fnm, ext))
1120 ls = [i
for i
in os.listdir(cwd)
if i.endswith(
'.%s' % ext)]
1123 logger.error(
"Cannot find a unique file with extension .%s in %s (%i found; %s)" % (ext, cwd, len(ls),
' '.join(ls)))
1126 warn_once(
"Cannot find a unique file with extension .%s in %s (%i found; %s)" %
1127 (ext, cwd, len(ls),
' '.join(ls)), warnhash =
"Found %i .%s files" % (len(ls), ext))
1129 answer = os.path.basename(ls[0])
1130 warn_once(
"Autodetected %s in %s" % (answer, cwd), warnhash =
"Autodetected %s" % answer)
1141 def listfiles(fnms=None, ext=None, err=False, dnm=None):
1143 cwd = os.path.abspath(os.getcwd())
1146 if isinstance(fnms, list):
1148 if not os.path.exists(i):
1149 logger.error(
'Specified %s but it does not exist' % i)
1152 elif isinstance(fnms, six.string_types):
1153 if not os.path.exists(fnms):
1154 logger.error(
'Specified %s but it does not exist' % fnms)
1157 elif fnms
is not None:
1158 logger.info(str(fnms))
1159 logger.error(
'First argument to listfiles must be a list, a string, or None')
1161 if answer == []
and ext
is not None:
1162 answer = [os.path.basename(i)
for i
in os.listdir(os.getcwd())
if i.endswith(
'.%s' % ext)]
1163 if answer == []
and err:
1164 logger.error(
'listfiles function failed to come up with a file! (fnms = %s ext = %s)' % (str(fnms), str(ext)))
1167 for ifnm, fnm
in enumerate(answer):
1168 if os.path.dirname(os.path.abspath(fnm)) != os.getcwd():
1169 fsrc = os.path.abspath(fnm)
1170 fdest = os.path.join(os.getcwd(), os.path.basename(fnm))
1175 if os.path.exists(fdest):
1176 if not filecmp.cmp(fsrc, fdest):
1177 logger.error(
"onefile() will not overwrite %s with %s\n" % (os.path.join(os.getcwd(), os.path.basename(fnm)),os.path.abspath(fnm)))
1180 logger.info(
"\x1b[93monefile() says the files %s and %s are identical\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1181 answer[ifnm] = os.path.basename(fnm)
1183 logger.info(
"\x1b[93monefile() will copy %s to %s\x1b[0m\n" % (os.path.abspath(fnm), os.getcwd()))
1184 shutil.copy2(fsrc, fdest)
1185 answer[ifnm] = os.path.basename(fnm)
1191 Extract a list of files from .tar archive with any compression. 1192 The file is extracted to the base folder of the archive. 1197 Name of the archive file. 1199 File names to be extracted. 1200 force : bool, optional 1201 If true, then force extraction of file even if they already exist on disk. 1204 fdir = os.path.abspath(os.path.dirname(tarfnm))
1206 if (
not force)
and all([os.path.exists(os.path.join(fdir, f))
for f
in fnms]):
return 1208 if not os.path.exists(tarfnm):
return 1209 if not tarfile.is_tarfile(tarfnm):
return 1211 if isinstance(fnms, six.string_types): fnms = [fnms]
1213 arch = tarfile.open(tarfnm,
'r') 1215 all_members = arch.getmembers()
1216 all_names = [f.name
for f
in all_members]
1217 members = [f
for f
in all_members
if f.name
in fnms]
1219 arch.extractall(fdir, members=members)
1222 if os.path.exists(Dir):
1223 if os.path.isdir(Dir):
pass 1225 logger.error(
"Tried to create directory %s, it exists but isn't a directory\n" % newdir)
1233 s = os.path.split(os.path.normpath(Dir))
1234 if s[1] ==
'' or s[1] ==
'.' :
return []
1238 if os.path.split(os.getcwd())[1] != Dir:
1239 logger.error(
"Trying to leave directory %s, but we're actually in directory %s (check your code)\n" % (Dir,os.path.split(os.getcwd())[1]))
1241 for i
in range(len(
allsplit(Dir))):
1245 specific_lst = [([
'mdrun',
'grompp',
'trjconv',
'g_energy',
'g_traj'],
"Make sure to install GROMACS and add it to your path (or set the gmxpath option)"),
1246 ([
'force.mdin',
'stage.leap'],
"This file is needed for setting up AMBER force matching targets"),
1247 ([
'conf.pdb',
'mono.pdb'],
"This file is needed for setting up OpenMM condensed phase property targets"),
1248 ([
'liquid.xyz',
'liquid.key',
'mono.xyz',
'mono.key'],
"This file is needed for setting up OpenMM condensed phase property targets"),
1249 ([
'dynamic',
'analyze',
'minimize',
'testgrad',
'vibrate',
'optimize',
'polarize',
'superpose'],
"Make sure to install TINKER and add it to your path (or set the tinkerpath option)"),
1250 ([
'runcuda.sh',
'npt.py',
'npt_tinker.py'],
"This file belongs in the ForceBalance source directory, not sure why it is missing"),
1251 ([
'input.xyz'],
"This file is needed for TINKER molecular property targets"),
1252 ([
'.*key$',
'.*xyz$'],
"I am guessing this file is probably needed by TINKER"),
1253 ([
'.*gro$',
'.*top$',
'.*itp$',
'.*mdp$',
'.*ndx$'],
"I am guessing this file is probably needed by GROMACS")
1257 specific_dct = dict(list(itertools.chain(*[[(j,i[1])
for j
in i[0]]
for i
in specific_lst])))
1260 fnm = os.path.split(fnm)[1]
1262 for key
in specific_dct:
1265 if re.match(key, fnm):
1266 answer +=
"%s\n" % specific_dct[key]
1269 def wopen(dest, binary=False):
1270 """ If trying to write to a symbolic link, remove it first. """ 1271 if os.path.islink(dest):
1272 logger.warning(
"Trying to write to a symbolic link %s, removing it first\n" % dest)
1275 return open(dest,
'wb')
1277 return open(dest,
'w')
1279 def LinkFile(src, dest, nosrcok = False):
1280 if os.path.abspath(src) == os.path.abspath(dest):
return 1281 if os.path.exists(src):
1283 if os.path.islink(dest)
and not os.path.exists(dest):
1285 os.symlink(src, dest)
1286 elif os.path.exists(dest):
1287 if os.path.islink(dest):
pass 1289 logger.error(
"Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link\n" % (src, dest))
1292 os.symlink(src, dest)
1295 logger.error(
"Tried to create symbolic link %s to %s, but source file doesn't exist%s\n" % (src,dest,
MissingFileInspection(src)))
1300 if os.path.exists(src):
1301 if os.path.exists(dest):
1302 if os.path.islink(dest):
1303 logger.error(
"Tried to copy %s to %s, destination exists but it's a symbolic link\n" % (src, dest))
1306 shutil.copy2(src, dest)
1308 logger.error(
"Tried to copy %s to %s, but source file doesn't exist%s\n" % (src,dest,
MissingFileInspection(src)))
1312 for fnm
in os.listdir(abssrcdir):
1313 srcfnm = os.path.join(abssrcdir, fnm)
1314 destfnm = os.path.join(absdestdir, fnm)
1315 if os.path.islink(destfnm)
and not os.path.exists(destfnm):
1317 if os.path.isfile(srcfnm)
or (os.path.isdir(srcfnm)
and fnm ==
'IC'):
1318 if not os.path.exists(destfnm):
1320 os.symlink(srcfnm, destfnm)
1323 """ Remove the file if it exists (doesn't return an error). """ 1324 if os.path.exists(fnm):
1330 return os.path.split(os.popen(
'which %s 2> /dev/null' % fnm).readlines()[0].strip())[0]
1336 Copy a source directory tree to a destination directory tree, 1337 overwriting files as necessary. This does not require removing 1338 the destination folder, which can reduce the number of times 1339 shutil.rmtree needs to be called. 1344 distutils.dir_util._path_created = {}
1345 distutils.dir_util.copy_tree(src, dest)
1349 def __init__(self, callback):
1350 self.callback = callback
1353 def push(self, data):
1358 self.buf += data.decode(
'utf-8')
1363 self.callback(self.buf +
"\n")
1368 while "\n" in self.buf
or "\r" in self.buf:
1369 chunk, sep, self.buf = re.split(
r"(\r|\n)", self.buf, maxsplit=1)
1370 self.callback(chunk + sep)
1372 def __enter__(self):
1375 def __exit__(self, *args, **kwargs):
1378 def _exec(command, print_to_screen = False, outfnm = None, logfnm = None, stdin = "", print_command = True, copy_stdout = True, copy_stderr = False, persist = False, expand_cr=False, print_error=True, rbytes=1, cwd=None, **kwargs):
1379 """Runs command line using subprocess, optionally returning stdout. 1381 command (required) = Name of the command you want to execute 1382 outfnm (optional) = Name of the output file name (overwritten if exists) 1383 logfnm (optional) = Name of the log file name (appended if exists) 1384 stdin (optional) = A string to be passed to stdin, as if it were typed (use newline character to mimic Enter key) 1385 print_command = Whether to print the command. 1386 copy_stdout = Copy the stdout stream; can set to False in strange situations 1387 copy_stderr = Copy the stderr stream to the stdout stream; useful for GROMACS which prints out everything to stderr (argh.) 1388 expand_cr = Whether to expand carriage returns into newlines (useful for GROMACS mdrun). 1389 print_error = Whether to print error messages on a crash. Should be true most of the time. 1390 persist = Continue execution even if the command gives a nonzero return code. 1391 rbytes = Number of bytes to read from stdout and stderr streams at a time. GMX requires rbytes = 1 otherwise streams are interleaved. Higher values for speed. 1395 cmd_options={
'shell':isinstance(command, six.string_types),
'stdin':PIPE,
'stdout':PIPE,
'stderr':PIPE,
'universal_newlines':expand_cr,
'cwd':cwd}
1399 if outfnm
is not None:
1400 outfnm = os.path.abspath(os.path.join(cwd, outfnm))
1401 if logfnm
is not None:
1402 logfnm = os.path.abspath(os.path.join(cwd, logfnm))
1406 if logfnm
is not None:
1407 with open(logfnm,
'ab+')
as f:
1408 f.write(out.encode(
'utf-8'))
1410 if outfnm
is not None:
1411 with open(outfnm,
'wb+' if wtf.first
else 'ab+')
as f:
1412 f.write(out.encode(
'utf-8'))
1418 if stdin
is None: stdin =
"" 1421 logger.info(
"Executing process: \x1b[92m%-50s\x1b[0m%s%s%s%s\n" % (
' '.join(command)
if type(command)
is list
else command,
1422 " In: %s" % cwd
if cwd
is not None else "",
1423 " Output: %s" % outfnm
if outfnm
is not None else "",
1424 " Append: %s" % logfnm
if logfnm
is not None else "",
1425 (
" Stdin: %s" % stdin.replace(
'\n',
'\\n'))
if stdin
else ""))
1426 wtf(
"Executing process: %s%s\n" % (command, (
" Stdin: %s" % stdin.replace(
'\n',
'\\n'))
if stdin
else ""))
1428 cmd_options.update(kwargs)
1429 p = subprocess.Popen(command, **cmd_options)
1432 p.stdin.write(stdin.encode(
'ascii'))
1442 streams = [p.stdout, p.stderr]
1444 p2 = sys.version_info.major == 2
1446 def process_out(read):
1450 sys.stdout.write(str(read.encode(
'utf-8')))
1452 sys.stdout.write(read)
1454 process_out.stdout.append(read)
1456 process_out.stdout = []
1458 def process_err(read):
1461 sys.stderr.write(str(read.encode(
'utf-8')))
1463 sys.stderr.write(read)
1464 process_err.stderr.append(read)
1466 process_out.stdout.append(read)
1468 process_err.stderr = []
1474 to_read, _, _ = select(streams, [], [])
1480 if hasattr(fh,
'read1'):
1487 read =
''.
encode(
'utf-8')
1489 if read_nbytes == 0:
1490 read += fhread(rbytes)
1491 read_nbytes += rbytes
1495 if read_nbytes > 10+rbytes:
1496 raise RuntimeError(
"Failed to decode stdout from external process.")
1498 streams.remove(p.stdout)
1503 out_chunker.push(read)
1505 except UnicodeDecodeError:
1507 elif fh
is p.stderr:
1509 read =
''.
encode(
'utf-8')
1511 if read_nbytes == 0:
1512 read += fhread(rbytes)
1513 read_nbytes += rbytes
1517 if read_nbytes > 10+rbytes:
1518 raise RuntimeError(
"Failed to decode stderr from external process.")
1520 streams.remove(p.stderr)
1525 err_chunker.push(read)
1527 except UnicodeDecodeError:
1531 if len(streams) == 0:
break 1535 process_out.stdout =
''.join(process_out.stdout)
1536 process_err.stderr =
''.join(process_err.stderr)
1538 _exec.returncode = p.returncode
1539 if p.returncode != 0:
1540 if process_err.stderr
and print_error:
1541 logger.warning(
"Received an error message:\n")
1542 logger.warning(
"\n[====] \x1b[91mError Message\x1b[0m [====]\n")
1543 logger.warning(process_err.stderr)
1544 logger.warning(
"[====] \x1b[91mEnd o'Message\x1b[0m [====]\n")
1547 logger.info(
"%s gave a return code of %i (it may have crashed) -- carrying on\n" % (command, p.returncode))
1552 logger.error(
"\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n\n" % (command, p.returncode))
1556 Out = process_out.stdout.split(
'\n')
1560 _exec.returncode =
None 1563 logger.warning(warning +
'\n')
1564 if sys.stdin.isatty():
1565 logger.warning(
"\x1b[1;91mPress Enter or wait %i seconds (I assume no responsibility for what happens after this!)\x1b[0m\n" % timeout)
1567 rlist, wlist, xlist = select([sys.stdin], [], [], timeout)
1569 sys.stdin.readline()
1572 def warn_once(warning, warnhash = None):
1573 """ Prints a warning but will only do so once in a given run. """ 1574 if warnhash
is None:
1576 if warnhash
in warn_once.already:
1578 warn_once.already.add(warnhash)
1579 if type(warning)
is str:
1580 logger.info(warning +
'\n')
1581 elif type(warning)
is list:
1582 for line
in warning:
1583 logger.info(line +
'\n')
1584 warn_once.already = set()
1591 Similar to the bultin function map(). But spawn a thread for each argument 1592 and apply `func` concurrently. 1594 Note: unlike map(), we cannot take an iterable argument. `data` should be an 1602 def task_wrapper(i):
1603 result[i] = func(data[i])
1605 threads = [threading.Thread(target=task_wrapper, args=(i,))
for i
in range(N)]
def mean_stderr(ts)
Return mean and standard deviation of a time series ts.
def remove_if_exists(fnm)
Remove the file if it exists (doesn't return an error).
def MissingFileInspection(fnm)
def copy_tree_over(src, dest)
Copy a source directory tree to a destination directory tree, overwriting files as necessary...
def isdecimal(word)
Matches things with a decimal only; see isint and isfloat.
def __init__(self, stream=sys.stdout)
def pmat2d(mat2d, precision=1, format="e", loglevel=INFO)
Printout of a 2-D array.
def concurrent_map(func, data)
Similar to the bultin function map().
def lp_load(fnm)
Read an object from a bzipped file specified by the path.
def extract_tar(tarfnm, fnms, force=False)
Extract a list of files from .tar archive with any compression.
def LinkFile(src, dest, nosrcok=False)
def createWorkQueue(wq_port, debug=True, name=package)
def invert_svd(X, thresh=1e-12)
Invert a matrix using singular value decomposition.
def flat(vec)
Given any list, array, or matrix, return a single-index array.
def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True)
Compute the (cross) statistical inefficiency of (two) timeseries.
def natural_sort(l)
Return a natural sorted list.
def get_least_squares(x, y, w=None, thresh=1e-12)
def floatornan(word)
Returns a big number if we encounter NaN.
def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue.
def lp_dump(obj, fnm, protocol=0)
Write an object to a zipped pickle file specified by the path.
def col(vec)
Given any list, array, or matrix, return a 1-column 2D array.
def monotonic(arr, start, end)
def est124(val)
Given any positive floating point value, return a value [124]e+xx that is closest to it in the log sp...
def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60)
Submit a job to the Work Queue.
def link_dir_contents(abssrcdir, absdestdir)
def wq_wait(wq, wait_time=10, wait_intvl=10, print_time=60, verbose=False)
This function waits until the work queue is completely empty.
Exactly like FileHandler, except no newline character is printed at the end of each message...
def est1234568(val)
Given any positive floating point value, return a value [1234568]e+xx that is closest to it in the lo...
def orthogonalize(vec1, vec2)
Given two vectors vec1 and vec2, project out the component of vec1 that is along the vec2-direction...
def warn_press_key(warning, timeout=10)
def printcool_dictionary(Dict, title="Dictionary Keys : Values", bold=False, color=2, keywidth=25, topwidth=50, center=True, leftpad=0)
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
def grouper(iterable, n)
Collect data into fixed-length chunks or blocks.
def wq_wait1(wq, wait_time=10, wait_intvl=1, print_time=60, verbose=False)
This function waits ten seconds to see if a task in the Work Queue has finished.
def printcool(text, sym="#", bold=False, color=2, ansi=None, bottom='-', minwidth=50, center=True, sym2="=")
Cool-looking printout for slick formatting of output.
def multiD_statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True)
def click()
Stopwatch function for timing.
def bak(path, dest=None, cwd=None, start=1)
def pvec1d(vec1d, precision=1, format="e", loglevel=INFO)
Printout of a 1-D vector.
def onefile(fnm=None, ext=None, err=False)
def isint(word)
ONLY matches integers! If you have a decimal point? None shall pass!
def listfiles(fnms=None, ext=None, err=False, dnm=None)
def warn_once(warning, warnhash=None)
Prints a warning but will only do so once in a given run.
def astr(vec1d, precision=4)
Write an array to a string so we can use it to key a dictionary.
def wopen(dest, binary=False)
If trying to write to a symbolic link, remove it first.
def monotonic_decreasing(arr, start=None, end=None, verbose=False)
Return the indices of an array corresponding to strictly monotonic decreasing behavior.
def isfloat(word)
Matches ANY number; it can be a decimal, scientific notation, what have you CAUTION - this will also ...
def row(vec)
Given any list, array, or matrix, return a 1-row 2D array.
Exactly like StreamHandler, except no newline character is printed at the end of each message...