Source code for nestly.scripts.nestagg

"""
Aggregate results of ``nestly`` runs.
"""

import argparse
import collections
import csv
import functools
import itertools
import os.path
import json
import sys

from ..core import control_iter, nest_map

DEFAULT_SEP = ','
DEFAULT_NAME = 'control.json'

# JSON loaders retaining key order
_ordered_load = functools.partial(json.load,
                                  object_pairs_hook=collections.OrderedDict)
_ordered_loads = functools.partial(json.loads,
                                   object_pairs_hook=collections.OrderedDict)

[docs]def warn(message): print >>sys.stderr, message
def _warn_on_io(fn): @functools.wraps(fn) def f(*args, **kwargs): r = fn(*args, **kwargs) try: for i in r: yield i except IOError as e: warn(str(e)) return f def _delim_accum(control_files, filename_template, keys=None, exclude_keys=None, separator=DEFAULT_SEP, missing_action='fail'): """ Accumulator for delimited files Combines each file with values from JSON dictionary in same directory :param iterable control_files: Iterable of control files :param filename_template: A template for the file to nest_map :param keys: List of keys to select from JSON dictionary. If ``None``, keep all keys. :param separator: Delimiter """ def map_fn(d, control, keys=keys): f = os.path.join(d, filename_template.format(**control)) keys = keys if keys is not None else control.keys() if exclude_keys: keys = list(frozenset(keys) - frozenset(exclude_keys)) if frozenset(keys) - frozenset(control): # Unknown keys raise ValueError( "The following required key(s) are not present in {1}: {0}".format( ', '.join(frozenset(keys) - frozenset(control)), f)) with open(f) as fp: reader = csv.DictReader(fp, delimiter=separator) for row in reader: row_dict = collections.OrderedDict( itertools.chain(((k, row[k]) for k in reader.fieldnames), ((k, v) for k, v in control.items() if k in keys))) yield row_dict if missing_action == 'warn': map_fn = _warn_on_io(map_fn) return itertools.chain.from_iterable(nest_map(control_files, map_fn))
[docs]def delim(arguments): """ Execute delim action. :param arguments: Parsed command line arguments from :func:`main` """ if bool(arguments.control_files) == bool(arguments.directory): raise ValueError( 'Exactly one of control_files and `-d` must be specified.') if arguments.directory: arguments.control_files.extend(control_iter(arguments.directory)) with arguments.output as fp: results = _delim_accum(arguments.control_files, arguments.file_template, arguments.keys, arguments.exclude_keys, arguments.separator, missing_action=arguments.missing_action) r = next(results) writer = csv.DictWriter(fp, r.keys(), delimiter=arguments.separator) writer.writeheader() writer.writerow(r) writer.writerows(results)
[docs]def comma_separated_values(s): s = s.split(',') return s
[docs]def main(args=sys.argv[1:]): """ Command-line interface for nestagg """ parser = argparse.ArgumentParser(description="""Aggregate results of nestly runs""") subparsers = parser.add_subparsers() delim_parser = subparsers.add_parser('delim', help="""Combine control files with delimited files.""") delim_parser.set_defaults(func=delim) key_group = delim_parser.add_mutually_exclusive_group() key_group.add_argument('-k', '--keys', help="""Comma separated list of keys from the JSON file to include [default: all keys]""", type=comma_separated_values) key_group.add_argument('-x', '--exclude-keys', help="""Comma separated list of keys from the JSON file not to include [default: %(default)s]""", type=comma_separated_values) delim_parser.add_argument('-m', '--missing-action', choices=('fail', 'warn'), help="""Action to take when a file is missing [default: %(default)s]""", default='fail') delim_parser.add_argument('file_template', help="""Template for the delimited file to read in each directory [e.g. '{run_id}.csv']""") delim_parser.add_argument('control_files', metavar="control.json", help="""Control files""", nargs="*") delim_parser.add_argument('-d', '--directory', help="""Run on all control files under %(metavar)s. May be used in place of specifying control files.""", metavar='DIR') delim_parser.add_argument('-s', '--separator', default=DEFAULT_SEP, help="""Separator [default: %(default)s]""") delim_parser.add_argument('-t', '--tab', action='store_const', dest='separator', const='\t', help="""Files are tab-separated""") delim_parser.add_argument('-o', '--output', default=sys.stdout, type=argparse.FileType('w'), help="""Output file [default: stdout]""") arguments = parser.parse_args() arguments.func(arguments)