run_generate_dataset.py 5.11 KB
Newer Older
Jiri Borovec committed
1
"""
Jiri Borovec committed
2
The main script for generating synthetic datasets
Jiri Borovec committed
3 4 5 6 7 8

Copyright (C) 2015-2016 Jiri Borovec <jiri.borovec@fel.cvut.cz>
"""

import os
import logging
Jiri Borovec committed
9 10
import inspect
import json
Jiri Borovec committed
11
import argparse
Jiri Borovec committed
12 13
import multiprocessing as mproc
from functools import partial
Jiri Borovec committed
14 15 16 17 18

import dataset_utils as tl_dataset

logger = logging.getLogger(__name__)

Jiri Borovec committed
19
NB_THREADS = int(mproc.cpu_count() * 0.7)
Jiri Borovec committed
20
DEFAULT_PATH_DATA = '/datagrid/temporary/Medical/'
Jiri Borovec committed
21 22 23
DEFAULT_DIR_APD = 'atomicPatternDictionary_vx'
DEFAULT_PATH_APD = os.path.join(DEFAULT_PATH_DATA, DEFAULT_DIR_APD)
NAME_WEIGHTS = 'combination.csv'
Jiri Borovec committed
24
NAME_CONFIG = 'config.json'
Jiri Borovec committed
25
DATASET_TYPE = '2D'
Jiri Borovec committed
26 27 28 29
IMAGE_SIZE = {
    '2D': (128, 128),
    '3D': (16, 128, 128),
}
Jiri Borovec committed
30 31
NB_SAMPLES = 1500
NB_ATM_PATTERNS = 18
Jiri Borovec committed
32 33
NOISE_BINARY = 0.03
NOISE_PROB = 0.2
Jiri Borovec committed
34 35


Jiri Borovec committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
def aparse_params():
    """
    SEE: https://docs.python.org/3/library/argparse.html
    :return:
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--nb_samples', type=int, required=False, default=NB_SAMPLES,
                        help='number of samples to be generated in each dataset')
    parser.add_argument('--nb_patterns', type=int, required=False,
                        default=NB_ATM_PATTERNS,
                        help='number of atom. patterns in created dictionary')
    parser.add_argument('--path_out', type=str, required=False,
                        default=DEFAULT_PATH_APD,
                        help='path to the datasets ending '
                             'with name of datasets parent folder')
    parser.add_argument('--image_size', type=int, required=False, nargs='+',
                        default=IMAGE_SIZE[DATASET_TYPE],
                        help='dimensions of generated images in axis Z, X, Y')
    parser.add_argument('--nb_jobs', type=int, required=False, default=NB_THREADS,
                        help='number of processes in parallel')
    args = parser.parse_args()
    assert len(args.image_size) == 2 or len(args.image_size) == 3
    args.path_out = os.path.abspath(os.path.expanduser(args.path_out))
    return args


Jiri Borovec committed
62 63 64 65 66 67 68 69 70 71 72
def view_func_params(frame=inspect.currentframe(), path_out=''):
    args, _, _, values = inspect.getargvalues(frame)
    logger.info('PARAMETERS: \n%s',
                '\n'.join('"{}": \t {}'.format(k, v) for k, v in values.iteritems()))
    if os.path.exists(path_out):
        path_json = os.path.join(path_out, NAME_CONFIG)
        with open(path_json, 'w') as fp:
            json.dump(values, fp)
    return values


Jiri Borovec committed
73 74
def generate_all(path_out=DEFAULT_PATH_APD, atlas_size=IMAGE_SIZE[DATASET_TYPE],
                 nb_patterns=NB_ATM_PATTERNS, nb_samples=NB_SAMPLES, nb_jobs=NB_THREADS):
Jiri Borovec committed
75 76 77
    """ generate complete dataset containing dictionary od patterns and also
    input binary / probab. images with geometrical deformation and random noise

Jiri Borovec committed
78 79 80
    :param atlas_size:
    :param nb_samples:
    :param nb_patterns:
Jiri Borovec committed
81 82 83
    :param csv_name: str
    :param path_out: str, path to the results directory
    """
Jiri Borovec committed
84
    assert os.path.exists(os.path.dirname(path_out))
Jiri Borovec committed
85 86
    if not os.path.exists(path_out):
        os.mkdir(path_out)
Jiri Borovec committed
87
    view_func_params(inspect.currentframe(), path_out)
Jiri Borovec committed
88
    path_dir = lambda d: os.path.join(path_out, d)
Jiri Borovec committed
89
    # im_dict = dictionary_generate_rnd_pattern()
Jiri Borovec committed
90 91
    im_dict = tl_dataset.dictionary_generate_atlas(path_out, im_size=atlas_size,
                                                   nb_ptns=nb_patterns)
Jiri Borovec committed
92

Jiri Borovec committed
93
    im_comb, df_weights = tl_dataset.dataset_binary_combine_patterns(im_dict,
Jiri Borovec committed
94
                                      path_dir('datasetBinary_raw'), nb_samples)
Jiri Borovec committed
95
    df_weights.to_csv(os.path.join(path_out, NAME_WEIGHTS))
Jiri Borovec committed
96

Jiri Borovec committed
97
    ds_apply = partial(tl_dataset.dataset_apply_image_function, nb_jobs=nb_jobs)
Jiri Borovec committed
98

Jiri Borovec committed
99 100 101 102 103 104
    im_deform = ds_apply(im_comb, path_dir('datasetBinary_deform'),
                         tl_dataset.image_deform_elastic)
    ds_apply(im_comb, path_dir('datasetBinary_noise'),
             tl_dataset.add_image_binary_noise, NOISE_BINARY)
    ds_apply(im_deform, path_dir('datasetBinary_defNoise'),
             tl_dataset.add_image_binary_noise, NOISE_BINARY)
Jiri Borovec committed
105

Jiri Borovec committed
106 107 108 109 110 111 112 113
    im_comb_prob = ds_apply(im_comb, path_dir('datasetProb_raw'),
                            tl_dataset.image_transform_binary2prob, 0.5)
    im_def_prob = ds_apply(im_deform, path_dir('datasetProb_deform'),
                           tl_dataset.add_image_prob_noise, 0.5)
    ds_apply(im_comb_prob, path_dir('datasetProb_noise'),
             tl_dataset.add_image_prob_noise, NOISE_PROB)
    ds_apply(im_def_prob, path_dir('datasetProb_defNoise'),
             tl_dataset.add_image_prob_noise, NOISE_PROB)
Jiri Borovec committed
114 115 116 117 118 119 120


def convert_dataset_nifti(p_datasets=DEFAULT_PATH_APD):
    tl_dataset.dataset_convert_nifti(os.path.join(p_datasets, 'datasetBinary_raw'),
                                     os.path.join(p_datasets, 'datasetBinary_raw_nifti'))


Jiri Borovec committed
121
def main():
Jiri Borovec committed
122
    logging.basicConfig(level=logging.INFO)
Jiri Borovec committed
123
    logger.info('running...')
Jiri Borovec committed
124
    params = aparse_params()
Jiri Borovec committed
125 126 127

    # test_Ellipse()

Jiri Borovec committed
128 129 130
    generate_all(path_out=params.path_out, atlas_size=params.image_size,
                 nb_patterns=params.nb_patterns, nb_samples=params.nb_samples,
                 nb_jobs=params.nb_jobs)
Jiri Borovec committed
131 132 133 134 135 136 137

    # convert_dataset_nifti()

    logger.info('DONE')


if __name__ == "__main__":
Jiri Borovec committed
138
    main()