run_detect_fail_case.py 8.68 KB
Newer Older
Jiri Borovec committed
1
"""
Jiri Borovec committed
2 3
script that walk over all segmentation, compute some statistic
and then decide while the segmentation is likely to be correct of not
Jiri Borovec committed
4 5


Jiri Borovec committed
6 7
Copyright (C) 2015-2016 Jiri Borovec <jiri.borovec@fel.cvut.cz>
"""
Jiri Borovec committed
8 9 10 11 12 13

import os
import glob
import logging
import itertools
import shutil
Jiri Borovec committed
14
import multiprocessing as mproc
Jiri Borovec committed
15 16 17 18 19

# to suppress all visu, has to be on the beginning
import matplotlib
matplotlib.use('Agg')
import numpy as np
Jiri Borovec committed
20
import pandas as pd
Jiri Borovec committed
21
from skimage import io, morphology
Jiri Borovec committed
22 23 24 25 26
import matplotlib.pyplot as plt

logger = logging.getLogger(__name__)

PATH_BASE = '/datagrid/Medical/microscopy/drosophila/'
Jiri Borovec committed
27
PATH_SEGM = os.path.join(PATH_BASE, 'RESULTS/orig_segm')
Jiri Borovec committed
28 29 30
PATH_VISU = os.path.join(PATH_BASE, 'TEMPORARY/orig_visu')
# PATH_SEGM = os.path.join(PATH_BASE, 'real_segmentations/stage_4_segm')
# PATH_VISU = os.path.join(PATH_BASE, 'real_segmentations/stage_4_visu')
Jiri Borovec committed
31
NB_JOBS = mproc.cpu_count()
Jiri Borovec committed
32 33 34
CSV_SEGM_GOOD = 'segm_good.csv'
CSV_SEGM_FAIL = 'segm_fail.csv'
FIG_STAT = 'stat_segm_labels.jpeg'
Jiri Borovec committed
35 36
PREFIX_VISU_SEGM = 'visu_segm_'
# size around image borders
Jiri Borovec committed
37
NB_IMG_CORNER = 50
Jiri Borovec committed
38
# ration how much backround has to be around borders
Jiri Borovec committed
39
THRESHOLD_CORNER_BG = 0.95
Jiri Borovec committed
40
# ration for total num,ber of backround
Jiri Borovec committed
41
THRESHOLD_BACKGROUND = 0.95
Jiri Borovec committed
42
# ration of bacground in object convex hull
Jiri Borovec committed
43
THRESHOLD_CONVEX = 0.85
Jiri Borovec committed
44 45


Jiri Borovec committed
46
def labels_ration(path_seg):
Jiri Borovec committed
47 48
    """ compute radion among labels in segmentation as histogram

Jiri Borovec committed
49
    :param path_seg: str
Jiri Borovec committed
50 51
    :return: {int: float}
    """
Jiri Borovec committed
52 53
    seg = io.imread(path_seg)
    n_seg = os.path.basename(path_seg)
Jiri Borovec committed
54 55 56 57 58 59 60 61
    d_lb_hist = {lb: np.sum(seg == lb) / float(np.product(seg.shape) )
              for lb in np.unique(seg)}
    # the image borders
    seg_border = np.concatenate((seg[:NB_IMG_CORNER, :].ravel(),
                           seg[-NB_IMG_CORNER:, :].ravel(),
                           seg[:, :NB_IMG_CORNER].ravel(),
                           seg[:, -NB_IMG_CORNER:].ravel()))
    r_bg = np.sum(seg_border == 0) / float(seg_border.shape[0])
Jiri Borovec committed
62 63 64
    seg_fg = morphology.binary_closing(seg > 0, morphology.disk(30))
    obj_convex = morphology.convex_hull_object(seg_fg)
    obj_bg = np.sum(seg_fg[obj_convex] > 0) / float(np.sum(obj_convex))
Jiri Borovec committed
65 66
    return {'name': n_seg,
            'lb_hist': d_lb_hist,
Jiri Borovec committed
67 68
            'r_bg': r_bg,
            'r_cx': obj_bg}
Jiri Borovec committed
69 70


Jiri Borovec committed
71
def plot_histo_labels(dict_hist, path_dir=''):
Jiri Borovec committed
72 73
    """ plot some simple histogram

Jiri Borovec committed
74 75
    :param path_dir: str
    :param dict_hist: {int: float}
Jiri Borovec committed
76
    """
Jiri Borovec committed
77
    logger.info('plotting stat. results')
Jiri Borovec committed
78
    fig = plt.figure()
Jiri Borovec committed
79 80 81
    for lb in dict_hist:
        plt.plot(dict_hist[lb], '+', label=str(lb))
    plt.xlim([0, max(len(v) for v in dict_hist.itervalues())])
Jiri Borovec committed
82 83 84 85
    plt.xlabel('image samples')
    plt.ylabel('label cover')
    plt.legend(loc=0)
    plt.grid()
Jiri Borovec committed
86 87
    if os.path.exists(path_dir):
        fig.savefig(os.path.join(path_dir, FIG_STAT))
Jiri Borovec committed
88 89


Jiri Borovec committed
90
def read_make_hist(paths_seg):
Jiri Borovec committed
91 92
    """ in parallel read all segmentation and compute individual histogram

Jiri Borovec committed
93
    :param paths_seg: [str], paths to all segmentation
Jiri Borovec committed
94 95 96
    :return:[str, {int: float}] list or pairs with image name
            and relative label histogram
    """
Jiri Borovec committed
97 98
    logger.debug('run in %i threads...', NB_JOBS)
    mproc_pool = mproc.Pool(NB_JOBS)
Jiri Borovec committed
99
    l_desc = mproc_pool.map(labels_ration, paths_seg)
Jiri Borovec committed
100 101 102 103 104
    mproc_pool.close()
    mproc_pool.join()
    return l_desc


Jiri Borovec committed
105
def merge_hist_stat(list_name_hist):
Jiri Borovec committed
106 107
    """ merge particular histograms per segmentation into one global per label

Jiri Borovec committed
108
    :param list_name_hist: [str, {int: float}] list or pairs with image name
Jiri Borovec committed
109 110 111 112
            and relative label histogram
    :return: {int: [float]}, histogram per label over all images
    """
    logger.debug('merge partial results...')
Jiri Borovec committed
113
    l_hist = [l['lb_hist'] for l in list_name_hist]
Jiri Borovec committed
114
    lbs = itertools.chain(*[h.keys() for h in l_hist])
Jiri Borovec committed
115 116 117
    uq_lbs = np.unique(list(lbs)).tolist()
    dict_hist = {lb: [h[lb] for h in l_hist if lb in h]
                  for lb in uq_lbs}
Jiri Borovec committed
118
    # join the foregrounds
Jiri Borovec committed
119
    dict_hist['fg'] = []
Jiri Borovec committed
120
    for hist in l_hist:
Jiri Borovec committed
121
        dict_hist['fg'].append(hist.get(1, 0) + hist.get(2, 0))
Jiri Borovec committed
122
    logger.debug('compute statistic...')
Jiri Borovec committed
123
    for lb, vals in dict_hist.iteritems():
Jiri Borovec committed
124 125 126
        if len(vals) == 0:
            logger.warning('label %s has no values to compute', str(lb))
            continue
Jiri Borovec committed
127 128 129 130
        logger.info('label %s with mean %f, median %f, std %f',
                    str(lb), np.mean(vals), np.median(vals), np.std(vals))
        logger.debug(' -> count outliers: %i',
                     np.sum(abs(vals - np.median(vals)) > 3 * np.std(vals)))
Jiri Borovec committed
131
    return dict_hist
Jiri Borovec committed
132 133


Jiri Borovec committed
134
def segm_decision(l_desc, dict_hist):
Jiri Borovec committed
135 136 137 138
    """ according given rules decide weather the segmentation is good or not

    :param l_desc: [{str, {int: float}}] list or pairs with image name
            and relative label histogram
Jiri Borovec committed
139
    :param dict_hist: {int: [float]}, histogram per label over all images
Jiri Borovec committed
140 141 142
    :return: [str], [str]
    """
    l_good, l_fail = [], []
Jiri Borovec committed
143
    fg_median, fg_std = np.median(dict_hist['fg']), np.std(dict_hist['fg'])
Jiri Borovec committed
144 145 146
    for i, desc in enumerate(l_desc):
        fg = desc['lb_hist'].get(1, 0) + desc['lb_hist'].get(2, 0)
        b_range = abs(fg - fg_median) <= 3 * fg_std
Jiri Borovec committed
147
        if b_range \
Jiri Borovec committed
148 149 150
                and desc['lb_hist'][0] < THRESHOLD_BACKGROUND \
                and desc['r_bg'] > THRESHOLD_CORNER_BG \
                and desc['r_cx'] > THRESHOLD_CONVEX:
Jiri Borovec committed
151 152 153 154 155 156
            l_good.append(desc['name'])
        else:
            l_fail.append(desc['name'])
    return l_good, l_fail


Jiri Borovec committed
157
def export_results(path_dir, l_good, l_fail):
Jiri Borovec committed
158 159
    """ export the results into csv file

Jiri Borovec committed
160
    :param path_dir: str
Jiri Borovec committed
161 162 163 164 165
    :param l_good: [str], names of images
    :param l_fail: [str], names of images
    """
    logger.info('export results as CSV files')
    pd.DataFrame(['images'] + l_good).to_csv(
Jiri Borovec committed
166
        os.path.join(path_dir, CSV_SEGM_GOOD), index=False, header=False)
Jiri Borovec committed
167
    pd.DataFrame(['images'] + l_fail).to_csv(
Jiri Borovec committed
168
        os.path.join(path_dir, CSV_SEGM_FAIL), index=False, header=False)
Jiri Borovec committed
169 170


Jiri Borovec committed
171
def segm_detect_fails(path_dir=PATH_SEGM, im_pattern='*.png'):
Jiri Borovec committed
172 173 174
    """ make the statistic over all segmentation in given folder
    and decide weather that are correct or fails

Jiri Borovec committed
175 176
    :param path_dir: str
    :param im_pattern: str, pattern fro images
Jiri Borovec committed
177
    """
Jiri Borovec committed
178 179 180 181
    logger.info('FOLDER: "%s"', path_dir)
    if not os.path.exists(path_dir):
        raise Exception('folder "{}" dnot exist'.format(path_dir))
    p_segs = glob.glob(os.path.join(path_dir, im_pattern))
Jiri Borovec committed
182 183 184 185 186 187 188 189 190 191
    logger.debug('found %i segmentation', len(p_segs))

    l_desc = read_make_hist(p_segs)

    d_hist = merge_hist_stat(l_desc)

    # make the decision while segm is fine
    l_good, l_fail = segm_decision(l_desc, d_hist)
    logger.info('number good %i and fails %i', len(l_good), len(l_fail))

Jiri Borovec committed
192
    export_results(path_dir, l_good, l_fail)
Jiri Borovec committed
193
    # show all samples
Jiri Borovec committed
194
    plot_histo_labels(d_hist, path_dir)
Jiri Borovec committed
195 196


Jiri Borovec committed
197 198 199 200
def mproc_copy_file(mp_set):
    shutil.copyfile(*mp_set)


Jiri Borovec committed
201
def copy_files(l_imgs, path_dir_visu, path_out):
Jiri Borovec committed
202 203 204
    """ copy list of images in multi thread

    :param l_imgs: [str]
Jiri Borovec committed
205 206
    :param path_dir_visu: str
    :param path_out: str
Jiri Borovec committed
207
    """
Jiri Borovec committed
208 209
    pp_dir_visu = os.path.join(path_dir_visu, PREFIX_VISU_SEGM)
    pp_out = os.path.join(path_out, PREFIX_VISU_SEGM)
Jiri Borovec committed
210 211 212 213 214 215 216 217
    mp_set = [(pp_dir_visu + n_img, pp_out + n_img) for n_img in l_imgs]

    mproc_pool = mproc.Pool(NB_JOBS)
    mproc_pool.map(mproc_copy_file, mp_set)
    mproc_pool.close()
    mproc_pool.join()


Jiri Borovec committed
218
def filter_copy_visu(path_dir_seg=PATH_SEGM, path_dir_visu=PATH_VISU):
Jiri Borovec committed
219 220 221
    """ load csv file vith good and bad segmentation and in the visual folder
    create subfolder for good and bad segm and copy relevant iimages there

Jiri Borovec committed
222 223
    :param path_dir_seg: str
    :param path_dir_visu: str
Jiri Borovec committed
224
    """
Jiri Borovec committed
225
    logger.info('filter and copy cases')
Jiri Borovec committed
226
    logger.debug('segmentation: %s,\n visual: %s', path_dir_seg, path_dir_visu)
Jiri Borovec committed
227
    for n_csv in [CSV_SEGM_GOOD, CSV_SEGM_FAIL]:
Jiri Borovec committed
228
        logger.info('reading "%s"', n_csv)
Jiri Borovec committed
229
        p_out = os.path.join(path_dir_visu, os.path.splitext(n_csv)[0])
Jiri Borovec committed
230 231 232 233
        if os.path.exists(p_out):
            logger.debug('remove old dir %s', p_out)
            shutil.rmtree(p_out)
        os.mkdir(p_out)
Jiri Borovec committed
234
        df = pd.DataFrame.from_csv(os.path.join(path_dir_seg, n_csv),
Jiri Borovec committed
235
                                   index_col=False)
Jiri Borovec committed
236
        logger.info('copy %i images to "%s"', len(df), n_csv)
Jiri Borovec committed
237
        copy_files(df['images'].values.tolist(), path_dir_visu, p_out)
Jiri Borovec committed
238 239 240


def main():
Jiri Borovec committed
241
    """ the main_real entry point """
Jiri Borovec committed
242 243
    logging.basicConfig(level=logging.DEBUG)
    logger.info('running...')
Jiri Borovec committed
244 245 246 247 248
    # defaults run
    segm_detect_fails()
    filter_copy_visu()

    for idx in range(1, 5):
Jiri Borovec committed
249 250
        p_dir_seg = os.path.join(PATH_BASE, 'RESULTS/type_{}_segm'.format(idx))
        p_dir_visu = os.path.join(PATH_BASE, 'TEMPORARY/type_{}_visu'.format(idx))
Jiri Borovec committed
251 252 253
        segm_detect_fails(p_dir_seg)
        filter_copy_visu(p_dir_seg, p_dir_visu)

Jiri Borovec committed
254 255
    logger.info('DONE')
    # plt.show()
Jiri Borovec committed
256 257


Jiri Borovec committed
258
if __name__ == '__main__':
Jiri Borovec committed
259
    main()