run_parse_results_csv_aggreg.py 4.99 KB
Newer Older
Jiri Borovec committed
1 2 3 4
"""
This script parse the csv with encoding and extend then  by information
from general drosophila information file
Second it does mean activation on aggregated gene ids
Jiri Borovec committed
5 6

Copyright (C) 2015-2016 Jiri Borovec <jiri.borovec@fel.cvut.cz>
Jiri Borovec committed
7 8 9 10 11
"""

import os
import sys
import glob
Jiri Borovec committed
12 13
import json
import gc
Jiri Borovec committed
14
import time
Jiri Borovec committed
15 16
import logging
import multiprocessing as mproc
Jiri Borovec committed
17
from functools import partial
Jiri Borovec committed
18 19 20 21

import numpy as np
import pandas as pd

Jiri Borovec committed
22
sys.path.append(os.path.abspath(os.path.join('..', '..'))) # Add path to root
Jiri Borovec committed
23 24 25 26 27 28
import src.segmentation.tool_superpixels as tl_spx
import src.atm_ptn_dict.run_apd_reconstruction as r_reconst

NB_THREADS = int(mproc.cpu_count() * .8)
PATH_BASE = '/datagrid/Medical/microscopy/drosophila/'
PATH_CSV_MAIN = os.path.join(PATH_BASE, 'all_disc_image_info_for_prague.txt')
Jiri Borovec committed
29 30 31
PATH_EXPERIMENTS = os.path.join(PATH_BASE, 'TEMPORARY', 'experiments_APDL_real')
# PATH_EXPERIMENTS = os.path.join(PATH_BASE, 'RESULTS', 'experiments_APDL_real')
CONFIG_JSON = 'config.json'
Jiri Borovec committed
32 33 34
PREFIX_ATLAS = 'atlas_'
PREFIX_ENCODE = 'encoding_'
PREFIX_CONNECT = 'connectivity_'
Jiri Borovec committed
35
POSIX_CSV_NEW = r_reconst.POSIX_CSV_NEW
Jiri Borovec committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59

logger = logging.getLogger(__name__)

# the debug mode use just map not parallel
RUN_DEBUG = False


def extend_df(df_encode, df_main):
    if not 'gene_id' in df_encode.columns:
        df_encode = df_encode.merge(df_main, left_index=True, right_on='image',
                                    how='inner')
    return df_encode


def aggregate_encoding(df_encode, column='gene_id', func=np.mean):
    df_result = pd.DataFrame()
    list_ptns = [c for c in df_encode if c.startswith('ptn ')]
    grouped = df_encode.groupby(column)
    for value, df_group in grouped:
        data = df_group[list_ptns].values
        result = np.apply_along_axis(func, axis=0, arr=data)
        dict_res = dict(zip(list_ptns, result.tolist()))
        dict_res.update({column: value, 'count': len(df_group)})
        df_result = df_result.append(dict_res, ignore_index=True)
Jiri Borovec committed
60
    df_result.set_index(column, inplace=True)
Jiri Borovec committed
61 62 63 64 65 66 67
    return df_result


def export_atlas_connectivity(path_atlas):
    logger.info('atlas (%s) of "%s"', os.path.exists(path_atlas), path_atlas)
    img_atlas = r_reconst.load_atlas_image(path_atlas)
    name_atlas = os.path.splitext(os.path.basename(path_atlas))[0]
Jiri Borovec committed
68
    r_reconst.export_fig_atlas(img_atlas, os.path.dirname(path_atlas), name_atlas)
Jiri Borovec committed
69 70 71 72 73 74 75 76 77 78
    vertices, edges = tl_spx.make_graph_segm_connect2d_conn4(img_atlas)
    nb_lbs = max(vertices) + 1
    matrix_connect = np.zeros((nb_lbs, nb_lbs))
    for e1, e2 in edges:
        matrix_connect[e1, e2] = 1
        matrix_connect[e2, e1] = 1
    list_ptns = ['ptn {}'.format(i) for i in range(nb_lbs)]
    df_connect = pd.DataFrame(matrix_connect, columns=list_ptns, index=list_ptns)
    path_csv = path_atlas.replace(PREFIX_ATLAS, PREFIX_CONNECT).replace('.png', '.csv')
    df_connect.to_csv(path_csv)
Jiri Borovec committed
79
    return img_atlas
Jiri Borovec committed
80 81 82 83 84 85


def mproc_wrapper(mp_tuple):
    return process_experiment(*mp_tuple)


Jiri Borovec committed
86 87 88 89 90 91 92 93 94 95
def load_config_json(path_expt, config_name=CONFIG_JSON):
    path_config = os.path.join(path_expt, config_name)
    if not os.path.exists(path_config):
        logger.warning('missing particular json config "%s"', path_config)
        return
    with open(path_config, 'r') as fp:
        config = json.load(fp)
    return config


Jiri Borovec committed
96 97
def process_experiment(path_csv, df_main):
    logger.info(' -> %s', os.path.basename(path_csv))
Jiri Borovec committed
98 99
    path_atlas = path_csv.replace(PREFIX_ENCODE, PREFIX_ATLAS).replace('.csv', '.png')
    atlas = export_atlas_connectivity(path_atlas)
Jiri Borovec committed
100 101
    config = load_config_json(os.path.dirname(path_csv))
    df_encode = r_reconst.recompute_encoding(config, atlas)
Jiri Borovec committed
102
    # df_encode = pd.DataFrame.from_csv(path_csv)
Jiri Borovec committed
103 104
    df_encode = extend_df(df_encode, df_main)
    if 'image' in df_encode.columns:
Jiri Borovec committed
105
        df_encode.set_index('image', inplace=True)
Jiri Borovec committed
106 107
    df_encode.to_csv(path_csv)
    df_result = aggregate_encoding(df_encode)
Jiri Borovec committed
108
    df_result.to_csv(path_csv.replace('.csv', POSIX_CSV_NEW))
Jiri Borovec committed
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123


def main(path_csv_main=PATH_CSV_MAIN, path_experiemnts=PATH_EXPERIMENTS):
    logging.basicConfig(level=logging.INFO)
    logger.info('exist: %i, %i', os.path.exists(PATH_CSV_MAIN), os.path.exists(PATH_EXPERIMENTS))
    df_main = pd.DataFrame.from_csv(path_csv_main, sep='\t')
    df_main['image'] = df_main['image_path'].apply(lambda x: x.split('.')[0])

    list_expt = [p for p in glob.glob(os.path.join(path_experiemnts, '*'))
                 if os.path.isdir(p)]

    for i, path_dir in enumerate(list_expt):
        logger.info('EXPERIMENT: (%i / %i)', (i + 1), len(list_expt))
        logger.info(os.path.basename(path_dir))
        list_csv = [p for p in glob.glob(os.path.join(path_dir, 'encoding_*.csv'))
Jiri Borovec committed
124
                    if not p.endswith(POSIX_CSV_NEW)]
Jiri Borovec committed
125 126

        if RUN_DEBUG:
Jiri Borovec committed
127
            map(partial(process_experiment, df_main=df_main), list_csv)
Jiri Borovec committed
128
        else:
Jiri Borovec committed
129 130
            mproc_pool = mproc.Pool(len(list_csv))
            mproc_pool.map(partial(process_experiment, df_main=df_main), list_csv)
Jiri Borovec committed
131 132 133 134 135 136 137 138
            mproc_pool.close()
            mproc_pool.join()

    logger.info('DONE')


if __name__ == '__main__':
    main()