Commit 03219a0e authored by Jiri Borovec's avatar Jiri Borovec

hierarchical clustering

parent f1263495
......@@ -306,7 +306,7 @@ def dataset_binary_combine_patterns(im_ptns, out_dir, nb_samples=NB_SAMPLES,
mproc_pool.join()
df_weights.columns = ['image'] + [COLUMN_NAME.format(i + 1)
for i in range(len(df_weights.columns) - 1)]
df_weights = df_weights.set_index('image')
df_weights.set_index('image', inplace=True)
logger.debug(df_weights.head())
return im_spls, df_weights
......@@ -466,7 +466,7 @@ def dataset_load_images(path_dir, im_pattern='*', nb_spls=None, nb_jobs=1):
:param nb_jobs: int
:return: [np.array], [str]
"""
logger.info('loading folder (%s) <- "%s"', os.path.exists(path_dir), path_dir)
logger.debug('loading folder (%s) <- "%s"', os.path.exists(path_dir), path_dir)
assert os.path.exists(path_dir)
paths_img = find_images(path_dir, im_pattern)
paths_img = sorted(paths_img)[:nb_spls]
......
......@@ -32,9 +32,10 @@ import src.own_utils.tool_experiments as tl_expt
def _reduce_method(m):
# REQURED FOR MPROC POOL
# ISSUE: cPickle.PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
# http://stackoverflow.com/questions/25156768/cant-pickle-type-instancemethod-using-pythons-multiprocessing-pool-apply-a
# REQURED FOR MPROC POOL
# ISSUE: cPickle.PicklingError:
# Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
# http://stackoverflow.com/questions/25156768/cant-pickle-type-instancemethod-using-pythons-multiprocessing-pool-apply-a
if m.im_self is None:
return getattr, (m.im_class, m.im_func.func_name)
else:
......@@ -303,7 +304,7 @@ class ExperimentAPD(tl_expt.Experiment):
for stat in self.l_stat:
self.df_stat = self.df_stat.append(stat, ignore_index=True)
if self.iter_var_name in stat:
self.df_stat = self.df_stat.set_index(self.iter_var_name)
self.df_stat.set_index(self.iter_var_name, inplace=True)
path_csv = os.path.join(self.params.get('path_exp'), self.RESULTS_CSV)
logging.debug('save results: "%s"', path_csv)
self.df_stat.to_csv(path_csv)
......
#!/bin/bash
#cd ~/Dropbox/Workspace/py_ImageProcessing/src/atm_ptn_dict/
#source ~/vEnv/bin/activate
# STATE-OF-THE-ART methods
# python run_experiment_apd_all.py \
# -in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v0 \
# -out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
#
# python run_experiment_apd_all.py \
# -in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v1 \
# -out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
#
# python run_experiment_apd_all.py \
# -in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v2 \
# -out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
#
# python run_experiment_apd_all.py \
# -in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v3 \
# -out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
#
# python run_experiment_apd_all.py \
# -in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary3D_v0 \
# -out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-3D-sta
#
# python run_experiment_apd_all.py \
# -in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary3D_v1 \
# -out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-3D-sta
python run_experiment_apd_all.py \
-in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v0 \
-out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
python run_experiment_apd_all.py \
-in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v1 \
-out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
python run_experiment_apd_all.py \
-in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v2 \
-out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
python run_experiment_apd_all.py \
-in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary_v3 \
-out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-sta
python run_experiment_apd_all.py \
-in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary3D_v0 \
-out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-3D-sta
python run_experiment_apd_all.py \
-in /datagrid/Medical/microscopy/drosophila/synthetic_data/atomicPatternDictionary3D_v1 \
-out /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD-3D-sta
# OUR method
......
"""
Perform the hierarchical clustering on estimated atlas such as merging patterns
together with the smallest reconstruction error.
We assume going from all patters presented to single pattern in the atlas
EXAMPLE:
>> python run_apd_hierarchical_cluster.py \
--path_in /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APDL_real
>> python run_apd_hierarchical_cluster.py \
--path_in /datagrid/Medical/microscopy/drosophila/TEMPORARY/experiments_APD_temp \
--names_expt ExperimentALPE_mp_real_type_3_segm_reg_binary_gene_ssmall_20160509-155333 \
--nb_jobs 1
Copyright (C) 2015-2016 Jiri Borovec <jiri.borovec@fel.cvut.cz>
"""
import os
import glob
import logging
import gc
import time
import multiprocessing as mproc
from functools import partial
import tqdm
import numpy as np
import pandas as pd
import dataset_utils as gen_data
import run_apd_reconstruction as r_reconst
NB_THREADS = int(mproc.cpu_count() * .9)
NAME_CONFIG = 'config.json'
PREFIX_ATLAS = 'atlas_'
PREFIX_ENCODE = 'encoding_'
PREFIX_RECONST = 'reconstruct_'
CSV_RECONT_DIFF = 'reconstruct_hierarchical_clustering.csv'
POSIX_CSV_SKIP = r_reconst.POSIX_CSV_NEW
DIR_PREFIX = 'hierarchical_clustering_'
def compute_merged_reconst_diff(ptn_comb, dict_params, path_out, atlas, img_names):
""" merge pattern pair and compute reconstruction diff
:param ptn_comb: (int, int)
:param dict_params: {str: int}
:param path_out: str
:param atlas: np.array<height, width>
:param img_names: [str]
:return: (int, int), float
"""
atlas_merge = atlas.copy()
# merge pattern pair
for lb in ptn_comb[1:]:
atlas_merge[atlas_merge == lb] = ptn_comb[0]
wrapper_reconstruction = partial(r_reconst.compute_reconstruction,
dict_params=dict_params, path_out=path_out,
im_atlas=atlas_merge)
tuples_name_diff = map(wrapper_reconstruction, img_names)
# compute mean diff over all reconst
diff = np.mean(np.asarray(tuples_name_diff)[:, 1].astype(np.float))
return ptn_comb, diff
def hierarchical_clustering_merge_patterns(dict_params, path_out, img_names,
atlas, nb_jobs=NB_THREADS):
""" using hierarchical clustering merge pattern pair and return partial results
:param dict_params: {str: ...}
:param path_out: str
:param img_names: [str]
:param atlas: np.array<height, width>
:param nb_jobs: int
:return: np.array<height, width>, (int, int), float
"""
labels = sorted(np.unique(atlas).tolist())
# generate combinations as list as skipping the 0 assuming on first position
ptn_combines = [(labels[i], labels[j])
for i in range(1, len(labels)) for j in range(1, i)]
assert len(ptn_combines) > 0 and \
not any(len(set(ptn)) == 1 for ptn in ptn_combines)
# parallel compute reconstructions
wrapper_compute_merged = partial(compute_merged_reconst_diff,
dict_params=dict_params, path_out=path_out,
atlas=atlas, img_names=img_names)
if nb_jobs > 1:
mproc_pool = mproc.Pool(nb_jobs)
tuples_ptn_diff = mproc_pool.map(wrapper_compute_merged, ptn_combines)
mproc_pool.close()
mproc_pool.join()
else:
tuples_ptn_diff = map(wrapper_compute_merged, ptn_combines)
logging.debug('computed merged diffs: %s', repr(tuples_ptn_diff))
idx_min = np.argmin(tuples_ptn_diff, axis=0)[1]
ptn_comb, diff = tuples_ptn_diff[idx_min]
logging.debug('found minimal pn pos %i for diff %f and patterns %s',
idx_min, diff, repr(ptn_comb))
atlas_merged = atlas.copy()
for lb in ptn_comb[1:]:
atlas_merged[atlas_merged == lb] = ptn_comb[0]
return atlas_merged, ptn_comb, diff
def export_partial_atlas_encode(dict_params, path_out, df_merged, max_label,
nb, atlas, ptn_comb, diff):
""" export partial results such as atlas, encoding and reconstruct diff
:param dict_params: {str: ...}
:param path_out: str
:param df_merged: DF
:param max_label: int
:param nb: int
:param atlas: np.array<height, width>
:param ptn_comb: (int, int)
:param diff: float
:return: DF
"""
gen_data.export_image(path_out, atlas, PREFIX_ATLAS + 'merged_%i' % nb)
r_reconst.export_fig_atlas(atlas, path_out,
PREFIX_ATLAS + 'merged_%i' % nb, max_label)
df_encode = r_reconst.recompute_encoding(dict_params, atlas)
df_encode.to_csv(
os.path.join(path_out, PREFIX_ENCODE + 'merged_%i.csv' % nb))
df_merged = df_merged.append({
'nb_labels': nb,
'merged': ptn_comb,
'reconst_diff': diff}, ignore_index=True)
return df_merged
def sequence_hierarchical_clustering(dict_params, path_out, img_names, atlas,
nb_jobs=NB_THREADS):
""" sequance if hierarchical clustering which decrease number of patterns
by partial merging pattern pairs and exporting partial results
:param dict_params: {str, ...}
:param path_out: str
:param img_names: [str]
:param atlas: np.array<height, width>
:param nb_jobs:
:return: DF
"""
if not os.path.exists(path_out):
os.mkdir(path_out)
nb_labels = len(np.unique(atlas))
max_label = atlas.max()
df_merged = pd.DataFrame()
ptn_comb, diff = compute_merged_reconst_diff((0, 0), dict_params, path_out,
atlas, img_names)
df_merged = export_partial_atlas_encode(dict_params, path_out, df_merged,
max_label, nb_labels, atlas, ptn_comb, diff)
# recursively merge patterns
for nb in reversed(range(2, nb_labels)):
atlas, ptn_comb, diff = hierarchical_clustering_merge_patterns(
dict_params, path_out, img_names, atlas, nb_jobs)
df_merged = export_partial_atlas_encode(dict_params, path_out, df_merged,
max_label, nb, atlas, ptn_comb, diff)
df_merged.set_index('nb_labels', inplace=True)
df_merged.to_csv(os.path.join(path_out, CSV_RECONT_DIFF))
return df_merged
def process_experiment(path_expt, nb_jobs=NB_THREADS):
""" process complete folder with experiment
:param path_expt: str
"""
logging.info('Experiment folder: \n "%s"', path_expt)
dict_params = r_reconst.load_config_json(path_expt)
atlas_names = [os.path.basename(p) for p
in glob.glob(os.path.join(path_expt, PREFIX_ATLAS + '*.png'))]
list_csv = [p for p in glob.glob(os.path.join(path_expt, PREFIX_ENCODE + '*.csv'))
if not p.endswith(POSIX_CSV_SKIP)]
logging.debug('found %i CSV files: %s', len(list_csv), repr(list_csv))
df_diffs_all = pd.DataFrame()
for path_csv in sorted(list_csv):
name_csv = os.path.basename(path_csv)
name_atlas = r_reconst.find_relevant_atlas(name_csv, atlas_names)
logging.info('Atlas: "%s" -> Encoding: "%s"', name_atlas, name_csv)
path_atlas = os.path.join(path_expt, name_atlas)
atlas = r_reconst.load_atlas_image(path_atlas)
img_names = pd.DataFrame.from_csv(path_csv).index.tolist()
path_out = os.path.join(path_expt, DIR_PREFIX + os.path.splitext(name_atlas)[0])
df_diff = sequence_hierarchical_clustering(dict_params, path_out,
img_names, atlas, nb_jobs)
# separet jut the recont dif and name it after atlas
df_diff = df_diff['reconst_diff']
df_diff.name = os.path.splitext(name_atlas)[0]
logging.debug('records: %i for "%s"', len(df_diff), df_diff.name)
df_diffs_all = pd.concat([df_diffs_all, df_diff], axis=1)
df_diffs_all.to_csv(os.path.join(path_expt, CSV_RECONT_DIFF))
def main():
""" process complete list of experiments """
logging.basicConfig(level=logging.INFO)
logging.info('running...')
arg_params = r_reconst.parse_arg_params(r_reconst.create_args_parser())
logging.info('PARAMS: \n%s', '\n'.join(['"{}": \n\t {}'.format(k, v)
for k, v in arg_params.iteritems()]))
list_expt = [os.path.join(arg_params['path_in'], n) for n in arg_params['names_expt']]
assert len(list_expt) > 0, 'No experiments found!'
tqdm_bar = tqdm.tqdm(total=len(list_expt))
for path_expt in list_expt:
process_experiment(path_expt, arg_params['nb_jobs'])
gc.collect(), time.sleep(1)
tqdm_bar.update(1)
logging.info('DONE')
if __name__ == '__main__':
main()
This diff is collapsed.
......@@ -21,20 +21,18 @@ import pandas as pd
sys.path.append(os.path.abspath(os.path.join('..', '..'))) # Add path to root
import src.segmentation.tool_superpixels as tl_spx
import src.atm_ptn_dict.dataset_utils as gen_data
import src.atm_ptn_dict.pattern_weights as ptn_weight
import src.atm_ptn_dict.run_apd_reconstruction as r_reconst
NB_THREADS = int(mproc.cpu_count() * .8)
PATH_BASE = '/datagrid/Medical/microscopy/drosophila/'
PATH_CSV_MAIN = os.path.join(PATH_BASE, 'all_disc_image_info_for_prague.txt')
PATH_EXPERIMENTS = os.path.join(PATH_BASE, 'TEMPORARY', 'experiments_APD_new')
# PATH_EXPERIMENTS = os.path.join(PATH_BASE, 'RESULTS', 'experiments_APD')
PATH_EXPERIMENTS = os.path.join(PATH_BASE, 'TEMPORARY', 'experiments_APDL_real')
# PATH_EXPERIMENTS = os.path.join(PATH_BASE, 'RESULTS', 'experiments_APDL_real')
CONFIG_JSON = 'config.json'
PREFIX_ATLAS = 'atlas_'
PREFIX_ENCODE = 'encoding_'
PREFIX_CONNECT = 'connectivity_'
POSIX_GENE = '_gene.csv'
CONFIG_JSON = 'config.json'
POSIX_CSV_NEW = r_reconst.POSIX_CSV_NEW
logger = logging.getLogger(__name__)
......@@ -59,7 +57,7 @@ def aggregate_encoding(df_encode, column='gene_id', func=np.mean):
dict_res = dict(zip(list_ptns, result.tolist()))
dict_res.update({column: value, 'count': len(df_group)})
df_result = df_result.append(dict_res, ignore_index=True)
df_result = df_result.set_index(column)
df_result.set_index(column, inplace=True)
return df_result
......@@ -67,7 +65,7 @@ def export_atlas_connectivity(path_atlas):
logger.info('atlas (%s) of "%s"', os.path.exists(path_atlas), path_atlas)
img_atlas = r_reconst.load_atlas_image(path_atlas)
name_atlas = os.path.splitext(os.path.basename(path_atlas))[0]
r_reconst.export_fig_atlas(os.path.dirname(path_atlas), name_atlas, img_atlas)
r_reconst.export_fig_atlas(img_atlas, os.path.dirname(path_atlas), name_atlas)
vertices, edges = tl_spx.make_graph_segm_connect2d_conn4(img_atlas)
nb_lbs = max(vertices) + 1
matrix_connect = np.zeros((nb_lbs, nb_lbs))
......@@ -95,31 +93,19 @@ def load_config_json(path_expt, config_name=CONFIG_JSON):
return config
def recompute_encoding(atlas, path_csv):
path_expt = os.path.dirname(path_csv)
config = load_config_json(path_expt)
path_in = os.path.join(config.get('path_in'), config.get('dataset'))
imgs, im_names = gen_data.dataset_load_images(path_in)
weights = [ptn_weight.weights_image_atlas_overlap_major(img, atlas) for img in imgs]
df = pd.DataFrame(data=np.array(weights), index=im_names)
df.columns = ['ptn {}'.format(lb + 1) for lb in df.columns]
df.index.name = 'image'
gc.collect(), time.sleep(1)
return df
def process_experiment(path_csv, df_main):
logger.info(' -> %s', os.path.basename(path_csv))
path_atlas = path_csv.replace(PREFIX_ENCODE, PREFIX_ATLAS).replace('.csv', '.png')
atlas = export_atlas_connectivity(path_atlas)
df_encode = recompute_encoding(atlas, path_csv)
config = load_config_json(os.path.dirname(path_csv))
df_encode = r_reconst.recompute_encoding(config, atlas)
# df_encode = pd.DataFrame.from_csv(path_csv)
df_encode = extend_df(df_encode, df_main)
if 'image' in df_encode.columns:
df_encode = df_encode.set_index('image')
df_encode.set_index('image', inplace=True)
df_encode.to_csv(path_csv)
df_result = aggregate_encoding(df_encode)
df_result.to_csv(path_csv.replace('.csv', '_gene.csv'))
df_result.to_csv(path_csv.replace('.csv', POSIX_CSV_NEW))
def main(path_csv_main=PATH_CSV_MAIN, path_experiemnts=PATH_EXPERIMENTS):
......@@ -135,7 +121,7 @@ def main(path_csv_main=PATH_CSV_MAIN, path_experiemnts=PATH_EXPERIMENTS):
logger.info('EXPERIMENT: (%i / %i)', (i + 1), len(list_expt))
logger.info(os.path.basename(path_dir))
list_csv = [p for p in glob.glob(os.path.join(path_dir, 'encoding_*.csv'))
if not p.endswith('_gene.csv')]
if not p.endswith(POSIX_CSV_NEW)]
if RUN_DEBUG:
map(partial(process_experiment, df_main=df_main), list_csv)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment