diff --git a/.RData b/.RData
new file mode 100644
index 0000000000000000000000000000000000000000..b53e54d2ccee1de96cc54f2c1a47f2d8aba8b97b
Binary files /dev/null and b/.RData differ
diff --git a/.RDataTmp b/.RDataTmp
new file mode 100644
index 0000000000000000000000000000000000000000..80a5714ad60107ba1e84587f618aadb6888cb596
Binary files /dev/null and b/.RDataTmp differ
diff --git a/.Rhistory b/.Rhistory
new file mode 100644
index 0000000000000000000000000000000000000000..999dd34992b1d1445338f55cf187458416fc45ba
--- /dev/null
+++ b/.Rhistory
@@ -0,0 +1,512 @@
+class(GE)
+class(GE1)
+class(GE1)[1]
+class(GE1)[1] == "matrix"
+class(GE)[1]
+dim(PK)
+head(colnames(PK))
+dim(PK["MT-ND1",])
+head(PK["MT-ND1",])
+length(PK["MT-ND1",])
+dim(PK[parts[[1]],parts[[2]]])
+dim(PK)
+class(GE)[1]
+class(GE)[1] %in% c("matrix", "data.frame")
+class(GE_in)[1] %in% c("matrix", "data.frame")
+dim(GE_in)
+dim(PK_in)
+dim(PK)
+didim(a)
+dim(a)
+dim(a) == dim(PK)
+sum(dim(a) == dim(PK))
+class(unlist(parts))
+class(parts)
+length(parts[[1]])
+dim(GE)
+dim(GE_in)
+sum(matrix(1, 4, 5))
+sum(matrix(1, 4000, 5029))
+parts
+li = list('java','python')
+li2 <- append(li,'r')
+print(li2)
+li <- list()
+append(li, 'r')
+li <- append(li, 'r')
+li <- append(li, 'r2')
+li
+lapply(parts, length)
+unlist(lapply(parts, length))
+size(matrix(NA, 3, 4))
+11000 / (22000 * 22000)
+sizes <- (5, 100, 5000)
+sizes <- c(5, 100, 5000)
+idx <- 3
+idx > sizes
+(idx > sizes) * sizes
+idx <- 100
+(idx > sizes) * sizes
+sum(idx > sizes)
+idx <- 101
+sum(idx > sizes)
+idx < 99
+idx <- 99
+idx <- 101
+sum(idx > sizes)
+sizes
+idx
+idx <- 99
+sum(idx > sizes)
+idx <- 99
+sum(idx > sizes) + 1
+unlist(lapply(parts, length))
+remove(SK_SS)
+Rcpp::sourceCpp("source/skeleton.cpp")
+Rcpp::sourceCpp("source/skeleton.cpp")
+Rcpp::sourceCpp("source/skeleton.cpp")
+Rcpp::sourceCpp("source/skeleton.cpp")
+Rcpp::sourceCpp("source/skeleton.cpp")
+Rcpp::sourceCpp("source/skeleton.cpp")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+dim(GE)
+dim(PK)
+SK <- PK
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+Rcpp::sourceCpp("source/skeleton.cpp")
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+source("G:/4_Semestr/DP/R_version/source/BiDAG_BGe.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+View(myScore)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+traceback()
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+unique(PK)
+unlist(matrix(NA, 2, 2))
+c(matrix(NA, 2, 2))
+unique(c(PK))
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts = parts)
+remove(idx)
+remove(sizes)
+remove(prev_wd)
+remove(li)
+remove(li2)
+remove(GE1)
+remove(GE2)
+remove(Ga)
+remove(a)
+gd()
+gc()
+source("G:/4_Semestr/DP/R_version/source/general.R")
+src.source("BiDAG_BGe.R")
+tst <- list(matrix(NA, 2, 3), matrix(NA, 3, 4))
+sum(lapply(tst, ncol))
+lapply(tst, ncol)
+sum(unlist(lapply(tst, ncol)))
+c(1, 2) * c(-1, 1)
+tst <- list(a=matrix(NA, 2, 2))
+tst$a
+matrix(1:9, 3, 3)[1,3]
+matrix(1:9, 3, 3)
+matrix(1:9, 3, 3)[1,3]
+matrix(1:9, 3, 3)[1:2,1:2]
+tst <- c(1,2)
+matrix(1:9, 3, 3)[tst,tst]
+tst <- function()
+{
+for(i in 1:9)
+{
+if(i == 1)
+{
+break
+}
+print(i)
+}
+}
+tst()
+tst <- function()
+{
+for(i in 1:9)
+{
+if(i == 1)
+{
+next
+}
+print(i)
+}
+}
+tst()
+length(PK[1,])
+sum(PK[1,] > 0)
+which(PK[1,] > 0)
+names(which(PK[1,] > 0))
+lapply(parts, length)
+(17293 + 1698) * (17293 + 1698)
+(17293 + 1698) * (17293 + 1698) + (1698 + 3009)*(1698 + 3009)
+(17293 + 1698) * (17293 + 1698) + (1698 + 3009)*(1698 + 3009) - 22000 * 22000
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/general.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK)
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+P_D_G
+class(param)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/general.R")
+paste(1, c(1,2), "")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+View(SK)
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+head(G_adjs)
+length(G_adjs)
+class(G_adjs)
+length(G_adjs)
+View(G_adjs)
+tst <- list()
+tst[[1]] <- matrix(NA, 2, 3)
+tst[[2]] <- matrix(NA, 3, 5)
+View(tst)
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+dim(adj)
+View(G_adjs)
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+View(G_adjs)
+length(G_adjs)
+dim(adj)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+?View
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+head(sc)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+knitr::opts_knit$set(root.dir = '/inst')# COMMENT IF NOT FROM PACKAGE
+#knitr::opts_knit$set(root.dir = 'C:/school/diplomka_package/gpubayesomics/inst')
+# COMMENT IF NOT FROM PACKAGE
+library(dplyr)
+library(gsubfn)
+library(data.table)
+library(biomaRt)
+library("org.Hs.eg.db")
+library(EnsDb.Hsapiens.v86)
+library(miRBaseConverter)
+library(httr)
+library(tidyr)
+source("data_preprocess.R")
+NUM_CIRC_RNA_SAMPLE <- 1
+NUM_MI_RNA_SAMPLE <- 25
+NUM_M_RNA_SAMPLE <- 50
+MAX_INPUT_EDGES <- 3
+input_intomics <- input_all[[1]]
+f_expr_m <- "data/MDS_expr_mrna.RData"
+f_expr_mi <- "data/MDS_expr_mirna.RData"
+f_expr_circ <- "data/MDS_expr_circ.RData"
+f_pk_circ_mi <- "data/MDS_prior_circ_mi.RData"
+f_pk_mi_m <- "data/MDS_prior_mi_m.RData"
+f_sample_names <- "data/MDS_sample_names.RData"
+input_all <- data_preprocess(
+f_expr_m,
+f_expr_mi,
+f_expr_circ,
+f_pk_circ_mi,
+f_pk_mi_m,
+f_sample_names,
+NUM_CIRC_RNA_SAMPLE,
+NUM_MI_RNA_SAMPLE,
+NUM_M_RNA_SAMPLE,
+MAX_INPUT_EDGES)
+input_intomics <- input_all[[1]]
+list[omics, layer_def, PK, PK_present, PK_real, sizes] <- input_intomics
+NUM_M_RNA_SAMPLE <- sizes[1]
+NUM_MI_RNA_SAMPLE <- sizes[2]
+NUM_CIRC_RNA_SAMPLE <- sizes[3]
+sprintf("We subsampled these numbers: mRNA:%s, miRNA:%s, circRNA:%s",
+NUM_M_RNA_SAMPLE, NUM_MI_RNA_SAMPLE, NUM_CIRC_RNA_SAMPLE)
+require("knitr")
+require("bnlearn")
+library(bnlearn)
+library(foreach)
+library(GGally)
+library(bestNormalize)
+library(bnstruct)
+library(rlist)
+library(matrixStats)
+library(parallel)
+library(numbers)
+source_code_dir <- "IntOMICS_noimproved/"  # The directory where IntOMICS source code files #are saved.
+# Original IntOMICS
+#source_code_dir <- "IntOMICS_improved/"
+file_path_vec <- list.files(source_code_dir, full.names = T)
+for(f_path in file_path_vec){source(f_path)}
+source("G:/4_Semestr/DP/R_version/IntOMICS_noimproved/OMICS_module.R")
+debugSource("G:/4_Semestr/DP/R_version/IntOMICS_noimproved/OMICS_module.R")
+OMICS_module_res_1 <- OMICS_module(omics = GSE127960_WT_omics, PK = GSE127960_WT_PK, layers_def = GSE127960_WT_layers_def)
+load("intomics_data/input_data.RData")
+load("../intomics_data/input_data.RData")
+OMICS_module_res_1 <- OMICS_module(omics = GSE127960_WT_omics, PK = GSE127960_WT_PK, layers_def = GSE127960_WT_layers_def)
+debugSource("G:/4_Semestr/DP/R_version/IntOMICS_noimproved/pf_UB_est.R")
+pf_UB_res
+View(pf_UB_res)
+class(unlist(pf_UB_res$BGe_score_all_configs_node))
+min(unlist(pf_UB_res$BGe_score_all_configs_node))
+max(unlist(pf_UB_res$BGe_score_all_configs_node))
+length(parts)
+sapply(parts, length)
+18000 / 25
+sum(parts)
+class(parts)
+ls
+sum(unlist(parts))
+head(unlist(parts))
+length(unlist(parts))
+22000 // 25
+22000 / 25
+18000 / 25
+sapply(parts, length)
+sizes <- unlist(lapply(parts, length))
+sizes
+class(sizes)
+sum(sizes)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+class(GE)
+dim(PK)
+dim(SK)
+PK <- SK
+clear
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+#knitr::opts_knit$set(root.dir = '/inst')# COMMENT IF NOT FROM PACKAGE
+#knitr::opts_knit$set(root.dir = 'C:/school/diplomka_package/gpubayesomics/inst')
+# COMMENT IF NOT FROM PACKAGE
+source("source/general.R")
+source("source/skeleton.R")
+exists(asd)
+source("source/general.R")
+source("source/skeleton.R")
+GE < readRDS("MMPC_setup/cgpa_MDS_GE.RData")
+source("source/general.R")
+source("source/skeleton.R")
+GE < readRDS("MMPC_setup/cgpa_MDS_GE.RData")
+source("source/general.R")
+source("source/skeleton.R")
+GE <- readRDS("MMPC_setup/cgpa_MDS_GE.RData")
+PK <- readRDS("MMPC_setup/adj.RData")
+SK <- PK
+parts <- readRDS("MMPC_setup/cgpa_MDS_parts.RData")
+SK_SS_tripartite(GE,PK,SK,parts)
+sample(1:10)
+sample(1:10)
+sample(1:10)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+tst <- matrix(c(0, 1, 1, 0), 2, 2)
+tst
+tst[1,]
+tst[1,] == 0
+which(tst[1,] == 0)
+which(tst[1,])
+which(tst[1,] == 1)
+sample()
+sample(1)
+?sample
+sample(2)
+sample(1, 2)
+sample(2, 1)
+sample(2, 1)
+sample(2, 1)
+sample(2, 1)
+sample(2, 1)
+sample(2, 1)
+sample(2, 1)
+sample(2, 1)
+c("a"=1, "b"=2)
+names(c("a"=1, "b"=2))
+tst <- c("a"=1, "b"=2)
+tst["a"]
+tst["a"] <- tst["a"] + 2
+tst
+tst["a"] <- TRUE
+tst
+suppressWarnings(attach(getNamespace("bnlearn")))
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+P_G_change <- function(from, to, action_is_add, sizes, prev_score, B_PK_mat, beta_L, beta_H)
+{
+# ! Convert C++ indices to Rcpp indices(+1) !
+b_i_j <- B_PK_mat[from+1, to+1]
+u_change <- 1 - 2 * b_i_j
+if(!action_is_add)
+{
+u_change <- -1 * u_change
+}
+N <- sum(sizes)
+ret <- pP_G_compute(prev_score + u_change, N, beta_L, beta_H)
+return(ret)
+}
+BGe_change <- function(from, to, action_is_add, sizes, score_prev, G_adjs, param)
+{
+# Convert C++ indices to Rcpp indices(+1)
+from <- from + 1
+to <- to + 1
+part_id <- sum(from > sizes) + 1
+adj <- G_adjs[[part_id]]
+prev <- adj[from, to]
+score_node_prev <- BGe_node_compute(node, neights, param)
+adj[from, to] <- action_is_add
+node <- colnames(adj)[to]
+neights <- names(which(adj[node,] > 0))
+score_node <- BGe_node_compute(node, neights, param)
+adj[from, to] <- prev
+return(score_node - score_node_prev + score_prev)
+}
+score_change_G <- function(G_adjs, B_PKs, beta_L, beta_H, param_in,
+sizes,
+score_prev, from, to, action_is_add)
+{
+ret <- score_prev
+N <- sum(sizes)
+A <- P_G_change(from, to, action_is_add, sizes, ret["pG"], B_PKs, beta_L, beta_H)
+P_G_change()
+B <- BGe_change(from, to, action_is_add, sizes, ret["pDG"], G_adjs, param_in)
+BGe_change()
+ret["pG"] <- ret["pG"] + A
+ret["pDG"] <- ret["pDG"] + B
+return(ret)
+}
+Rcpp::sample
+rep(0, 3)
+rep(1, 3)
+tst
+class(tst)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+?list
+list(5)
+class(matrix(TRUE, 2, 2))
+sapply(matrix(TRUE, 2, 2), class)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+?sample
+?sample
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+sample(c(0,1), prob=(0.5, 0.5))
+sample(c(0,1), prob=c(0.5, 0.5))
+sample(c(0,1), prob=c(0.5, 0.5))
+sample(c(0,1), prob=c(0.5, 0.5))
+sample(c(0,1), prob=c(0.5, 0.5))
+sample(c(0,1), prob=c(0.5, 0.5))
+sample(c(0,1), prob=c(0.5, 0.5))
+sample(c(0,1), 1, prob=c(0.5, 0.5))
+sample(c(0,1), 1, prob=c(0.5, 0.5))
+sample(c(0,1), 1, prob=c(0.5, 0.5))
+sample(c(0,1), 1, prob=c(0.5, 0.5))
+sample(c(0,1), 1, prob=c(0.5, 0.5))
+sample(c(0,1), 1, prob=c(0.5, 0.5))
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")\
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+?Rcpp::sourceCpp
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+?p
+p(1)
+p(1, 2)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/general.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+SK_SS_tripartite(GE,PK,SK,parts)
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+traceback()
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+G_adjs
+View(G_adjs)
+length(nodes_cand)
+nodes_cand
+adj
+source("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+traceback()
+debugSource("G:/4_Semestr/DP/R_version/source/skeleton.R")
+SK_SS_tripartite(GE,PK,SK,parts)
+class(c("pG"=pG, "pDG"=pDG))
+class(c("pG"=pG, "pDG"=pDG)[|pG])
+tst <- c("pG"=pG, "pDG"=pDG)
+tst["pG"]
+tst["pDG"]
+pG
+5:5
diff --git a/R_version.Rproj b/R_version.Rproj
new file mode 100644
index 0000000000000000000000000000000000000000..8e3c2ebc99e2e337f7d69948b93529a437590b27
--- /dev/null
+++ b/R_version.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/data_preprocess.R b/data_preprocess.R
new file mode 100644
index 0000000000000000000000000000000000000000..42b96b462125a62b10a46ac65ba8c4beb6fd203b
--- /dev/null
+++ b/data_preprocess.R
@@ -0,0 +1,1937 @@
+## usethis namespace: start
+#' @useDynLib gpubayesnetomics, .registration = TRUE
+## usethis namespace: end
+NULL
+
+## usethis namespace: start
+#' @importFrom Rcpp sourceCpp
+## usethis namespace: end
+NULL
+
+
+
+
+
+
+
+
+circGPA_circrnas <- function(f_circ_mirna_prior='data/circrna-mirna-interactions.csv')
+{
+  map <- read.csv(f_circ_mirna_prior)
+  subset(map, select = -c(X) )
+  return(map$circRNA)
+}
+
+circGPA_mirnas <- function()
+{
+  miRNATab=getMiRNATable(version="v22",species="hsa")
+  return(sort(unique(miRNATab$Mature1)))
+}
+
+circGPA_mrnas <- function()
+{
+  all_coding_genes <- getBM(
+          attributes = c( "hgnc_symbol"), 
+          filters = c("biotype"), 
+          values = list(biotype="protein_coding"), 
+          mart = DB_mart
+    )
+  return(all_coding_genes$hgnc_symbol)
+}
+
+
+
+readDataFile <- function(filepath)
+{
+    library(tools)
+    ext <- file_ext(filepath)
+
+    if(ext != "csv" && ext != "tsv")
+    {
+        stop("Only .CSV and .TSV files are supported!")
+    }
+
+    ret <- NULL
+    if(ext == "csv")
+    {
+        ret <- read.csv(file = filepath)
+    }
+    if(ext == "tsv")
+    {
+        ret <- read.table(file = filepath, sep = '\t', header = TRUE)
+    }
+    return(ret)
+}
+
+normalize_and_filter_low_counts <- function(GE_matrix, method="DESeq2", gene_lengths=NULL, threshold=10)
+{
+    if(method == "DESeq2")
+    {
+        library(DESeq2)
+        metadata <- as.data.frame(colnames(GE_matrix))
+        GE_matrix <- t(GE_matrix)
+
+        dds <- DESeqDataSetFromMatrix(countData=GE_matrix,
+                                      colData=metadata,
+                                      design=~1)
+        dds<- dds[rowSums(counts(dds))>10,] # remove low counts
+        dds = estimateSizeFactors(dds) # calculate size
+        GE_matrix = counts(dds, normalized = TRUE)#get normalized data
+        GE_matrix <- t(GE_matrix)
+        return(GE_matrix)
+    }
+
+    if(method == "TPM")
+    {
+        GE_matrix <- t(GE_matrix)
+
+        fetched_idx <- rowSums(GE_matrix)>0
+
+        gene_lengths <- gene_lengths[fetched_idx]# remove low counts
+        GE_matrix<- GE_matrix[fetched_idx,] # remove low counts
+
+        GE_matrix <- GE_matrix / gene_lengths
+        GE_matrix <- t( t(GE_matrix) * 1000000.0 / colSums(GE_matrix) )
+
+        #GE_matrix <- log2(GE_matrix + 1)
+        GE_matrix <- t(GE_matrix)
+
+        return(list(GE_matrix, fetched_idx))
+    }
+}
+
+extract_gene_lengths <- function(df_choosen_genes)
+{
+    ret <- df_choosen_genes %>% mutate(GENE_LEN=abs(
+        as.numeric(START)-as.numeric(END)))
+    return(ret$GENE_LEN)
+}
+
+extract_mirna <- function(df_allrna_annot_hg38)
+{
+    # get all rows with GENE_TYPE=miRNA
+    mirna_rows <- df_allrna_annot_hg38[df_allrna_annot_hg38$GENE_TYPE=="miRNA",]
+
+    return(mirna_rows)
+}
+
+extract_circrna <- function(df_circrna)
+{
+    # get only circ ID present rows
+    circ_rows <- df_circrna[!is.na(df_circrna$circRNA_ID),]
+
+    # create pseudo geneID for them if n/a ENSG id
+    # n/a -----> ENSG + circID
+    non_geneid_rows <- circ_rows[circ_rows$GENE_ID == "n/a",]
+
+    circ_name <- non_geneid_rows$circRNA_ID
+    circ_name <- paste("ENSG", circ_name, sep="")
+    circ_rows[circ_rows$GENE_ID == "n/a", "GENE_ID"] <- circ_name
+    return(circ_rows)
+}
+
+convertEnsembleIDToEntrez <- function(ensemble_array)
+{
+    library(AnnotationDbi)
+    library(org.Hs.eg.db)
+
+    converted_array <- mapIds(org.Hs.eg.db,
+                 keys=ensemble_array, #Column containing Ensembl gene ids
+                 column="ENTREZID",
+                 keytype="ENSEMBL",
+                 multiVals="first")
+
+
+    #BUT! not all ensemble genes are present in Entrez, so:
+
+
+    # get NA entrez genes' original ensemble id
+    orig_na_ensemble <- ensemble_array[is.na(converted_array)]
+
+    # remove ENSG from it
+    na_ensembl_ids <- sapply(strsplit(orig_na_ensemble, split='ENSG', fixed=TRUE), function(x) (x[2]))
+
+    # add minus so that this ID is 100% unique and not in ENTREZID
+    na_ensembl_ids <- sapply(na_ensembl_ids, function(x) paste("-", x, sep=""))
+
+    # replace NA with ids
+    converted_array[is.na(converted_array)] <- na_ensembl_ids
+
+    # add ENTREZID:
+    converted_array <- sapply(converted_array, function(x) paste("ENTREZID:", x, sep=""))
+
+    return(converted_array)
+}
+
+extract_all_sample_names <- function(df_circ, df_allrna)
+{
+    cnames_circ <- colnames(df_circ);
+    cnames_all <- colnames(df_allrna);
+
+    cnames_circ <- cnames_circ[10:length(cnames_circ)];
+    cnames_all <- cnames_all[7:length(cnames_all)];
+
+    cnames_total <- union(cnames_circ, cnames_all);
+    cnames_total <- unique(cnames_total);
+
+    return(cnames_total);
+}
+
+extract_num <- function(vec_string)
+{
+    matches <- regmatches(vec_string, gregexpr("[[:digit:]]+", vec_string))
+    ret <- as.numeric(unlist(matches))
+    return(ret)
+}
+
+
+mat_miRNA_to_geneID <- function(ID_row)
+{
+    spl <- strsplit(ID_row, split = "-")[[1]]
+    type <- spl[[2]]
+    id <- spl[[3]]
+    str_ret <- "MIR"
+    if(type == "let")
+    {
+        str_ret <- paste(str_ret, "LET", sep="")
+    }
+    else if(type != "mir")
+    {
+        str <- sprintf("Gene: %s is neither LET or MIR type, cannot convert!", ID_row)
+        print(str)
+        return(NA)
+    }
+
+    str_ret <- paste(str_ret, toupper(id), sep="")
+    return(str_ret)
+}
+
+
+convert_prior_mi_m <- function(df_prior_mi_m, df_allrna, sample_names)
+{
+    print("################### preprocessing of PK (miRNA <-> mRNA) ################################")
+
+    df_prior_mi_m_orig <- df_prior_mi_m
+
+    # =====================  mRNA =====================
+    mrnas <- df_prior_mi_m$target_symbol
+    mrnas_unique <- unique(mrnas)
+
+    # 1) search in expression data
+    str <- sprintf("PK table(miRNA <-> mRNA) has total %s unique mRNAs", length(mrnas_unique))
+    print(str)
+
+    one_expr_data_found <- mrnas_unique[mrnas_unique %in% df_allrna$GENE_NAME]
+    one_ENSG_rows <- df_allrna[match(one_expr_data_found, df_allrna$GENE_NAME),]
+    one_not_found <- mrnas_unique[!(mrnas_unique %in% df_allrna$GENE_NAME)]
+    one_names <- one_expr_data_found
+
+    str <- sprintf("%s of those mRNAs can be explicitly found in expr.table", length(one_expr_data_found))
+    print(str)
+
+    # 2) search in BioMart(Ensembl) by gene symbol = gene name
+    #mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
+    two_expr_data_found <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name'),
+                 filters = 'external_gene_name',
+                 values = one_not_found,
+                 mart = DB_ensembl)
+    # only present in expr.table
+    two_expr_data_found <- two_expr_data_found[two_expr_data_found$ensembl_gene_id %in% df_allrna$GENE_ID,]
+    # use match -> NA are not found, indices found used to fetch by GENE ID
+    two_found_idx <- match(one_not_found, two_expr_data_found$external_gene_name)
+    two_expr_data_found <- two_expr_data_found[na.omit(two_found_idx),]
+    two_names <- two_expr_data_found$external_gene_name
+    two_ENSG_rows <- df_allrna[match(two_expr_data_found$ensembl_gene_id, df_allrna$GENE_ID),]
+    two_not_found <- one_not_found[is.na(two_found_idx)]
+
+    str <- sprintf("Next %s was found by using BioMart(%s overlap with prev) as GENE name",
+                   nrow(two_expr_data_found),
+                   sum(two_expr_data_found$external_gene_name %in% one_ENSG_rows$GENE_ID))
+    print(str)
+
+    # 3) search in BioMart(Ensembl) by gene synonym = e.g. gene name, but not primary used
+    tri_expr_data <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'external_synonym'),
+                           filters = 'external_synonym',
+                           values = two_not_found,
+                           mart = DB_ensembl)
+    # only those in expr.table are needed
+    tri_expr_data <- tri_expr_data[tri_expr_data$ensembl_gene_id %in% df_allrna$GENE_ID,]
+    tri_found_idx <- match(two_not_found, tri_expr_data$external_synonym)
+    tri_expr_data <- tri_expr_data[na.omit(tri_found_idx),]
+    tri_ENSG_rows <- df_allrna[match(tri_expr_data$ensembl_gene_id, df_allrna$GENE_ID),]
+    tri_names <- tri_expr_data$external_synonym
+
+    not_exist <- two_not_found[is.na(tri_found_idx)]
+    str <- sprintf("Final %s was found by using BioMart(%s/%s overlap with prevs) as GENE synonym",
+                   nrow(tri_expr_data),
+                   sum(tri_expr_data$ensembl_gene_id %in% two_expr_data_found$ensembl_gene_id),
+                   sum(tri_expr_data$ensembl_gene_id %in% one_ENSG_rows$GENE_ID))
+    print(str)
+
+    # 4) The rest is left, it is just 7 out of 17.4K
+    str <- sprintf("In the end we have %s mRNA genes not found, these are:",
+                   length(not_exist))
+    print(str)
+    print(not_exist)
+
+    # 5) combine them into one table
+    total_names <- c(one_names, two_names, tri_names)
+    total_ENSG_rows <- rbind(one_ENSG_rows, two_ENSG_rows, tri_ENSG_rows)
+
+    # 6) remove not found from PK and extract Ensembl IDs per mRNA in PK
+    df_prior_mi_m <- df_prior_mi_m[df_prior_mi_m$target_symbol %in% total_names,]
+    mrnas <- df_prior_mi_m$target_symbol
+    mrnas_idx <- match(mrnas, total_names)
+
+    mrna_rows <- total_ENSG_rows
+    mrna_ENSG <- total_ENSG_rows[mrnas_idx,]$GENE_ID
+
+    # =====================  miRNA =====================
+    mirnas <- df_prior_mi_m$mature_mirna_id
+    mirnas <- tolower(mirnas)
+    mirnas_unique <- unique(mirnas)
+
+    idx_mirna <- match(mirnas, mirnas_unique)
+
+    gene_names_unique <- sapply(mirnas_unique, function(name)
+    {
+        mirna <- strsplit(name, split="-")[[1]]
+
+        if(mirna[[1]] != "hsa" || length(mirna) < 3)
+        {
+            str <- sprintf("ERROR! miRNA %s has some wrong format! Use hsa-XXX-XXX", name)
+            stop(str)
+        }
+
+        type <- mirna[[2]]
+        id <- mirna[[3]]
+        str_ret <- "MIR"
+        if(type == "let")
+        {
+            str_ret <- paste(str_ret, "LET", sep="")
+        }
+        if(type != "let" && type != "mir")
+        {
+            str <- sprintf("ERROR! miRNA %s has wrong type! Use either: hsa-let-XXX/hsa-mir-XXX", name)
+            stop(str)
+        }
+
+        str_ret <- paste(str_ret, toupper(id), sep="")
+        return(str_ret)
+    })
+    sample_names <- sample_names
+
+    allrna_num <- sapply(df_allrna$GENE_NAME, function(name)
+        {
+        nums <- extract_num(name)
+        return(nums[1])
+    })
+
+
+    mirna_rows <- sapply(gene_names_unique, function(name)
+    {
+        my_num <- extract_num(name)[1]
+
+        all_found <- grepl(name, df_allrna$GENE_NAME, fixed = TRUE)
+        same_num <- allrna_num==my_num
+
+        all_found <- all_found & same_num
+
+        if(sum(all_found) == 0)
+        {
+            str <- sprintf("miRNA:%s not found in expression data! Repair!",name)
+            stop(str)
+        }
+        rows <- df_allrna[all_found,]
+        ret <- rows[1,]
+        ret[,sample_names] <- colSums(rows[,sample_names])
+        ret
+    })
+    mirna_rows <- t(mirna_rows)
+    mirna_rows <- data.frame(mirna_rows)
+
+
+    gene_ids <- mirna_rows$GENE_ID[idx_mirna]
+    gene_ids <- data.frame(unlist(gene_ids))
+
+
+    mirna_rows <- unique(mirna_rows)
+    PK_mi_m <- data.frame(
+                          mirna_orig=mirnas,
+                          mirna_names=gene_names_unique[idx_mirna],
+                          mirna_ENSG=gene_ids[[1]],
+                          mrna_orig=mrnas, mrna_ENSG=mrna_ENSG
+                          )
+
+    return(list(mrna_rows, mirna_rows, PK_mi_m))
+}
+
+convert_prior_circ_mi <- function(df_allrna, df_circrna , df_prior_circ)
+{
+    print("################### preprocessing of PK (circRNA <-> miRNA) ################################")
+    str <- sprintf("PK table(circRNA <-> miRNA) has total %s rows", nrow(df_prior_circ))
+    print(str)
+
+
+    df_prior_circ_orig <- df_prior_circ
+    sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+
+    mirna_rows <- extract_mirna(df_allrna)
+    circrna_rows <- extract_circrna(df_circrna)
+
+    prior_circrna_names <- df_prior_circ$circRNA
+    prior_circrna_names <- unique(prior_circrna_names)
+    prior_circrna_names_unique <- prior_circrna_names
+
+    found <- circrna_rows$circRNA_ID %in% prior_circrna_names
+    prior_circ_rows <- circrna_rows[found,]
+
+    if(length(prior_circrna_names_unique) != nrow(prior_circ_rows))
+    {
+        str <- sprintf("Prior CIRC-miRNA had %s circRNAs, I found only %s in expression data! Not found:",
+                       nrow(prior_circ_rows),
+                       length(prior_circrna_names_unique))
+
+        print(str)
+        print(circrna_rows[!found,]$circRNA_ID)
+        print(prior_circrna_names[!(prior_circrna_names %in% prior_circ_rows$circRNA_ID)])
+    }
+
+    prior_mirna_names <- df_prior_circ$miRNAs
+    prior_mirna_names <- unique(prior_mirna_names)
+    prior_mirna_names <- sapply(strsplit(prior_mirna_names, split='hsa-miR-', fixed=TRUE), function(x) (x[2]))
+    prior_mirna_names <- sapply(strsplit(prior_mirna_names, split='-', fixed=TRUE), function(x) (x[1]))
+    prior_mirna_names <- gsub("\\D", "", prior_mirna_names)
+    prior_mirna_names <- paste("MIR", prior_mirna_names, sep="")
+
+    prior_mirna_names_unique <- lapply(unique(prior_mirna_names), function(x){
+        all_found <- mirna_rows$GENE_NAME %like% x
+
+        if(sum(all_found) == 0)
+        {
+            return()
+        }
+        x
+    })
+    prior_mirna_names_unique <- unlist(prior_mirna_names_unique)
+
+    prior_mirna_names <- unique(prior_mirna_names)
+
+    allrna_num <- sapply(mirna_rows$GENE_NAME, function(name)
+    {
+        nums <- extract_num(name)
+        return(nums[1])
+    })
+
+    prior_mirna_rows <- sapply(prior_mirna_names, function(x){
+        my_num <- extract_num(x)[1]
+        all_found <- grepl(x, mirna_rows$GENE_NAME, fixed = TRUE)
+        same_num <- allrna_num==my_num
+        all_found <- all_found & same_num
+
+        if(sum(all_found) == 0)
+        {
+            str <- sprintf("miRNA: %s wasn't found in exrp.table, removing its rows", x)
+            print(str)
+            return()
+        }
+
+        ret <- mirna_rows[all_found,]
+        if(sum(all_found) > 1)
+        {
+            str1 <- sprintf("miRNA: %s has more paralogous forms in expression: ", x)
+            print(paste(str1, "\n"))
+            print(ret$GENE_NAME)
+        }
+        ret_first <- ret[1,]  # take first one for representation
+
+        # but since all of them are paralogous -> sum them up??
+        ret_first[,sample_names] <- colSums(ret[, sample_names])
+        rownames(ret_first)[1] <- x
+
+        ret_first <- ret_first %>% mutate(paralogous=list(ret[,"GENE_NAME"]))
+        ret_first <- ret_first %>% mutate(paralogous_ENSG=list(ret[,"GENE_ID"]))
+        ret_first$GENE_NAME <- x
+        ret_first
+    } )
+    prior_mirna_rows <- bind_rows(prior_mirna_rows)
+    prior_mirna_names <- df_prior_circ$miRNAs
+    prior_mirna_names <- sapply(strsplit(prior_mirna_names, split='hsa-miR-', fixed=TRUE), function(x) (x[2]))
+    prior_mirna_names <- sapply(strsplit(prior_mirna_names, split='-', fixed=TRUE), function(x) (x[1]))
+    prior_mirna_names <- gsub("\\D", "", prior_mirna_names)
+    prior_mirna_names <- paste("MIR", prior_mirna_names, sep="")
+
+    prior_circrna_names <- df_prior_circ$circRNA
+
+    first_cond <- prior_mirna_names %in% prior_mirna_names_unique
+    second_cond <- prior_circrna_names %in% prior_circrna_names_unique
+    total_cond <- first_cond & second_cond
+
+    prior_mirna_names <- prior_mirna_names[total_cond]
+    df_prior_circ <- df_prior_circ[total_cond,]
+
+    idx_mirna <- match(prior_mirna_names, prior_mirna_names_unique)
+    idx_circ <- match(df_prior_circ$circRNA, prior_circ_rows$circRNA_ID)
+
+    PK_circ_mirna <- data.frame(circ=prior_circ_rows$GENE_ID[idx_circ],
+                                mirna=prior_mirna_rows$GENE_ID[idx_mirna],
+                                orig_circ=df_prior_circ$circRNA,
+                                orig_mirna=prior_mirna_names,
+                                orig_orig_mirna=df_prior_circ$miRNAs)
+
+
+    str <- sprintf("After preprocess and row removing, now we have %s of them",
+                   nrow(PK_circ_mirna))
+    print(str)
+
+    return(list(prior_mirna_rows, prior_circ_rows, PK_circ_mirna))
+}
+
+
+convert_prior_to_ENSG <- function(df_allrna, df_circrna , df_prior_circ, df_prior_mi_m)
+{
+    sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+
+    list[mrna_rows, mirna_rows, PK_mi_m] <- convert_prior_mi_m(
+                                                        df_prior_mi_m,
+                                                        df_allrna,
+                                                        sample_names)
+
+    mirna_rows <- mirna_rows %>% rowwise %>% mutate(paralogous=GENE_NAME)
+    mirna_rows <- mirna_rows %>% rowwise %>% mutate(paralogous_ENSG=GENE_ID)
+
+    mrna_rows <- mrna_rows %>% rowwise %>% mutate(paralogous=GENE_NAME)
+    mrna_rows <- mrna_rows %>% rowwise %>% mutate(paralogous_ENSG=GENE_ID)
+
+
+    list[mirna_rows_2, circ_rows, PK_circ_mi] <- convert_prior_circ_mi(
+                                                        df_allrna,
+                                                        df_circrna ,
+                                                        df_prior_circ)
+
+    circ_rows <- circ_rows %>% rowwise %>% mutate(paralogous=GENE_NAME)
+    circ_rows <- circ_rows %>% rowwise %>% mutate(paralogous_ENSG=GENE_ID)
+
+
+    all_paralogs <- unlist(mirna_rows_2$paralogous)
+    to_add_rows <- mirna_rows[!(mirna_rows$GENE_NAME %in% all_paralogs),]
+    mirna_rows <- rbind(mirna_rows_2, to_add_rows)
+
+
+
+
+    return(list(mrna_rows, mirna_rows, circ_rows, PK_circ_mi, PK_mi_m))
+}
+
+
+
+sample_from_rows <- function(data,
+                             num_circRNA_sample=1,
+                             num_miRNA_sample=10,
+                             num_mRNA_sample=20,
+                             is_random=FALSE,
+                             is_send_PK=FALSE)
+{
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m, sample_names] <- data
+    m <- length(sample_names)
+
+    n1 <- nrow(mrna_rows)
+    n2 <- nrow(mirna_rows)
+    n3 <- nrow(circrna_rows)
+
+    print("########## SUBSAMPLING NEEDED AMOUNT OF RNAs ###############")
+
+    if(n3 >= num_circRNA_sample)
+    {
+        # 1) start by sampling circ -> they are our key points
+        indices_circ <- sample.int(n3, num_circRNA_sample)
+        circrna_rows <- circrna_rows[indices_circ,]
+        PK_circ_mi <- PK_circ_mi[PK_circ_mi$orig_circ %in% circrna_rows$circRNA_ID,]
+        #View(circrna_rows)
+        #View(PK_circ_mi)
+
+        # 1.1) update mirna -> some of them may have been deleted
+        all_mirna <- c(PK_circ_mi$orig_mirna)#PK_circ_mi
+        mirna_rows <- mirna_rows[mirna_rows$GENE_NAME %in% all_mirna,]
+        PK_mi_m <- PK_mi_m[PK_mi_m$mirna_names %in% mirna_rows$GENE_NAME, ]
+
+        # 1.2) update mrna -> some of them may have been because of miRNA
+        all_mrna <- PK_mi_m$mrna_ENSG
+        mrna_rows <- mrna_rows[mrna_rows$GENE_ID %in% all_mrna,]
+        n1 <- nrow(mrna_rows)
+        n2 <- nrow(mirna_rows)
+        n3 <- nrow(circrna_rows)
+    }
+
+
+    if(n2 >= num_miRNA_sample)
+    {
+        # 2) subsample mirna
+        indices_mi <- sample.int(n2, num_miRNA_sample)
+        mirna_rows <- mirna_rows[indices_mi,]
+        PK_circ_mi <- PK_circ_mi[PK_circ_mi$orig_mirna %in% mirna_rows$GENE_NAME,]
+        PK_mi_m <- PK_mi_m[PK_mi_m$mirna_names %in% mirna_rows$GENE_NAME, ]
+
+        # 2.1) change mrna
+        all_mrna <- PK_mi_m$mrna_ENSG
+        mrna_rows <- mrna_rows[mrna_rows$GENE_ID %in% all_mrna,]
+        n1 <- nrow(mrna_rows)
+        n2 <- nrow(mirna_rows)
+    }
+    else{
+        str <- sprintf("Not enough miRNA samples! Need:%s, have: %s", num_miRNA_sample, n2)
+        print(str)
+    }
+    if(n1 >= num_mRNA_sample)
+    {
+        # 2) subsample mrna
+        indices_m <- sample.int(n1, num_mRNA_sample)
+        mrna_rows <- mrna_rows[indices_m,]
+        PK_mi_m <- PK_mi_m[PK_mi_m$mrna_ENSG %in% mrna_rows$GENE_ID, ]
+        n1 <- nrow(mrna_rows)
+    }
+    else{
+        str <- sprintf("Not enough mRNA samples! Need:%s, have: %s", num_mRNA_sample, n1)
+        print(str)
+    }
+
+    return(list(mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m))
+}
+
+
+
+
+
+
+convert_to_IntOMICS <- function(
+        f_all_rna_annot,
+        f_circ_rna_annot,
+        f_circ_mirna_prior,
+        f_mirna_mrna_prior,
+        num_circRNA_sample=3,
+        num_miRNA_sample=12,
+        num_mRNA_sample=24,
+        max_input_edges=3,
+        is_random=FALSE,
+        is_send_PK=FALSE)
+{
+    library(dplyr)
+    library(gsubfn)
+    library(data.table)
+    library(biomaRt)
+    library("org.Hs.eg.db")
+    library(EnsDb.Hsapiens.v86)
+
+    # read all files
+    df_allrna <- readDataFile(f_all_rna_annot)
+    df_circrna <- readDataFile(f_circ_rna_annot)
+    df_PK_mi_circ <- readDataFile(f_circ_mirna_prior)
+    df_PK_mi_mrna <- readDataFile(f_mirna_mrna_prior)
+
+    sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+
+
+    # preprocess priors
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <- convert_prior_to_ENSG(
+                                df_allrna, df_circrna, df_PK_mi_circ, df_PK_mi_mrna)
+
+
+    # take out subsample
+    data <- list(mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m, sample_names)
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <-
+                                sample_from_rows(
+                                        data,
+                                        num_circRNA_sample,
+                                        num_miRNA_sample,
+                                        num_mRNA_sample,
+                                        is_random,
+                                        is_send_PK)
+
+
+    # process data further
+    to_numeric <- c(sample_names, "START", "END")
+    sapply(to_numeric, function(col)
+    {
+        mirna_rows[,col] <- as.numeric(mirna_rows[,col])
+        #mrna_rows[,col] <- as.numeric(mrna_rows[,col])
+        #circ_rows[,col] <- as.numeric(circ_rows[,col])
+    })
+
+    print("################### converting data to Gene Expression(GE) matrix ################################")
+
+    # m -------- by intomics denotion: number of experiment samples
+    m <- length(sample_names)
+
+    mirna_names <- mirna_rows$GENE_ID
+    n2 <-length(mirna_names)
+
+    mrna_names <- mrna_rows$GENE_ID
+    n1 <- length(mrna_names)
+
+    circrna_names <- circrna_rows$circRNA_ID
+    n3 <- length(circrna_names)
+
+    str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+    print(str)
+
+    # create matrices ---- counts
+    MRNA_DATA <- mrna_rows[, sample_names]
+    MRNA_DATA <- data.matrix(MRNA_DATA)
+    MRNA_DATA <- t(MRNA_DATA)
+    #MRNA_DATA <- base::sapply(1:n1, function(col_index) unlist(mrna_rows[col_index,sample_names]))
+
+    MIRNA_DATA <- mirna_rows[, sample_names]
+    MIRNA_DATA <- data.matrix(MIRNA_DATA)
+    MIRNA_DATA <- t(MIRNA_DATA)
+    #MIRNA_DATA <- base::sapply(1:n2, function(col_index) unlist(mirna_rows[col_index,sample_names]))
+
+    CIRCRNA_DATA <- circrna_rows[, sample_names]
+    CIRCRNA_DATA <- data.matrix(CIRCRNA_DATA)
+    CIRCRNA_DATA <- t(CIRCRNA_DATA)
+    #CIRCRNA_DATA <- base::sapply(1:n3, function(col_index) unlist(circrna_rows[col_index, sample_names]))
+    CIRCRNA_DATA[is.na(CIRCRNA_DATA)] <- 0
+
+    # get their gene lengths
+    MRNA_gene_lengths <- extract_gene_lengths(mrna_rows)
+    MIRNA_gene_lengths <- extract_gene_lengths(mirna_rows)
+    CIRCRNA_gene_lengths <- extract_gene_lengths(circrna_rows)
+    gene_lengths <- c(MRNA_gene_lengths, MIRNA_gene_lengths,CIRCRNA_gene_lengths )
+
+    # get names of genes
+    total_gene_names = c(mrna_names, mirna_names, circrna_names)
+
+    # combine them to one matrix
+    GE_matrix <- cbind(MRNA_DATA, MIRNA_DATA, CIRCRNA_DATA)
+    colnames(GE_matrix) <- total_gene_names
+    rownames(GE_matrix) <- sample_names
+
+    # convert counts to TPM and remove low counts
+    list[GE_matrix, chosen_idx] <- normalize_and_filter_low_counts(
+                                                GE_matrix,
+                                                method="TPM",
+                                                gene_lengths = gene_lengths)
+
+    chosen_idx <- (1:(n1+n2+n3))[chosen_idx]
+
+    # update n1, n2 --- low counts removed
+    mrna_idx <- chosen_idx[chosen_idx <= n1]
+    mirna_idx <- chosen_idx[chosen_idx > n1]
+    mirna_idx <- mirna_idx[mirna_idx <= (n2+n1)] - n1
+
+    circrna_idx <- chosen_idx[chosen_idx > (n2+n1)] - n2 - n1
+
+    mrna_names <- mrna_names[mrna_idx]
+    mirna_names <- mirna_names[mirna_idx]
+    circrna_names <- circrna_names[circrna_idx]
+    total_gene_names = c(mrna_names, mirna_names, circrna_names)
+
+    n1 <- length(mrna_idx)
+    n2 <- length(mirna_idx)
+    n3 <- length(circrna_idx)
+
+    str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+    print(str)
+
+
+    # define real PK
+    circ_in_rows <- PK_circ_mi$orig_circ %in% circrna_names
+    mirna_in_rows <- PK_circ_mi$mirna %in% mirna_names
+    PK_circ_mi <- PK_circ_mi[mirna_in_rows & circ_in_rows,]
+
+    m_in_rows <- PK_mi_m$mrna_ENSG %in% mrna_names
+    mirna_in_rows <- PK_mi_m$mirna_ENSG %in% mirna_names
+    PK_mi_m <- PK_mi_m[mirna_in_rows & m_in_rows,]
+
+    N <- ncol(GE_matrix)
+    PK_real <- matrix(0, N, N)
+
+    for(i in 1:nrow(PK_circ_mi))
+    {
+        row_PK <- PK_circ_mi[i,]
+        idx_circ <- (1:n3 + n2 + n1)[row_PK$orig_circ == circrna_names]
+        idx_mirna <- (1:n2 + n1)[row_PK$mirna == mirna_names]
+        PK_real[idx_circ, idx_mirna] <- 1
+        #print(paste("CIRC:", idx_circ, ", MI: ", idx_mirna))
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+        row_PK <- PK_mi_m[i,]
+        idx_m <- (1:n1)[row_PK$mrna_ENSG == mrna_names]
+        idx_mirna <- (1:n2 + n1)[row_PK$mirna_ENSG == mirna_names]
+        PK_real[idx_mirna, idx_m] <- 1
+        #print(paste("MI:", idx_mirna, ", M: ", idx_m))
+    }
+
+
+
+
+    # convert names to Entrez ID:
+    ensembl_gene_names <- colnames(GE_matrix)
+    #total_gene_names <- convertEnsembleIDToEntrez(ensembl_gene_names)
+    total_gene_names <- paste("ENTREZID:", total_gene_names, sep="")
+    # total_gene_names[1:n2 + n1] <-
+    #     paste(total_gene_names[1:n2 + n1], "mi", sep="")
+    #
+    # total_gene_names[1:n3 + n1 + n2] <-
+    #     paste(total_gene_names[1:n3 + n1 + n2], "circ", sep="")
+
+    mirna_names <- total_gene_names[1:n2 + n1]
+    mrna_names <- total_gene_names[1:n1]
+    circrna_names <- total_gene_names[1:n3 + n1 + n2]
+
+    colnames(GE_matrix) <- total_gene_names
+    colnames(PK_real) <- total_gene_names
+    rownames(PK_real) <- total_gene_names
+
+    # define omics
+    omics <- list(ge=GE_matrix)#, circ=CIRC_MATRIX)
+
+    # define layer_def
+    min_in <- min(num_circRNA_sample, num_miRNA_sample, num_mRNA_sample)
+    layer_def <- data.frame(omics="ge", layer=as.numeric(1),
+                            fan_in_ge=as.numeric(max_input_edges))
+
+    # define prior
+    PK <- data.frame(matrix(0, nrow=0, ncol=6))
+    colnames(PK) <- c("src_entrez","dest_entrez",
+                         "direction","type",
+                         "source", "edge_type")
+    idx_PK <- 1
+
+    for(i in 1:n1)
+    {
+        i_gene <- mrna_names[i]
+        #turn off mRNA -> mRNA interactions
+        for(j in 1:n1)
+        {
+            if(i != j)
+            {
+                j_gene <- mrna_names[j]
+                PK[idx_PK,] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+                idx_PK <- idx_PK + 1
+            }
+        }
+
+        #turn off mRNA -> circRNA interactions
+        for(j in 1:n3)
+        {
+            j_gene <- circrna_names[j]
+            PK[idx_PK,] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+
+
+        #turn off mRNA -> miRNA interactions
+        for(j in 1:n2)
+        {
+            j_gene <- mirna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+    }
+
+    for(i in 1:n2)
+    {
+        i_gene <- mirna_names[i]
+
+        # turn off miRNA->miRNA interactions
+        for(j in 1:n2)
+        {
+            j_gene <- mirna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+
+        # turn off miRNA->circRNA interactions
+        for(j in 1:n3)
+        {
+            j_gene <- circrna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+    }
+
+    for(i in 1:n3)
+    {
+        i_gene <- circrna_names[i]
+        # turn off circRNA->mRNA interactions
+        for(j in 1:n2)
+        {
+            j_gene <- mrna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+
+        # turn off circRNA->circRNA interactions
+        for(j in 1:n3)
+        {
+
+            j_gene <- circrna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+    }
+
+    PK_present <- PK
+
+
+    for(i in 1:nrow(PK_circ_mi))
+    {
+        PK_row <- PK_circ_mi[i,]
+        name_circ <- PK_row$orig_circ
+        #name_circ <- paste("ENTREZID:", name_circ, sep="")
+        name_mi <- PK_row$mirna
+        #name_mi <- paste("ENTREZID:", name_mi, sep="")
+
+        PK_present[idx_PK, ] <- c(name_circ, name_mi, "directed", "Process", "circGPA", "present")
+        idx_PK <- idx_PK + 1
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+        PK_row <- PK_mi_m[i,]
+        name_m <- PK_row$mrna_ENSG
+        #name_m <- paste("ENTREZID:", name_m, sep="")
+        name_mi <- PK_row$mirna_ENSG
+        #name_mi <- paste("ENTREZID:", name_mi, sep="")
+
+        #PK_present[idx_PK, ] <- c(name_mi, name_m, "miRNA", "mRNA")
+        PK_present[idx_PK, ] <- c(name_mi, name_m, "directed", "Process", "circGPA", "present")
+        idx_PK <- idx_PK + 1
+    }
+
+
+
+    # define annot
+    #symbols <- df_allrna[df_allrna$GENE_ID %in% ensembl_gene_names,]$GENE_NAME
+    #annot <- data.frame(entrezID=total_gene_names, gene_symbol=symbols)
+    retsizes <- c(n1, n2, n3)
+    return(list(omics, layer_def, PK, PK_present, PK_real, retsizes))
+}
+
+convert_to_KIMONO <- function(
+        f_all_rna_annot,
+        f_circ_rna_annot,
+        f_circ_mirna_prior,
+        f_mirna_mrna_prior,
+        num_circRNA_sample=3,
+        num_miRNA_sample=12,
+        num_mRNA_sample=24,
+        max_input_edges=3,
+        is_random=FALSE,
+        is_send_PK=FALSE)
+{
+    library(dplyr)
+    library(gsubfn)
+    library(data.table)
+    library(biomaRt)
+    library("org.Hs.eg.db")
+    library(EnsDb.Hsapiens.v86)
+
+    # read all files
+    df_allrna <- readDataFile(f_all_rna_annot)
+    df_circrna <- readDataFile(f_circ_rna_annot)
+    df_PK_mi_circ <- readDataFile(f_circ_mirna_prior)
+    df_PK_mi_mrna <- readDataFile(f_mirna_mrna_prior)
+
+    sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+
+
+    # preprocess priors
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <- convert_prior_to_ENSG(
+        df_allrna, df_circrna, df_PK_mi_circ, df_PK_mi_mrna)
+
+
+    # take out subsample
+    data <- list(mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m, sample_names)
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <-
+        sample_from_rows(
+            data,
+            num_circRNA_sample,
+            num_miRNA_sample,
+            num_mRNA_sample,
+            is_random,
+            is_send_PK)
+
+
+    # process data further
+    to_numeric <- c(sample_names, "START", "END")
+    sapply(to_numeric, function(col)
+    {
+        mirna_rows[,col] <- as.numeric(mirna_rows[,col])
+        #mrna_rows[,col] <- as.numeric(mrna_rows[,col])
+        #circ_rows[,col] <- as.numeric(circ_rows[,col])
+    })
+
+    print("################### converting data to Gene Expression(GE) matrix ################################")
+
+    # m -------- by intomics denotion: number of experiment samples
+    m <- length(sample_names)
+
+    mirna_names <- mirna_rows$GENE_ID
+    n2 <-length(mirna_names)
+
+    mrna_names <- mrna_rows$GENE_ID
+    n1 <- length(mrna_names)
+
+    circrna_names <- circrna_rows$circRNA_ID
+    n3 <- length(circrna_names)
+
+    str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+    print(str)
+
+    # create matrices ---- counts
+    MRNA_DATA <- mrna_rows[, sample_names]
+    MRNA_DATA <- data.matrix(MRNA_DATA)
+    MRNA_DATA <- t(MRNA_DATA)
+    #MRNA_DATA <- base::sapply(1:n1, function(col_index) unlist(mrna_rows[col_index,sample_names]))
+
+    MIRNA_DATA <- mirna_rows[, sample_names]
+    MIRNA_DATA <- data.matrix(MIRNA_DATA)
+    MIRNA_DATA <- t(MIRNA_DATA)
+    #MIRNA_DATA <- base::sapply(1:n2, function(col_index) unlist(mirna_rows[col_index,sample_names]))
+
+    CIRCRNA_DATA <- circrna_rows[, sample_names]
+    CIRCRNA_DATA <- data.matrix(CIRCRNA_DATA)
+    CIRCRNA_DATA <- t(CIRCRNA_DATA)
+    #CIRCRNA_DATA <- base::sapply(1:n3, function(col_index) unlist(circrna_rows[col_index, sample_names]))
+    CIRCRNA_DATA[is.na(CIRCRNA_DATA)] <- 0
+
+    # get their gene lengths
+    MRNA_gene_lengths <- extract_gene_lengths(mrna_rows)
+    MIRNA_gene_lengths <- extract_gene_lengths(mirna_rows)
+    CIRCRNA_gene_lengths <- extract_gene_lengths(circrna_rows)
+    gene_lengths <- c(MRNA_gene_lengths, MIRNA_gene_lengths,CIRCRNA_gene_lengths )
+
+    # get names of genes
+    total_gene_names = c(mrna_names, mirna_names, circrna_names)
+
+    # combine them to one matrix
+    GE_matrix <- cbind(MRNA_DATA, MIRNA_DATA, CIRCRNA_DATA)
+    colnames(GE_matrix) <- total_gene_names
+    rownames(GE_matrix) <- sample_names
+
+    # convert counts to TPM and remove low counts
+    list[GE_matrix, chosen_idx] <- normalize_and_filter_low_counts(
+        GE_matrix,
+        method="TPM",
+        gene_lengths = gene_lengths)
+
+    chosen_idx <- (1:(n1+n2+n3))[chosen_idx]
+
+    # update n1, n2 --- low counts removed
+    mrna_idx <- chosen_idx[chosen_idx <= n1]
+    mirna_idx <- chosen_idx[chosen_idx > n1]
+    mirna_idx <- mirna_idx[mirna_idx <= (n2+n1)] - n1
+
+    circrna_idx <- chosen_idx[chosen_idx > (n2+n1)] - n2 - n1
+
+    mrna_names <- mrna_names[mrna_idx]
+    mirna_names <- mirna_names[mirna_idx]
+    circrna_names <- circrna_names[circrna_idx]
+    total_gene_names = c(mrna_names, mirna_names, circrna_names)
+
+    n1 <- length(mrna_idx)
+    n2 <- length(mirna_idx)
+    n3 <- length(circrna_idx)
+
+    str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+    print(str)
+
+
+    # define real PK
+    circ_in_rows <- PK_circ_mi$orig_circ %in% circrna_names
+    mirna_in_rows <- PK_circ_mi$mirna %in% mirna_names
+    PK_circ_mi <- PK_circ_mi[mirna_in_rows & circ_in_rows,]
+
+    m_in_rows <- PK_mi_m$mrna_ENSG %in% mrna_names
+    mirna_in_rows <- PK_mi_m$mirna_ENSG %in% mirna_names
+    PK_mi_m <- PK_mi_m[mirna_in_rows & m_in_rows,]
+
+    N <- ncol(GE_matrix)
+    PK_real <- matrix(0, N, N)
+
+    for(i in 1:nrow(PK_circ_mi))
+    {
+        row_PK <- PK_circ_mi[i,]
+        idx_circ <- (1:n3 + n2 + n1)[row_PK$orig_circ == circrna_names]
+        idx_mirna <- (1:n2 + n1)[row_PK$mirna == mirna_names]
+        PK_real[idx_circ, idx_mirna] <- 1
+        #print(paste("CIRC:", idx_circ, ", MI: ", idx_mirna))
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+        row_PK <- PK_mi_m[i,]
+        idx_m <- (1:n1)[row_PK$mrna_ENSG == mrna_names]
+        idx_mirna <- (1:n2 + n1)[row_PK$mirna_ENSG == mirna_names]
+        PK_real[idx_mirna, idx_m] <- 1
+        #print(paste("MI:", idx_mirna, ", M: ", idx_m))
+    }
+
+
+
+
+    # convert names to Entrez ID:
+    ensembl_gene_names <- colnames(GE_matrix)
+
+    mirna_names <- total_gene_names[1:n2 + n1]
+    mrna_names <- total_gene_names[1:n1]
+    circrna_names <- total_gene_names[1:n3 + n1 + n2]
+
+    colnames(GE_matrix) <- total_gene_names
+    colnames(PK_real) <- total_gene_names
+    rownames(PK_real) <- total_gene_names
+
+    PK_present <-  data.frame(matrix(0, nrow=0, ncol=4))
+    colnames(PK_present) <- c("A", "B", "layer_A", "layer_B")
+    idx_PK <- 1
+
+    for(i in 1:nrow(PK_circ_mi))
+    {
+        PK_row <- PK_circ_mi[i,]
+        name_circ <- PK_row$orig_circ
+        name_mi <- PK_row$mirna
+
+        PK_present[idx_PK, ] <- c(name_circ, name_mi, "circRNA", "miRNA")
+        idx_PK <- idx_PK + 1
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+        PK_row <- PK_mi_m[i,]
+        name_m <- PK_row$mrna_ENSG
+        name_mi <- PK_row$mirna_ENSG
+
+        PK_present[idx_PK, ] <- c(name_mi, name_m, "miRNA", "mRNA")
+        idx_PK <- idx_PK + 1
+    }
+
+    mRNA_GE <- GE_matrix[,(1:n1)]
+    mRNA_GE <- data.frame(mRNA_GE)
+    if(ncol(mRNA_GE) == 1)
+    {
+        colnames(mRNA_GE) <- mrna_names
+    }
+
+    miRNA_GE <- GE_matrix[,(n1+1):(n1+n2)]
+    miRNA_GE <- data.frame(miRNA_GE)
+    if(ncol(miRNA_GE) == 1)
+    {
+        colnames(miRNA_GE) <- mirna_names
+    }
+
+    circRNA_GE <- GE_matrix[,(n1+n2+1):(n1+n2+n3)]
+    circRNA_GE <- data.frame(circRNA_GE)
+    if(ncol(circRNA_GE) == 1)
+    {
+        colnames(circRNA_GE) <- circrna_names
+    }
+
+    retsizes <- c(n1, n2, n3)
+
+    input_data <- list(
+        'circRNA' = circRNA_GE,
+        'miRNA' = miRNA_GE,
+        'mRNA' = mRNA_GE
+    )
+
+    return(list(input_data, PK_present, retsizes))
+}
+
+normalize_all_counts <- function(some_rows, sample_names, method="DESeq2", threshold=10)
+{
+    gene_lengths <- as.numeric(some_rows$START) - as.numeric(some_rows$END)
+    gene_lengths <- abs(gene_lengths)
+
+    GE_matrix <- some_rows[,sample_names]
+    GE_matrix[is.na(GE_matrix)] <- 0 # convert NA to 0
+    GE_matrix <- mutate_all(GE_matrix, function(x) as.numeric(x))
+
+
+
+    if(method == "DESeq2")
+    {
+        library(DESeq2)
+
+        GE_matrix <- t(GE_matrix)
+        metadata <- as.data.frame(colnames(GE_matrix))
+
+
+
+        dds <- DESeqDataSetFromMatrix(countData=GE_matrix,
+                                      colData=metadata,
+                                      design=~1)
+        dds<- dds[rowSums(counts(dds))>10,] # remove low counts
+        dds = estimateSizeFactors(dds) # calculate size
+        GE_matrix = counts(dds, normalized = TRUE)#get normalized data
+        GE_matrix <- t(GE_matrix)
+
+    }
+
+    if(method == "TPM")
+    {
+        GE_matrix <- t(GE_matrix)
+
+        fetched_idx <- rowSums(GE_matrix)>0
+
+        gene_lengths <- gene_lengths[fetched_idx]# remove low counts
+        GE_matrix<- GE_matrix[fetched_idx,] # remove low counts
+
+        GE_matrix <- GE_matrix / gene_lengths
+        GE_matrix <- t( t(GE_matrix) * 1000000.0 / colSums(GE_matrix) )
+
+        GE_matrix <- log2(GE_matrix + 1)
+        GE_matrix <- t(GE_matrix)
+
+    }
+
+    some_rows[,sample_names] <- GE_matrix
+
+    return(some_rows)
+}
+
+map_MSD_to_prior_names <- function(f_all_rna_annot,
+                                   f_circ_rna_annot,
+                                   f_circ_mirna_prior,
+                                   f_mirna_mrna_prior)
+{
+  # read all files
+  df_allrna <- readDataFile(f_all_rna_annot)
+  df_circrna <- readDataFile(f_circ_rna_annot)
+  df_PK_mi_circ <- readDataFile(f_circ_mirna_prior)
+  df_PK_mi_mrna <- readDataFile(f_mirna_mrna_prior)
+  
+  sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+  
+  
+  # preprocess priors
+  list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <- convert_prior_to_ENSG(
+    df_allrna, df_circrna, df_PK_mi_circ, df_PK_mi_mrna)
+  
+  # convert to TPM
+  mrna_rows <- normalize_all_counts(mrna_rows, sample_names, method="TPM")
+  mirna_rows <- normalize_all_counts(mirna_rows, sample_names, method="TPM")
+  circrna_rows <- normalize_all_counts(circrna_rows, sample_names, method="TPM")
+  
+  # process data further
+  mrna_rows[,sample_names] <- mutate_all(mrna_rows[,sample_names], function(x) as.numeric(x))
+  mirna_rows[,sample_names] <- mutate_all(mirna_rows[,sample_names], function(x) as.numeric(x))
+  circrna_rows[,sample_names] <- mutate_all(circrna_rows[,sample_names], function(x) as.numeric(x))
+  
+  # save the results so that no DB/web access is needed in future
+  folder_name <- basename(dirname(f_all_rna_annot))
+  
+  data <- list(mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m, sample_names)
+  saveRDS(data, "tstDataAll")
+  
+  all_names <- list(paste(folder_name, "/", "MDS_expr_mrna.RData", sep=""),
+                    paste(folder_name, "/", "MDS_expr_mirna.RData", sep=""),
+                    paste(folder_name, "/", "MDS_expr_circ.RData", sep=""),
+                    paste(folder_name, "/", "MDS_prior_circ_mi.RData", sep=""),
+                    paste(folder_name, "/", "MDS_prior_mi_m.RData", sep=""),
+                    paste(folder_name, "/", "MDS_sample_names.RData", sep="")
+                    )
+  
+  
+  saveRDS(mrna_rows,    all_names[[1]])
+  saveRDS(mirna_rows,   all_names[[2]])
+  saveRDS(circrna_rows, all_names[[3]])
+  saveRDS(PK_circ_mi,   all_names[[4]])
+  saveRDS(PK_mi_m,      all_names[[5]])
+  saveRDS(sample_names, all_names[[6]])
+  
+  return(all_names)
+}
+
+
+mmpc_convert <- function(
+    f_expr_m, 
+    f_expr_mi, 
+    f_expr_circ, 
+    f_pk_circ_mi, 
+    f_pk_mi_m, 
+    f_sample_names,
+    subset=NA,
+    gen_whitelist=TRUE,
+    gen_blacklist=TRUE
+    )
+{
+  
+  
+  # read all files objects
+  mrna_rows <- readRDS(f_expr_m)
+  mirna_rows <- readRDS(f_expr_mi)
+  circrna_rows <- readRDS(f_expr_circ)
+  PK_circ_mi <- readRDS(f_pk_circ_mi)
+  PK_mi_m <- readRDS(f_pk_mi_m)
+  sample_names <- readRDS(f_sample_names)
+  
+  # take out subsample
+  # data <- list(
+  #               mrna_rows,
+  #               mirna_rows,
+  #               circrna_rows,
+  #               PK_circ_mi,
+  #               PK_mi_m,
+  #               sample_names)
+  
+  # list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <-
+  #   sample_from_rows(
+  #     data,
+  #     num_circRNA_sample,
+  #     num_miRNA_sample,
+  #     num_mRNA_sample,
+  #     is_random,
+  #     is_send_PK)
+  
+  # process data further
+  mrna_rows[,sample_names] <- mutate_all(mrna_rows[,sample_names], function(x) as.numeric(x))
+  mrna_rows <- mrna_rows[!is.na(rowSums(mrna_rows[,sample_names])),]
+  
+  mirna_rows[,sample_names] <- mutate_all(mirna_rows[,sample_names], function(x) as.numeric(x))
+  mirna_rows <- mirna_rows[!is.na(rowSums(mirna_rows[,sample_names])),]
+  
+  circrna_rows[,sample_names] <- mutate_all(circrna_rows[,sample_names], function(x) as.numeric(x))
+  circrna_rows <- circrna_rows[!is.na(rowSums(circrna_rows[,sample_names])),]
+  
+  if(!is.na(subset))
+  {
+    data <- list(mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m, sample_names)
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <-
+      sample_from_rows(
+        data,
+        1,
+        subset,
+        subset,
+        FALSE,
+        FALSE)
+  }
+  
+  
+  print("################### converting data to Gene Expression(GE) matrix ################################")
+  # m -------- by intomics denotion: number of experiment samples
+  m <- length(sample_names)
+  
+  mirna_names <- mirna_rows$GENE_ID
+  n2 <-length(mirna_names)
+  
+  mrna_names <- mrna_rows$GENE_ID
+  n1 <- length(mrna_names)
+  
+  circrna_names <- circrna_rows$circRNA_ID
+  n3 <- length(circrna_names)
+  
+  str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+  print(str)
+  
+  # create matrices ---- counts
+  MRNA_DATA <- mrna_rows[, sample_names]
+  MRNA_DATA <- data.matrix(MRNA_DATA)
+  MRNA_DATA <- t(MRNA_DATA)
+  mrna_names <- mrna_names[!is.na(colSums(MRNA_DATA))]
+  MRNA_DATA <- MRNA_DATA[,!is.na(colSums(MRNA_DATA))]
+  #MRNA_DATA <- base::sapply(1:n1, function(col_index) unlist(mrna_rows[col_index,sample_names]))
+  
+  
+  MIRNA_DATA <- mirna_rows[, sample_names]
+  MIRNA_DATA <- data.matrix(MIRNA_DATA)
+  MIRNA_DATA <- t(MIRNA_DATA)
+  mirna_names <- mirna_names[!is.na(colSums(MIRNA_DATA))]
+  MIRNA_DATA <- MIRNA_DATA[,!is.na(colSums(MIRNA_DATA))]
+  #MIRNA_DATA <- base::sapply(1:n2, function(col_index) unlist(mirna_rows[col_index,sample_names]))
+
+  
+  CIRCRNA_DATA <- circrna_rows[, sample_names]
+  CIRCRNA_DATA <- data.matrix(CIRCRNA_DATA)
+  CIRCRNA_DATA <- t(CIRCRNA_DATA)
+  circrna_names <- circrna_names[!is.na(colSums(CIRCRNA_DATA))]
+  #CIRCRNA_DATA <- base::sapply(1:n3, function(col_index) unlist(circrna_rows[col_index, sample_names]))
+  CIRCRNA_DATA <- CIRCRNA_DATA[,!is.na(colSums(CIRCRNA_DATA))]
+  CIRCRNA_DATA[is.na(CIRCRNA_DATA)] <- 0
+
+  # get names of genes
+  total_gene_names = c(mrna_names, mirna_names, circrna_names)
+  
+  # combine them to one matrix
+  GE_matrix <- cbind(MRNA_DATA, MIRNA_DATA, CIRCRNA_DATA)
+  colnames(GE_matrix) <- total_gene_names
+  rownames(GE_matrix) <- sample_names
+  
+  str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+  print(str)
+  ############################ MMPC #############################################
+  print("############# converting GE to MMPC input ########################")
+  mirna_names <- total_gene_names[1:n2 + n1]
+  mrna_names <- total_gene_names[1:n1]
+  circrna_names <- total_gene_names[1:n3 + n1 + n2]
+  colnames(GE_matrix) <- total_gene_names
+
+  partitions <- list(unlist(circrna_names), 
+                     unlist(mirna_names),
+                     unlist(mrna_names))
+  
+  WL <- NA
+  BL <- NA
+  
+  if(
+    (!gen_blacklist) && 
+    (!gen_whitelist))
+  {
+    
+  }
+  
+  if(gen_blacklist)
+  {
+    # define blacklist by bnlearn
+    BL <- set2blacklist(unlist(mrna_names))
+    BL <- rbind(BL, set2blacklist(unlist(mirna_names)))
+    BL <- rbind(BL, set2blacklist(unlist(circrna_names)))
+    BL <- rbind(BL, tiers2blacklist(list(unlist(mrna_names), unlist(circrna_names))))
+    BL <- rbind(BL, tiers2blacklist(list(unlist(circrna_names), unlist(mrna_names))))
+  }
+  
+  if(gen_whitelist)
+  {
+    WL <- data.frame()
+  }
+  
+  
+  
+  # PK <- data.frame(matrix(0, nrow=0, ncol=2))
+  # colnames(PK) <- c("from","to")
+  # 
+  # coords <- expand.grid(mrna_names, mrna_names)
+  # PK <- rbind(PK, coords)
+  # coords <- expand.grid(mrna_names, circrna_names)
+  # PK <- rbind(PK, coords)
+  # coords <- expand.grid(circrna_names, mrna_names)
+  # PK <- rbind(PK, coords)
+  # coords <- expand.grid(circrna_names, circrna_names)
+  # PK <- rbind(PK, coords)
+  # coords <- expand.grid(mirna_names, mirna_names)
+  # PK <- rbind(PK, coords)
+  
+  # idx_PK <- 1
+  # 
+  # 
+  # 
+  # 
+  # 
+  # 
+  # 
+  # for(i in 1:n1)
+  # {
+  #   
+  #   i_gene <- mrna_names[i]
+  #   #turn off mRNA -> mRNA interactions
+  #   for(j in 1:n1)
+  #   {
+  #     if(i != j)
+  #     {
+  #       j_gene <- mrna_names[j]
+  #       PK[idx_PK,] <- c(i_gene, j_gene)
+  #       idx_PK <- idx_PK + 1
+  #     }
+  #     
+  #     printf("%d/%d, %d/%d  total size: %d\n", i, n1, j, n1, nrow(PK))
+  #     cat("\014")  
+  #   }
+  #   
+  #   #turn off mRNA -> circRNA interactions
+  #   for(j in 1:n3)
+  #   {
+  #     j_gene <- circrna_names[j]
+  #     PK[idx_PK,] <- c(i_gene, j_gene)
+  #     idx_PK <- idx_PK + 1
+  #     printf("%d/%d, %d/%d  total size: %d\n", i, n1, j, n3, nrow(PK))
+  #     cat("\014")  
+  #   }
+  # }
+  # 
+  # for(i in 1:n2)
+  # {
+  #   i_gene <- mirna_names[i]
+  #   
+  #   # turn off miRNA->miRNA interactions
+  #   for(j in 1:n2)
+  #   {
+  #     j_gene <- mirna_names[j]
+  #     PK[idx_PK, ] <- c(i_gene, j_gene)
+  #     idx_PK <- idx_PK + 1
+  #     printf("%d/%d, %d/%d  total size: %d\n", i, n2, j, n2, nrow(PK))
+  #     cat("\014")  
+  #   }
+  # }
+  # 
+  # for(i in 1:n3)
+  # {
+  #   i_gene <- circrna_names[i]
+  #   # turn off circRNA->mRNA interactions
+  #   for(j in 1:n1)
+  #   {
+  #     j_gene <- mrna_names[j]
+  #     PK[idx_PK, ] <- c(i_gene, j_gene)
+  #     idx_PK <- idx_PK + 1
+  #     printf("%d/%d, %d/%d  total size: %d\n", i, n3, j, n1, nrow(PK))
+  #     cat("\014")  
+  #   }
+  #   
+  #   # turn off circRNA->circRNA interactions
+  #   for(j in 1:n3)
+  #   {
+  #     
+  #     j_gene <- circrna_names[j]
+  #     PK[idx_PK, ] <- c(i_gene, j_gene)
+  #     idx_PK <- idx_PK + 1
+  #     printf("%d/%d, %d/%d  total size: %d\n", i, n3, j, n1, nrow(PK))
+  #     cat("\014")  
+  #   }
+  # }
+  
+  return(list(GE_matrix, partitions, WL, BL))
+  
+  #return(list(GE_matrix, PK, partitions))
+}
+
+
+
+
+data_preprocess <- function(
+        f_expr_m, 
+        f_expr_mi, 
+        f_expr_circ, 
+        f_pk_circ_mi, 
+        f_pk_mi_m, 
+        f_sample_names,
+        num_circRNA_sample=1,
+        num_miRNA_sample=24,
+        num_mRNA_sample=24,
+        max_input_edges=3,
+        is_random=FALSE,
+        is_send_PK=FALSE)
+{
+    
+
+    # read all files objects
+    mrna_rows <- readRDS(f_expr_m)
+    mirna_rows <- readRDS(f_expr_mi)
+    circrna_rows <- readRDS(f_expr_circ)
+    PK_circ_mi <- readRDS(f_pk_circ_mi)
+    PK_mi_m <- readRDS(f_pk_mi_m)
+    sample_names <- readRDS(f_sample_names)
+    
+    # take out subsample
+    data <- list(mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m, sample_names)
+    list[mrna_rows, mirna_rows, circrna_rows, PK_circ_mi, PK_mi_m] <-
+        sample_from_rows(
+            data,
+            num_circRNA_sample,
+            num_miRNA_sample,
+            num_mRNA_sample,
+            is_random,
+            is_send_PK)
+
+    # process data further
+    mrna_rows[,sample_names] <- mutate_all(mrna_rows[,sample_names], function(x) as.numeric(x))
+    mirna_rows[,sample_names] <- mutate_all(mirna_rows[,sample_names], function(x) as.numeric(x))
+    circrna_rows[,sample_names] <- mutate_all(circrna_rows[,sample_names], function(x) as.numeric(x))
+
+    print("################### converting data to Gene Expression(GE) matrix ################################")
+
+    # m -------- by intomics denotion: number of experiment samples
+    m <- length(sample_names)
+
+    mirna_names <- mirna_rows$GENE_ID
+    mirna_names <- unlist(mirna_names)
+    n2 <-length(mirna_names)
+
+    mrna_names <- mrna_rows$GENE_ID
+    n1 <- length(mrna_names)
+
+    circrna_names <- circrna_rows$circRNA_ID
+    n3 <- length(circrna_names)
+
+    str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+    print(str)
+
+    # create matrices ---- counts
+    MRNA_DATA <- mrna_rows[, sample_names]
+    MRNA_DATA <- data.matrix(MRNA_DATA)
+    MRNA_DATA <- t(MRNA_DATA)
+    #MRNA_DATA <- base::sapply(1:n1, function(col_index) unlist(mrna_rows[col_index,sample_names]))
+
+    MIRNA_DATA <- mirna_rows[, sample_names]
+    MIRNA_DATA <- data.matrix(MIRNA_DATA)
+    MIRNA_DATA <- t(MIRNA_DATA)
+    #MIRNA_DATA <- base::sapply(1:n2, function(col_index) unlist(mirna_rows[col_index,sample_names]))
+
+    CIRCRNA_DATA <- circrna_rows[, sample_names]
+    CIRCRNA_DATA <- data.matrix(CIRCRNA_DATA)
+    CIRCRNA_DATA <- t(CIRCRNA_DATA)
+    #CIRCRNA_DATA <- base::sapply(1:n3, function(col_index) unlist(circrna_rows[col_index, sample_names]))
+    CIRCRNA_DATA[is.na(CIRCRNA_DATA)] <- 0
+
+    # get their gene lengths
+    #MRNA_gene_lengths <- extract_gene_lengths(mrna_rows)
+    #MIRNA_gene_lengths <- extract_gene_lengths(mirna_rows)
+    #CIRCRNA_gene_lengths <- extract_gene_lengths(circrna_rows)
+    #gene_lengths <- c(MRNA_gene_lengths, MIRNA_gene_lengths,CIRCRNA_gene_lengths )
+
+    # get names of genes
+    total_gene_names = c(mrna_names, mirna_names, circrna_names)
+
+    # combine them to one matrix
+    GE_matrix <- cbind(MRNA_DATA, MIRNA_DATA, CIRCRNA_DATA)
+    colnames(GE_matrix) <- total_gene_names
+    rownames(GE_matrix) <- sample_names
+
+    # list[GE_matrix, chosen_idx] <- normalize_and_filter_low_counts(
+    #     GE_matrix,
+    #     method="TPM",
+    #     gene_lengths = gene_lengths)
+    #
+    # chosen_idx <- (1:(n1+n2+n3))[chosen_idx]
+    #
+    # # update n1, n2 --- low counts removed
+    # mrna_idx <- chosen_idx[chosen_idx <= n1]
+    # mirna_idx <- chosen_idx[chosen_idx > n1]
+    # mirna_idx <- mirna_idx[mirna_idx <= (n2+n1)] - n1
+    #
+    # circrna_idx <- chosen_idx[chosen_idx > (n2+n1)] - n2 - n1
+    #
+    # mrna_names <- mrna_names[mrna_idx]
+    # mirna_names <- mirna_names[mirna_idx]
+    # circrna_names <- circrna_names[circrna_idx]
+    # total_gene_names = c(mrna_names, mirna_names, circrna_names)
+    #
+    # n1 <- length(mrna_idx)
+    # n2 <- length(mirna_idx)
+    # n3 <- length(circrna_idx)
+
+    str <- sprintf("N1: %s,  N2: %s N3: %s M:%s\n", n1, n2, n3, m)
+    print(str)
+
+    # define real PK
+    #circ_in_rows <- PK_circ_mi$orig_circ %in% circrna_names
+    #mirna_in_rows <- PK_circ_mi$mirna %in% mirna_names
+    #PK_circ_mi <- PK_circ_mi[mirna_in_rows & circ_in_rows,]
+
+    #m_in_rows <- PK_mi_m$mrna_ENSG %in% mrna_names
+    #mirna_in_rows <- PK_mi_m$mirna_ENSG %in% mirna_names
+    #PK_mi_m <- PK_mi_m[mirna_in_rows & m_in_rows,]
+
+    N <- ncol(GE_matrix)
+    PK_real <- matrix(0, N, N)
+
+    for(i in 1:nrow(PK_circ_mi))
+    {
+        row_PK <- PK_circ_mi[i,]
+        idx_circ <- (1:n3 + n2 + n1)[row_PK$orig_circ == circrna_names]
+        idx_mirna <- (1:n2 + n1)[row_PK$mirna == mirna_names]
+        PK_real[idx_circ, idx_mirna] <- 1
+        #print(paste("CIRC:", idx_circ, ", MI: ", idx_mirna))
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+        row_PK <- PK_mi_m[i,]
+        idx_m <- (1:n1)[row_PK$mrna_ENSG == mrna_names]
+        idx_mirna <- (1:n2 + n1)[row_PK$mirna_ENSG == mirna_names]
+        PK_real[idx_mirna, idx_m] <- 1
+        #print(paste("MI:", idx_mirna, ", M: ", idx_m))
+    }
+
+
+
+
+
+    ##################### KIMONO #################################
+    print("############# converting GE to KiMONO input ########################")
+    ensembl_gene_names <- colnames(GE_matrix)
+    mirna_names <- total_gene_names[1:n2 + n1]
+    mrna_names <- total_gene_names[1:n1]
+    circrna_names <- total_gene_names[1:n3 + n1 + n2]
+
+    colnames(GE_matrix) <- total_gene_names
+    colnames(PK_real) <- total_gene_names
+    rownames(PK_real) <- total_gene_names
+
+    PK_present <-  data.frame(matrix(0, nrow=0, ncol=4))
+    colnames(PK_present) <- c("A", "B", "layer_A", "layer_B")
+    idx_PK <- 1
+
+    for(i in 1:nrow(PK_circ_mi))
+    {
+        PK_row <- PK_circ_mi[i,]
+        name_circ <- PK_row$orig_circ
+        name_mi <- PK_row$mirna
+
+        PK_present[idx_PK, ] <- c(name_circ, name_mi, "circRNA", "miRNA")
+        idx_PK <- idx_PK + 1
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+        PK_row <- PK_mi_m[i,]
+        name_m <- PK_row$mrna_ENSG
+        name_mi <- PK_row$mirna_ENSG
+
+        PK_present[idx_PK, ] <- c(name_mi, name_m, "miRNA", "mRNA")
+        idx_PK <- idx_PK + 1
+    }
+
+    mRNA_GE <- GE_matrix[,(1:n1)]
+    mRNA_GE <- data.frame(mRNA_GE)
+    if(ncol(mRNA_GE) == 1)
+    {
+        colnames(mRNA_GE) <- mrna_names
+    }
+
+    miRNA_GE <- GE_matrix[,(n1+1):(n1+n2)]
+    miRNA_GE <- data.frame(miRNA_GE)
+    if(ncol(miRNA_GE) == 1)
+    {
+        colnames(miRNA_GE) <- mirna_names
+    }
+
+    circRNA_GE <- GE_matrix[,(n1+n2+1):(n1+n2+n3)]
+    circRNA_GE <- data.frame(circRNA_GE)
+    if(ncol(circRNA_GE) == 1)
+    {
+        colnames(circRNA_GE) <- circrna_names
+    }
+
+    retsizes <- c(n1, n2, n3)
+
+    input_data <- list(
+        'circRNA' = circRNA_GE,
+        'miRNA' = miRNA_GE,
+        'mRNA' = mRNA_GE
+    )
+
+    KIMONO_RET <- list(input_data, PK_present, retsizes)
+
+
+
+
+    ############################ INTOMICS #############################################
+    print("############# converting GE to IntOMICS input ########################")
+    ensembl_gene_names <- colnames(GE_matrix)
+    #total_gene_names <- convertEnsembleIDToEntrez(ensembl_gene_names)
+    total_gene_names <- paste("ENTREZID:", total_gene_names, sep="")
+    # total_gene_names[1:n2 + n1] <-
+    #     paste(total_gene_names[1:n2 + n1], "mi", sep="")
+    #
+    # total_gene_names[1:n3 + n1 + n2] <-
+    #     paste(total_gene_names[1:n3 + n1 + n2], "circ", sep="")
+
+    mirna_names <- total_gene_names[1:n2 + n1]
+    mrna_names <- total_gene_names[1:n1]
+    circrna_names <- total_gene_names[1:n3 + n1 + n2]
+
+    colnames(GE_matrix) <- total_gene_names
+    colnames(PK_real) <- total_gene_names
+    rownames(PK_real) <- total_gene_names
+
+    # define omics
+    omics <- list(ge=GE_matrix)#, circ=CIRC_MATRIX)
+
+    # define layer_def
+    min_in <- min(num_circRNA_sample, num_miRNA_sample, num_mRNA_sample)
+    layer_def <- data.frame(omics="ge", layer=as.numeric(1),
+                            fan_in_ge=as.numeric(max_input_edges))
+
+    # define prior
+    PK <- data.frame(matrix(0, nrow=0, ncol=6))
+    colnames(PK) <- c("src_entrez","dest_entrez",
+                      "direction","type",
+                      "source", "edge_type")
+    idx_PK <- 1
+    
+    
+    
+    
+    for(i in 1:nrow(PK_circ_mi))
+    {
+      PK_row <- PK_circ_mi[i,]
+      name_circ <- PK_row$orig_circ
+      name_circ <- paste("ENTREZID:", name_circ, sep="")
+      name_mi <- PK_row$mirna
+      name_mi <- paste("ENTREZID:", name_mi, sep="")
+      
+      if(!(name_mi %in% colnames(omics$ge)))
+      {
+        print(paste("miRNA:", name_mi, " is not found in gene names, but exists in prior!"))
+      }
+      if(!(name_circ %in% colnames(omics$ge)))
+      {
+        print(paste("circRNA:", name_circ, " is not found in gene names, but exists in prior!"))
+      }
+      
+      PK[idx_PK, ] <- c(name_circ, name_mi, "directed", "Process", "circGPA", "present")
+      idx_PK <- idx_PK + 1
+    }
+    for(i in 1:nrow(PK_mi_m))
+    {
+      PK_row <- PK_mi_m[i,]
+      name_m <- PK_row$mrna_ENSG
+      name_m <- paste("ENTREZID:", name_m, sep="")
+      name_mi <- PK_row$mirna_ENSG
+      name_mi <- paste("ENTREZID:", name_mi, sep="")
+      
+      PK[idx_PK, ] <- c(name_mi, name_m, "directed", "Process", "circGPA", "present")
+      idx_PK <- idx_PK + 1
+    }
+    
+    PK_present <- PK
+    prev_idx <- idx_PK
+    
+    #Construct other priors
+    prior_filling <- lapply(1:6, function(n) list(NA))
+    
+    #add previous
+    prior_filling[[1]][1:(idx_PK-1)] <- PK_present[,1]
+    prior_filling[[2]][1:(idx_PK-1)] <- PK_present[,2]
+    prior_filling[[3]][1:(idx_PK-1)] <- PK_present[,3]
+    prior_filling[[4]][1:(idx_PK-1)] <- PK_present[,4]
+    prior_filling[[5]][1:(idx_PK-1)] <- PK_present[,5]
+    prior_filling[[6]][1:(idx_PK-1)] <- PK_present[,6]
+    
+    # fill common absent parameters
+    prior_filling[[3]][idx_PK:(idx_PK+6)] <- "directed"
+    prior_filling[[4]][idx_PK:(idx_PK+6)] <- "Process"
+    prior_filling[[5]][idx_PK:(idx_PK+6)] <- "circGPA"
+    prior_filling[[6]][idx_PK:(idx_PK+6)] <- "absent"
+    
+    #turn off mRNA -> mRNA interactions
+    prior_filling[[1]][[idx_PK]] <- mrna_names
+    prior_filling[[2]][[idx_PK]] <- mrna_names
+    idx_PK <- idx_PK + 1
+    
+    #turn off mRNA -> circRNA interactions
+    prior_filling[[1]][[idx_PK]] <- mrna_names
+    prior_filling[[2]][[idx_PK]] <- circrna_names
+    idx_PK <- idx_PK + 1
+    
+    #turn off mRNA -> miRNA interactions
+    prior_filling[[1]][[idx_PK]] <- mrna_names
+    prior_filling[[2]][[idx_PK]] <- mirna_names
+    idx_PK <- idx_PK + 1
+    
+    # turn off miRNA->miRNA interactions
+    prior_filling[[1]][[idx_PK]] <- mirna_names
+    prior_filling[[2]][[idx_PK]] <- mirna_names
+    idx_PK <- idx_PK + 1
+    
+    # turn off miRNA->circRNA interactions
+    prior_filling[[1]][[idx_PK]] <- mirna_names
+    prior_filling[[2]][[idx_PK]] <- circrna_names
+    idx_PK <- idx_PK + 1
+    
+    # turn off circRNA->mRNA interactions
+    prior_filling[[1]][[idx_PK]] <- circrna_names
+    prior_filling[[2]][[idx_PK]] <- mrna_names
+    idx_PK <- idx_PK + 1
+    
+    # turn off circRNA->circRNA interactions
+    prior_filling[[1]][[idx_PK]] <- circrna_names
+    prior_filling[[2]][[idx_PK]] <- circrna_names
+
+    PK_present <- data.frame(matrix(NA, nrow=idx_PK, ncol=0))
+    
+    PK_present$src_entrez <- prior_filling[[1]]
+    PK_present$dest_entrez <- prior_filling[[2]]
+    PK_present$direction <- prior_filling[[3]]
+    PK_present$type <- prior_filling[[4]]
+    PK_present$source <- prior_filling[[5]]
+    PK_present$edge_type <- prior_filling[[6]]
+    colnames(PK_present) <- c("src_entrez","dest_entrez",
+                      "direction","type",
+                      "source", "edge_type")
+    
+    
+    idx_PK <- prev_idx
+    for(i in 1:n1)
+    {
+        i_gene <- mrna_names[i]
+        #turn off mRNA -> mRNA interactions
+        for(j in 1:n1)
+        {
+            if(i != j)
+            {
+                j_gene <- mrna_names[j]
+                PK[idx_PK,] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+                idx_PK <- idx_PK + 1
+            }
+        }
+
+        #turn off mRNA -> circRNA interactions
+        for(j in 1:n3)
+        {
+            j_gene <- circrna_names[j]
+            PK[idx_PK,] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+
+
+        #turn off mRNA -> miRNA interactions
+        for(j in 1:n2)
+        {
+            j_gene <- mirna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+    }
+
+    for(i in 1:n2)
+    {
+        i_gene <- mirna_names[i]
+
+        # turn off miRNA->miRNA interactions
+        for(j in 1:n2)
+        {
+            j_gene <- mirna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+
+        # turn off miRNA->circRNA interactions
+        for(j in 1:n3)
+        {
+            j_gene <- circrna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+    }
+
+    for(i in 1:n3)
+    {
+        i_gene <- circrna_names[i]
+        # turn off circRNA->mRNA interactions
+        for(j in 1:n1)
+        {
+            j_gene <- mrna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+
+        # turn off circRNA->circRNA interactions
+        for(j in 1:n3)
+        {
+
+            j_gene <- circrna_names[j]
+            PK[idx_PK, ] <- c(i_gene, j_gene, "directed", "Process", "circGPA", "absent")
+            idx_PK <- idx_PK + 1
+        }
+    }
+
+
+    # define annot
+    #symbols <- df_allrna[df_allrna$GENE_ID %in% ensembl_gene_names,]$GENE_NAME
+    #annot <- data.frame(entrezID=total_gene_names, gene_symbol=symbols)
+    retsizes <- c(n1, n2, n3)
+    INTOMICS_RET <- list(omics, layer_def, PK, PK_present, PK_real, retsizes)
+
+
+    return(list(INTOMICS_RET, KIMONO_RET, PK_mi_m))
+}
+
+
+
+
diff --git a/source/BiDAG_BGe.R b/source/BiDAG_BGe.R
new file mode 100644
index 0000000000000000000000000000000000000000..8343ca12fce5bdb8d25cf4eb4744c3ce0109c9ee
--- /dev/null
+++ b/source/BiDAG_BGe.R
@@ -0,0 +1,124 @@
+#'
+#' BGe score computation, taken from IntOMICS implementation:
+#' <DAGcorescore_BiDAG_BGe.R>
+#'
+BGe_node_compute <- function(j,parentnodes, param)
+{
+  n <- param$n
+  TN <- param$TN
+  awpN <- param$awpN
+  scoreconstvec <- param$scoreconstvec
+  
+  lp <- length(parentnodes) #number of parents
+  awpNd2 <- (awpN-n+lp+1)/2
+  A <- TN[j,j]
+  
+  switch(as.character(lp),
+         "0"={# just a single term if no parents
+           corescore <- scoreconstvec[lp+1] -awpNd2*log(A)
+         },
+         
+         "1" = {# no need for matrices
+           D <- TN[parentnodes,parentnodes]
+           logdetD <- log(D)
+           B <- TN[j,parentnodes]
+           logdetpart2 <- log(A-B^2/D)
+           corescore <- scoreconstvec[lp+1]-awpNd2*logdetpart2 - logdetD/2
+         },
+         
+         "2" = {
+           D <- TN[parentnodes,parentnodes]
+           
+           dettwobytwo <- function(D) {
+             D[1,1]*D[2,2]-D[1,2]*D[2,1]
+           }
+           
+           detD <- dettwobytwo(D)
+           logdetD <- log(detD)
+           B <- TN[j,parentnodes]
+           logdetpart2 <- log(dettwobytwo(D-(B)%*%t(B)/A))+log(A)-logdetD
+           corescore <- scoreconstvec[lp+1]-awpNd2*logdetpart2 - logdetD/2
+         },
+         
+         {# otherwise we use cholesky decomposition to perform both
+           D<-as.matrix(TN[parentnodes,parentnodes])
+           choltemp<-chol(D)
+           logdetD<-2*log(prod(choltemp[(lp+1)*c(0:(lp-1))+1]))
+           B<-TN[j,parentnodes]
+           logdetpart2<-log(A-sum(backsolve(choltemp,B,transpose=TRUE)^2))
+           corescore <- scoreconstvec[lp+1]-awpNd2*logdetpart2 - logdetD/2
+         })
+  
+  return(corescore)
+}
+
+####################################################################################################
+## This function is from BiDAG package (https://cran.r-project.org/web/packages/BiDAG/index.html) ##
+## arameters needed for calculation of the BGe score                                              ##
+## see arXiv:1402.6863                                                                            ##
+####################################################################################################
+
+scoreparameters_BiDAG_BGe <- function (n, 
+                                       data, 
+                                       bgepar = list(am = 1, aw = NULL))
+{
+
+  if (anyNA(data)) {
+    print("Warning: Dataset contains missing data (covariance matrix computation: complete.obs parameter - missing values are handled by casewise deletion)")
+  }
+
+  if (all(is.character(colnames(data)))) {
+    nodeslabels <- colnames(data)
+  } else {
+    nodeslabels <- sapply(c(1:n), function(x) paste("v", x, sep = ""))
+  }
+
+  colnames(data) <- nodeslabels
+  initparam <- list()
+  initparam$labels <- nodeslabels
+  initparam$type <- "bge"
+  initparam$DBN <- FALSE
+  initparam$weightvector <- NULL
+  initparam$data <- data
+
+  initparam$bgnodes <- NULL
+  initparam$static <- NULL
+  initparam$mainnodes <- c(1:n)
+  
+  initparam$bgn <- 0
+  initparam$n <- n
+  initparam$nsmall <- n
+  initparam$labels.short <- initparam$labels
+  initparam$logedgepmat <- NULL
+
+  N <- nrow(data)
+  if(N==1)
+  {
+    covmat <- matrix(0,nrow=n, ncol=n, dimnames=list(initparam$labels.short,initparam$labels.short))
+  } else {
+    covmat <- cov(data, use = "complete.obs") * (N - 1)
+  } # end if else (N==1)
+  means <- colMeans(data, na.rm = TRUE)
+  bgepar$aw <- n + bgepar$am + 1
+  
+  initparam$am <- bgepar$am
+  initparam$aw <- bgepar$aw
+  initparam$N <- N
+  initparam$means <- means
+  mu0 <- numeric(n)
+  T0scale <- bgepar$am * (bgepar$aw - n - 1)/(bgepar$am + 1)
+  T0 <- diag(T0scale, n, n)
+  initparam$TN <- T0 + covmat + ((bgepar$am * N)/(bgepar$am + N)) * (mu0 - means) %*% t(mu0 - means)
+  initparam$awpN <- bgepar$aw + N
+  constscorefact <- -(N/2) * log(pi) + (1/2) * log(bgepar$am/(bgepar$am +  N))
+  initparam$muN <- (N * means + bgepar$am * mu0)/(N + bgepar$am)
+  initparam$SigmaN <- initparam$TN/(initparam$awpN - n - 1)
+  initparam$scoreconstvec <- numeric(n)
+  for (j in (1:n)) {
+    awp <- bgepar$aw - n + j
+    initparam$scoreconstvec[j] <- constscorefact - lgamma(awp/2) + 
+      lgamma((awp + N)/2) + ((awp + j - 1)/2) * log(T0scale)
+  }
+  attr(initparam, "class") <- "scoreparameters"
+  initparam
+}
diff --git a/source/data_circGPA_preprocess.R b/source/data_circGPA_preprocess.R
new file mode 100644
index 0000000000000000000000000000000000000000..bead9a3043d2014a0384d09dc1954f0075e4cdbb
--- /dev/null
+++ b/source/data_circGPA_preprocess.R
@@ -0,0 +1,926 @@
+#############################################################################################
+##  This source file contains the logic for data interaction between circGPA and our data  ##
+#############################################################################################
+
+###########################################################################################
+### Files containing input data: database-extracted interactions + MDS expression data  ###
+###########################################################################################
+f_annot_allrna <- "data/200625_allRNA_fromRNAseq_annot_hg38.tsv"
+f_annot_circrna <- "data/200625_circRNA_fromRNAseq_annot_hg19.tsv"
+f_prior_circ_mirna <- "data/circrna-mirna-interactions.csv"
+f_prior_mi_mrna <- "data/mirna-mrna-interactions.csv"
+
+### Set global variables with online database access
+httr::set_config(config(ssl_verifypeer = FALSE))
+if(!exists("DB_mart"))
+{
+  DB_mart <<- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
+}
+if(!exists("DB_ensembl"))
+{
+  DB_ensembl <<- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
+}
+
+readDataFile <- function(filepath)
+{
+  library(tools)
+  ext <- file_ext(filepath)
+  
+  if(ext != "csv" && ext != "tsv")
+  {
+    stop("Only .CSV and .TSV files are supported!")
+  }
+  
+  ret <- NULL
+  if(ext == "csv")
+  {
+    ret <- read.csv(file = filepath)
+  }
+  if(ext == "tsv")
+  {
+    ret <- read.table(file = filepath, sep = '\t', header = TRUE)
+  }
+  return(ret)
+}
+
+##################################################################################
+### Files containing cached data: saved results of the below listed functions: ###
+##################################################################################
+f_cached_IM <- "data/cached_IM.RData"
+f_cached_MuC <- "data/cached_MuC.RData"
+f_cached_MPK <- "data/cached_MPK.RData"
+f_cached_mirna_names <- "data/cached_mirna_names.Rdata"
+f_cached_circrna_names <- "data/cached_circrna_names.Rdata"
+f_cached_mrna_names <- "data/cached_mrna_names.Rdata"
+f_cached_mrna_CT <- "data/cached_mrna_CT.RData"
+f_cached_mirna_CT <- "data/cached_mirna_CT.RData"
+f_cached_circrna_CT <- "data/cached_circrna_CT.RData"
+
+
+circGPA_circrnas <- function(f_circ_mirna_prior='data/circrna-mirna-interactions.csv')
+{
+  if (file.exists(f_cached_circrna_names)) {
+    ret <- readRDS(f_cached_circrna_names)
+    return(ret)
+  } 
+  map <- read.csv(f_circ_mirna_prior)
+  subset(map, select = -c(X) )
+  
+  
+  saveRDS(unique(map$circRNA), f_cached_circrna_names)
+  return(unique(map$circRNA))
+}
+
+matureMIRNA_remove_suffix <- function(mirnas)
+{
+  divide_combined_form <- function(combo)
+  {
+    #combo_orig <- combo
+    if(combo == "MIR103A1")
+    {
+      tst <- 1
+    }
+    # first --- remove some suffix in form of "-5p"
+    combo <- strsplit(combo, split="-")[[1]][1]
+    
+    # second --- remove other form of suffix:
+    combo2 <- str_split(combo, "", simplify = TRUE)  # --> each character as element
+    is_num <- !is.na(suppressWarnings(as.numeric(combo2))) #---> TRUE/FALSE for each char: is digit?
+    if(!last(is_num))  # if last digit is not a digit --> 100% doesn't have second suffix
+    {
+      return(combo)
+    }
+    
+    parts <- strsplit(combo, "[0-9]")
+    parts <- unlist(parts)
+    parts <- parts[parts != ""]
+    if(length(parts) != 2)    # the only possible second suffix is in form of:
+    {                         # MIR[LET] [digits] [characters] <numbers suffix>
+      return(combo)           # by doing split we can check if we have two digit parts
+    }
+    # remove last number
+    combo <- gsub("|\\d+$", "", combo)    # if we have two parts ---> remove number from end
+    return(combo)
+  }
+  cmirnas <- sapply(mirnas, divide_combined_form)
+  return(cmirnas)
+}
+
+circGPA_mirnas <- function()
+{
+  if (file.exists(f_cached_mirna_names)) {
+    ret <- readRDS(f_cached_mirna_names)
+    return(ret)
+  } 
+  miRNATab=getMiRNATable(version="v22",species="hsa")
+  
+  # TODO -> convert miRNA to suffix-removed version
+  
+  saveRDS(sort(unique(miRNATab$Mature1)), f_cached_mirna_names)
+  return(sort(unique(miRNATab$Mature1)))
+}
+
+circGPA_mrnas <- function()
+{
+  if (file.exists(f_cached_mrna_names)) {
+    ret <- readRDS(f_cached_mrna_names)
+    return(ret)
+  } 
+  all_coding_genes <- getBM(
+    attributes = c( "hgnc_symbol"), 
+    filters = c("biotype"), 
+    values = list(biotype="protein_coding"), 
+    mart = DB_mart
+  )
+  
+  
+  saveRDS(all_coding_genes$hgnc_symbol, f_cached_mrna_names)
+  return(all_coding_genes$hgnc_symbol)
+}
+
+circGPA_GE <- function()
+{
+  total_names <- c()
+  df_allrna <- readDataFile(f_annot_allrna)
+  df_circrna <- readDataFile(f_annot_circrna)
+  sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+  
+  mrna_CT <- generate_mrna_CT()
+  mrna_CT <- mrna_CT[mrna_CT$`Is in MDS?`,]
+  total_names <- c(total_names, mrna_CT$`circGPA symbol`)
+  mrna_CT <- mrna_CT[,c("START", "END", sample_names)]
+  mrna_CT[is.na(mrna_CT)] <- 0 # remove NA by 0
+  mrna_CT <- normalize_all_counts(mrna_CT, sample_names, method="TPM")
+  mrna_CT <- mrna_CT[,sample_names]
+  mrna_CT[is.na(mrna_CT)] <- 0 # remove NA by 0
+  
+  mirna_CT <- generate_mirna_CT()
+  mirna_CT <- mirna_CT[mirna_CT$`Is in MDS?`,]
+  total_names <- c(total_names, mirna_CT$`circGPA symbol`)
+  mirna_CT <- mirna_CT[,c("START", "END", sample_names)]
+  mirna_CT[is.na(mirna_CT)] <- 0 # remove NA by 0
+  mirna_CT <- normalize_all_counts(mirna_CT, sample_names, method="TPM")
+  mirna_CT <- mirna_CT[,sample_names]
+  mirna_CT[is.na(mirna_CT)] <- 0 # remove NA by 0
+  
+  circrna_CT <- generate_circrna_CT()
+  circrna_CT <- circrna_CT[circrna_CT$`Is in MDS?`,]
+  total_names <- c(total_names, circrna_CT$`circGPA symbol`)
+  circrna_CT <- circrna_CT[,c("START", "END", sample_names)]
+  circrna_CT[is.na(circrna_CT)] <- 0 # remove NA by 0
+  circrna_CT <- normalize_all_counts(circrna_CT, sample_names, method="TPM")
+  circrna_CT <- circrna_CT[,sample_names]
+  circrna_CT[is.na(circrna_CT)] <- 0 # remove NA by 0
+  
+  GE <- rbind(mrna_CT, mirna_CT, circrna_CT)
+  GE <- as.matrix(GE)
+  GE <- t(GE)
+  GE <- data.frame(GE)
+  
+  rownames(GE) <- sample_names
+  colnames(GE) <- total_names
+  
+  
+  
+  return(GE)
+}
+
+circGPA_matrix_PK <- function()
+{
+  total_names <- c()
+
+  mrna_CT <- generate_mrna_CT()
+  mrna_CT <- mrna_CT[mrna_CT$`Is in MDS?`,]
+  mrna_names <- mrna_CT$`circGPA symbol`
+  total_names <- c(total_names, mrna_CT$`circGPA symbol`)
+  
+  mirna_CT <- generate_mirna_CT()
+  mirna_CT <- mirna_CT[mirna_CT$`Is in MDS?`,]
+  mirna_names <- mirna_CT$`circGPA symbol`
+  total_names <- c(total_names, mirna_CT$`circGPA symbol`)
+  
+  circrna_CT <- generate_circrna_CT()
+  circrna_CT <- circrna_CT[circrna_CT$`Is in MDS?`,]
+  circrna_names <- circrna_CT$`circGPA symbol`
+  total_names <- c(total_names, circrna_CT$`circGPA symbol`)
+  
+  inter <- readDataFile(f_prior_circ_mirna)
+  inter <- subset(inter, select = -c(X))
+  inter2 <- readDataFile(f_prior_mi_mrna)
+  inter2 <- subset(inter2, select = -c(X))
+  
+  # TODO ---> check for suffix-removal
+  
+  inter <- inter[inter$miRNAs %in% mirna_names,]
+  inter <- inter[inter$circRNA %in% circrna_names,]
+  
+  inter2 <- inter2[inter2$mature_mirna_id %in% mirna_names,]
+  inter2 <- inter2[inter2$target_symbol %in% mrna_names,]  
+  
+  ret <- matrix(0, nrow=length(total_names), ncol=length(total_names))
+  colnames(ret) <- total_names
+  rownames(ret) <- total_names
+  
+  for(idx in 1:nrow(inter))
+  {
+    gene_mirna <- inter[idx,2]
+    gene_circ <- inter[idx,1]
+    
+    if(
+      (gene_mirna %in% mirna_names) && 
+      (gene_circ %in% circrna_names))
+    {
+      ret[gene_mirna,gene_circ] <- 1
+      ret[gene_circ,gene_mirna] <- 1
+    }
+    else{
+      asd <- 1
+    }
+  }
+  
+  
+  for(idx in 1:nrow(inter2))
+  {
+    gene_mirna <- inter2[idx,1]
+    gene_mrna <- inter2[idx,2]
+    
+    if(
+      (gene_mirna %in% mirna_names) && 
+      (gene_mrna %in% mrna_names))
+    {
+      ret[gene_mrna,gene_mirna] <- 1
+      ret[gene_mirna,gene_mrna] <- 1
+    }
+    else{
+      asd <- 1
+    }
+  }
+  
+  saveRDS(ret, f_cached_MPK)
+  
+  return(ret)
+}
+
+circGPA_whitelist_PK <- function()
+{
+  inter <- readDataFile(f_prior_circ_mirna)
+  inter <- subset(inter, select = -c(X))
+  inter2 <- readDataFile(f_prior_mi_mrna)
+  inter2 <- subset(inter2, select = -c(X))
+  
+  
+  mrna_CT <- generate_mrna_CT()
+  mrna_CT <- mrna_CT[mrna_CT$`Is in MDS?`,]
+  mrna_names <- mrna_CT$`circGPA symbol`
+
+  mirna_CT <- generate_mirna_CT()
+  mirna_CT <- mirna_CT[mirna_CT$`Is in MDS?`,]
+  mirna_names <- mirna_CT$`circGPA symbol`
+
+  circrna_CT <- generate_circrna_CT()
+  circrna_CT <- circrna_CT[circrna_CT$`Is in MDS?`,]
+  circrna_names <- circrna_CT$`circGPA symbol`
+  
+  # TODO ---> check for suffix-removal
+  
+  inter <- inter[inter$miRNAs %in% mirna_names,]
+  inter <- inter[inter$circRNA %in% circrna_names,]
+  
+  inter2 <- inter2[inter2$mature_mirna_id %in% mirna_names,]
+  inter2 <- inter2[inter2$target_symbol %in% mrna_names,]
+  
+  
+  # Add both directions for MMPC
+  inter_flipped <- inter[,c("miRNAs", "circRNA")]
+  colnames(inter) <- c("from", "to")
+  colnames(inter_flipped) <- c("from", "to")
+  inter <- rbind(inter, inter_flipped)
+  
+  inter2_flipped <- inter2[,c("target_symbol", "mature_mirna_id")]
+  colnames(inter2) <- c("from", "to")
+  colnames(inter2_flipped) <- c("from", "to")
+  inter2 <- rbind(inter2, inter2_flipped)
+  
+  inter <- rbind(inter, inter2)
+  
+  return(inter)
+}
+
+circGPA_partitions <- function()
+{
+  mrna_CT <- generate_mrna_CT()
+  mrna_CT <- mrna_CT[mrna_CT$`Is in MDS?`,]
+  mrna_names <- mrna_CT$`circGPA symbol`
+  
+  mirna_CT <- generate_mirna_CT()
+  mirna_CT <- mirna_CT[mirna_CT$`Is in MDS?`,]
+  mirna_names <- mirna_CT$`circGPA symbol`
+  
+  circrna_CT <- generate_circrna_CT()
+  circrna_CT <- circrna_CT[circrna_CT$`Is in MDS?`,]
+  circrna_names <- circrna_CT$`circGPA symbol`
+  
+  return(list(mrna_names, mirna_names, circrna_names))
+}
+
+
+
+circGPA_IM <- function()
+{
+  if (file.exists(f_cached_IM)) {
+    ret <- readRDS(f_cached_IM)
+    return(ret)
+  } 
+  mrnas <- circGPA_mrnas()
+  mirnas <- circGPA_mirnas()
+  
+  mrnas_num <- length(mrnas)
+  mirnas_num <- length(mirnas)
+  Im <- matrix(0, nrow=mrnas_num, ncol=mirnas_num)
+  
+  rownames(Im) <- mrnas
+  colnames(Im) <- mirnas
+  
+  inter <- readDataFile(f_prior_mi_mrna)
+  
+  for(idx in 1:nrow(inter))
+  {
+      gene_mirna <- inter[idx,2]
+      gene_mrna <- inter[idx,3]
+
+      if(
+        (gene_mirna %in% mirnas) && 
+        (gene_mrna %in% mrnas))
+      {
+        Im[gene_mrna,gene_mirna] <- 1
+      }
+  }
+  
+  saveRDS(Im, f_cached_IM)
+  
+  return(Im)
+}
+circGPA_MuCirc <- function()
+{
+  if (file.exists(f_cached_MuC)) {
+    ret <- readRDS(f_cached_MuC)
+    return(ret)
+  } 
+  circrnas <- circGPA_circrnas()
+  mirnas <- circGPA_mirnas()
+  
+  circrnas_num <- length(circrnas)
+  mirnas_num <- length(mirnas)
+  
+  Im <- matrix(0, nrow=mirnas_num, ncol=circrnas_num)
+  colnames(Im) <- circrnas
+  rownames(Im) <- mirnas
+  
+  inter <- readDataFile(f_prior_circ_mirna)
+  
+  for(idx in 1:nrow(inter))
+  {
+    gene_mirna <- inter[idx,3]
+    gene_circ <- inter[idx,2]
+    
+    if(
+      (gene_mirna %in% mirnas) && 
+      (gene_circ %in% circrnas))
+    {
+      Im[gene_mirna,gene_circ] <- 1
+    }
+  }
+  
+  # TODO ---> convert miRNA to suffix-removed version
+  
+  saveRDS(Im, f_cached_MuC)
+  return(Im)
+}
+
+circGPA_df_M_MI <- function()
+{
+  df_PK_mi_mrna <- readDataFile(f_prior_mi_mrna)
+  mrna_names <- circGPA_mrnas()
+  df_PK_mi_mrna <- df_PK_mi_mrna[df_PK_mi_mrna$target_symbol %in% mrna_names,]
+  return(df_PK_mi_mrna)
+}
+
+
+normalize_all_counts <- function(some_rows, sample_names, method="DESeq2", threshold=10)
+{
+  gene_lengths <- as.numeric(some_rows$START) - as.numeric(some_rows$END)
+  gene_lengths <- abs(gene_lengths)
+  
+  GE_matrix <- some_rows[,sample_names]
+  GE_matrix[is.na(GE_matrix)] <- 0 # convert NA to 0
+  GE_matrix <- mutate_all(GE_matrix, function(x) as.numeric(x))
+  
+  
+  
+  if(method == "DESeq2")
+  {
+    library(DESeq2)
+    
+    GE_matrix <- t(GE_matrix)
+    metadata <- as.data.frame(colnames(GE_matrix))
+    
+    
+    
+    dds <- DESeqDataSetFromMatrix(countData=GE_matrix,
+                                  colData=metadata,
+                                  design=~1)
+    dds<- dds[rowSums(counts(dds))>10,] # remove low counts
+    dds = estimateSizeFactors(dds) # calculate size
+    GE_matrix = counts(dds, normalized = TRUE)#get normalized data
+    GE_matrix <- t(GE_matrix)
+    
+  }
+  
+  if(method == "TPM")
+  {
+    GE_matrix <- t(GE_matrix)
+    
+    fetched_idx <- rowSums(GE_matrix)>0
+    
+    gene_lengths <- gene_lengths[fetched_idx]# remove low counts
+    GE_matrix<- GE_matrix[fetched_idx,] # remove low counts
+    
+    GE_matrix <- GE_matrix / gene_lengths
+    GE_matrix <- t( t(GE_matrix) * 1000000.0 / colSums(GE_matrix) )
+    
+    GE_matrix <- log2(GE_matrix + 1)
+    GE_matrix <- t(GE_matrix)
+    
+  }
+  
+  some_rows[,sample_names] <- GE_matrix
+  
+  return(some_rows)
+}
+
+extract_mirna <- function(df_allrna_annot_hg38)
+{
+  # get all rows with GENE_TYPE=miRNA
+  mirna_rows <- df_allrna_annot_hg38[df_allrna_annot_hg38$GENE_TYPE=="miRNA",]
+  
+  return(mirna_rows)
+}
+extract_circrna <- function(df_circrna)
+{
+  # get only circ ID present rows
+  circ_rows <- df_circrna[!is.na(df_circrna$circRNA_ID),]
+  
+  # create pseudo geneID for them if n/a ENSG id
+  # n/a -----> ENSG + circID
+  non_geneid_rows <- circ_rows[circ_rows$GENE_ID == "n/a",]
+  
+  circ_name <- non_geneid_rows$circRNA_ID
+  circ_name <- paste("ENSG", circ_name, sep="")
+  circ_rows[circ_rows$GENE_ID == "n/a", "GENE_ID"] <- circ_name
+  return(circ_rows)
+}
+
+extract_all_sample_names <- function(df_circ, df_allrna)
+{
+  cnames_circ <- colnames(df_circ);
+  cnames_all <- colnames(df_allrna);
+  
+  cnames_circ <- cnames_circ[10:length(cnames_circ)];
+  cnames_all <- cnames_all[7:length(cnames_all)];
+  
+  cnames_total <- union(cnames_circ, cnames_all);
+  cnames_total <- unique(cnames_total);
+  
+  return(cnames_total);
+}
+
+convert_prior_mi_m <- function(df_prior_mi_m, df_allrna, sample_names)
+{
+  print("################### preprocessing of PK (miRNA <-> mRNA) ################################")
+  
+  df_prior_mi_m_orig <- df_prior_mi_m
+  
+  # =====================  mRNA =====================
+  mrnas <- df_prior_mi_m$target_symbol
+  mrnas_unique <- unique(mrnas)
+  
+  # 1) search in expression data
+  str <- sprintf("PK table(miRNA <-> mRNA) has total %s unique mRNAs", length(mrnas_unique))
+  print(str)
+  
+  one_expr_data_found <- mrnas_unique[mrnas_unique %in% df_allrna$GENE_NAME]
+  one_ENSG_rows <- df_allrna[match(one_expr_data_found, df_allrna$GENE_NAME),]
+  one_not_found <- mrnas_unique[!(mrnas_unique %in% df_allrna$GENE_NAME)]
+  one_names <- one_expr_data_found
+  
+  str <- sprintf("%s of those mRNAs can be explicitly found in expr.table", length(one_expr_data_found))
+  print(str)
+  
+  # 2) search in BioMart(Ensembl) by gene symbol = gene name
+  #mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
+  two_expr_data_found <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name'),
+                               filters = 'external_gene_name',
+                               values = one_not_found,
+                               mart = DB_ensembl)
+  # only present in expr.table
+  two_expr_data_found <- two_expr_data_found[two_expr_data_found$ensembl_gene_id %in% df_allrna$GENE_ID,]
+  # use match -> NA are not found, indices found used to fetch by GENE ID
+  two_found_idx <- match(one_not_found, two_expr_data_found$external_gene_name)
+  two_expr_data_found <- two_expr_data_found[na.omit(two_found_idx),]
+  two_names <- two_expr_data_found$external_gene_name
+  two_ENSG_rows <- df_allrna[match(two_expr_data_found$ensembl_gene_id, df_allrna$GENE_ID),]
+  two_not_found <- one_not_found[is.na(two_found_idx)]
+  
+  str <- sprintf("Next %s was found by using BioMart(%s overlap with prev) as GENE name",
+                 nrow(two_expr_data_found),
+                 sum(two_expr_data_found$external_gene_name %in% one_ENSG_rows$GENE_ID))
+  print(str)
+  
+  # 3) search in BioMart(Ensembl) by gene synonym = e.g. gene name, but not primary used
+  tri_expr_data <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'external_synonym'),
+                         filters = 'external_synonym',
+                         values = two_not_found,
+                         mart = DB_ensembl)
+  # only those in expr.table are needed
+  tri_expr_data <- tri_expr_data[tri_expr_data$ensembl_gene_id %in% df_allrna$GENE_ID,]
+  tri_found_idx <- match(two_not_found, tri_expr_data$external_synonym)
+  tri_expr_data <- tri_expr_data[na.omit(tri_found_idx),]
+  tri_ENSG_rows <- df_allrna[match(tri_expr_data$ensembl_gene_id, df_allrna$GENE_ID),]
+  tri_names <- tri_expr_data$external_synonym
+  
+  not_exist <- two_not_found[is.na(tri_found_idx)]
+  str <- sprintf("Final %s was found by using BioMart(%s/%s overlap with prevs) as GENE synonym",
+                 nrow(tri_expr_data),
+                 sum(tri_expr_data$ensembl_gene_id %in% two_expr_data_found$ensembl_gene_id),
+                 sum(tri_expr_data$ensembl_gene_id %in% one_ENSG_rows$GENE_ID))
+  print(str)
+  
+  # 4) The rest is left, it is just 7 out of 17.4K
+  str <- sprintf("In the end we have %s mRNA genes not found, these are:",
+                 length(not_exist))
+  print(str)
+  print(not_exist)
+  
+  # 5) combine them into one table
+  total_names <- c(one_names, two_names, tri_names)
+  total_ENSG_rows <- rbind(one_ENSG_rows, two_ENSG_rows, tri_ENSG_rows)
+  
+  # 6) remove not found from PK and extract Ensembl IDs per mRNA in PK
+  df_prior_mi_m <- df_prior_mi_m[df_prior_mi_m$target_symbol %in% total_names,]
+  mrnas <- df_prior_mi_m$target_symbol
+  mrnas_idx <- match(mrnas, total_names)
+  
+  mrna_rows <- total_ENSG_rows
+  mrna_ENSG <- total_ENSG_rows[mrnas_idx,]$GENE_ID
+  
+  # =====================  miRNA =====================
+  mirnas <- df_prior_mi_m$mature_mirna_id
+  mirnas <- tolower(mirnas)
+  mirnas_unique <- unique(mirnas)
+  
+  idx_mirna <- match(mirnas, mirnas_unique)
+  
+  gene_names_unique <- sapply(mirnas_unique, function(name)
+  {
+    mirna <- strsplit(name, split="-")[[1]]
+    
+    if(mirna[[1]] != "hsa" || length(mirna) < 3)
+    {
+      str <- sprintf("ERROR! miRNA %s has some wrong format! Use hsa-XXX-XXX", name)
+      stop(str)
+    }
+    
+    type <- mirna[[2]]
+    id <- mirna[[3]]
+    str_ret <- "MIR"
+    if(type == "let")
+    {
+      str_ret <- paste(str_ret, "LET", sep="")
+    }
+    if(type != "let" && type != "mir")
+    {
+      str <- sprintf("ERROR! miRNA %s has wrong type! Use either: hsa-let-XXX/hsa-mir-XXX", name)
+      stop(str)
+    }
+    
+    str_ret <- paste(str_ret, toupper(id), sep="")
+    return(str_ret)
+  })
+  sample_names <- sample_names
+  
+  allrna_num <- sapply(df_allrna$GENE_NAME, function(name)
+  {
+    nums <- extract_num(name)
+    return(nums[1])
+  })
+  
+  
+  mirna_rows <- sapply(gene_names_unique, function(name)
+  {
+    my_num <- extract_num(name)[1]
+    
+    all_found <- grepl(name, df_allrna$GENE_NAME, fixed = TRUE)
+    same_num <- allrna_num==my_num
+    
+    all_found <- all_found & same_num
+    
+    if(sum(all_found) == 0)
+    {
+      str <- sprintf("miRNA:%s not found in expression data! Repair!",name)
+      stop(str)
+    }
+    rows <- df_allrna[all_found,]
+    ret <- rows[1,]
+    ret[,sample_names] <- colSums(rows[,sample_names])
+    ret
+  })
+  mirna_rows <- t(mirna_rows)
+  mirna_rows <- data.frame(mirna_rows)
+  
+  
+  gene_ids <- mirna_rows$GENE_ID[idx_mirna]
+  gene_ids <- data.frame(unlist(gene_ids))
+  
+  
+  mirna_rows <- unique(mirna_rows)
+  PK_mi_m <- data.frame(
+    mirna_orig=mirnas,
+    mirna_names=gene_names_unique[idx_mirna],
+    mirna_ENSG=gene_ids[[1]],
+    mrna_orig=mrnas, mrna_ENSG=mrna_ENSG
+  )
+  
+  return(list(mrna_rows, mirna_rows, PK_mi_m))
+}
+
+
+generate_mrna_CT <- function()
+{
+  df_allrna <- readDataFile(f_annot_allrna)
+  df_PK_mi_mrna <- circGPA_df_M_MI()
+  df_circrna <- readDataFile(f_annot_circrna)
+  sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+  
+
+  df_prior_mi_m_orig <- df_PK_mi_mrna
+  
+  # =====================  mRNA =====================
+  mrnas <- df_PK_mi_mrna$target_symbol
+  mrnas_unique <- unique(mrnas)
+  
+  # 1) search in expression data
+  one_expr_data_found <- mrnas_unique[mrnas_unique %in% df_allrna$GENE_NAME]
+  one_ENSG_rows <- df_allrna[match(one_expr_data_found, df_allrna$GENE_NAME),]
+  one_not_found <- mrnas_unique[!(mrnas_unique %in% df_allrna$GENE_NAME)]
+  one_names <- one_expr_data_found
+  
+  # 2) search in BioMart(Ensembl) by gene symbol = gene name
+  #mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
+  two_expr_data_found <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name'),
+                               filters = 'external_gene_name',
+                               values = one_not_found,
+                               mart = DB_ensembl)
+  # only present in expr.table
+  two_expr_data_found <- two_expr_data_found[two_expr_data_found$ensembl_gene_id %in% df_allrna$GENE_ID,]
+  # use match -> NA are not found, indices found used to fetch by GENE ID
+  two_found_idx <- match(one_not_found, two_expr_data_found$external_gene_name)
+  two_expr_data_found <- two_expr_data_found[na.omit(two_found_idx),]
+  two_names <- two_expr_data_found$external_gene_name
+  two_ENSG_rows <- df_allrna[match(two_expr_data_found$ensembl_gene_id, df_allrna$GENE_ID),]
+  two_not_found <- one_not_found[is.na(two_found_idx)]
+  
+  # 3) search in BioMart(Ensembl) by gene synonym = e.g. gene name, but not primary used
+  tri_expr_data <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'external_synonym'),
+                         filters = 'external_synonym',
+                         values = two_not_found,
+                         mart = DB_ensembl)
+  # only those in expr.table are needed
+  tri_expr_data <- tri_expr_data[tri_expr_data$ensembl_gene_id %in% df_allrna$GENE_ID,]
+  tri_found_idx <- match(two_not_found, tri_expr_data$external_synonym)
+  tri_expr_data <- tri_expr_data[na.omit(tri_found_idx),]
+  tri_ENSG_rows <- df_allrna[match(tri_expr_data$ensembl_gene_id, df_allrna$GENE_ID),]
+  tri_names <- tri_expr_data$external_synonym
+  
+  # 4) combine them into one table
+  total_names <- c(one_names, two_names, tri_names)
+  total_ENSG_rows <- rbind(one_ENSG_rows, two_ENSG_rows, tri_ENSG_rows)
+  
+  # 5) create conversion table:
+  all_mrnas_cgpa <- circGPA_mrnas()
+  all_mrnas_idx <- match(all_mrnas_cgpa, total_names)
+  all_mrnas_rows <- total_ENSG_rows[all_mrnas_idx,]
+  
+  mrna_CT <- data.frame(matrix(NA, 
+                               nrow=length(all_mrnas_cgpa), 
+                               ncol=(length(sample_names) + 6)
+                               )
+                        )
+  
+  mrna_CT[,1] <- all_mrnas_cgpa
+  mrna_CT[,2] <- all_mrnas_rows$GENE_NAME
+  mrna_CT[,3] <- all_mrnas_rows$GENE_ID
+  mrna_CT[,4] <- all_mrnas_cgpa %in% total_names
+  mrna_CT[,5] <- all_mrnas_rows$START
+  mrna_CT[,6] <- all_mrnas_rows$END
+  mrna_CT[,7:(6+length(sample_names))] <- all_mrnas_rows[,sample_names]
+  
+  colnames(mrna_CT) <- c("circGPA symbol",
+                         "MDS symbol",
+                         "MDS ENSG ID",
+                         "Is in MDS?",
+                         "START", "END",
+                         sample_names)
+  
+  saveRDS(mrna_CT, f_cached_mrna_CT)
+  return(mrna_CT)
+}
+
+generate_circrna_CT <- function()
+{
+  
+  df_allrna <- readDataFile(f_annot_allrna)
+  df_circrna <- readDataFile(f_annot_circrna)
+  sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+  df_prior_circ <- readDataFile(f_prior_circ_mirna)
+  
+  df_prior_circ_orig <- df_prior_circ
+  sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+  
+  mirna_rows <- extract_mirna(df_allrna)
+  circrna_rows <- extract_circrna(df_circrna)
+  
+  prior_circrna_names <- df_prior_circ$circRNA
+  prior_circrna_names <- unique(prior_circrna_names)
+  prior_circrna_names_unique <- prior_circrna_names
+  
+  found <- circrna_rows$circRNA_ID %in% prior_circrna_names
+  prior_circ_rows <- circrna_rows[found,]
+  
+  if(length(prior_circrna_names_unique) != nrow(prior_circ_rows))
+  {
+    str <- sprintf("Prior CIRC-miRNA had %s circRNAs, I found only %s in expression data! Not found:",
+                   nrow(prior_circ_rows),
+                   length(prior_circrna_names_unique))
+    
+    print(str)
+    print(circrna_rows[!found,]$circRNA_ID)
+    print(prior_circrna_names[!(prior_circrna_names %in% prior_circ_rows$circRNA_ID)])
+  }
+  
+  cgpa_circrna <- circGPA_circrnas()
+  cgpa_idx <- match(cgpa_circrna, prior_circrna_names)
+  cgpa_rows <- prior_circ_rows[cgpa_idx,]
+  
+  
+  circ_CT <- data.frame(matrix(NA, 
+                               nrow=length(cgpa_circrna), 
+                               ncol=(length(sample_names) + 6)
+  )
+  )
+  
+  circ_CT[,1] <- cgpa_circrna
+  circ_CT[,2] <- cgpa_rows$circRNA_ID
+  circ_CT[,3] <- cgpa_rows$GENE_ID
+  circ_CT[,4] <- cgpa_circrna %in% prior_circrna_names
+  circ_CT[,5] <- cgpa_rows$START
+  circ_CT[,6] <- cgpa_rows$END
+  circ_CT[,7:(6+length(sample_names))] <- cgpa_rows[,sample_names]
+  
+  colnames(circ_CT) <- c("circGPA symbol",
+                         "MDS symbol",
+                         "MDS ENSG ID",
+                         "Is in MDS?",
+                         "START",
+                         "END",
+                         sample_names)
+  
+
+  saveRDS(circ_CT, f_cached_circrna_CT)
+  return(circ_CT)
+}
+
+generate_mirna_CT <- function()
+{
+  df_allrna <- readDataFile(f_annot_allrna)
+  df_circrna <- readDataFile(f_annot_circrna)
+  sample_names <- extract_all_sample_names(df_circrna, df_allrna)
+  df_prior_circ <- readDataFile(f_prior_circ_mirna)
+  df_PK_mi_mrna <- circGPA_df_M_MI()
+  
+  cgpa_mirna <- circGPA_mirnas()
+  mirna_rows <- extract_mirna(df_allrna)
+  
+  # Convert MIRLET124A3-5p ----> MIRLET124A,
+  # e.g., remove both suffixes <-5p> and <3>
+  divide_combined_form <- function(combo)
+  {
+    #combo_orig <- combo
+    if(combo == "MIR103A1")
+    {
+      tst <- 1
+    }
+    # first --- remove some suffix in form of "-5p"
+    combo <- strsplit(combo, split="-")[[1]][1]
+    
+    # second --- remove other form of suffix:
+    combo2 <- str_split(combo, "", simplify = TRUE)  # --> each character as element
+    is_num <- !is.na(suppressWarnings(as.numeric(combo2))) #---> TRUE/FALSE for each char: is digit?
+    if(!last(is_num))  # if last digit is not a digit --> 100% doesn't have second suffix
+    {
+      return(combo)
+    }
+    
+    parts <- strsplit(combo, "[0-9]")
+    parts <- unlist(parts)
+    parts <- parts[parts != ""]
+    if(length(parts) != 2)    # the only possible second suffix is in form of:
+    {                         # MIR[LET] [digits] [characters] <numbers suffix>
+      return(combo)           # by doing split we can check if we have two digit parts
+    }
+    # remove last number
+    combo <- gsub("|\\d+$", "", combo)    # if we have two parts ---> remove number from end
+    return(combo)
+  }
+  cmirnas <- sapply(mirna_rows$GENE_NAME, divide_combined_form)
+  
+  mirnas_unique <- unique(cgpa_mirna)
+  idx_mirna <- match(cgpa_mirna, mirnas_unique)
+  
+  gene_names_unique <- sapply(mirnas_unique, function(name)
+  {
+    mirna <- strsplit(name, split="-")[[1]]
+    
+    if(mirna[[1]] != "hsa" || length(mirna) < 3)
+    {
+      str <- sprintf("ERROR! miRNA %s has some wrong format! Use hsa-XXX-XXX", name)
+      stop(str)
+    }
+    
+    type <- mirna[[2]]
+    id <- mirna[[3]]
+    str_ret <- "MIR"
+    if(type == "let")
+    {
+      str_ret <- paste(str_ret, "LET", sep="")
+    }
+    if(type != "let" && type != "miR")
+    {
+      str <- sprintf("ERROR! miRNA %s has wrong type! Use either: hsa-let-XXX/hsa-mir-XXX", name)
+      stop(str)
+    }
+    
+    str_ret <- paste(str_ret, toupper(id), sep="")
+    return(str_ret)
+  })
+  gene_names_unique_unnamed <- unname(gene_names_unique)
+  
+  # Extract only those that are present in non-suffix MDS data
+  mirna_found <- gene_names_unique_unnamed[gene_names_unique_unnamed %in% cmirnas]
+  mirnas_unique <- mirnas_unique[gene_names_unique_unnamed %in% cmirnas]
+  mirna_found_idx <- match(mirna_found, cmirnas)
+  mirna_found_rows <- mirna_rows[mirna_found_idx, ]
+  
+  # create Conversion table
+  cgpa_rows <- mirna_found_rows[match(gene_names_unique_unnamed, mirna_found),]
+  mi_CT <- data.frame(matrix(NA, 
+                               nrow=length(cgpa_mirna), 
+                               ncol=(length(sample_names) + 7)
+                            )
+  )
+  
+  mi_CT[,1] <- cgpa_mirna
+  mi_CT[,2] <- gene_names_unique_unnamed
+  mi_CT[,3] <- cgpa_rows$GENE_NAME
+  mi_CT[,4] <- cgpa_rows$GENE_ID
+  mi_CT[,5] <- gene_names_unique_unnamed %in% mirna_found
+  mi_CT[,6] <- cgpa_rows$START
+  mi_CT[,7] <- cgpa_rows$END
+  mi_CT[,8:(7+length(sample_names))] <- cgpa_rows[,sample_names]
+  
+  colnames(mi_CT) <- c("circGPA symbol",
+                         "circGPA converted",
+                         "MDS symbol",
+                         "MDS ENSG ID",
+                         "Is in MDS?",
+                         "START",
+                         "END",
+                         sample_names)
+  
+  
+  saveRDS(mi_CT, f_cached_mirna_CT)
+  return(mi_CT)
+}
+
+
+generate_circGPA_conversion_tables <- function()
+{
+  ct <- generate_mrna_CT()
+  cat("mRNA conversion table size: ", nrow(ct), "\n")
+  ct <- generate_mirna_CT()
+  cat("miRNA conversion table size: ", nrow(ct), "\n")
+  ct <- generate_circrna_CT()
+  cat("circRNA conversion table size: ", nrow(ct), "\n")
+  print("Success!")
+}
+
+
diff --git a/source/general.R b/source/general.R
new file mode 100644
index 0000000000000000000000000000000000000000..fda93cd00827b5f595a6d378b309d29b7ede0fda
--- /dev/null
+++ b/source/general.R
@@ -0,0 +1,25 @@
+
+
+src.source <- function(nm)
+{
+  prev_wd <- getwd()
+  setwd(getSrcDirectory(function(){})[1])
+  #setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+  source(nm)
+  setwd(prev_wd)
+}
+
+src.cppsource <- function(nm)
+{
+  prev_wd <- getwd()
+  setwd(getSrcDirectory(function(){})[1])
+  #setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+  Rcpp::sourceCpp(nm)
+  setwd(prev_wd)
+}
+
+msg <- function(x, width = 120)
+{
+  message("\r", rep(" ", times = width - length(x)), "\r", appendLF = F)
+  message(x, appendLF = F)
+}
\ No newline at end of file
diff --git a/source/mmpc_optimized.R b/source/mmpc_optimized.R
new file mode 100644
index 0000000000000000000000000000000000000000..079c6241841c20de6c714b9c81978c9afd2fafc5
--- /dev/null
+++ b/source/mmpc_optimized.R
@@ -0,0 +1,385 @@
+mmpc_bnlearn_classic <- function(x, cluster = NULL, whitelist = NULL, blacklist = NULL, 
+                                 test = NULL, alpha = NULL, B = NULL, max.sx = NULL, 
+                                 debug = FALSE, undirected = FALSE)
+{
+  method <- "mmpc"
+  
+  reset.test.counter()
+  res = NULL
+  parallel = FALSE
+  data.info = check.data(x, allow.missing = TRUE)
+  check.learning.algorithm(method, class = "constraint")
+  test = check.test(test, x)
+  check.logical(debug)
+  check.logical(undirected)
+  alpha = check.alpha(alpha)
+  B = check.B(B, test)
+  max.sx = check.largest.sx.set(max.sx, x)
+  cluster = check.cluster(cluster)
+  if (!is.null(cluster)) {
+    parallel = TRUE
+    slaves.setup(cluster)
+    if (debug) {
+      warning("disabling debugging output for parallel computing.")
+      debug = FALSE
+    }
+  }
+  whitelist = build.whitelist(whitelist, nodes = names(x), 
+                              data = x, algo = method, criterion = test)
+  blacklist = build.blacklist(blacklist, whitelist, names(x), 
+                              algo = method)
+  full.blacklist = arcs.rbind(blacklist, list.illegal.arcs(names(x), 
+                                                           x, test))
+  full.blacklist = unique.arcs(full.blacklist, names(x))
+
+  
+  local.structure = maxmin.pc(x = x, whitelist = whitelist, 
+                              blacklist = full.blacklist, test = test, alpha = alpha, 
+                              B = B, debug = debug, max.sx = max.sx, cluster = cluster, 
+                              complete = data.info$complete.nodes)
+  
+
+  if (undirected) {
+    arcs = nbr2arcs(local.structure)
+    learning = list(whitelist = whitelist, blacklist = blacklist, 
+                    test = test, args = list(alpha = alpha), ntests = test.counter())
+    if (!is.null(B)) 
+      learning$args$B = B
+    res = list(learning = learning, nodes = cache.structure(names(x), 
+                                                            arcs = arcs), arcs = arcs)
+  }
+  else {
+    res = learn.arc.directions(x = x, local.structure = local.structure, 
+                               whitelist = whitelist, blacklist = full.blacklist, 
+                               test = test, alpha = alpha, B = B, max.sx = max.sx, 
+                               complete = data.info$complete.nodes, debug = debug)
+    res$learning["blacklist"] = list(blacklist)
+  }
+  if (parallel) 
+    res$learning$ntests = res$learning$ntests + sum(unlist(parallel::clusterEvalQ(cluster, 
+                                                                                  test.counter())))
+  res$learning$algo = method
+  res$learning$undirected = undirected
+  res$learning$max.sx = max.sx
+  res$learning$illegal = list.illegal.arcs(names(x), x, test)
+  invisible(structure(res, class = "bn"))
+}
+
+
+mmpc_bnlearn_tripartite <- function(x, cluster, partitions, whitelist = NULL,
+                                    test = NULL, alpha = 0.05, 
+                                    B = NULL, max.sx = NULL, debug = FALSE, undirected = TRUE,
+                                    from=1,to=NULL)
+{
+  # Hardcode
+  #
+  blacklist = NULL
+  method <- "mmpc"
+  if(is.null(to))
+  {
+    to <- ncol(x)
+  }
+  
+  
+  reset.test.counter()
+  res = NULL
+  parallel = FALSE
+  data.info = check.data(x, allow.missing = TRUE)
+  check.learning.algorithm(method, class = "constraint")
+  test = check.test(test, x)
+  check.logical(debug)
+  check.logical(undirected)
+  alpha = check.alpha(alpha)
+  B = check.B(B, test)
+  max.sx = check.largest.sx.set(max.sx, x)
+  cluster = check.cluster(cluster)
+  if (!is.null(cluster)) {
+    parallel = TRUE
+    slaves.setup(cluster)
+    if (debug) {
+      warning("disabling debugging output for parallel computing.")
+      debug = FALSE
+    }
+  }
+  whitelist = build.whitelist(whitelist, nodes = names(x), 
+                              data = x, algo = method, criterion = test)
+  # blacklist = build.blacklist(blacklist, whitelist, names(x), 
+  #                             algo = method)
+  # full.blacklist = arcs.rbind(blacklist, list.illegal.arcs(names(x), 
+  #                                                          x, test))
+  # full.blacklist = unique.arcs(full.blacklist, names(x))
+
+  local.structure = .inside_mmpc_bnlearn_tripartite(
+                              x = x, partitions=partitions, 
+                              whitelist = whitelist, 
+                              blacklist = NULL, test = test, alpha = alpha, 
+                              B = B, debug = debug, max.sx = max.sx, cluster = cluster, 
+                              complete = data.info$complete.nodes,
+                              from=from, to=to)
+
+
+  if (undirected) {
+    arcs = nbr2arcs(local.structure)
+    learning = list(whitelist = whitelist, blacklist = NULL, 
+                    test = test, args = list(alpha = alpha), ntests = test.counter())
+    if (!is.null(B)) 
+      learning$args$B = B
+    res = list(learning = learning, nodes = cache.structure(names(x), 
+                                                            arcs = arcs), arcs = arcs)
+  }
+  else {
+    res = learn.arc.directions(x = x, local.structure = local.structure, 
+                               whitelist = whitelist, blacklist = full.blacklist, 
+                               test = test, alpha = alpha, B = B, max.sx = max.sx, 
+                               complete = data.info$complete.nodes, debug = debug)
+    res$learning["blacklist"] = list(blacklist)
+  }
+  if (parallel) 
+    res$learning$ntests = res$learning$ntests + sum(unlist(parallel::clusterEvalQ(cluster, 
+                                                                                  test.counter())))
+  res$learning$algo = method
+  res$learning$undirected = undirected
+  res$learning$max.sx = max.sx
+  res$learning$illegal = list.illegal.arcs(names(x), x, test)
+  invisible(structure(res, class = "bn"))
+}
+
+.inside_mmpc_bnlearn_tripartite <- function(x, partitions, 
+                                            cluster = NULL, whitelist, blacklist, test, alpha, 
+                                            B, max.sx = ncol(x), complete, debug = FALSE,
+                                            from, to)
+{
+  nodes = names(x)
+  
+  mask <- nodes %in% partitions[[1]]
+  if(length(partitions) > 1)
+  {
+    for(i in 2:length(partitions))
+    {
+      mask <- mask + (nodes %in% partitions[[i]]) * i
+    }
+  }
+  names(mask) <- nodes
+  
+  #idxs <- 1:length(nodes)
+  idxs <- from:to
+  print("Forward:")
+  mb = smartSapply(cluster, 
+                   as.list(idxs), 
+                   .imprvd_maxmin.pc.forward.phase, 
+                   partitions = partitions,
+                   all_nodes = nodes,
+                   mask=mask,
+                   
+                   data = x, alpha = alpha, B = B, 
+                   whitelist = whitelist, 
+                   #blacklist = blacklist, 
+                   test = test, max.sx = max.sx, 
+                   complete = complete, debug = debug)
+  
+  names(mb) = nodes
+  quit()
+  
+  print("Backward:")
+  mb = smartSapply(cluster, 
+                   
+                   as.list(idxs),  
+                   .imprvd_neighbour, 
+                   mb = mb, 
+                   partitions = partitions,
+                   all_nodes = nodes,
+                   mask=mask,
+                   
+                   data = x, alpha = alpha, B = B, 
+                   whitelist = whitelist, 
+                   #blacklist = blacklist, 
+                   test = test, max.sx = max.sx, 
+                   markov = FALSE, complete = complete, debug = debug)
+  
+  print("MB:")
+  names(mb) = nodes
+  
+  for (node in nodes) mb[[node]]$mb = fake.markov.blanket(mb, 
+                                                          node)
+  mb = bn.recovery(mb, nodes = nodes, debug = debug)
+  return(mb)
+}
+
+
+
+.imprvd_maxmin.pc.forward.phase <- function(idx, mask, partitions,
+                                            data, all_nodes, alpha, B, whitelist, #blacklist,
+                                            test, 
+                                            max.sx = ncol(x), complete, debug = FALSE)
+{
+  my_mid <- mask[idx]
+  x <- all_nodes[idx]
+  
+  cat("Node: ", x, ", myid:", my_mid, "\n")
+  
+  if(my_mid == length(partitions))
+  {
+    nodes <- all_nodes[all_nodes %in% partitions[[my_mid - 1]]]
+  }
+  else if(my_mid == 1)
+  {
+    nodes <- all_nodes[all_nodes %in% partitions[[my_mid + 1]]]
+  }
+  else{
+    nodes1 <- all_nodes[all_nodes %in% partitions[[my_mid + 1]]]
+    nodes2 <- all_nodes[all_nodes %in% partitions[[my_mid - 1]]]
+    nodes <- union(nodes1, nodes2)
+  }
+  
+  
+  
+  whitelist <- whitelist[whitelist[,1] %in% c(nodes, x),]
+  whitelist <- whitelist[whitelist[,2] %in% c(nodes, x),]
+  
+  nodes = nodes[nodes != x]
+  whitelisted = nodes[sapply(nodes, function(y) {
+    is.whitelisted(whitelist, c(x, y), either = TRUE)
+  })]
+  # blacklisted = nodes[sapply(nodes, function(y) {
+  #   is.blacklisted(blacklist, c(x, y), both = TRUE)
+  # })]
+  cpc = whitelisted
+  association = structure(numeric(length(nodes)), names = nodes)
+  to.add = ""
+  # if (debug) {
+  #   #cat("----------------------------------------------------------------\n")
+  #   #cat("* forward phase for node", x, ".\n")
+  # }
+  
+  #nodes = nodes[nodes %!in% c(cpc, blacklisted)]
+  repeat {
+    
+    if (length(cpc) > max.sx) 
+      break
+    to.be.checked = setdiff(names(which(association < alpha)), 
+                            cpc)
+    
+    cat("Node: ", idx, ", CPC: [", length(cpc), " + ", length(to.be.checked),"]\n")
+    
+    start_time <- Sys.time()
+    
+    association = sapply(to.be.checked, maxmin.pc.heuristic.optimized, 
+                         y = x, sx = cpc, data = data, test = test, alpha = alpha, 
+                         B = B, association = association, complete = complete, 
+                         debug = FALSE)
+    
+    end_time <- Sys.time()
+    cat("Node: ", idx, ", CPC: [", length(cpc), " + ", length(to.be.checked),"], t: ", end_time - start_time,"\n")
+    
+    
+    if (all(association > alpha) || length(nodes) == 0 || 
+        is.null(nodes)) 
+      break
+    to.add = names(which.min(association))
+    
+    
+    
+    # if (debug) {
+    #   cat("  @", to.add, "accepted as a parent/children candidate ( p-value:", 
+    #       association[to.add], ").\n")
+    # }
+    if (association[to.add] <= alpha) {
+      cpc = c(cpc, to.add)
+      nodes = nodes[nodes != to.add]
+    }
+  }
+  
+  #storage/praha1/home/anuarali/DP/MMPC/
+  
+  cat("Node: ", idx, ", finished with [", length(cpc), "], saving to:\n", 
+      paste("fwd_tmp/cpc_",idx,".RData",sep=""),
+      "\n")
+  
+  saveRDS(cpc, paste("fwd_tmp/cpc_",idx,".RData",sep=""))
+  
+  cat("Node: ", idx, " successfully saved!")
+  
+  return(cpc)
+}
+
+.imprvd_neighbour <- function(idx, mask, partitions, all_nodes,
+                              mb,
+                              data, alpha, B = NULL, whitelist, #blacklist, 
+                              test, empty.dsep = TRUE, markov = TRUE, max.sx = ncol(x), 
+                              complete, debug = FALSE)
+{
+  my_mid <- mask[idx]
+  x <- all_nodes[idx]
+  cat("Node: ", x, ", myid:", my_mid)
+  
+  if(my_mid == length(partitions))
+  {
+    nodes <- all_nodes[all_nodes %in% partitions[[my_mid - 1]]]
+  }
+  else if(my_mid == 1)
+  {
+    nodes <- all_nodes[all_nodes %in% partitions[[my_mid + 1]]]
+  }
+  else{
+    nodes1 <- all_nodes[all_nodes %in% partitions[[my_mid + 1]]]
+    nodes2 <- all_nodes[all_nodes %in% partitions[[my_mid - 1]]]
+    nodes <- union(nodes1, nodes2)
+  }
+  whitelist <- whitelist[whitelist[,1] %in% c(nodes, x),]
+  whitelist <- whitelist[whitelist[,2] %in% c(nodes, x),]
+  
+  
+  # ORIGINAL CODE
+  candidate.neighbours = mb[[x]]
+  if (length(candidate.neighbours) == 0) {
+    if (debug) {
+      cat("----------------------------------------------------------------\n")
+      cat("* markov blanket of", x, "is empty; the neighbourhood as well.\n")
+    }
+    return(list(mb = character(0), nbr = character(0)))
+  }
+  # blacklisted = candidate.neighbours[sapply(candidate.neighbours, 
+  #                                           function(y) {
+  #                                             is.blacklisted(blacklist, c(x, y), both = TRUE)
+  #                                           })]
+  whitelisted = candidate.neighbours[sapply(candidate.neighbours,
+                                            function(y) {
+                                              is.whitelisted(whitelist, c(x, y), either = TRUE)
+                                            })]
+  # candidate.neighbours = setdiff(candidate.neighbours, blacklisted)
+  candidate.neighbours = union(candidate.neighbours, whitelisted)
+  
+  if (debug) {
+    cat("----------------------------------------------------------------\n")
+    cat("* learning neighbourhood of", x, ".\n")
+    #cat("  * blacklisted nodes: '", blacklisted, "'\n")
+    # cat("  * whitelisted nodes: '", whitelisted, "'\n")
+    # cat("  * starting with neighbourhood: '", candidate.neighbours, 
+    #     "'\n")
+  }
+  if (length(candidate.neighbours) <= 1) 
+    return(list(mb = mb[[x]], nbr = candidate.neighbours))
+  for (y in setdiff(candidate.neighbours, whitelisted)) {  
+    # if (debug) 
+    #   cat("  * checking node", y, "for neighbourhood.\n")
+    if (markov) 
+      dsep.set = smaller(setdiff(mb[[x]], y), setdiff(mb[[y]], 
+                                                      x))
+    else dsep.set = setdiff(mb[[x]], y)
+    # if (debug) 
+    #   cat("    > dsep.set = '", dsep.set, "'\n")
+    a = allsubs.test(x = x, y = y, sx = dsep.set, min = ifelse(empty.dsep, 
+                                                               0, 1), max = min(length(dsep.set), max.sx), data = data, 
+                     test = test, alpha = alpha, B = B, complete = complete, 
+                     debug = debug)
+    if (a["p.value"] > alpha) 
+      candidate.neighbours = candidate.neighbours[candidate.neighbours != 
+                                                    y]
+    # if (debug) 
+    #   cat("    > node", y, "is", ifelse(a["p.value"] > 
+    #                                       alpha, "not", "still"), "a neighbour of", x, 
+    #       ". ( p-value:", a["p.value"], ")\n")
+  }
+  return(list(mb = mb[[x]], nbr = candidate.neighbours))
+}
+
diff --git a/source/skeleton.R b/source/skeleton.R
new file mode 100644
index 0000000000000000000000000000000000000000..31357ab4d9e2106b2c75af5393443c9efacdabcb
--- /dev/null
+++ b/source/skeleton.R
@@ -0,0 +1,337 @@
+
+
+# SHOULD BE CALLED FROM RMD source("....some/path.../general.R")
+source("G:/4_Semestr/DP/R_version/source/general.R")
+
+
+src.source("BiDAG_BGe.R")
+src.cppsource("skeleton.cpp")
+
+BGe_compute <- function(adj, param)
+{
+  nodes_cand <- 1:nrow(adj)
+  sc <- sum(sapply(nodes_cand, function(node)
+  {
+    neights <- which(adj[node,] > 0)
+    score <- BGe_node_compute(node, neights, param)
+    return(score)
+  })
+  )
+  return(sc)
+}
+
+BGe_change <- function(from, to, action_is_add, sizes, score_prev, G_adjs, param)
+{
+  part_id <- sum(from > sizes) + 1
+  
+  adj <- G_adjs[[part_id]]
+  prev <- adj[from, to]
+  
+  score_node_prev <- BGe_node_compute(node, neights, param)
+  
+  adj[from, to] <- action_is_add
+  
+  node <- colnames(adj)[to]
+  neights <- names(which(adj[node,] > 0))
+  
+  score_node <- BGe_node_compute(node, neights, param)
+  
+  adj[from, to] <- prev
+  return(score_node - score_node_prev + score_prev)
+}
+
+
+#'
+#' Compute a prior energy function of the given adjacency graph. Formula:
+#' E(i,j) = 
+#'        (if edge in G) = 1 - B(i,j) 
+#'        
+#'    (if edge not in G) = B(i,j)
+#' 
+#' E(G) = sum(i,j) E(i,j)
+#' 
+#' For reference see:
+#' * IntOMICS:
+#' https://doi.org/10.1186/s12859-022-04891-9
+#' * Bayesian prior see:
+#' https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3957076/
+#' 
+#' @param G_adjs A list of adjacency matrices across each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param B_PKs A list of prior matrices for each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param N A total number of genes
+#' @returns E(G), the prior energy function for a given graph
+#'
+Energy_compute <- function(G_adjs, B_PKs, N)
+{
+  E <- 0  
+  
+  # we should count blocked edges too, 
+  # they have 0 prior probability -> E(i,j) = 1
+  # we do it by doing (N x N) - (non-blocked number)
+  E_non_blocked <- 0  
+  
+  for(i in 1:length(G_adjs)) # for each partition interaction separately
+  {
+    # if 1 ---> 1-B(i,j), ...., if 0 ----> B(i,j)
+    E <- sum(G_adjs[[i]] * (1-B_PKs[[i]])  + (1-G_adjs[[i]]) * B_PKs[[i]])
+    E_non_blocked <- E_non_blocked + ncol(B_PKs[[i]]) * nrow(B_PKs[[i]])
+  }
+  E <- (E + N*N - E_non_blocked) 
+  return(E)
+}
+
+#'
+#' Compute a score-P(G) for a given graph. This part of function should be called 
+#'
+#' For an integral formula of the Bayesian prior see:
+#' https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3957076/
+#' 
+#' The formula used below is the closed-form solution
+#'  for definite integral with the given Beta_L, Beta_H.
+#' 
+#' @param E A previously computed energy U(G) for a graph
+#' @param N A total number of genes
+#' @param beta_L A lower threshold on prior's weight      (numeric) 
+#' @param beta_H An upper threshold on prior's weight     (numeric)
+#' @returns score ~proportional to~ P(G), network prior for a given network
+#'
+pP_G_compute <- function(E, N, beta_L, beta_H)
+{
+  U <- E / (N*N)
+  delta_beta <- beta_H - beta_L
+  
+  ret <- exp(-U * beta_L) - exp(-U * beta_H)
+  ret <- ret / delta_beta
+  ret <- ret / U
+  return(ret)
+}
+
+P_G_change <- function(from, to, action_is_add, sizes, energy_prev, B_PKs, beta_L, beta_H)
+{
+  part_id <- sum(from > sizes) + 1
+  
+  B_PK_mat <- B_PKs[[part_id]]
+  
+  b_i_j <- B_PK_mat[from, to]
+  
+  u_change <- 1 - 2 * b_i_j
+  
+  if(action_is_add) 
+  {
+    u_change <- -1 * u_change
+  }
+  
+  N <- sum(sizes)
+  
+  U.new <- energy_prev + u_change
+  score <- pP_G_compute(U.new, N, beta_L, beta_H)
+  
+  ret <- list(score=score, energy=U.new)
+  return(ret)
+}
+
+
+
+
+
+#'
+#' Compute a score-P(G) for a given graph, e.g., non-normalized P(G).
+#'
+#' For theoretical reference see:
+#' * IntOMICS:
+#' https://doi.org/10.1186/s12859-022-04891-9
+#' * Bayesian prior see:
+#' https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3957076/
+#' 
+#' @param G_adjs A list of adjacency matrices across each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param B_PKs A list of prior matrices for each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param beta_L A lower threshold on prior's weight      (numeric) 
+#' @param beta_H An upper threshold on prior's weight     (numeric)
+#' @returns A list with 2 values, score-P(G) and energy U(G)
+#'
+P_G <- function(G_adjs, B_PKs, beta_L, beta_H)
+{
+  # compute total number of genes
+  N <- ncol(G_adjs[[1]]) + sum(unlist(lapply(G_adjs, nrow)))  
+  
+  # The computation is divided for further optimization:
+  # energy could be changed by a constant
+  E <- Energy_compute(G_adjs, B_PKs, N)
+  sP_G <- pP_G_compute(E, N, beta_L, beta_H)
+  return(list(score=sP_G, energy=E))
+}
+
+#'
+#' Compute a P(D | G) for a given graph, e.g., BGe score without the prior part
+#' 
+#' @param G_adjs A list of adjacency matrices across each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param param_in A precomputed parameters for BGe computation
+#' @returns P(D | G) = BGe score of a given graph
+#'
+P_D_G <- function(G_adjs, param_in)
+{
+  pDG <- 0
+  for(i in 1:length(G_adjs)) # for each partition interaction separately
+  {
+    adj <- G_adjs[[i]]
+    cat(paste(i, "/", length(G_adjs), "\n"))
+    pDG <- pDG + BGe_compute(adj, param_in)
+  }
+  return(pDG)
+}
+
+#'
+#' Compute a score proportional to P(G | D) for a given graph, e.g., 
+#'    P(G | D) = P(D|G) P(G) ~ (BGe score, data only) * (Prior knowledge)
+#'    
+#'    
+#' The proportionality is the absence of normalization constant, e.g.,
+#' each score is the P(G|D) multiplied by some constant
+#' 
+#' @param G_adjs A list of adjacency matrices across each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param B_PKs A list of prior matrices for each partition interaction, for example: list(CIRC->MI, MI->M)
+#' @param beta_L A lower threshold on prior's weight      (numeric) 
+#' @param beta_H An upper threshold on prior's weight     (numeric)
+#' @param param_in A precomputed parameters for BGe computation
+#' @returns P(D | G) = BGe score of a given graph
+#'
+score_G <- function(G_adjs, B_PKs, beta_L, beta_H, param_in)
+{
+  pG_list <- P_G(G_adjs, B_PKs, beta_L, beta_H)
+  pDG <- P_D_G(G_adjs, param_in)
+  #pGD <- pG * pDG
+  return(c("pG"=pG_list$score, "pDG"=pDG, "energy"=pG_list$energy))
+}
+
+
+score_change_G <- function(G_adjs, B_PKs, beta_L, beta_H, param_in,
+                           sizes,
+                           score_prev, from, to, action_is_add)
+{
+    ret <- score_prev
+    N <- sum(sizes)
+    
+    # ! Convert C++ indices to Rcpp indices(+1) !
+    AC <- P_G_change(from+1, to+1, action_is_add, sizes, ret["energy"], B_PKs, beta_L, beta_H)
+    B <- BGe_change(from+1, to+1, action_is_add, sizes, ret["pDG"], G_adjs, param_in)
+
+    ret["pG"] <- ret["pG"] + AC$score
+    ret["pDG"] <- ret["pDG"] + B
+    ret["energy"] <- ret["energy"] + AC$energy
+    return(ret)
+}
+
+
+
+#' Run a modified SKeleton-Based Stochastic Search. 
+#' Requires an input as:
+#' N = number of genes
+#' S = number of samples
+#' 
+#' @param GE A gene expression matrix,                              (N x S)
+#' @param PK A prior knowledge matrix,                              (N x N)
+#' @param SK A skeleton inferred from MMPC_tripartite,              (N x N)
+#' @param parts A list of partitions, total unique elements:        (N)
+#' @param I A global number of iterations to perform,               (integer)
+#' @param alpha A threshold prob.value for final edge cutoff        (alpha)
+#' @param debug Logical, TRUE if a debugging print is needed        (logical)
+#' @param beta_L A lower threshold on prior's weight, default = 0.5 (numeric) 
+#' @param beta_H An upper threshold on prior's weight, default=10.5 (numeric) 
+#' @returns A binary matrix, the final inferred Bayesian net        (N x N)
+#'
+SK_SS_tripartite <- function(GE, PK, SK, parts, I=1000, debug=FALSE, alpha=0.05, beta_L=0.5, beta_H=10.5)
+{
+  if(missing(GE) || missing(PK) || missing(SK) || missing(parts)) {
+    stop("Some of the input arguments were not provided, need: Ge, PK, SK, parts")
+  }
+  
+  if(!(class(GE)[1] %in% c("matrix", "data.frame")))
+  {
+    stop("GE should be a matrix or data.frame")
+  }
+  if(!(class(PK)[1] %in% c("matrix", "data.frame")))
+  {
+    stop("PK should be a matrix or data.frame")
+  }
+  if(!(class(SK)[1] %in% c("matrix", "data.frame")))
+  {
+    stop("SK should be a matrix or data.frame")
+  }
+  if((class(parts) != "list") || (class(unlist(parts)) != "character"))
+  {
+    stop("parts should be a list of character vectors")
+  }
+  if(ncol(PK) != nrow(PK))
+  {
+    stop("PK should be a square logical matrix of size N x N, where N=number of genes")
+  }
+  if(sum(dim(PK) == dim(SK)) != 2)
+  {
+    stop("PK and SK should both have a same size square logical matrix of size N x N, where N=number of genes")
+  }
+  if(ncol(GE) != ncol(PK))
+  {
+    stop("GE and PK should have the same number of genes, GE=(S x N), PK=(N x N)")
+  }
+  if(length(unlist(unique(parts))) != ncol(GE))
+  {
+    stop("partitions should contain all genes, e.g., length(unlist(unique(parts))) == N")
+  }
+  
+  # Extract sizes of every partitions
+  sizes <- unlist(lapply(parts, length))
+  
+  # prepare BGe score parameters
+  myScore <- scoreparameters_BiDAG_BGe(n = ncol(GE), data = GE)
+  param_in <- list(n=myScore$n, awpN=myScore$awpN, TN=myScore$TN, scoreconstvec=myScore$scoreconstvec)
+  remove(myScore)
+  
+  # Extract every partition's GE,PK,SK parts
+  GE_in <- list()
+  PK_in <- list()
+  SK_in <- list()
+
+  for(i in 1:length(parts))
+  {
+    # GE
+    subGE <- GE[, parts[[i]]]
+    subGE <- as.matrix(subGE)
+    colnames(subGE) <- parts[[i]]
+    GE_in[[i]] <- subGE
+    remove(subGE)
+    
+    # interactions are 1 less, e.g., for K parts there are K-1 interaction parts
+    if(i == 1){
+      next
+    }
+    
+    # PK
+    subPK <- PK[parts[[i-1]],parts[[i]]]
+    subPK <- as.matrix(subPK)
+    
+    # convert PK to prior scores as in IntOMICS/Bayesian Network Prior(Isci)
+    subPK <- subPK * 0.5 + 0.5   # ---> 1 = prior,   0.5 = no prior, but allowed edge
+    
+    PK_in[[i-1]] <- subPK
+    remove(subPK)
+    # SK
+    subSK <- SK[parts[[i-1]],parts[[i]]]
+    subSK <- as.matrix(subSK)
+    SK_in[[i-1]] <- subSK
+    remove(subSK)
+  }
+  
+  #val <- P_D_G(SK_in, param_in)
+  
+  vals <- score_G(SK_in, PK_in, beta_L, beta_H, param_in)
+  vals_change <- score_change_G(SK_in, PK_in, beta_L, beta_H, param_in, sizes, vals, 0, 5, TRUE)
+  
+  
+  
+  
+  return(0.0)
+  
+  return(c_SK_SS(GE_in,PK_in,SK_in, sizes, param_in,
+                 I, debug, alpha, beta_L, beta_H,
+                 score_G, score_change_G))
+}
\ No newline at end of file
diff --git a/source/skeleton.cpp b/source/skeleton.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0e7159bf71adf21b0da26608cffd8cf0581f7175
--- /dev/null
+++ b/source/skeleton.cpp
@@ -0,0 +1,524 @@
+/**
+ * This part will be the header file declarations in package, but <Rcpp::sourceCpp()> requires single file to work, so it is here for now
+ */
+
+#include <numeric>
+#include <bitset>
+#include <cstring>
+#include <vector>
+
+// [[Rcpp::depends(RcppArmadillo)]]
+#include <RcppArmadillo.h>
+
+#include <RcppArmadilloExtensions/sample.h>
+
+//#include <Rcpp.h>
+
+using Rcpp::Rcout;
+using Rcpp::Rcerr;
+using Rcpp::NumericVector;
+using Rcpp::NumericMatrix;
+using Rcpp::DataFrame;
+using Rcpp::Nullable;
+using Rcpp::LogicalMatrix;
+using Rcpp::IntegerMatrix;
+using Rcpp::List;
+using Rcpp::Function;
+using Rcpp::IntegerVector;
+
+using std::bitset;
+
+typedef unsigned int uint32_t;
+
+enum ChangeType
+{
+  ADD = true, 
+  DELETE = false
+};
+
+
+struct Bitset2D {
+  uint8_t** array;
+  uint32_t n;
+  uint32_t m;
+  
+  
+  Bitset2D(uint32_t n, uint32_t m){
+    Rcout << "Created a bitmap of size: " << n << "x" << m << "\n";
+    this->n = n;
+    this->m = m;
+    
+    array = new uint8_t*[n];
+    uint32_t sz = (m + 7) / 8;
+    
+    for(uint32_t i = 0; i < n; i++)
+    {
+      array[i] = new uint8_t[sz];
+      std::memset(array[i], 0, sizeof *(array[i]) * sz);
+    }
+  }
+  
+  ~Bitset2D()
+  {
+    for(uint32_t i = 0; i < n; i++)
+    {
+      delete[] array[i];
+    }
+    delete[] array;
+  }
+  
+  void set(uint32_t x, uint32_t y, bool v)
+  {
+    uint8_t val = array[x][y/8];
+    uint32_t i = y % 8;
+    if(v){
+      val |= 1 << i;
+    }
+    else{
+      val &= ~(1 << i);
+    }
+    
+    array[x][y/8] = val;
+  }
+  
+  bool get(uint32_t x, uint32_t y)
+  {
+    uint8_t val = array[x][y/8];
+    uint32_t i = y % 8;
+    return (val & (1<<i)) != 0;
+  }
+  
+  
+};
+
+// [[Rcpp::export]]
+double tst()
+{
+  uint32_t n = 9, m = 9;
+  Bitset2D t = Bitset2D(n, m);
+  
+  t.set(2,2, ADD);
+  t.set(1,5, ADD);
+  t.set(0,3, ADD);
+  
+  for(int i = 0; i < n; i++)
+  {
+    for(int j = 0; j < m; j++)
+    {
+      Rcout << t.get(i,j) << " ";
+    }
+    Rcout << "\n";
+  }
+  Rcout << "\n";
+  
+  
+  t.set(0, 3, DELETE);
+  for(int i = 0; i < n; i++)
+  {
+    for(int j = 0; j < m; j++)
+    {
+      Rcout << t.get(i,j) << " ";
+    }
+    Rcout << "\n";
+  }
+  Rcout << "\n";
+  
+  t.set(7, 2, ADD);
+  t.set(7, 2, DELETE);
+  t.set(7, 4, true);
+  t.set(7, 5, true);
+  
+  for(int i = 0; i < n; i++)
+  {
+    for(int j = 0; j < m; j++)
+    {
+      Rcout << t.get(i,j) << " ";
+    }
+    Rcout << "\n";
+  }
+  Rcout << "\n";
+  
+  return 0.0;
+}
+
+
+struct GraphChange
+{
+  GraphChange* next = nullptr;
+  GraphChange* prev = nullptr;
+  
+  ChangeType type;
+  
+  uint32_t from;
+  uint32_t to;
+  uint32_t listID;
+  
+  GraphChange* forward(Bitset2D* adj)
+  {
+    adj->set(from, to, type);
+    return next;
+  }
+  GraphChange* backward(Bitset2D* adj)
+  {
+    adj->set(from, to, !type);
+    return prev;
+  }
+  
+  
+  GraphChange* forward(List& adjs)
+  {
+    //LogicalMatrix& mat = Rcpp::as<LogicalMatrix>(adjs[listID]);
+    //adjs[listID](from, to) = type;
+    
+    //const IntegerMatrix& sk = adjs[listID];
+    
+    return next;
+  }
+  GraphChange* backward(List& adjs)
+  {
+    //LogicalMatrix& mat = Rcpp::as<LogicalMatrix>(adjs[listID]);
+    //mat(from, to) = 1-type;
+    //adjs[listID](from, to) = 1-type;
+    //const IntegerMatrix& sk = adjs[listID];
+    
+    return prev;
+  }
+  
+};
+
+
+struct IData
+{
+  const List* GE;
+  const List* PK;
+  const List* SK;
+  const NumericVector* sizes;
+  const List* bge_param;
+  
+  Function* score_full_func;
+  Function* score_change_func;
+};
+struct ISettings
+{
+  uint32_t I;
+  bool debug;
+  double beta_L;
+  double beta_H;
+};
+
+
+class Network
+{
+protected:
+  //static inline Function f_rnorm = Function("rnorm");
+  //static inline Function f_unlist = Function("unlist");
+  
+  uint32_t N;  // number nodes
+  uint32_t M;  // number samples
+
+  Bitset2D* ADJ = nullptr;   // N x N boolean matrix, could be reduced
+  
+  IData input_data;
+  ISettings settings;
+  
+  IntegerMatrix counts;
+  
+  
+  std::vector<GraphChange*> V_G;
+  
+public:  
+  Network(IData input, ISettings settings)
+  {
+    this->input_data = input;
+    this->settings = settings;
+    this->N = Rcpp::sum(*input.sizes);
+    
+    NumericMatrix firstGE = ((*input.GE)[0]);
+    
+    this->M = firstGE.nrow();
+    
+    //this->ADJ = new Bitset2D(SK.ncol(),SK.nrow());
+
+    Rcout << "Network: N=" << N << ", M=" << M << "\n";
+  }
+  
+  ~Network()
+  {
+    if(ADJ != nullptr)
+    {
+      delete ADJ;
+    }
+    
+    
+    if(V_G.size() == 0)
+    {
+      return;
+    }
+    
+    GraphChange* curr = V_G[V_G.size() - 1];
+    while(curr != nullptr)
+    {
+      GraphChange* prev = curr;
+      curr = curr->prev;
+      delete prev;
+    }
+    V_G.clear();
+  }
+  
+  void partialSK_SS()
+  {
+    for(uint32_t i = 0; i < this->settings.I; i++)
+    {
+      Rcout << "partialSK-SS, Iteration: " << i << "\n";
+      this->partialSK_SS_phase1_iter();
+    }
+  }
+  
+protected:  
+  GraphChange* createGraphChange(GraphChange* prev, ChangeType action, uint32_t from, uint32_t to)
+  {
+    GraphChange* ret = new GraphChange;
+    ret->prev = prev;
+    ret->type = action;
+    ret->next = nullptr;
+    ret->from = from;
+    ret->to = to;
+    
+    return ret;
+  }
+  
+  void partialSK_SS_phase1_iter()
+  {
+    List& G = this->emptyG_0();
+    
+    NumericVector twoScores = (*this->input_data.score_full_func)(
+                       G, 
+                       *this->input_data.PK,
+                       this->settings.beta_L,
+                       this->settings.beta_H,
+                       *this->input_data.bge_param
+                    );
+    
+    
+    double score = (twoScores[0] * twoScores[1]);
+
+    const List* SK_list = this->input_data.SK;
+    
+    // for loop over 1...N
+    // for loop over neighbors
+    // try add edge
+    for(uint32_t i = 0; i < SK_list->length(); i++)
+    {
+       const IntegerMatrix& sk = (*SK_list)[i];
+      
+       for(uint32_t j = 0; j < sk.ncol(); j++)
+       {
+          for(uint32_t k = 0; k < sk.nrow(); j++)
+          {
+            if(sk(j,k) == 0) 
+              continue;
+            
+            this->tryPerformMove(G, &score, i, j, k, ChangeType::ADD);
+          }
+       }
+    }
+    
+    
+    // for loop over 1...N
+    // for loop over neighbors
+    // try remove/add edge
+    for(uint32_t i = 0; i < SK_list->length(); i++)
+    {
+      const IntegerMatrix& g = G[i];
+      
+      for(uint32_t j = 0; j < g.ncol(); j++)
+      {
+        for(uint32_t k = 0; k < g.nrow(); j++)
+        {
+          if(g(j,k) == 0)
+          {
+            this->tryPerformMove(G, &score, i, j, k, ChangeType::ADD);
+          }
+          else{
+            this->tryPerformMove(G, &score, i, j, k, ChangeType::DELETE);
+          }
+        }
+      }
+    }
+    
+    
+  }
+  
+  
+  List& emptyG_0()
+  {
+    const List* SK_list = this->input_data.SK;
+    List* ret = new List(SK_list->length());
+    
+    for(uint32_t i = 0; i < SK_list->length(); i++)
+    {
+      const IntegerMatrix& sk = (*SK_list)[i];
+      (*ret)[i] = LogicalMatrix(sk.nrow(), sk.ncol());
+    }
+    
+    // TODO
+    return *ret;
+  }
+  
+  
+  void tryPerformMove(List& adjs, double* score, uint32_t listID, uint32_t from, uint32_t to, ChangeType action)
+  {
+    NumericVector scoreChanges = (*this->input_data.score_change_func)
+    (
+        adjs, 
+        *this->input_data.PK, 
+        this->settings.beta_L, 
+        this->settings.beta_H, 
+        *this->input_data.bge_param,
+        *this->input_data.sizes,
+        *score, 
+        from, 
+        to, 
+        action == ChangeType::ADD);
+    
+    
+    double score_new = scoreChanges[0] * scoreChanges[1];
+    
+    double normalize_c = *score + score_new;
+    
+    
+    NumericVector probs = {*score, score_new};
+    probs = probs / normalize_c;
+    IntegerVector ids = {0, 1};
+    
+    IntegerVector ret = Rcpp::sample(ids, 1, probs = probs);
+    if(ret[0] != 1) return;
+    
+    *score = score_new;
+    
+    this->makeMove(adjs, listID, from, to, action);
+  }
+  
+  
+  void makeMove(List& adjs, uint32_t listID, uint32_t from, uint32_t to, ChangeType action)
+  {
+    GraphChange* toAdd = new GraphChange;
+    toAdd->from = from;
+    toAdd->to = to;
+    toAdd->listID = listID;
+    toAdd->type = action;
+    
+    toAdd->prev = this->V_G.empty() ? nullptr : this->V_G[this->V_G.size() - 1];
+    toAdd->forward(adjs);
+  }
+  
+  
+  
+};
+
+/**
+ * These two functions are the real implementation in C++...
+ */
+// IntegerMatrix c_partialSK_SS(
+//                          const List& GE,
+//                          const List& PK,
+//                          const List& SK,
+//                          const NumericVector& sizes,
+//                          uint32_t I,
+//                          bool debug,
+//                          double alpha);
+IntegerMatrix c_SK_SS(
+                         const List& GE,
+                         const List& PK,
+                         const List& SK,
+                         const NumericVector& sizes,
+                         const List& param,
+                         uint32_t I,
+                         bool debug,
+                         double alpha,
+                         double beta_L, double beta_H,
+                         Function score_full_func,
+                         Function score_change_func);
+
+
+/**
+ * This part is the implementation of above header declarations, it goes to .CPP part in package
+ */
+//#include "skeleton.h"
+
+
+// [[Rcpp::export]]
+double ttt(const IntegerMatrix& SK)
+{
+  Rcout << SK.nrow() << "," << SK.ncol() << "\n";
+  return 0.0;
+}
+
+
+IntegerMatrix c_partialSK_SS(
+                         const List& GE,
+                         const List& PK,
+                         const List& SK,
+                         const NumericVector& sizes,
+                         uint32_t I,
+                         bool debug,
+                         double alpha)
+{
+  Rcout << "I:" << I << "debug:" << debug << "\n";
+  Rcout << "I am in partialSK_SS function!\n";
+  //Network* net = new Network(GE, SK);
+  //delete net;
+  
+  return IntegerMatrix();
+}
+
+// [[Rcpp::export]]
+IntegerMatrix c_SK_SS(
+                  const List& GE,
+                  const List& PK,
+                  const List& SK,
+                  const NumericVector& sizes,
+                  const List& param,
+                  uint32_t I,
+                  bool debug,
+                  double alpha,
+                  double beta_L, double beta_H,
+                  Function score_full_func,
+                  Function score_change_func
+                  )
+{
+  Rcout << "I am in SK_SS function!\n";
+  
+  IData i_data;
+  
+  i_data.GE = &GE;
+  i_data.PK = &PK;
+  i_data.SK = &SK;
+  i_data.sizes = &sizes;
+  i_data.bge_param = &param;
+  
+  i_data.score_full_func = &score_full_func;
+  i_data.score_change_func = &score_change_func;
+  
+  ISettings settings;
+  
+  settings.I = I;
+  settings.debug = debug;
+  settings.beta_H = beta_H;
+  settings.beta_L = beta_L;
+  
+  Network* net = new Network(i_data, settings);
+  
+  net->partialSK_SS();
+  
+  
+  delete net;
+  // double val = Rcpp::as<double>(
+  //               BGe_full(
+  //                 Rcpp::_["G_adjs"] = SK, 
+  //                 Rcpp::_["param_in"] = param)
+  //                 );
+  // Rcout << "BGe score of the input SK is: " << val << "\n";
+  
+  return IntegerMatrix();
+}
diff --git a/workflow.Rmd b/workflow.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..05d7635f9e9a15eb3bc6e6cda7fe87dddd14e3e7
--- /dev/null
+++ b/workflow.Rmd
@@ -0,0 +1,1177 @@
+---
+---
+---
+
+# Example of ???? package usage
+
+```{r setup}
+#knitr::opts_knit$set(root.dir = '/inst')# COMMENT IF NOT FROM PACKAGE
+#knitr::opts_knit$set(root.dir = 'C:/school/diplomka_package/gpubayesomics/inst')
+# COMMENT IF NOT FROM PACKAGE
+```
+
+## Import code for data pre-processing
+
+```{r, echo=FALSE, warning=FALSE, include=FALSE, error=FALSE,message=FALSE}
+library(dplyr)
+library(gsubfn)
+library(data.table)
+library(biomaRt)
+library("org.Hs.eg.db")
+library(EnsDb.Hsapiens.v86)
+library(miRBaseConverter)
+library(httr)
+library(tidyr)
+
+source("data_preprocess.R")
+```
+
+> #### Note:
+>
+> > Libraries also imported in data_preprocess.R package, so you don't need to import them explicitly
+
+## Preprocess data given by circGPA algorithm
+
+#### Specify input files and map map the data
+
+> #### Note:
+>
+> > This is a large call to an Ensembl database and can fail due to the timeout error.
+> >
+> > It is recommended to download needed files manually from Github
+
+```{r}
+
+
+list[f_expr_m, f_expr_mi, f_expr_circ, f_pk_circ_mi, f_pk_mi_m, f_sample_names] <- 
+          map_MSD_to_prior_names(
+                         f_annot_allrna, 
+                         f_annot_circrna, 
+                         f_prior_circ_mirna, 
+                         f_prior_mi_mrna)
+```
+
+> #### Note:
+>
+> > Runs around ≅ 2 minutes on average PC
+
+#### Subsample sizes
+
+> Due to the IntOMICS algorithm limitations, we can't pass all RNAs at once, so we need to specify the size of the samples,. Size is specified per type of RNA:
+
+```{r}
+NUM_CIRC_RNA_SAMPLE <- 1
+NUM_MI_RNA_SAMPLE <- 25
+NUM_M_RNA_SAMPLE <- 50
+```
+
+> #### Note:
+>
+> > It is recommended to have the number of all RNA around ≅ 60 samples total
+
+#### Max input edges per gene
+
+> The IntOMICS algorithm can't check all possible numbers of input edges. So we need to specify it:
+
+```{r}
+MAX_INPUT_EDGES <- 3
+```
+
+> #### Note:
+>
+> > It is recommended to have the max number of edges ≅ 3-5
+
+#### Run the pre-process -- convert circGPA output to IntOMICS/KiMONO input
+
+> The current algorithm uses such sampling technique:
+>
+> 1.  We take all circular RNAs and sample the needed amount of them
+>
+> 2.  Next, we reduce the micro RNAs to those that are interacting based on circGPA output
+>
+> 3.  Next, we take a sub-sample of needed size from those interacting micro RNAs
+>
+> 4.  Next, we reduce the mRNAs to those that are being interacted by sub-sampled micro RNAs
+>
+> 5.  Finally, we sub-sample mRNAs of needed size from those interacting mRNAs
+
+```{r}
+f_expr_m <- "data/MDS_expr_mrna.RData"
+f_expr_mi <- "data/MDS_expr_mirna.RData" 
+f_expr_circ <- "data/MDS_expr_circ.RData"
+f_pk_circ_mi <- "data/MDS_prior_circ_mi.RData"
+f_pk_mi_m <- "data/MDS_prior_mi_m.RData"
+f_sample_names <- "data/MDS_sample_names.RData"
+input_all <- data_preprocess(
+                            f_expr_m, 
+                            f_expr_mi, 
+                            f_expr_circ, 
+                            f_pk_circ_mi, 
+                            f_pk_mi_m, 
+                            f_sample_names,
+                            NUM_CIRC_RNA_SAMPLE,
+                            NUM_MI_RNA_SAMPLE,
+                            NUM_M_RNA_SAMPLE,
+                            MAX_INPUT_EDGES)
+```
+
+# MMHC application
+
+```{r}
+library(dplyr)
+library(gsubfn)
+library(data.table)
+library(biomaRt)
+library("org.Hs.eg.db")
+library(EnsDb.Hsapiens.v86)
+library(stringr)
+library(bnlearn)
+
+source("data_preprocess.R")
+source("IntOMICS_improved/improved_UB.R")
+
+f_expr_m <- "data/MDS_expr_mrna.RData"
+f_expr_mi <- "data/MDS_expr_mirna.RData" 
+f_expr_circ <- "data/MDS_expr_circ.RData"
+f_pk_circ_mi <- "data/MDS_prior_circ_mi.RData"
+f_pk_mi_m <- "data/MDS_prior_mi_m.RData"
+f_sample_names <- "data/MDS_sample_names.RData"
+
+list[GE, PK, partitions] <- mmpc_convert(
+                            f_expr_m, 
+                            f_expr_mi, 
+                            f_expr_circ, 
+                            f_pk_circ_mi, 
+                            f_pk_mi_m, 
+                            f_sample_names,
+                            subset=12,
+                            is_PK = FALSE)
+
+GE <- data.frame(GE)
+#colnames(PK) <- c("from", "to")
+```
+
+```{r}
+library(bnlearn)
+library(parallel)
+
+cl <- makeCluster(4)
+
+GE <- readRDS("data/MMPC_GE.RData")
+GE <- data.frame(GE)
+
+PK <- readRDS("data/MMPC_PK.RData")
+colnames(PK) <- c("from", "to")
+```
+
+```{r, echo=FALSE, warning=FALSE, include=FALSE, error=FALSE,message=FALSE}
+suppressWarnings(attach(getNamespace("bnlearn")))
+```
+
+```{r}
+library(bnlearn)
+source("source/mmpc_optimized.R")
+
+defaultW <- getOption("warn") 
+
+options(warn = -1) 
+
+#adj <- mmpc_bnlearn_classic(GE, debug=TRUE, blacklist = PK)#, cluster=cl)
+#adj <- bnlearn::mmpc(GE, debug=TRUE, blacklist = PK)
+#adj <- amat(adj)
+
+adj2 <- mmpc_bnlearn_tripartite(GE, debug=TRUE, partitions = partitions)#, cluster=cl)
+adj2 <- amat(adj2)
+
+options(warn = defaultW)
+
+#print(max(abs(adj - adj2)))
+```
+
+#### Compute the number of components
+
+```{r}
+library(igraph)
+library(knitr)
+
+print_connected_components <- function(adj)
+{
+    g <- graph.adjacency(adj)
+    clu <- components(g)
+    comps <- groups(clu)
+    freq_data <- table(apply(comps, 1, function(x) length(unlist(x))))
+    names(freq_data) <- paste("Comp of size: ", names(freq_data))
+    
+    kable(freq_data)
+}
+```
+
+```{r}
+adjj <- readRDS("data/MMPC_only_expr_adj.RData")
+tbl_expr <- print_connected_components(adjj)
+```
+
+```{r}
+adjj2 <- readRDS("data/MMPC_whitelist_adj.RData")
+tbl_wtlst <- print_connected_components(adjj2)
+```
+
+```{r}
+tbl_expr
+```
+
+```{r}
+PK <- readRDS("data/MMPC_only_expr_adj.RData")
+GE <- readRDS("MMPC_setup/cgpa_MDS_GE.RData")
+parts <- readRDS("MMPC_setup/cgpa_MDS_parts.RData")
+
+GE1 <- GE[parts[[1]],parts[[2]]]
+GE1 <- as.matrix(GE1)
+GE1 <- t(GE1)
+GE2 <- GE[parts[[2]],parts[[3]]]
+GE2 <- as.matrix(GE2)
+
+GE_in <- cbind(GE1, GE2)
+rownames(GE_in) <- parts[[2]]
+colnames(GE_in) <- c(parts[[1]], parts[[3]])
+```
+
+## Skeleton-based Stochastic Search(SK-SS), modified
+
+```{r}
+source("source/general.R")
+source("source/skeleton.R")
+
+
+GE <- readRDS("MMPC_setup/cgpa_MDS_GE.RData")
+PK <- readRDS("MMPC_setup/adj.RData")
+SK <- PK
+parts <- readRDS("MMPC_setup/cgpa_MDS_parts.RData")
+
+
+```
+
+```{r}
+SK_SS_tripartite(GE,PK,SK,parts)
+```
+
+## IntOMICS
+
+#### Decompose the result
+
+```{r, results='asis'}
+input_intomics <- input_all[[1]]
+list[omics, layer_def, PK, PK_present, PK_real, sizes] <- input_intomics
+NUM_M_RNA_SAMPLE <- sizes[1]
+NUM_MI_RNA_SAMPLE <- sizes[2]
+NUM_CIRC_RNA_SAMPLE <- sizes[3]
+
+sprintf("We subsampled these numbers: mRNA:%s, miRNA:%s, circRNA:%s", 
+        NUM_M_RNA_SAMPLE, NUM_MI_RNA_SAMPLE, NUM_CIRC_RNA_SAMPLE)
+```
+
+> **Some explanation:**
+>
+> First 4 values will be directly passed to the IntOMICS, they are defined(as in original paper) like that:
+
+-   **OMICS** -- Gene Expression(GE) matrix of all selected RNA samples
+
+-   **layer_def** -- generally speaking, it is the list of used features of IntOMICS algorithm. We only use the GE matrix feature and also specifying our max number of input edges here
+
+-   **PK** -- Prior Knowledge(PK) matrix. It is the table of all *absent* edges in the graph. For now - we will only block biologically invalid interactions, e.g. specify *tripartite graph* rules:
+
+    -   Add *absent* edges for every combination of genes except *CIRC -\> MI -\> M*
+
+-   **PK_present** -- Prior Knowledge(PK) matrix, same as **PK**, but has added *present* edges that are given by *circGPA* output. Will be used instead of **PK** in second experiment.
+
+> Next value will be used for further analysis
+
+-   **PK_real** -- table of prior interactions *given by* circGPA, e.g., between [circRNA -\> miRNA] and [miRNA-\> mRNA]. Has a form of 0/1 matrix
+
+#### Import libraries
+
+```{r, echo=FALSE, warning=FALSE, error=FALSE,message=FALSE, include=FALSE}
+require("knitr")
+require("bnlearn")
+library(bnlearn)
+library(foreach)
+library(GGally)
+library(bestNormalize)
+library(bnstruct)
+library(rlist)
+library(matrixStats)
+library(parallel)
+library(numbers)
+```
+
+#### Import modified IntOMICS source code
+
+```{r}
+load("intomics_data/input_data.RData")
+#input_all <- readRDS("../input_all_16.01.2023")
+input_all <- readRDS("save_data/input_all_16.01.2023_(250x4)_(1x5)")
+```
+
+```{r}
+source_code_dir <- "IntOMICS_noimproved/"  # The directory where IntOMICS source code files #are saved.
+
+ # Original IntOMICS
+#source_code_dir <- "IntOMICS_improved/" 
+
+file_path_vec <- list.files(source_code_dir, full.names = T)
+
+for(f_path in file_path_vec){source(f_path)}
+```
+
+#### Note:
+
+> The main modification is the support of absent edges -- e.g., the original algorithm suffered from the problem of sparse matrices -- it considered that each gene can be connected with other gene, even though the absence prior knowledge is added. So, I added some if statements and replaced some values. Hopefully, it didn't break the code.
+
+```{r}
+library(ggnewscale)
+library(ggplot2)
+library(dplyr)  
+library(patchwork)
+library(igraph)
+source("IntOMICS_improved/improved_UB.R")
+
+```
+
+```{r}
+OMICS_module_res_1 <- OMICS_module(omics = GSE127960_WT_omics, PK = GSE127960_WT_PK, layers_def = GSE127960_WT_layers_def)
+
+```
+
+```{r}
+res_ret <- aut.tuned.MCMC_sampling(burn_in = burn_in, 
+				      thin = thin, 
+				      seed1 = 6782,
+				      seed2 = 375,
+				      OMICS_module_res = OMICS_module_res_1, 
+				      minseglen = minseglen)
+```
+
+#### Run IntOMICS without PK
+
+```{r}
+OMICS_module_res <- OMICS_module(omics = omics, PK = PK_present, layers_def = layer_def)
+```
+
+> #### Note:
+>
+> > Runs around ≅ 5-10 minutes on average PC
+
+> **Setup choosing**
+>
+> Now, since the IntOMICS algorithm is based on sampling, we can sample with different numbers of iterations. Below are two setups used for testing, each has its time consumption in its header name. The time is specified for the default sample sizes above.
+>
+> The main two parameters group to increase:
+>
+> -   **burn_in** -- number of iterations in some part of IntOMICS algorithm. The more it is set -- the more precise it is
+>
+> -   **thin/minseglen** -- hyper-parameters that are used as threshold for output weights.(NOT TESTED PROPERLY)
+>
+> Generally, our sparse matrices run faster than the examples given by IntOMICS of the same size, but even so, it is very time consuming for such small sample sizes.
+
+#### 11 minute setup
+
+```{r}
+burn_in = 500
+thin = 50
+minseglen = 250
+```
+
+#### OR 90 minute setup
+
+```{r}
+burn_in = 100000
+thin = 500
+minseglen = 50000
+```
+
+#### Run
+
+```{r}
+start.time <- Sys.time()
+
+first_res <- aut.tuned.MCMC_sampling(burn_in = burn_in, 
+				      thin = thin, 
+				      seed1 = 1,
+				      seed2 = 5,
+				      OMICS_module_res = OMICS_module_res, 
+				      minseglen = minseglen)
+
+end.time <- Sys.time()
+time.taken <- end.time - start.time
+time.taken
+```
+
+```{r}
+subsample_PK_present <- function(pk_input, K=NA)
+{
+    abs_only <- pk_input[pk_input$edge_type == "absent",]
+    pres_only <- pk_input[pk_input$edge_type == "present",]
+    
+    N <- nrow(pres_only)
+    
+    if(is.na(K))
+    {
+        K = N / 2
+    }
+    else if(typeof(K) == "double")
+    {
+        K = N * K
+    }
+    K = as.integer(K)
+    
+    print(sprintf("Taking %s random edges from %s", K, N))
+    
+    idxs <- sample.int(N, K)
+    print(idxs)
+    
+    pres_only <- pres_only[idxs,]
+    #View(abs_only)
+    #View(pres_only)
+    ret <- rbind(abs_only, pres_only)
+    
+    return(ret)
+}
+```
+
+```{r}
+
+
+res2_PK_present <- subsample_PK_present(PK_present, K=0.125)
+#View(res2_PK_present)
+
+
+OMICS_module_res2 <- OMICS_module(omics = omics, PK = PK_present, layers_def = layer_def)
+
+
+
+start.time <- Sys.time()
+
+second_res <- aut.tuned.MCMC_sampling(burn_in = burn_in, 
+				      thin = thin, 
+				      seed1 = 2789,
+				      seed2 = 2233,
+				      OMICS_module_res = OMICS_module_res2, 
+				      minseglen = minseglen)
+
+end.time <- Sys.time()
+time.taken <- end.time - start.time
+time.taken
+```
+
+#### Save the result
+
+```{r}
+params <- data.frame(burn_in=burn_in, thin=thin, minseglen=minseglen)
+dir.create("saved_results")
+saveRDS(input_all, "saved_results/input_all")
+str1 <- sprintf("saved_results/first_res")
+saveRDS(list(input_intomics, params, first_res), str1)
+str2 <- sprintf("saved_results/second_res")
+saveRDS(list(input_intomics, params, second_res), str2)
+```
+
+#### (Optional) Read the results
+
+```{r, eval = FALSE}
+input_all <- readRDS("saved_results/input_all")
+
+str1 <- sprintf("saved_results/first_res")
+list[input_intomics, params, first_res] <- readRDS(str1)
+str2 <- sprintf("saved_results/second_res")
+list[input_intomics, params, second_res] <- readRDS(str2)
+
+list[omics, layer_def, PK, PK_present, PK_real, sizes] <- input_intomics
+NUM_M_RNA_SAMPLE <- sizes[1]
+NUM_MI_RNA_SAMPLE <- sizes[2]
+NUM_CIRC_RNA_SAMPLE <- sizes[3]
+```
+
+## Further analysis
+
+#### Experiment 1 - IntOMICS performance control
+
+> Our first experiment is straightforward -- we want to examine the number of interactions that are being correctly/incorrectly classified by IntOMICS, given the *circGPA* output as correct
+
+```{r}
+compare_results <- function(PK_true, PK_pred, name="IntOMICS")
+{
+    total_misclassified <- sum(PK_true!=PK_pred)
+    pos_examples <- PK_true == 1
+    TP <- sum(PK_pred[pos_examples] == 1)
+    FN <- sum(PK_pred[pos_examples] == 0)
+    neg_examples <- PK_true == 0
+    TN <- sum(PK_pred[neg_examples] == 0)
+    FP <- sum(PK_pred[neg_examples] == 1)
+    
+    total_misclassified <- sum(PK_true!=PK_pred)
+    
+    res <- data.frame(TP=TP, FP=FP, TN=TN, FN=FN, total_misclassified=total_misclassified)
+    
+    rownames(res)[1] <- name
+    
+    print(res)
+    
+    #View(res[(n1+1):(n1+n2),1:(n1)], "MI->M")
+    #View(res[(n1+n2+1):(n1+n2+n3),(n1+1):(n1+n2)], "CIRC->MI")
+}
+
+extract_allowed_interactions <- function(B_p)
+{
+    N <- nrow(B_p)
+    n1 <- NUM_M_RNA_SAMPLE
+    n2 <- NUM_MI_RNA_SAMPLE
+    n3 <- NUM_CIRC_RNA_SAMPLE
+    
+    B_circ_mi <- B_p[(n1+n2+1):(n1+n2+n3),(n1+1):(n1+n2)]
+    B_mi_mrna <- B_p[(n1+1):(n1+n2), (1):(n1)]
+    return(c(unlist(B_circ_mi), unlist(B_mi_mrna)))
+}
+
+```
+
+```{r}
+N <- nrow(second_res$B_prior_mat_weighted)
+
+intomics_output_1 <- first_res$B_prior_mat_weighted[1:N,] > 0
+intomics_output_2 <- second_res$B_prior_mat_weighted[1:N,] > 0
+
+circgrpa_output <- PK_real[1:N,] > 0
+
+mode(circgrpa_output) <- "integer"
+mode(intomics_output_1) <- "integer"
+mode(intomics_output_2) <- "integer"
+
+circgrpa_output <- extract_allowed_interactions(circgrpa_output)
+intomics_output_1 <- extract_allowed_interactions(intomics_output_1)
+intomics_output_2 <- extract_allowed_interactions(intomics_output_2)
+
+compare_results(circgrpa_output, intomics_output_1, name="Without edges as Prior")
+compare_results(circgrpa_output, intomics_output_2, name="With edges as Prior")
+```
+
+```{r}
+lst <- readRDS("../first_res_and_input_all_18.01.2023_(250x4)_(1x5)")
+f
+```
+
+## Correlations of the original data
+
+```{r, echo=FALSE, warning=FALSE, error=FALSE,message=FALSE, include=FALSE}
+library(ggplot2)
+library(dplyr)
+library(hrbrthemes)
+```
+
+```{r, warning=FALSE}
+cGE <- cor(omics$ge, method = c("pearson", "kendall", "spearman"))
+n1 <- NUM_M_RNA_SAMPLE
+n2 <- NUM_MI_RNA_SAMPLE
+n3 <- NUM_CIRC_RNA_SAMPLE
+
+
+# Circ - mi
+cor_circ_mi <- cGE[nrow(cGE), (n1+1):(n1+n2)]
+hist(cor_circ_mi, main="All correlations of 1 circRNA with all miRNAs", breaks=25)
+
+
+# mi - mrna
+cor_mi_mrna <- cGE[(n1+1):(n1+n2), 1:n1]
+cor_mi_mrna <- as.vector(cor_mi_mrna)
+PK_mask <- PK_real[(n1+1):(n1+n2), 1:n1]
+PK_mask <- as.vector(PK_mask)
+mode(PK_mask) <- "logical"
+cor_mi_mrna_cgpa <- cor_mi_mrna[PK_mask]
+
+Nc1 <- length(cor_mi_mrna_cgpa)
+Nc2 <- length(cor_mi_mrna)
+
+cor_data <- data.frame(
+    type=c(rep("All miRNA/mRNA", Nc2), rep("Only circGPA miRNA/mRNA correlations", Nc1)),
+    value=c(cor_mi_mrna, cor_mi_mrna_cgpa)
+)
+
+cor_data %>%
+  ggplot( aes(x=value, fill=type, y=..density..)) +
+    geom_histogram(alpha=0.5, binwidth = 0.01,
+                    position = 'identity') +
+    scale_fill_manual(values=c("red", "blue")) +
+    theme_ipsum() +
+    labs(fill="")
+
+```
+
+## KiMONO
+
+```{r, echo=FALSE, warning=FALSE, include=FALSE, error=FALSE,message=FALSE}
+
+library(dplyr)
+library(gsubfn)
+library(data.table)
+library(biomaRt)
+library("org.Hs.eg.db")
+library(EnsDb.Hsapiens.v86)
+source("data_preprocess.R")
+
+kimono_input <- input_all[[2]]
+
+list[input_data, PK_present, sizes] <- kimono_input
+
+n1 <- sizes[1]
+n2 <- sizes[2]
+n3 <- sizes[3]
+
+sprintf("We subsampled these numbers: mRNA:%s, miRNA:%s, circRNA:%s", 
+        n1, n2, n3)
+install.packages("KiMONO/", repos = NULL, type = "source")
+library(kimono)
+```
+
+```{r}
+subsample_PK_present <- function(pk_input, K=NA)
+{
+    N <- nrow(pk_input)
+    
+    if(is.na(K))
+    {
+        K = N / 2
+    }
+    else if(typeof(K) == "double")
+    {
+        K = N * K
+    }
+    K = as.integer(K)
+    
+    print(sprintf("Taking %s random edges from %s", K, N))
+    
+    idxs <- sample.int(N, K)
+    print(idxs)
+    
+    pk_input <- pk_input[idxs,]
+
+    return(pk_input)
+}
+```
+
+```{r eval=FALSE}
+# you should install following libraries
+# data.table,
+# dplyr,
+# oem,
+# foreach,
+# igraph,
+# doSNOW,
+# tidyverse,
+# mice,
+# impute,  ------> Biocmanager was needed
+# miselect,
+# checkmate,
+# hmlasso
+
+
+input_data[[1]] <- data.table(input_data[[1]])
+input_data[[2]] <- data.table(input_data[[2]])
+input_data[[3]] <- data.table(input_data[[3]])
+
+PK_present_input <- subsample_PK_present(PK_present, K=1.0)
+prior_network <- create_prior_network(PK_present_input)
+
+network <- kimono(input_data=input_data, prior_network = prior_network, core=2, infer_missing_prior = FALSE)
+
+
+library(dplyr)
+
+
+to_igraph(network) %>% plot_kimono(title='KiMONo Network (directed)')
+```
+
+```{r, eval=FALSE}
+saveRDS(list(kimono_input, network), "saved_results/kimono_res")
+```
+
+```{r}
+list[kimono_input, network] <- readRDS("saved_results/kimono_res")
+```
+
+```{r}
+N <- nrow(PK_real)
+
+kimono_output <- matrix(0, nrow = N, ncol = N)
+kimono_output <- data.frame(kimono_output)
+
+colnames(kimono_output) <- colnames(PK_real)
+rownames(kimono_output) <- rownames(PK_real)
+
+for(rowidx in 1:nrow(network))
+{
+    row_kimono <- network[rowidx,]
+    if(row_kimono$value == 0)
+    {
+        next
+    }
+    
+    if((row_kimono$predictor  == '(Intercept)') || 
+       (row_kimono$target  == '(Intercept)'))
+    {
+        next
+    }
+    
+    from <- row_kimono$predictor
+    from <- paste("ENTREZID:", from,sep="")
+    to <- row_kimono$target
+    to <- paste("ENTREZID:", to,sep="")
+    kimono_output[from,to] <- 1
+}
+
+circgrpa_output <- PK_real[1:N,] > 0
+
+mode(circgrpa_output) <- "integer"
+
+for(rowidx in 1:N)
+{
+    mode(kimono_output[rowidx,]) <- "integer"
+    mode(kimono_output[,rowidx]) <- "integer"
+}
+
+#circgrpa_output <- extract_allowed_interactions(circgrpa_output)
+#kimono_output <- extract_allowed_interactions(kimono_output)
+
+compare_results(circgrpa_output, kimono_output, name="With all Prior")
+```
+
+## Correctness comparison
+
+#### circGPA miRNA - mRNA verified
+
+```{r}
+
+library(multiMiR) # Biocmanager::install("multiMir")
+library(dplyr)
+GLOBAL_MART <<- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
+```
+
+```{r}
+
+check_all_mirnas <- function(circGPA_mi_m_inter_df, strong_only=TRUE, save_freq=10)
+{
+    dir.create("tmp_files")
+    
+    mirnas <- circGPA_mi_m_inter_df$mature_mirna_id
+    mirnas <- unique(mirnas)
+    M <- length(mirnas)
+    
+    mrnas <- circGPA_mi_m_inter_df$target_symbol
+    mrnas <- unique(mrnas)
+    N <- length(mrnas)
+    
+    ret <- data.frame(matrix(NA, nrow = M, ncol = (N+7)))
+    colnames(ret) <- c("miRNA_id", "circGPA predicted", "All validated", 
+                       "TP", "TN", "FN", "FP",
+                       paste("MRNA: ", mrnas, sep=""))
+    
+    idx <- 1
+    
+    start.time.global <- Sys.time()
+    
+    for(mirna_ in mirnas)
+    {
+        start.time <- Sys.time()
+        
+        
+        invisible(
+            db <- get_multimir(mirna=mirna_, 
+                                  table="validated", 
+                                  summary = TRUE)
+        )
+        
+        cat("\014")  
+        
+        if(strong_only)
+        {
+            exp1 <- grepl("Luciferase reporter assay", db@data$experiment, 
+                          fixed=TRUE)
+            exp2 <- grepl("qRT-PCR", db@data$experiment, fixed=TRUE)
+            exp3 <- grepl("Western blot", db@data$experiment, fixed=TRUE)
+            
+            exp <- exp1 | exp2 | exp3
+            validated <- db@data[exp,]
+            type <- "STRONG"
+        }
+        else{
+            validated <- db@summary
+            type <- "WEAK"
+        }
+        
+        validated_mrnas <- validated$target_symbol
+        validated_mrnas <- unique(validated_mrnas)
+        
+        validated <- mrnas %in% validated_mrnas
+        
+        circGPA_mrnas <- circGPA_mi_m_inter_df[circGPA_mi_m_inter_df$mature_mirna_id == mirna_,]$target_symbol
+        
+        circGPA_mrnas <- unique(circGPA_mrnas)
+        
+        predicted <- mrnas %in% circGPA_mrnas
+        total_circgpa <- sum(predicted)
+        total_validated <- sum(validated)
+
+        mrnas_data <- 100 + validated * 10 + predicted
+        FN <- sum(mrnas_data == 110) 
+        FP <- sum(mrnas_data == 101)
+        TP <- sum(mrnas_data == 111)
+        TN <- sum(mrnas_data == 100)
+        #mrnas_data <- toString(mrnas_data)
+        #idss <- match(mrnas, validated_mrnas)
+        #experiments <- db@data$experiment[idss]
+        
+        #mrnas_data <- paste(mrnas_data, experiments)
+        
+        ret[idx,] <- c(mirna_, total_circgpa, total_validated, TP, TN, FN, FP,
+                       mrnas_data)
+        idx <- idx+1
+        
+        end.time <- Sys.time()
+        time.taken <- end.time - start.time
+        print(time.taken)
+        print(sprintf("Time taken: %s", time.taken))
+        print(sprintf("%s / %s", idx, M))
+    }
+    
+    end.time.global <- Sys.time()
+    time.taken <- end.time.global - start.time.global
+    print(sprintf("Time taken for the entire table: %s", time.taken))
+    
+    
+    return(ret)
+}
+
+mi_m_output <- input_all[[3]]
+mi_m_output <- data.frame(mature_mirna_id=mi_m_output$mirna_orig, 
+                          target_symbol=mi_m_output$mrna_orig)
+
+cgpatable_strong <- check_all_mirnas(mi_m_output)
+cgpatable_weak <- check_all_mirnas(mi_m_output, strong_only = FALSE)
+```
+
+#### KiMONO miRNA - mRNA verified
+
+```{r}
+convert_enseml_symbol <- function(ensg)
+{
+    ret <- mapIds(org.Hs.eg.db, keys=ensg, keytype="ENSEMBL", column = "SYMBOL")
+    return(ret)
+}
+
+
+validated_kimono <- function(kimono_in_pk, kimono_out_net, strong_only=TRUE, save_freq=10)
+{
+    dir.create("tmp_files")
+    
+    all_mirnas1 <- kimono_in_pk$A[kimono_in_pk$layer_A == "miRNA"]
+    all_mirnas2 <- kimono_in_pk$B[kimono_in_pk$layer_B == "miRNA"]
+    all_mirnas_orig <- c(all_mirnas1, all_mirnas2)
+    all_mirnas_orig <- unique(all_mirnas_orig)
+
+    mirnas_db <- getBM(attributes=c('mirbase_id', 'ensembl_gene_id', 'mirbase_trans_name'),
+                             filters = 'ensembl_gene_id',
+                             values = all_mirnas_orig,
+                             mart = GLOBAL_MART)
+    
+    print("Conversion of miRNA: (if not everything -> BIG ERROR)")
+    print(nrow(mirnas_db))
+    print(length(all_mirnas_orig))
+    
+    all_mirnas <- mirnas_db$mirbase_id
+    all_mirnas <- data.frame(pk_ensg_name=all_mirnas_orig, symbol_name=all_mirnas)
+    rm(all_mirnas1)
+    rm(all_mirnas2)
+    rm(all_mirnas_orig)
+    rm(mirnas_db)
+    
+    M <- nrow(all_mirnas)
+
+    all_mrnas_orig <- kimono_in_pk$B[kimono_in_pk$layer_B == "mRNA"]
+    all_mrnas_orig <- unique(all_mrnas_orig)
+    all_mrnas <- convert_enseml_symbol(all_mrnas_orig)
+    all_mrnas <- unique(all_mrnas)
+    N <- length(all_mrnas)
+    all_mrnas <- data.frame(pk_ensg_name=all_mrnas_orig, symbol_name=all_mrnas)
+
+    ret <- data.frame(matrix(NA, nrow = M, ncol = (N+7)))
+    colnames(ret) <- c("miRNA_id", "KiMONO predicted", "All validated", 
+                       "TP", "TN", "FN", "FP",
+                       paste("MRNA: ", all_mrnas$pk_ensg_name, sep=""))
+
+    idx <- 1
+    
+    start.time.global <- Sys.time()
+    for(idx in 1:M)
+    {
+        mirna_ <- all_mirnas$symbol_name[idx]
+        mirna_ensg_ <- all_mirnas$pk_ensg_name[idx]
+        
+        start.time <- Sys.time()
+        
+        mirna_no_num <- substring(mirna_,1,nchar(mirna_)-2)
+        print(mirna_)
+        
+        mature_all <- c(mirna_,
+                        mirna_no_num,
+                        paste(mirna_, "-3p", sep=""),
+                        paste(mirna_, "-5p", sep=""),
+                        paste(mirna_no_num, "-3p", sep=""),
+                        paste(mirna_no_num, "-5p", sep=""))
+        
+        db <- get_multimir(mirna=mature_all,
+                                      table="validated", 
+                                      summary = TRUE)
+        
+        #cat("\014")  
+        
+        if(strong_only)
+        {
+            exp1 <- grepl("Luciferase reporter assay", db@data$experiment, 
+                          fixed=TRUE)
+            exp2 <- grepl("qRT-PCR", db@data$experiment, fixed=TRUE)
+            exp3 <- grepl("Western blot", db@data$experiment, fixed=TRUE)
+            
+            exp <- exp1 | exp2 | exp3
+            validated <- db@data[exp,]
+            type <- "STRONG"
+        }
+        else{
+            validated <- db@summary
+            type <- "WEAK"
+        }
+        
+        validated_mrnas <- validated$target_symbol
+        validated_mrnas <- unique(validated_mrnas)
+        
+        validated <- all_mrnas$symbol_name %in% validated_mrnas
+        
+        
+        mi_to_m_cond <- kimono_out_net$predictor_layer == "miRNA" &
+                        kimono_out_net$target_layer == "mRNA"
+        
+        mi_to_m_cond <- mi_to_m_cond & kimono_out_net$predictor == mirna_ensg_
+        
+        
+        m_to_mi_cond <- kimono_out_net$predictor_layer == "mRNA" &
+                        kimono_out_net$target_layer == "miRNA"
+        
+        m_to_mi_cond <- m_to_mi_cond & kimono_out_net$target == mirna_ensg_
+        
+        correct_way <- kimono_out_net$target[mi_to_m_cond]
+        reverse_way <- kimono_out_net$predictor[m_to_mi_cond]
+        
+        all_connections <- c(correct_way, reverse_way)
+        all_connections <- unique(all_connections)
+        
+        predicted <- all_mrnas$pk_ensg_name %in% all_connections
+        total_circgpa <- sum(predicted)
+        total_validated <- sum(validated)
+
+        mrnas_data <- 100 + validated * 10 + predicted
+        FN <- sum(mrnas_data == 110) 
+        FP <- sum(mrnas_data == 101)
+        TP <- sum(mrnas_data == 111)
+        TN <- sum(mrnas_data == 100)
+        
+        ret[idx,] <- c(mirna_, total_circgpa, total_validated, TP, TN, FN, FP,
+                       mrnas_data)
+        idx <- idx+1
+        
+        end.time <- Sys.time()
+        time.taken <- end.time - start.time
+        print(time.taken)
+        print(sprintf("Time taken: %s", time.taken))
+        print(sprintf("%s / %s", idx, M))
+        
+        if(idx %% save_freq == 0)
+        {
+            saveRDS(ret, sprintf("tmp_files/kimono_validation_table_%s_%s_%s_%s", 
+                             idx-1, M,
+                             ((FN + FP) != 0), type))
+        }
+    }
+    
+    end.time.global <- Sys.time()
+    time.taken <- end.time.global - start.time.global
+    print(sprintf("Time taken for the entire table: %s", time.taken))
+    
+    
+    return(ret)
+}
+
+kimono_valtable <- validated_kimono(kimono_input[[2]], network)
+
+kimono_valtable_weak <- validated_kimono(kimono_input[[2]], network, strong_only = FALSE)
+```
+
+#### IntOMICS miRNA - mRNA verified
+
+```{r}
+convert_enseml_symbol <- function(ensg)
+{
+    ret <- mapIds(org.Hs.eg.db, keys=ensg, keytype="ENSEMBL", column = "SYMBOL")
+    return(ret)
+}
+validated_intomics <- function(intomics_out, strong_only=TRUE, save_freq=10)
+{
+    dir.create("tmp_files")
+
+    n1 <- NUM_M_RNA_SAMPLE
+    n2 <- NUM_MI_RNA_SAMPLE
+    n3 <- NUM_CIRC_RNA_SAMPLE
+    
+    zero_one_interactions <- intomics_out > 0
+    
+    zero_one_mi_mrna <- zero_one_interactions[(n1+1):(n1+n2),(1):(n1)]
+    entrez_names <- rownames(zero_one_mi_mrna)
+    
+    all_mirnas_orig <- gsub("ENTREZID:", "", entrez_names)
+    mirnas_db <- getBM(attributes=c('mirbase_id', 'ensembl_gene_id', 'mirbase_trans_name'),
+                             filters = 'ensembl_gene_id',
+                             values = all_mirnas_orig,
+                             mart = GLOBAL_MART)
+    
+    print("Conversion of miRNA: (if not everything -> BIG ERROR)")
+    print(nrow(mirnas_db))
+    print(length(all_mirnas_orig))
+    
+    
+    all_mirnas <- mirnas_db$mirbase_id
+    all_mirnas <- data.frame(pk_ensg_name=all_mirnas_orig, symbol_name=all_mirnas)
+    
+    entrez_names <- colnames(zero_one_mi_mrna)
+    all_mrnas_orig <- gsub("ENTREZID:", "", entrez_names)
+    all_mrnas <- convert_enseml_symbol(all_mrnas_orig)
+    all_mrnas <- data.frame(pk_ensg_name=all_mrnas_orig, symbol_name=all_mrnas)
+    
+    N <- n1
+    print(N)
+    
+    M <- n2
+    print(M)
+    
+    ret <- data.frame(matrix(NA, nrow = M, ncol = (N+7)))
+    colnames(ret) <- c("miRNA_id", "KiMONO predicted", "All validated", 
+                       "TP", "TN", "FN", "FP",
+                       paste("MRNA: ", all_mrnas, sep=""))
+    
+    ret[,(6):(N+5)] <- zero_one_mi_mrna * 1
+    
+    
+    
+    for(idx in 1:M)
+    {
+        mirna_ <- all_mirnas$symbol_name[idx]
+        mirna_no_num <- substring(mirna_,1,nchar(mirna_)-2)
+        print(mirna_)
+        
+        mature_all <- c(mirna_,
+                        mirna_no_num,
+                        paste(mirna_, "-3p", sep=""),
+                        paste(mirna_, "-5p", sep=""),
+                        paste(mirna_no_num, "-3p", sep=""),
+                        paste(mirna_no_num, "-5p", sep=""))
+        
+        db <- get_multimir(mirna=mature_all,
+                                      table="validated", 
+                                      summary = TRUE)
+        
+        #cat("\014")  
+        
+        if(strong_only)
+        {
+            exp1 <- grepl("Luciferase reporter assay", db@data$experiment, 
+                          fixed=TRUE)
+            exp2 <- grepl("qRT-PCR", db@data$experiment, fixed=TRUE)
+            exp3 <- grepl("Western blot", db@data$experiment, fixed=TRUE)
+            
+            exp <- exp1 | exp2 | exp3
+            validated <- db@data[exp,]
+            type <- "STRONG"
+        }
+        else{
+            validated <- db@summary
+            type <- "WEAK"
+        }
+        
+        
+        validated_mrnas <- validated$target_symbol
+        validated_mrnas <- unique(validated_mrnas)
+        
+        validated <- all_mrnas$symbol_name %in% validated_mrnas
+        
+        predicted <- ret[idx,(8):(N+5)]
+        
+        ret[idx,(8):(N+5)] <- 100 + predicted + 10 * validated
+        FN <- sum(ret[idx,(8):(N+5)] == 110) 
+        FP <- sum(ret[idx,(8):(N+5)] == 101)
+        TP <- sum(ret[idx,(8):(N+5)] == 111)
+        TN <- sum(ret[idx,(8):(N+5)] == 100)
+        
+        total_circgpa <- sum(predicted)
+        total_validated <- sum(validated)
+        
+        ret[idx, 1:7] <- c(mirna_, total_circgpa, total_validated, TP, TN, FN, FP)
+    }
+    
+    return(ret)
+}
+
+
+intomics_vtable_strong <- validated_intomics(first_res$B_prior_mat_weighted)
+
+intomics_vtable_weak <- validated_intomics(first_res$B_prior_mat_weighted, strong_only = FALSE)
+```
+
+#### Total comparison of algorithms
+
+```{r, eval=FALSE}
+saveRDS(cgpatable_strong, "inst/saved_results/cgpatable_strong")
+saveRDS(cgpatable_weak, "inst/saved_results/cgpatable_weak")
+saveRDS(kimono_valtable, "inst/saved_results/kimono_valtable")
+saveRDS(kimono_valtable_weak, "inst/saved_results/kimono_valtable_weak")
+saveRDS(intomics_vtable_strong, "inst/saved_results/intomics_vtable_strong")
+saveRDS(intomics_vtable_weak, "inst/saved_results/intomics_vtable_weak")
+```
+
+```{r}
+comp <- matrix(NA, nrow=6, ncol=8)
+comp <- data.frame(comp)
+colnames(comp) <- c("Algorithm", "Num predictions", "Found validations", "TP", "TN", "FP", "FN", "Validation type")
+
+
+tables <- list(cgpatable_strong, kimono_valtable, intomics_vtable_strong,
+               cgpatable_weak, kimono_valtable_weak, intomics_vtable_weak)
+
+algo_names <- c(rep(c("circGPA", "KiMONO", "IntOMICS"), 2))
+algo_types <- c(rep(c("strong"),3), rep(c("weak"),3))
+
+
+for(idx in 1:6)
+{
+    table_curr <- tables[[idx]]
+    algname_curr <- algo_names[idx]
+    algtype_curr <- algo_types[idx]
+    
+    pred <- sum(as.numeric(table_curr[[2]]))
+    val <- sum(as.numeric(table_curr[["All validated"]]))
+    TP <- sum(as.numeric(table_curr[["TP"]]))
+    TN <- sum(as.numeric(table_curr[["TN"]]))
+    FP <- sum(as.numeric(table_curr[["FP"]]))
+    FN <- sum(as.numeric(table_curr[["FN"]]))
+    
+    
+    comp[idx,] <- c(algname_curr, pred, val, TP, TN, FP, FN, algtype_curr)
+}
+
+
+comp
+```