Skip to content
Snippets Groups Projects
Commit 3ef4fda0 authored by Hanako Ikezawa's avatar Hanako Ikezawa
Browse files

Everything works on SK-SS, thinking about Trie change

parent a452eeef
Branches main
No related tags found
No related merge requests found
......@@ -20,7 +20,7 @@ BGe_param_init <- function(...)
BGe_change <- function(move, G_adjs, param)
{
# Extract input values
part_id <- move$part_id
part_id <- move$listID
from <- move$from
to <- move$to
action_is_add <- move$action_is_add
......
library(matrixStats)
library(VeryLargeIntegers)
#library(VeryLargeIntegers)
EmpiricalPrior_param_init <- function(...)
......@@ -57,9 +57,21 @@ EmpiricalPrior_score <- function(G_adjs, param)
bpk <- B_PKs[[i]]
bpk <- bpk - 0.5
bpk <- bpk * 2
#local_energy <- bpk * (1 - adj) + (1 - bpk) * adj
local_energy <- bpk * (1 - adj)
local_energy2 <- bpk * (1 - adj)
local_energy2 <- sum(local_energy2)
adj <- G_adjs[[i]]
bpk <- B_PKs[[i]]
adj <- adj[bpk != 0.5]
bpk <- bpk[bpk != 0.5]
local_energy <- bpk * (1 - adj) + (1 - bpk) * adj
local_energy <- sum(local_energy)
if(local_energy != local_energy2)
{
asd <- 123
}
total_energy <- total_energy + local_energy
}
beta <- optimum_beta(total_energy, K, beta_L, beta_H)
......@@ -85,7 +97,7 @@ EmpiricalPrior_score <- function(G_adjs, param)
EmpiricalPrior_change <- function(move, G_adjs, param)
{
# Extract input
part_id <- move$part_id
part_id <- move$listID
from <- move$from
to <- move$to
action_is_add <- move$action_is_add
......@@ -94,14 +106,14 @@ EmpiricalPrior_change <- function(move, G_adjs, param)
beta_H <- param$beta_H
K <- param$K
prev_score <- param$prev_score
total_energy <- param$energy
prev_energy <- param$energy
# Compute change in energy
B_PK_mat <- B_PKs[[part_id]]
b_i_j <- B_PK_mat[from, to]
u_change <- 1 - 2 * b_i_j
u_change <- (1 - 2 * action_is_add) * u_change
total_energy <- energy + u_change
u_change <- (2 * action_is_add - 1) * u_change
total_energy <- prev_energy + u_change
# Compute change in prior
beta <- optimum_beta(total_energy, K, beta_L, beta_H)
......@@ -110,6 +122,7 @@ EmpiricalPrior_change <- function(move, G_adjs, param)
# Update param
param$diff <- sP_G - prev_score
param$score <- sP_G
param$prev_score <- param$score
param$energy <- total_energy
return(param)
......@@ -252,8 +265,8 @@ marginalization_P_G <- function(E, K, beta_L, beta_H, delta_beta=0.1)
}
# Remains of my comprehensive numerical tests with hypergeometric functions
library(gsl)
library(hypergeo)
#library(gsl)
#library(hypergeo)
#' No idea what is happening here,
#' but here is the numerical trick to be sure that hypergeom is always positive
#' https://stats.stackexchange.com/questions/33451/computation-of-hypergeometric-function-in-r
......
......@@ -40,7 +40,7 @@ score_G <- function(G_adjs, param_in)
G_adjs, prior
)
param_in$score <- likelihood$score + prior$score
param_in$score <- param_in$likelihood$score + param_in$prior$score
param_in$diff <- 0
return(param_in)
......@@ -68,7 +68,16 @@ score_G <- function(G_adjs, param_in)
#'
score_change_G <- function(move, G_adjs, param_in)
{
#move = list(score_prev, part_id, from, to, action_is_add)
#move = list(listID, from, to, action_is_add)
# convert indices from C++ (start at 0) to R (start at 1)
move[c("listID", "from", "to")] <- unlist(move[c("listID", "from", "to")]) + 1
if(move$action_is_add == FALSE)
{
asd <- 1
}
likelihood <- param_in$likelihood
prior <- param_in$prior
......@@ -79,7 +88,7 @@ score_change_G <- function(move, G_adjs, param_in)
move, G_adjs, prior
)
param_in$score <- likelihood$score + prior$score
param_in$score <- param_in$likelihood$score + param_in$prior$score
param_in$diff <- likelihood$diff + prior$diff
return(param_in)
......@@ -149,19 +158,19 @@ SK_SS_tripartite <- function(GE, PK, SK, parts,
stop("partitions should contain all genes, e.g., length(unlist(unique(parts))) == N")
}
# Remove zero rows
zeroGenes <- names(which(colSums(GE) == 0))
if(length(zeroGenes) > 0)
{
cat("Removing zero-count genes: ", zeroGenes, "\n")
for(i in 1:length(parts))
{
prev <- parts[[i]]
parts[[i]] <- prev[!(prev %in% zeroGenes)]
}
}
GE <- GE[,colSums(GE) != 0]
# Remove zero rows (only if gaussian? Remove by now, TODO decide)
# zeroGenes <- names(which(colSums(GE) == 0))
# if(length(zeroGenes) > 0)
# {
# cat("Removing zero-count genes: ", zeroGenes, "\n")
#
# for(i in 1:length(parts))
# {
# prev <- parts[[i]]
# parts[[i]] <- prev[!(prev %in% zeroGenes)]
# }
# }
# GE <- GE[,colSums(GE) != 0]
# Extract sizes of every partitions
sizes <- unlist(lapply(parts, length))
......@@ -204,7 +213,7 @@ SK_SS_tripartite <- function(GE, PK, SK, parts,
"poisson"=ZiCount_param_init,
"negbinom"=ZiCount_param_init)
param_lik_in <- init_param_lik_func(GE, dist=likelihood, ...)
param_lik_in <- init_param_lik_func(GE=GE, dist=likelihood, ...)
# Get chosen likelihood function
......@@ -212,8 +221,8 @@ SK_SS_tripartite <- function(GE, PK, SK, parts,
# Initialize parameters for prior term
init_param_prior_func <- switch(prior,
"empirical"=BGe_param_init,
"complexity"=ZiCount_param_init)
"empirical"=EmpiricalPrior_param_init,
"complexity"=NA)
param_prior_in <- init_param_prior_func(PKs=PK_in, beta_L=beta_L, beta_H=beta_H, ...)
......@@ -235,10 +244,10 @@ SK_SS_tripartite <- function(GE, PK, SK, parts,
PK_out[parts[[2]], parts[[3]]] <- G_out[[2]]
# TODO remove posterior not utilized!
PK_out <- exp(PK_out)
PK_out <- PK_out >= alpha
# PK_out <- exp(PK_out)
# PK_out <- PK_out >= alpha
saveRDS(PK_out, "SKSS_output.RDS")
return(list(PK_out=PK_out, PK_score=ref_score$pG + ref_score$pDG))
return(PK_out=PK_out)
}
......@@ -576,7 +576,7 @@ struct ISettings
};
#define SAVE_FREQ 500
#define PRINT_FREQ 10000
#define PRINT_FREQ 1000
#define PARTIAL_ADD_ONLY "[pSK-SS][add only]"
#define PARTIAL_ADD_REMOVE "[pSK-SS][add-remove]"
#define SKSS_PHASE "[SK-SS][add-remove]"
......@@ -743,7 +743,7 @@ protected:
// do not update V_G since we want to sample from partialSK-SS only!
// because of that -> shouldUpdateTrie = false
//this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD, false);
this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD, true);
this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD, false);
}
......@@ -791,7 +791,7 @@ protected:
uint32_t r = xyindex % sk.nrow();
uint32_t c = xyindex / sk.nrow();
if(i % (PRINT_FREQ * 1000) == 0)
if(i % (PRINT_FREQ) == 0)
{
this->DebugInfo(PARTIAL_ADD_ONLY, i);
}
......@@ -838,11 +838,11 @@ protected:
uint32_t r = xyindex % sk.nrow();
uint32_t c = xyindex / sk.nrow();
if(i % (PRINT_FREQ * 1000) == 0)
if(i % (PRINT_FREQ) == 0)
{
this->DebugInfo(PARTIAL_ADD_REMOVE, i);
}
if(i % SAVE_FREQ == 0)
if((i % SAVE_FREQ) == 0)
this->SaveGraph();
if((sk(r, c) == 0) &&
......@@ -862,10 +862,10 @@ protected:
void DebugInfo(std::string phase, uint32_t iter)
{
Rprintf("%s[%5u]/[%5u][Trie:%5u|%5u]\n", //[Score:%9.6f]
Rprintf("%s[%5u]/[%5u][Trie:%5u|%5u][Score:%9.6f]\n", //
phase.c_str(), iter, this->input_data.N,
this->V_G.num_graphs(), this->V_G.num_nodes()
//,this->probabilityMaxOptim
,this->probabilityMaxOptim
);
}
......@@ -1063,7 +1063,7 @@ protected:
void SaveGraph()
{
return;
//return;
// Save intermediate results
//Rprintf("Saving intermediate results [%9.6f]\n", this->logSumScores);
......@@ -1072,11 +1072,12 @@ protected:
List& tmp_adjs = this->G_optim_to_list();
char fileName[50];
char fileName2[50];
//sprintf(fileName, "optimG_%9.6f.RData", this->probabilityMaxOptim);
sprintf(fileName, "optimG_%9.6f.RData", this->probabilityMaxOptim);
//sprintf(fileName2, "optimG_%9.6f_copy.RData", this->probabilityMaxOptim);
sprintf(fileName, "G_%d.RData", this->current_id);
//sprintf(fileName, "G_%d.RData", this->current_id);
saveRDS(tmp_adjs, Rcpp::Named("file", fileName));
//saveRDS(tmp_adjs, Rcpp::Named("file", "G_posterior.RDS"));
//saveRDS(tmp_adjs, Rcpp::Named("file", "G_posterior_copy.RDS"));
......
library(stats)
library(pscl)
library(sigmoid)
library(mpath)
library(caret)
#library(sigmoid)
ZiCount_param_init <- function(...)
{
......@@ -21,6 +20,8 @@ ZiCount_score <- function(G_adjs, param)
pDG <- 0
# store every node's computed BGe subscore
param$prevscores <- list()
data <- param$data
dist <- param$dist
for(i in 1:length(G_adjs))
{
......@@ -36,7 +37,7 @@ ZiCount_score <- function(G_adjs, param)
ZiCount_change <- function(move, G_adjs, param)
{
# Extract input values
part_id <- move$part_id
part_id <- move$listID
from <- move$from
to <- move$to
action_is_add <- move$action_is_add
......@@ -51,7 +52,7 @@ ZiCount_change <- function(move, G_adjs, param)
adj[from, to] <- action_is_add
neights <- names(which(adj[,node] > 0))
score_node <- zi_llik_node(node, neights, data, dist)
adj[from, to] <- (1 - action_is_add)
adj[from, to] <- as.logical(1 - action_is_add)
# Compute a log change in a score
param$diff <- score_node - score_node_prev
......@@ -68,6 +69,10 @@ ZiCount_change <- function(move, G_adjs, param)
#' @importFrom stats glm
zi_llik_node <- function(node, parents, data, dist)
{
if(length(parents) == 0) # no parents -> intercept only model
{
parents <- 1
}
frml <- paste("`", node, "` ~ ", paste(parents, collapse="+"), sep="")
if(min(data[,node]) == 0) # if any zeros present -- assume zero inflated model
......@@ -85,10 +90,10 @@ zi_llik_node <- function(node, parents, data, dist)
zi_llik <- function(G, data, dist)
{
n <- ncol(data)
n <- ncol(G)
all_llik <- sapply(1:n, function(i){
node <- colnames(data)[i]
node <- colnames(G)[i]
part <- colnames(G)[which(G[,node] == 1)]
if(length(part) == 0)
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment