Skip to content
Snippets Groups Projects
Commit f6f41d01 authored by Anuarbekov, Alikhan's avatar Anuarbekov, Alikhan
Browse files

All is ready except for final scoring -> Overflow of BGe score

parent f861ec0e
No related branches found
No related tags found
No related merge requests found
......@@ -200,15 +200,17 @@ P_D_G <- function(G_adjs, param_in)
pDG1 <- pDG1 + BGe_compute(adj, param_in)
}
nodes_cand <- rownames(G_adjs[[1]])
pDG2 <- sum(sapply(nodes_cand, function(node)
{
score <- BGe_node_compute(node, integer(0), param_in)
return(score)
})
)
return(pDG1 + pDG2)
# They are still the same!
# nodes_cand <- rownames(G_adjs[[1]])
# pDG2 <- sum(sapply(nodes_cand, function(node)
# {
# score <- BGe_node_compute(node, integer(0), param_in)
# return(score)
# })
# )
# + pDG2
return(pDG1)
}
#'
......@@ -378,7 +380,7 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be
# this is a large computation that computes a large parameter matrix:
# N x N, where N is the sum of sizes, e.g., total number of genes
cat("Computing large BGe matrix...\n")
if(FALSE)
if(TRUE)
{
myScore <- scoreparameters_BiDAG_BGe(n = ncol(GE), data = GE)
param_in <- list(n=myScore$n, awpN=myScore$awpN, TN=myScore$TN, scoreconstvec=myScore$scoreconstvec)
......@@ -394,10 +396,18 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be
set.seed(0)
start.time <- Sys.time()
print(start.time)
E_f_G_out <- c_SK_SS(PK_in,SK_in, sizes, param_in,
I, debug, beta_L, beta_H,
score_G, score_change_G)
end.time <- Sys.time()
print(end.time)
print(end.time - start.time)
print(E_f_G_out)
E_f_G_out <- lapply(E_f_G_out, function(m) m > alpha)
......
......@@ -8,6 +8,7 @@
#include <string>
#include <vector>
#include <algorithm>
#include <map>
// Pure C99
#include <cstring>
......@@ -67,9 +68,16 @@ struct key_hash : public std::unary_function<key_t, std::size_t>
{
std::size_t operator()(const key_t& k) const
{
return (std::size_t)(std::get<0>(k)) ^
(std::size_t)(std::get<1>(k)) ^
(std::size_t)(std::get<2>(k));
std::size_t ret = 0;
ret ^= std::hash<uint32_t>()(std::get<0>(k)) + 0x9e3779b9 + (ret<<6) + (ret>>2);
ret ^= std::hash<uint32_t>()(std::get<1>(k)) + 0x9e3779b9 + (ret<<6) + (ret>>2);
ret ^= std::hash<uint32_t>()(std::get<2>(k)) + 0x9e3779b9 + (ret<<6) + (ret>>2);
return ret;
// return (std::size_t)(std::get<0>(k)) ^
// (std::size_t)(std::get<1>(k)) ^
// (std::size_t)(std::get<2>(k));
}
};
......@@ -118,6 +126,14 @@ struct GraphChange
*/
GraphChange* prev = nullptr;
/**
* Note: since unordered_map does not have any guarantee about the key order,
* the random sampling is modified by the changing key ordering!
*
* If such behavior is unacceptable, consider using std::map, but this
* will cost a change of complexity from O(1) to O(log n)
*/
std::unordered_map<
const key_t,
GraphChange*,
......@@ -157,7 +173,7 @@ struct GraphChange
* ! Note -> nodes with * haven't been visited !
*
* > if no flag is used to determine the node in Trie, we can't mark the above
* > when pushing a word, only its last tail will be marked
* > when pushing a word, only its last tail node will be marked
*/
bool is_graph = false;
......@@ -214,6 +230,11 @@ struct GraphChange
return prev;
}
void printG()
{
Rcout << listID << "," << from << "," << to << "; " << is_graph << "\n";
}
};
struct graph_comp
......@@ -238,7 +259,7 @@ public:
~Trie()
{
// TODO free, but not really needed for single script :)
}
......@@ -250,6 +271,12 @@ public:
{
if(word.size() == 0) return false;
/*
Rcout << "adding a word:\n";
for(key_t k : word)
Rcout << "["<< (uint32_t)std::get<0>(k) << "," << (uint32_t)std::get<1>(k) << "," << (uint32_t)std::get<2>(k) << "]";
Rcout<<"\n";
*/
// Now it is easier to add a word to trie:
// since characters are sorted, just traverse and add individual nodes
......@@ -257,6 +284,7 @@ public:
if(this->root.find(word[0]) == this->root.end())
{
current = new GraphChange(word[0], nullptr);
root[word[0]] = current;
}
else{
current = root.at(word[0]);
......@@ -299,25 +327,55 @@ public:
}
}
/**
* Randomly chooses a graph from the Trie and sets the G(adjacency) graph
* to it
*/
void sampleGraph(std::vector<LogicalMatrix*>& G_adjs)
{
uint32_t currIndex = 0;
// TODO ideally the Rcpp::sample should be modified
// to handle larger numbers
uint32_t sampledIndex = Rcpp::sample(INT_MAX, 1)[0];
uint32_t sampledIndex = Rcpp::sample(this->total_num_graphs, 1)[0] - 1;
// TODO
this->find(&currIndex, sampledIndex, G_adjs);
}
// TODO maybe 32 bit won't be enough
//
/**
* Returns a number of subgraphs in Trie.
*
* Note: this isn't the same as the number of nodes. We count only the subgraphs,
* for more detail see the struct GraphChange
*
* Note 2: maybe this 32-bit number won't be enough, consider using 64bit number,
* but then, again, Rcpp::sample should be modified to 64-bit (2x sample + shift etc...)!
* e.g., .Machine$integer.max on tested machine:= 2147483647
*/
uint32_t total_num()
{
return this->total_num_graphs;
}
void print()
{
for(const auto & [ key, value ] : this->root)
{
this->printRecursive(value);
}
}
protected:
/**
* Note: since unordered_map does not have any guarantee about the key order,
* the random sampling is modified by the changing key ordering!
*
* If such behavior is unacceptable, consider using std::map, but this
* will cost a change of complexity from O(1) to O(log n)
*/
std::unordered_map<
const key_t,
GraphChange*,
......@@ -325,22 +383,68 @@ protected:
>
root;
// TODO assert total_num_graphs < INT_MAX
uint64_t total_num_graphs = 0;
void printRecursive(GraphChange* curr)
{
Rcout << "recursive:\n";
curr->printG();
Rcout << "going down:\n";
for(const auto & [ key, value ] : curr->next_map)
{
this->printRecursive(value);
}
Rcout << "going up:\n";
}
void addWordRecursive(std::vector<GraphChange*>& word, uint32_t index)
// TODO assert total_num_graphs < INT_MAX
uint32_t total_num_graphs = 0;
void find(uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs)
{
key_t currKey = std::make_tuple(word[0]->listID, word[0]->from, word[0]->to);
if(root.find(currKey) == root.end())
// Note: if the sampled subgraphs are too deep and stack memory overflow,
// then consider using the non-recursive version
for(const auto & [ key, value ] : this->root)
{
// TODO
// Go down
value->forward(G_adjs);
*currIdx += value->is_graph;
this->recursiveFind(value, currIdx, needed, G_adjs);
if(*currIdx == needed)
{
break;
}
value->backward(G_adjs);
}
}
void findIndex(uint64_t* currIdx, const uint64_t& needed)
void recursiveFind(GraphChange* curr, uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs)
{
// TODO
if(*currIdx == needed)
{
return;
}
curr->forward(G_adjs);
currIdx += curr->is_graph;
for(const auto & [ key, value ] : curr->next_map)
{
this->recursiveFind(value, currIdx, needed, G_adjs);
if(*currIdx == needed)
{
return;
}
}
curr->backward(G_adjs);
return;
}
......@@ -354,11 +458,34 @@ protected:
// [[Rcpp::export]]
double tst()
{
IntegerVector rnd = Rcpp::sample(6, 6);
NumericMatrix t = NumericMatrix(2, 3, rnd.begin());
Rcout << t << "\n";
Trie trie;
trie.print();
std::vector<key_t> word;
word.push_back(std::make_tuple(0, 5, 10));
trie.appendChainAddOnly(word);
trie.print();
word.push_back(std::make_tuple(0, 7, 10));
trie.appendChainAddOnly(word);
trie.print();
word.push_back(std::make_tuple(0, 5, 11));
trie.appendChainAddOnly(word);
trie.print();
word.push_back(std::make_tuple(0, 4, 10));
trie.appendChainAddOnly(word);
trie.print();
word.erase(word.begin() + 0);
trie.appendChainAddOnly(word);
trie.print();
Rcout << "Final trie size: " << trie.total_num() << "\n";
Rcout << t * 2.5 << "\n";
return 0.0;
}
......@@ -414,6 +541,16 @@ protected:
std::vector<NumericMatrix*> E_f_G;
std::vector<NumericMatrix*> current_counts;
double currentProb = 1.0; // TODO
NumericVector initialProbs;
double bge_max = -99999999999.0;
double bge_min = 9999999999.0;
double energy_max = -99999999999.0;
double energy_min = 9999999999.0;
double prob_max = -99999999999.0;
double prob_min = 9999999999.0;
public:
Network(IData input, ISettings settings)
......@@ -443,6 +580,7 @@ public:
void SK_SS()
{
this->partialSK_SS();
return; // TODO remove
for(uint32_t i = 0; i < this->settings.I; i++)
{
......@@ -454,12 +592,17 @@ public:
List& result()
{
Rprintf("BGe min/max: %.10f, %.10f\n", this->bge_min, this->bge_max);
Rprintf("Energy min/max: %.10f, %.10f\n", this->energy_min, this->energy_max);
Rprintf("Prob min/max: %.10f, %.10f\n", this->prob_min, this->prob_max);
Rprintf("Initial probs: %.10f, %.10f\n", this->initialProbs[0], this->initialProbs[1]);
const List* SK_list = this->input_data.SK;
List* ret = new List(SK_list->length());
for(uint32_t i = 0; i < SK_list->length(); i++)
{
*this->E_f_G[i] = (*this->E_f_G[i]) / (this->V_G.total_num() / 100.0);
*this->E_f_G[i] = (*this->E_f_G[i]); // / (this->V_G.total_num() / 100.0)
(*ret)[i] = *this->E_f_G[i];
}
......@@ -469,22 +612,59 @@ public:
protected:
void SK_SS_phase2_iter()
{
//TODO
this->V_G.sampleGraph(this->G); // sample adjacency matrix, e.g., sample G_0
NumericVector twoScores = this->callScoreFull();
const List* SK_list = this->input_data.SK;
// for loop over 1...N
// for loop over neighbors
// try add edge
for(uint32_t i = 0; i < SK_list->length(); i++)
{
const IntegerMatrix& sk = (*SK_list)[i];
// Random permutation of the indices
uint32_t num = sk.ncol() * sk.nrow();
IntegerVector xy = Rcpp::sample(num, num);
// It is necessary to do 2d loop in COL, ROW order, since
// rows are local in memory by Rcpp standard
// UPD: after random permutation this doesn't matter
for(uint32_t j = 0; j < (uint32_t)sk.ncol(); j++)
{
for(uint32_t k = 0; k < (uint32_t)sk.nrow(); k++)
{
// extract 2D coords from 1D random permutation
uint32_t lin = xy[j * sk.nrow() + k] - 1;
uint32_t r = lin % sk.nrow();
uint32_t c = lin / sk.nrow();
Rcout << "\014" <<
i << "," << j << "," << k << " /" <<
(SK_list->length()) << "," << (uint32_t)sk.ncol() << "," << (uint32_t)sk.nrow() <<
"\n" << "Trie size: " << this->V_G.total_num();
// if inside G_0 -> skip
if((*this->G[i])(r,c) != 0)
continue;
// try add an edge outside of G_0
this->tryPerformMove(&twoScores, i, r, c, ChangeType::ADD);
}
}
}
}
void partialSK_SS_iter()
{
List& adjs = this->G_to_list();
NumericVector twoScores = (*this->input_data.score_full_func)(
adjs,
*this->input_data.PK,
this->settings.beta_L,
this->settings.beta_H,
*this->input_data.bge_param
);
delete &adjs;
NumericVector twoScores = this->callScoreFull();
this->initialProbs = twoScores;
const List* SK_list = this->input_data.SK;
// for loop over 1...N
......@@ -511,12 +691,12 @@ protected:
uint32_t r = lin % sk.nrow();
uint32_t c = lin / sk.nrow();
//"\014" <<
Rcout << lin << " = " <<
i << "," << r << "," << c << " /" <<
Rcout << "\014" <<
i << "," << j << "," << k << " /" <<
(SK_list->length()) << "," << (uint32_t)sk.ncol() << "," << (uint32_t)sk.nrow() <<
"\n";
"\n" << "Trie size: " << this->V_G.total_num() << "\n"
<< "BGe: max:" << this->bge_max << ", min:" << this->bge_min << "\n"
<< "prob: " << this->currentProb << "\n";
if(sk(r,c) == 0)
......@@ -526,7 +706,9 @@ protected:
}
}
}
return; // TODO remove
// for loop over 1...N
// for loop over neighbors
// try remove/add edge
......@@ -551,6 +733,13 @@ protected:
uint32_t r = lin % gz.nrow();
uint32_t c = lin / gz.nrow();
Rcout << "\014" <<
i << "," << j << "," << k << " /" <<
(SK_list->length()) << "," << (uint32_t)gz.ncol() << "," << (uint32_t)gz.nrow() <<
"\n" << "Trie size: " << this->V_G.total_num();
if(gz(r,c) == 0) // if not in G -> try add
this->tryPerformMove(&twoScores, i, r, c, ChangeType::ADD);
else // if in G -> try delete
......@@ -562,7 +751,7 @@ protected:
// Normalize counts by 100 * iteration number
for(uint32_t i = 0; i < SK_list->length(); i++)
{
*this->current_counts[i] = (*this->current_counts[i]) / (this->settings.I * 100.0);
*this->current_counts[i] = (*this->current_counts[i]); // / (this->settings.I * 100.0)
*this->E_f_G[i] += *this->current_counts[i];
}
}
......@@ -650,6 +839,7 @@ protected:
void tryPerformMove(NumericVector* score, uint32_t listID, uint32_t from, uint32_t to, ChangeType action)
{
/*
Rcout << "Trying to ";
if(action == ChangeType::ADD)
{
......@@ -659,24 +849,19 @@ protected:
Rcout << "REMOVE";
}
Rcout << " edge: " << listID << ", " << from << ", " << to << "\n";
*/
NumericVector scoreChanges = this->callScoreChange(score, listID, from, to, action);
List& adjs = this->G_to_list();
NumericVector scoreChanges = (*this->input_data.score_change_func)
(
adjs,
*this->input_data.PK,
this->settings.beta_L,
this->settings.beta_H,
*this->input_data.bge_param,
*this->input_data.sizes,
*score,
listID,
from,
to,
action == ChangeType::ADD);
double bge = scoreChanges[1];
double energy = scoreChanges[0];
double prob = bge + energy;
delete &adjs;
if(bge > this->bge_max) this->bge_max = bge;
if(bge < this->bge_min) this->bge_min = bge;
if(energy > this->energy_max) this->energy_max = energy;
if(energy < this->energy_min) this->energy_min = energy;
if(prob > this->prob_max) this->prob_max = prob;
if(prob < this->prob_min) this->prob_min = prob;
/*
* Here I tested multiple approaches to find the difference, because:
* We are dealing with ~ -1e+08 or even more, thus
......@@ -686,7 +871,7 @@ protected:
* in each scoring function. For reference see <score_change(...)> R function
*/
double mod_diff = (scoreChanges[3] + scoreChanges[4]);
Rcout << mod_diff << "\n";
// Rcout << mod_diff << "\n";
/**
* Modified(explicit per-partes) is used, e.g.:
......@@ -711,7 +896,11 @@ protected:
this->makeMove(listID, from, to, action);
// Is it appropriate?
this->currentProb *= diff;
// TODO remove debug
/*
if(action == ChangeType::ADD)
{
Rcout << "Adding";
......@@ -719,7 +908,7 @@ protected:
else{
Rcout << "Removing";
}
Rcout << " edge: " << listID << ", " << from << ", " << to << "\n";
Rcout << " edge: " << listID << ", " << from << ", " << to << "\n"; */
}
......@@ -756,6 +945,44 @@ protected:
}
}
NumericVector callScoreChange(NumericVector* score, uint32_t listID, uint32_t from, uint32_t to, ChangeType action)
{
List& adjs = this->G_to_list();
NumericVector scoreChanges = (*this->input_data.score_change_func)
(
adjs,
*this->input_data.PK,
this->settings.beta_L,
this->settings.beta_H,
*this->input_data.bge_param,
*this->input_data.sizes,
*score,
listID,
from,
to,
action == ChangeType::ADD);
delete &adjs;
return scoreChanges;
}
NumericVector callScoreFull()
{
List& adjs = this->G_to_list();
NumericVector twoScores = (*this->input_data.score_full_func)(
adjs,
*this->input_data.PK,
this->settings.beta_L,
this->settings.beta_H,
*this->input_data.bge_param
);
delete &adjs;
return twoScores;
}
};
/**
......@@ -814,6 +1041,9 @@ List c_SK_SS(
net->SK_SS();
List ret = net->result();
delete net;
return ret;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment