diff --git a/source/skeleton.R b/source/skeleton.R index 313a6d48096f9637765554702227aa6939f08013..00871b6b494f80c725907753accf08cd5e622f04 100644 --- a/source/skeleton.R +++ b/source/skeleton.R @@ -392,12 +392,17 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be cat("TN matrix have a size of: ", dim(param_in$TN), "\n") - #vals <- score_G(SK_in, PK_in, beta_L, beta_H, param_in) - #vals_change <- score_change_G(SK_in, PK_in, beta_L, beta_H, param_in, sizes, vals, 0, 5, TRUE) - set.seed(0) - return(c_SK_SS(PK_in,SK_in, sizes, param_in, - I, debug, alpha, beta_L, beta_H, - score_G, score_change_G)) + E_f_G_out <- c_SK_SS(PK_in,SK_in, sizes, param_in, + I, debug, beta_L, beta_H, + score_G, score_change_G) + + print(E_f_G_out) + + E_f_G_out <- lapply(E_f_G_out, function(m) m > alpha) + + # TODO convert to NxN matrix form + + return(E_f_G_out) } diff --git a/source/skeleton.cpp b/source/skeleton.cpp index 593efa401f4a7487c1fcbae517130b9bf727d123..6b623982852988b534b62df9dcc019d71b47d3a8 100644 --- a/source/skeleton.cpp +++ b/source/skeleton.cpp @@ -46,20 +46,22 @@ using Rcpp::_; typedef unsigned int uint32_t; typedef unsigned long long uint64_t; -/** - * Score computation in CPP: TODO? - */ - /** * This part will be the separate header file for tuple + hash + Trie data structures */ #include <tuple> +/** + * We will encode a 3D coordinate: + * > listID --> ID of the partition of our N-partition graph, goes: (0, N-2) + * Note: because, for example, for 3 partition(A,B,C), we have 2 adjacency matrices: AB, BC + * > FROM, TO --> row, column ID of the particular + */ typedef std::tuple<uint32_t, uint32_t, uint32_t> key_t; /** * Hashing of the triplet. Uses the simple XOR, - * TODO for future: replace with more sophisticated, possible errors + * TODO for future: replace with more sophisticated hashing, possible duplicates */ struct key_hash : public std::unary_function<key_t, std::size_t> { @@ -71,8 +73,6 @@ struct key_hash : public std::unary_function<key_t, std::size_t> } }; -struct GraphChange; - struct key_comp { /** @@ -243,11 +243,12 @@ public: /** - * Append a word consisting of ADD only nodes + * Append a word consisting of ADD only nodes. + * return TRUE only if non-visited subgraph added */ - void appendChainAddOnly(std::vector<key_t>& word) + bool appendChainAddOnly(std::vector<key_t>& word) { - if(word.size() == 0) return; + if(word.size() == 0) return false; // Now it is easier to add a word to trie: @@ -268,9 +269,10 @@ public: if(index == word.size()) { // update number of graphs, if subgraph wasn't visited -> +1 + bool is_nonvisited = !(current->is_graph); this->total_num_graphs += !(current->is_graph); current->mark(); // mark the node as graph - break; // stop + return is_nonvisited; // stop } if(current->next_map.find(word[index]) == current->next_map.end()) @@ -285,9 +287,10 @@ public: // mark the last one only, for more details see GraphDeta // update number of graphs, if subgraph wasn't visited -> +1 + bool is_nonvisited = !(current->is_graph); this->total_num_graphs += !(current->is_graph); current->mark(); - break; + return is_nonvisited; } // Go down the Trie @@ -308,6 +311,11 @@ public: // TODO } + // TODO maybe 32 bit won't be enough + uint32_t total_num() + { + return this->total_num_graphs; + } protected: std::unordered_map< @@ -346,8 +354,11 @@ protected: // [[Rcpp::export]] double tst() { - + IntegerVector rnd = Rcpp::sample(6, 6); + NumericMatrix t = NumericMatrix(2, 3, rnd.begin()); + Rcout << t << "\n"; + Rcout << t * 2.5 << "\n"; return 0.0; } @@ -381,11 +392,8 @@ protected: Trie V_G; - std::vector<NumericMatrix*> counts; - std::vector<NumericMatrix*> current_counts; - std::vector<LogicalMatrix*> G; - //std::vector<GraphChange*> curr_V_G; + std::vector<LogicalMatrix*> G; /** * sort individual elements from lowest to largest by unordered_set @@ -393,6 +401,19 @@ protected: */ std::vector<key_t> ord_V_G; + /** + * store 2 count matrices: + * > information about the current iteration counts only + * > after each iteration do: + * >> normalize current_counts by 100*I to prevent overflow + * >> E_f_G += current_counts; + * + * TODO: + * also store 1 last probability to multiply our counts? + */ + std::vector<NumericMatrix*> E_f_G; + std::vector<NumericMatrix*> current_counts; + double currentProb = 1.0; // TODO public: Network(IData input, ISettings settings) @@ -431,6 +452,19 @@ public: } } + List& result() + { + const List* SK_list = this->input_data.SK; + List* ret = new List(SK_list->length()); + + for(uint32_t i = 0; i < SK_list->length(); i++) + { + *this->E_f_G[i] = (*this->E_f_G[i]) / (this->V_G.total_num() / 100.0); + (*ret)[i] = *this->E_f_G[i]; + } + + return *ret; + } protected: void SK_SS_phase2_iter() @@ -441,22 +475,16 @@ protected: void partialSK_SS_iter() { - //List& G = this->emptyG_0(); // TODO remove? No, but used matrices from curr_V_G! - /* + List& adjs = this->G_to_list(); NumericVector twoScores = (*this->input_data.score_full_func)( - G, + adjs, *this->input_data.PK, this->settings.beta_L, this->settings.beta_H, *this->input_data.bge_param - );*/ //TODO - - NumericVector twoScores = NumericVector(2); - + ); + delete &adjs; - //Rcout << twoScores << "\n"; - //Rcout << "Score of empty G0: " << twoScores[0] * twoScores[1] << "\n"; - const List* SK_list = this->input_data.SK; // for loop over 1...N @@ -466,15 +494,10 @@ protected: { const IntegerMatrix& sk = (*SK_list)[i]; - Rcout << sk << "\n"; - // Random permutation of the indices uint32_t num = sk.ncol() * sk.nrow(); IntegerVector xy = Rcpp::sample(num, num); - xy = Rcpp::Range(0, num-1); // TODO remove - Rcout << xy << "\n"; - // It is necessary to do 2d loop in COL, ROW order, since // rows are local in memory by Rcpp standard // UPD: after random permutation this doesn't matter @@ -484,13 +507,13 @@ protected: { // extract 2D coords from 1D random permutation - uint32_t lin = xy[j * sk.nrow() + k]; + uint32_t lin = xy[j * sk.nrow() + k] - 1; uint32_t r = lin % sk.nrow(); uint32_t c = lin / sk.nrow(); //"\014" << - Rcout << + Rcout << lin << " = " << i << "," << r << "," << c << " /" << (SK_list->length()) << "," << (uint32_t)sk.ncol() << "," << (uint32_t)sk.nrow() << "\n"; @@ -535,6 +558,13 @@ protected: } } } + + // Normalize counts by 100 * iteration number + for(uint32_t i = 0; i < SK_list->length(); i++) + { + *this->current_counts[i] = (*this->current_counts[i]) / (this->settings.I * 100.0); + *this->E_f_G[i] += *this->current_counts[i]; + } } @@ -560,12 +590,10 @@ protected: { const IntegerMatrix& sk = (*SK_list)[i]; - //delete this->G[i]; *(this->G)[i] = LogicalMatrix(sk.nrow(), sk.ncol()); Rcpp::rownames(*(this->G)[i]) = Rcpp::rownames(sk); Rcpp::colnames(*(this->G)[i]) = Rcpp::colnames(sk); - //delete this->current_counts[i]; *(this->current_counts)[i] = NumericMatrix(sk.nrow(), sk.ncol()); Rcpp::rownames(*(this->current_counts)[i]) = Rcpp::rownames(sk); Rcpp::colnames(*(this->current_counts)[i]) = Rcpp::colnames(sk); @@ -590,9 +618,9 @@ protected: Rcpp::rownames(*(this->G)[i]) = Rcpp::rownames(sk); Rcpp::colnames(*(this->G)[i]) = Rcpp::colnames(sk); - (this->counts).push_back(new NumericMatrix(sk.nrow(), sk.ncol())); - Rcpp::rownames(*(this->counts)[i]) = Rcpp::rownames(sk); - Rcpp::colnames(*(this->counts)[i]) = Rcpp::colnames(sk); + (this->E_f_G).push_back(new NumericMatrix(sk.nrow(), sk.ncol())); + Rcpp::rownames(*(this->E_f_G)[i]) = Rcpp::rownames(sk); + Rcpp::colnames(*(this->E_f_G)[i]) = Rcpp::colnames(sk); (this->current_counts).push_back(new NumericMatrix(sk.nrow(), sk.ncol())); Rcpp::rownames(*(this->current_counts)[i]) = Rcpp::rownames(sk); @@ -610,12 +638,12 @@ protected: for(uint32_t i = 0; i < SK_list->length(); i++) { delete this->G[i]; - delete this->counts[i]; + delete this->E_f_G[i]; delete this->current_counts[i]; } this->G.clear(); - this->counts.clear(); + this->E_f_G.clear(); this->current_counts.clear(); } @@ -653,18 +681,13 @@ protected: * Here I tested multiple approaches to find the difference, because: * We are dealing with ~ -1e+08 or even more, thus * straightforward computation may overflow/underflow(when exp() is taken) + * + * The final choice -> compute difference in each score(BGe/Energy) separately + * in each scoring function. For reference see <score_change(...)> R function */ - //double per_part_diff = (scoreChanges[0] - (*score)[0]) + (scoreChanges[1] - (*score)[1]); - - // double log_score_new = scoreChanges[0] + scoreChanges[1]; - // double log_score_old = (*score)[0] + (*score)[1]; - // double diff = log_score_new - log_score_old; - double mod_diff = (scoreChanges[3] + scoreChanges[4]); - Rcout << mod_diff << "\n"; - // Modified(explicit per-partes) is used /** * Modified(explicit per-partes) is used, e.g.: * @@ -672,6 +695,7 @@ protected: * > difference is calculated in SCORE functions * > difference is separated for BGe / Energy part * > ONLY difference is converted by exp, since it is tractable! + * > try checking BGe score for large graph, it is ~ -55000 in log form */ double diff = exp(mod_diff); @@ -687,7 +711,6 @@ protected: this->makeMove(listID, from, to, action); - // TODO remove debug if(action == ChangeType::ADD) { @@ -702,52 +725,49 @@ protected: void makeMove(uint32_t listID, uint32_t from, uint32_t to, ChangeType action) { - /* - GraphChange* toAdd = new GraphChange; - toAdd->from = from; - toAdd->to = to; - toAdd->listID = listID; - toAdd->type = action; - toAdd->is_graph = true; - - toAdd->prev = this->curr_V_G.empty() ? nullptr : this->curr_V_G[this->curr_V_G.size() - 1]; - toAdd->forward(adjs); - */ - - // update adjacency (*(this->G[listID]))(from,to) = action == ChangeType::ADD; - // Update our ordered add-only word key_t kt = std::make_tuple(listID, from, to); if(action == ChangeType::ADD) { insert_sorted(this->ord_V_G, kt); - //this->ord_V_G.insert(kt); } else{ erase_sorted(this->ord_V_G, kt); - //this->ord_V_G.erase(this->ord_V_G.find(kt)); } - this->V_G.appendChainAddOnly(this->ord_V_G); + + // Try to add word to Trie + bool shouldAdd = this->V_G.appendChainAddOnly(this->ord_V_G); + + // If new subgraph found, update edge counts + if(shouldAdd) this->AddCounts(); } + void AddCounts() + { + const List* SK_list = this->input_data.SK; + for(uint32_t i = 0; i < SK_list->length(); i++) + { + NumericMatrix tmp = Rcpp::as<NumericMatrix>(*this->G[i]) * currentProb; + *this->current_counts[i] += tmp; + } + } }; /** * This function is the interface to the R part, the only function exported */ -IntegerMatrix c_SK_SS( +List c_SK_SS( const List& PK, const List& SK, const NumericVector& sizes, const List& param, uint32_t I, bool debug, - double alpha, double beta_L, double beta_H, Function score_full_func, Function score_change_func); @@ -759,14 +779,13 @@ IntegerMatrix c_SK_SS( //#include "skeleton.h" // [[Rcpp::export]] -IntegerMatrix c_SK_SS( +List c_SK_SS( const List& PK, const List& SK, const NumericVector& sizes, const List& param, uint32_t I, bool debug, - double alpha, double beta_L, double beta_H, Function score_full_func, Function score_change_func @@ -793,16 +812,9 @@ IntegerMatrix c_SK_SS( Network* net = new Network(i_data, settings); - //net->partialSK_SS(); - - + net->SK_SS(); + List ret = net->result(); delete net; - // double val = Rcpp::as<double>( - // BGe_full( - // Rcpp::_["G_adjs"] = SK, - // Rcpp::_["param_in"] = param) - // ); - // Rcout << "BGe score of the input SK is: " << val << "\n"; - - return IntegerMatrix(); + + return ret; }