diff --git a/source/skeleton.cpp b/source/skeleton.cpp index 67a77aa054ea5e5d68b6a895abf0c15d5e1e7768..7c6c3831888d2e4b4dce58f2dc60202c51af40a4 100644 --- a/source/skeleton.cpp +++ b/source/skeleton.cpp @@ -14,11 +14,14 @@ #include <cstring> #include <cmath> #include <limits.h> - +#include <cstdio> +#include <cstdint> /** * We include the external library of RcppArmadillo, * this allows for a future usage of linear algebra, advanced Rcpp, etc... + * + * NOT USED AS FOR NOW */ // [[Rcpp::depends(RcppArmadillo)]] #include <RcppArmadillo.h> @@ -26,7 +29,7 @@ //#include <Rcpp.h> /** - * Most used Rcpp objects, used to + * Most used Rcpp objects, used to remove Rcpp prefix */ using Rcpp::Rcout; using Rcpp::Rcerr; @@ -41,17 +44,13 @@ using Rcpp::Function; using Rcpp::IntegerVector; using Rcpp::_; - +/** + * Also -- include the "saveRDS" function for partial result serialization and save + */ Rcpp::Environment base("package:base"); Function saveRDS = base["saveRDS"]; -/** - * Ideally should be included from library, but let it be like that :) - */ -//typedef unsigned int uint32_t; -//typedef unsigned long long uint64_t; -typedef unsigned short uint16_t; -typedef unsigned char uint8_t; + /** * This part will be the separate header file for tuple + hash + Trie data structures */ @@ -119,248 +118,368 @@ enum ChangeType DELETE = false }; -struct GraphChange + + +// ########################################### COMPRESSED TRIE CODE ############################################################### +/** + * Apply the key_tp's triplet index as TRUE on adjacency matrix + */ +inline static void G_forward(const key_tp &key, std::vector<LogicalMatrix*>& G_adjs) { - /** - * Note: since unordered_map does not have any guarantee about the key order, - * the random sampling is modified by the changing key ordering! - * - * If such behavior is unacceptable, consider using std::map, but this - * will cost a change of complexity from O(1) to O(log n) - */ - std::unordered_map< - const key_tp, - GraphChange*, - key_hash - > - next_map; + const uint8_t one = std::get<0>(key); + const uint16_t two = std::get<1>(key); + const uint16_t three = std::get<2>(key); - /** - * After Trie receives a word, only the end of it is actually a subgraph, - * - * Example to illustrate the intermediate subgraph mechanics: - * add 2 -> add 1 -> add 5 -> add 4 -> delete 2 -> add 3 - * - * then, the visited subgraphs are: - * {[2], [1, 2], [1, 2, 5], [1, 2, 4, 5], [1, 4, 5], [1, 3, 4, 5]} - * - * - * which results in Trie: - * - * 2 - * root---| 5 - * | 2--| - * 1*--| 4*---5 - * | - * 4*--5 - * | - * 3*--4*--5 - * - * - * ! Note -> nodes with * haven't been visited ! - * - * > if no flag is used to determine the node in Trie, we can't mark the above - * > when pushing a word, only its last tail node will be marked - */ + + (*G_adjs[one])(two, three) = true; +} + +/** + * Apply the key_tp's triplet index as FALSE on adjacency matrix + */ +inline static void G_backward(const key_tp &key, std::vector<LogicalMatrix*>& G_adjs) +{ + const uint8_t one = std::get<0>(key); + const uint16_t two = std::get<1>(key); + const uint16_t three = std::get<2>(key); + + (*G_adjs[one])(two, three) = true; +} + +struct Node +{ bool is_graph = false; + key_tp *values = nullptr; + uint32_t num_values = 0; - void mark() - { - this->is_graph = true; - } + std::unordered_map< + key_tp, + Node *, + key_hash> + next_map; - GraphChange() + void print() { - this->is_graph = false; + std::cout << "["; + for(uint32_t i = 0; i < num_values; i++) + { + key_tp k = values[i]; + std::cout << "(" + << (uint32_t)std::get<0>(k) << "," + << std::get<1>(k) << "," + << std::get<2>(k) << ") "; + } + std::cout << "] "; } }; -class Trie +#define ALLOC_SIZE ((2UL << 13)) +Node PREALLOCATED_POOL[ALLOC_SIZE]; + +/** + * Optimization technique -- allocate a large portion of nodes in advance on stack, + * so that the OS won't spend any additional time allocating nodes one by one. + */ +class NodePool { public: - Trie() + NodePool() { - } - ~Trie() + Node *get() + { + if (this->current_index >= ALLOC_SIZE) + { + Rprintf("Maximum number of nodes (%5lu) reached! Now allocating one by one!", ALLOC_SIZE); + Node* ret = new Node; + return ret; + } + Node *ret = PREALLOCATED_POOL + this->current_index; + this->current_index++; + return ret; + } + + uint64_t size() { - // TODO free, but not really needed for single script :) + return this->current_index; } +protected: + uint64_t current_index = 0; +}; + +/** + * After Trie receives a word, only the end of it is actually a subgraph, + * + * Example to illustrate the intermediate subgraph mechanics: + * add 2 -> add 1 -> add 5 -> add 4 -> delete 2 -> add 3 + * + * then, the visited subgraphs are: + * {[2], [1, 2], [1, 2, 5], [1, 2, 4, 5], [1, 4, 5], [1, 3, 4, 5]} + * + * + * which results in Trie (Which is then compressed): + * + * 2 + * root---| 5 + * | 2--| + * 1*--| 4*---5 + * | + * 4*--5 + * | + * 3*--4*--5 + * + * + * ! Note -> nodes with * haven't been visited and, thus, not subgraphs ! + * Compressed trie example: + * https://www.geeksforgeeks.org/compressed-tries/ + * + * + * > if no flag is used to determine the node in Trie, we can't mark the above + * > when pushing a word, only its last tail node will be marked + */ +class CompressedTrie +{ +public: + CompressedTrie() {} + ~CompressedTrie() {} /** - * Append a word consisting of ADD only nodes. + * Append a word consisting of sorted ADD only nodes. * return TRUE only if non-visited subgraph added */ - bool appendChainAddOnly(std::vector<key_tp>& word) + bool AppendChain(std::vector<key_tp>& word); + + /** + * Print current Trie, used only for debug purposes + */ + void PrintTrie(); + + /** + * Randomly chooses a graph from the Trie and sets the matrix(adjacency) + * to it + */ + void SampleAdjacencyMatrix(std::vector<LogicalMatrix*>& matrix); + + /** + * Return the number of nodes in a graph + */ + uint64_t num_nodes() {return this->node_pool.size();} + + /** + * Return the number of nodes that are actually subgraphs + */ + uint32_t num_graphs() {return this->total_num_graphs;} + +protected: + NodePool node_pool; + Node *root = this->node_pool.get(); + + uint32_t total_num_graphs = 0; + + uint32_t current_needed_index; + uint32_t current_visited_index; + std::vector<LogicalMatrix*>* current_G_adjs; + void findRecursive(Node *currNode); + + void appendWordToNode(Node* toAppend, key_tp* word, uint32_t len); // TODO remove, not used when no delete + bool appendRecursive(key_tp* word, uint32_t len); + uint32_t compareValues(Node* toAppend, key_tp* word, uint32_t len); + Node* createNode(key_tp* word, uint32_t len); + void subdivideNode(Node* node, uint32_t divideIndex); + + uint64_t SampleLong() { - if(word.size() == 0) return false; + uint32_t sampledIndex = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; + uint32_t sampledIndex2 = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; + uint64_t ret = sampledIndex; + ret = ret << 32; + ret = ret + sampledIndex2; + return ret; + } +}; + + + +bool CompressedTrie::AppendChain(std::vector<key_tp>& word) +{ + bool ret = this->appendRecursive(&word[0], word.size()); + this->total_num_graphs += ret; + return ret; +} - // Now it is easier to add a word to trie: - // since characters are sorted, just traverse and add individual nodes - GraphChange* current = nullptr; - if(this->root.find(word[0]) == this->root.end()) + +bool CompressedTrie::appendRecursive(key_tp* word, uint32_t len) +{ + Node* currNode = this->root; + + + while(true) + { + key_tp currFirstLetter = word[0]; + + // No branch exists for our word + if(currNode->next_map.find(currFirstLetter) == currNode->next_map.end()) + { + // append new node with entire word + Node* appended = this->createNode(word, len); + currNode->next_map[currFirstLetter] = appended; + return true; + } + + Node* cand = currNode->next_map[currFirstLetter]; + + // Find an index at which graph and word divert + uint32_t moveIndex = this->compareValues(cand, word, len); + + // If node isnt fully in our word -> divide aligned and unaligned part + if(moveIndex < cand->num_values) { - current = new GraphChange(); // word[0], nullptr - this->total_num_nodes++; + subdivideNode(cand, moveIndex); - root[word[0]] = current; } - else{ - current = root.at(word[0]); + // If node is our entire word -> mark and end + if(moveIndex == len) + { + cand->is_graph = true; + return true; } - uint32_t index = 1; - while(true) + // go deeper + currNode = cand; + word = word + moveIndex; + len = len - moveIndex; + } + + + + return false; +} + +#include <queue> + +void CompressedTrie::PrintTrie() +{ + std::queue<Node*> inorder_q; + inorder_q.push(this->root); + + std::cout << "#############################\n"; + + while(!inorder_q.empty()) + { + uint32_t size = inorder_q.size(); + for(int i = 0; i < size; i++) { - // The entire word is in Trie - if(index == word.size()) - { - // update number of graphs, if subgraph wasn't visited -> +1 - bool is_nonvisited = !(current->is_graph); - this->total_num_graphs += !(current->is_graph); - current->mark(); // mark the node as graph - return is_nonvisited; // stop - } + Node* n = inorder_q.front(); + inorder_q.pop(); + n->print(); - if(current->next_map.find(word[index]) == current->next_map.end()) + for(const auto& value : n->next_map) { - // Create and append all the missing nodes - current->next_map[word[index]] = new GraphChange();//word[index], current - this->total_num_nodes++; - - for(;index < word.size();index++) - { - current->next_map[word[index]] = new GraphChange();//word[index], current - this->total_num_nodes++; - current = current->next_map[word[index]]; - } - - // mark the last one only, for more details see GraphDeta - // update number of graphs, if subgraph wasn't visited -> +1 - bool is_nonvisited = !(current->is_graph); - this->total_num_graphs += !(current->is_graph); - current->mark(); - return is_nonvisited; + inorder_q.push(value.second); } - - // Go down the Trie - current = current->next_map[word[index]]; - index++; } + std::cout << "\n"; } +} + +void CompressedTrie::subdivideNode(Node* node, uint32_t divideIndex) +{ + uint32_t prevLen = node->num_values; + if(prevLen == divideIndex) return; - /** - * Randomly chooses a graph from the Trie and sets the G(adjacency) graph - * to it - */ - void sampleGraph(std::vector<LogicalMatrix*>& G_adjs) - { - uint32_t currIndex = 0; - - // TODO ideally the Rcpp::sample should be modified - // to handle larger numbers - uint32_t sampledIndex = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; - - this->find(&currIndex, sampledIndex, G_adjs); - } + node->num_values = divideIndex; - /** - * Returns a number of subgraphs in Trie. - * - * Note: this isn't the same as the number of nodes. We count only the subgraphs, - * for more detail see the struct GraphChange - * - * Note 2: maybe this 32-bit number won't be enough, consider using 64bit number, - * but then, again, Rcpp::sample should be modified to 64-bit (2x sample + shift etc...)! - * e.g., .Machine$integer.max on tested machine:= 2147483647 - */ - uint32_t total_num() - { - return this->total_num_graphs; - } + Node* newNode = this->node_pool.get(); + newNode->values = node->values + divideIndex; + newNode->num_values = prevLen - divideIndex; + newNode->next_map = node->next_map; + newNode->is_graph = node->is_graph; + + node->next_map = {}; + node->next_map[newNode->values[0]] = newNode; + node->is_graph = false; + + return; +} + +uint32_t CompressedTrie::compareValues(Node* toAppend, key_tp* word, uint32_t len) +{ + uint32_t size = toAppend->num_values; + size = (size < len) ? size : len; - uint64_t nodes_num() + for(uint32_t i = 1; i < size; i++) { - return this->total_num_nodes; + if(word[i] != toAppend->values[i]) + { + return i; + } } + return size; +} +Node* CompressedTrie::createNode(key_tp* word, uint32_t len) +{ + Node* ret = this->node_pool.get(); + ret->num_values = len; + ret->values = new key_tp[len]; + std::memcpy(ret->values, (void*)word, len * sizeof(key_tp)); -protected: - /** - * Note: since unordered_map does not have any guarantee about the key order, - * the random sampling is modified by the changing key ordering! - * - * If such behavior is unacceptable, consider using std::map, but this - * will cost a change of complexity from O(1) to O(log n) - */ - std::unordered_map< - const key_tp, - GraphChange*, - key_hash - > - root; + return ret; +} + +void CompressedTrie::appendWordToNode(Node* toAppend, key_tp* word, uint32_t len) +{ + key_tp* newValues = new key_tp[toAppend->num_values + len]; + std::memcpy(newValues, toAppend->values, toAppend->num_values * sizeof(key_tp)); + std::memcpy(newValues + toAppend->num_values, word, len * sizeof(key_tp)); + delete[] toAppend->values; + toAppend->values = newValues; + toAppend->num_values += len; +} + +void CompressedTrie::SampleAdjacencyMatrix(std::vector<LogicalMatrix*>& matrix) +{ + this->current_needed_index = this->SampleLong() % this->total_num_graphs; + this->current_visited_index = 0; + this->current_G_adjs = &matrix; - // TODO assert total_num_graphs < INT_MAX - uint32_t total_num_graphs = 0; - uint64_t total_num_nodes = 0; + this->findRecursive(this->root); - void find(uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs) - { - // Note: if the sampled subgraphs are too deep and stack memory overflow, - // then consider using the non-recursive version - + return; +} - for(const auto &item : this->root) - { - const auto& key = item.first; - const auto& value = item.second; - - // Go down - (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = true; - - *currIdx += value->is_graph; - - - this->recursiveFind(value, currIdx, needed, G_adjs); - if(*currIdx == needed) - { - break; - } - - (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = false; - } +void CompressedTrie::findRecursive(Node *currNode) +{ + if (this->current_visited_index == this->current_needed_index) + { + return; } - - void recursiveFind(GraphChange* curr, uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs) + for (const auto &item : currNode->next_map) { - if(*currIdx == needed) + const auto &key = item.first; + const auto &value = item.second; + + G_forward(key, *this->current_G_adjs); + + this->current_visited_index += value->is_graph; + + this->findRecursive(value); + + if (this->current_visited_index == this->current_needed_index) { return; } - for(const auto &item : this->root) - { - const auto& key = item.first; - const auto& value = item.second; - (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = true; - currIdx += value->is_graph; - - this->recursiveFind(value, currIdx, needed, G_adjs); - if(*currIdx == needed) - { - return; - } - (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = false; - } - - return; + G_backward(key, *this->current_G_adjs); } -}; + return; +} + struct key_action_tp { ChangeType type; @@ -375,14 +494,15 @@ struct key_action_tp (*adjs[listID])(from, to) = type; } }; - +// ########################################### COMPRESSED TRIE CODE ############################################################### struct IData { const List* PK; const List* SK; - const NumericVector* sizes; + const IntegerVector* sizes; + uint32_t N; const List* bge_param; Function* score_full_func; @@ -396,13 +516,18 @@ struct ISettings double beta_H; }; +#define SAVE_FREQ 1000 +#define PRINT_FREQ 10000 +#define PARTIAL_ADD_ONLY "[pSK-SS][add only]" +#define PARTIAL_ADD_REMOVE "[pSK-SS][add-remove]" +#define SKSS_PHASE "[SK-SS][add-remove]" class Network { protected: IData input_data; ISettings settings; - //Trie V_G; + CompressedTrie V_G; /** * sort individual elements from lowest to largest by unordered_set @@ -450,19 +575,21 @@ public: void SK_SS() { this->partialSK_SS(); + /* uint32_t sampled_subgraphs = this->unord_V_G.size(); IntegerVector indices = Rcpp::sample(sampled_subgraphs, this->settings.I); indices.insert(indices.begin(), 1); - std::sort(indices.begin(), indices.end()); + std::sort(indices.begin(), indices.end());*/ for(uint32_t i = 0; i < this->settings.I; i++) { Rcout << "SK-SS, Iteration: " << i << "\n"; - + /* for(uint32_t j = indices[i]; j < indices[i+1]; j++) { this->unord_V_G[j-1].forward(this->G_sampled); - } + }*/ + this->V_G.SampleAdjacencyMatrix(this->G_sampled); this->emptyCurrCountsAndG(); @@ -488,54 +615,55 @@ public: protected: void SK_SS_phase2_iter() { - //this->V_G.sampleGraph(this->G); // sample adjacency matrix, e.g., sample G_0 NumericVector twoScores = this->callScoreFull(); - IntegerVector ls = Rcpp::sample(2, 2); - const List* SK_list = this->input_data.SK; - // for loop over 1...N - // for loop over neighbors - // try add edge - for(uint32_t i = 0; i < SK_list->length(); i++) + IntegerVector riter = Rcpp::sample( + this->input_data.N, this->input_data.N); + + for(uint32_t i = 0; i < this->input_data.N; i++) { - Rprintf("%5d/%5d \n Array:20%d\n", i, SK_list->length(), this->unord_V_G.size()); - uint32_t l = ls[i] - 1; - const IntegerMatrix& sk = (*SK_list)[l]; + uint32_t flatindex = riter[i] - 1; + uint32_t lindex = 0; + uint32_t xyindex = flatindex; - // Random permutation of the indices - uint32_t num = sk.ncol() * sk.nrow(); - IntegerVector xy = Rcpp::sample(num, num); - - // It is necessary to do 2d loop in COL, ROW order, since - // rows are local in memory by Rcpp standard - // UPD: after random permutation this doesn't matter - for(uint32_t j = 0; j < (uint32_t)sk.ncol(); j++) + uint32_t sumSizes = 0; + while(true) { - // Save intermediate results - Rprintf("Saving intermediate results\n"); - List& tmp_adjs = this->G_to_list(); - saveRDS(tmp_adjs, Rcpp::Named("file", "optimG.RData")); + uint32_t sizeMatrix = (*this->input_data.sizes)[lindex] * (*this->input_data.sizes)[lindex + 1]; + sumSizes += sizeMatrix; - Rprintf("[SK-SS][add only][%5u]/[%5u]\n", j, (uint32_t)sk.ncol()); - for(uint32_t k = 0; k < (uint32_t)sk.nrow(); k++) + if(sumSizes >= flatindex) { - - // extract 2D coords from 1D random permutation - uint32_t lin = xy[j * sk.nrow() + k] - 1; - uint32_t r = lin % sk.nrow(); - uint32_t c = lin / sk.nrow(); - - // if inside G_0 -> skip - if((*this->G[l])(r,c) != 0) - continue; - - // try add an edge outside of G_0 - // do not update V_G since we want to sample from partialSK-SS only! - // because of that -> shouldUpdateTrie = false - this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD, false); + break; } + lindex++; + xyindex -= sizeMatrix; } + + const IntegerMatrix& sk = (*SK_list)[lindex]; + uint32_t r = xyindex % sk.nrow(); + uint32_t c = xyindex / sk.nrow(); + + if(i % PRINT_FREQ == 0) + { + this->DebugInfo(SKSS_PHASE, i); + } + + if(i % SAVE_FREQ == 0) + { + this->SaveGraph(); + } + + // if inside G_0 -> skip + if((*this->G[lindex])(r,c) != 0) + continue; + + // try add an edge outside of G_0 + // do not update V_G since we want to sample from partialSK-SS only! + // because of that -> shouldUpdateTrie = false + this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD, false); + } } @@ -544,99 +672,105 @@ protected: { NumericVector twoScores = this->callScoreFull(); const List* SK_list = this->input_data.SK; - IntegerVector ls = Rcpp::sample(2, 2); + IntegerVector riter = Rcpp::sample( + this->input_data.N, this->input_data.N); Rprintf("partialSKSS-add-only phase\n"); - // for loop over 1...N - // for loop over neighbors - // try add edge - for(uint32_t i = 0; i < SK_list->length(); i++) + + for(uint32_t i = 0; i < this->input_data.N; i++) { - uint32_t l = ls[i] - 1; + uint32_t flatindex = riter[i] - 1; + uint32_t lindex = 0; + uint32_t xyindex = flatindex; + + uint32_t sumSizes = 0; + while(true) + { + uint32_t sizeMatrix = (*this->input_data.sizes)[lindex] * (*this->input_data.sizes)[lindex + 1]; + + sumSizes += sizeMatrix; + + if(sumSizes >= flatindex) + { + break; + } + lindex++; + xyindex -= sizeMatrix; + } + + const IntegerMatrix& sk = (*SK_list)[lindex]; + uint32_t r = xyindex % sk.nrow(); + uint32_t c = xyindex / sk.nrow(); + + if(i % (PRINT_FREQ * 1000) == 0) + { + this->DebugInfo(PARTIAL_ADD_ONLY, i); + } + if(sk(r, c) == 0) + { + continue; + } + this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD); - const IntegerMatrix& sk = (*SK_list)[l]; - Rprintf("%5d/%5d \n Array:20%d\n", i, SK_list->length(), this->unord_V_G.size()); - - // Random permutation of the indices - uint32_t num = sk.ncol() * sk.nrow(); - IntegerVector xy = Rcpp::sample(num, num); - - // It is necessary to do 2d loop in COL, ROW order, since - // rows are local in memory by Rcpp standard - // UPD: after random permutation this doesn't matter - for(uint32_t j = 0; j < (uint32_t)sk.ncol(); j++) - { - Rprintf("[pSK-SS][add only][%5u]/[%5u]\n", j, (uint32_t)sk.ncol()); - for(uint32_t k = 0; k < (uint32_t)sk.nrow(); k++) - { - - // extract 2D coords from 1D random permutation - uint32_t lin = xy[j * sk.nrow() + k] - 1; - uint32_t r = lin % sk.nrow(); - uint32_t c = lin / sk.nrow(); - - if(sk(r, c) == 0) - { - continue; - } - - this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD); - } - } } Rprintf("partialSKSS-add-remove phase\n"); + riter = Rcpp::sample( + this->input_data.N, this->input_data.N); - ls = Rcpp::sample(2, 2); - // for loop over 1...N - // for loop over neighbors - // try remove/add edge - for(uint32_t i = 0; i < SK_list->length(); i++) + for(uint32_t i = 0; i < this->input_data.N; i++) { - // Save intermediate results - Rprintf("Saving intermediate results\n"); - List& tmp_adjs = this->G_to_list(); - saveRDS(tmp_adjs, Rcpp::Named("file", "optimG.RData")); - + uint32_t flatindex = riter[i] - 1; + uint32_t lindex = 0; + uint32_t xyindex = flatindex; - Rprintf("%5d/%5d \n Array:20%d\n", i, SK_list->length(), this->unord_V_G.size()); - - uint32_t l = ls[i] - 1; - LogicalMatrix& gz = *(this->G[l]); - const IntegerMatrix& sk = (*SK_list)[l]; + uint32_t sumSizes = 0; + while(true) + { + uint32_t sizeMatrix = (*this->input_data.sizes)[lindex] * (*this->input_data.sizes)[lindex + 1]; + + sumSizes += sizeMatrix; + + if(sumSizes >= flatindex) + { + break; + } + lindex++; + xyindex -= sizeMatrix; + } - // Random permutation of the indices - uint32_t num = gz.ncol() * gz.nrow(); - IntegerVector xy = Rcpp::sample(num, num); + const IntegerMatrix& sk = (*SK_list)[lindex]; + uint32_t r = xyindex % sk.nrow(); + uint32_t c = xyindex / sk.nrow(); + if(i % PRINT_FREQ == 0) + { + this->DebugInfo(PARTIAL_ADD_REMOVE, i); + } + if(i % SAVE_FREQ == 0) + this->SaveGraph(); - // It is necessary to do 2d loop in COL, ROW order, since - // rows are local in memory by Rcpp standard - // UPD: after random permutation this doesn't matter - for(uint32_t j = 0; j < (uint32_t)gz.ncol(); j++) + if(sk(r, c) == 0) { - Rprintf("[pSK-SS][add only][%5u]/[%5u]\n", j, (uint32_t)gz.ncol()); - for(uint32_t k = 0; k < (uint32_t)gz.nrow(); k++) - { - // extract 2D coords from 1D random permutation - uint32_t lin = xy[j * gz.nrow() + k] - 1; - uint32_t r = lin % gz.nrow(); - uint32_t c = lin / gz.nrow(); - - if(sk(r, c) == 0) - { - continue; - } - - if(gz(r,c) == 0) // if not in G -> try add - this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD); - else // if in G -> try delete - this->tryPerformMove(&twoScores, l, r, c, ChangeType::DELETE); - } + continue; } + + LogicalMatrix& gz = *(this->G[lindex]); + + if(gz(r,c) == 0) // if not in G -> try add + this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD); + else // if in G -> try delete + this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::DELETE); } } + void DebugInfo(std::string phase, uint32_t iter) + { + Rprintf("%s[%5u]/[%5u][Trie:%5u|%5u][Score:%9.6f]\n", + phase.c_str(), iter, this->input_data.N, + this->V_G.num_graphs(), this->V_G.num_nodes(), + this->probabilityMaxOptim); + } List& G_to_list() { @@ -794,11 +928,27 @@ protected: } // Try to add word to Trie - //this->V_G.appendChainAddOnly(this->ord_V_G); + this->V_G.AppendChain(this->ord_V_G); + key_action_tp to_push; to_push.type = action; to_push.coord = kt; - this->unord_V_G.push_back(to_push); + //this->unord_V_G.push_back(to_push); + } + + void SaveGraph() + { + /* + if(this->probabilityMaxOptim <= -80000000000.0) + { + return; + }*/ + // Save intermediate results + Rprintf("Saving intermediate results [%9.6f]\n", this->probabilityMaxOptim); + List& tmp_adjs = this->G_to_list(); + char fileName[50]; + sprintf(fileName, "optimG_%9.6f.RData", this->probabilityMaxOptim); + saveRDS(tmp_adjs, Rcpp::Named("file", fileName)); } void CheckVisitedGraphOptimum(NumericVector& scoreChanges) @@ -869,7 +1019,7 @@ protected: List c_SK_SS( const List& PK, const List& SK, - const NumericVector& sizes, + const IntegerVector& sizes, const List& param, uint32_t I, bool debug, @@ -887,7 +1037,7 @@ List c_SK_SS( List c_SK_SS( const List& PK, const List& SK, - const NumericVector& sizes, + const IntegerVector& sizes, const List& param, uint32_t I, bool debug, @@ -896,12 +1046,20 @@ List c_SK_SS( Function score_change_func ) { + uint32_t N = 0; + for(uint32_t i = 0; i < (sizes.size() - 1); i++) + { + N += sizes[i] * sizes[i+1]; + } + Rprintf("N = %u\n", N); + IData i_data; i_data.PK = &PK; i_data.SK = &SK; i_data.sizes = &sizes; i_data.bge_param = ¶m; + i_data.N = N; i_data.score_full_func = &score_full_func; i_data.score_change_func = &score_change_func; @@ -913,6 +1071,7 @@ List c_SK_SS( settings.beta_H = beta_H; settings.beta_L = beta_L; + Network* net = new Network(i_data, settings); net->SK_SS();