diff --git a/source/skeleton.cpp b/source/skeleton.cpp
index 67a77aa054ea5e5d68b6a895abf0c15d5e1e7768..7c6c3831888d2e4b4dce58f2dc60202c51af40a4 100644
--- a/source/skeleton.cpp
+++ b/source/skeleton.cpp
@@ -14,11 +14,14 @@
 #include <cstring>
 #include <cmath>
 #include <limits.h>
-
+#include <cstdio>
+#include <cstdint>
 
 /**
  * We include the external library of RcppArmadillo, 
  * this allows for a future usage of linear algebra, advanced Rcpp, etc...
+ * 
+ * NOT USED AS FOR NOW
  */
 // [[Rcpp::depends(RcppArmadillo)]]
 #include <RcppArmadillo.h>
@@ -26,7 +29,7 @@
 //#include <Rcpp.h>
 
 /**
- * Most used Rcpp objects, used to 
+ * Most used Rcpp objects, used to remove Rcpp prefix
  */
 using Rcpp::Rcout;
 using Rcpp::Rcerr;
@@ -41,17 +44,13 @@ using Rcpp::Function;
 using Rcpp::IntegerVector;
 using Rcpp::_;
 
-
+/**
+ * Also -- include the "saveRDS" function for partial result serialization and save
+ */
 Rcpp::Environment base("package:base");
 Function saveRDS = base["saveRDS"];
 
-/**
- * Ideally should be included from library, but let it be like that :)
- */
-//typedef unsigned int uint32_t;
-//typedef unsigned long long uint64_t;
-typedef unsigned short uint16_t;
-typedef unsigned char uint8_t;
+
 /**
  * This part will be the separate header file for tuple + hash + Trie data structures
  */
@@ -119,248 +118,368 @@ enum ChangeType
   DELETE = false
 };
 
-struct GraphChange
+
+
+// ########################################### COMPRESSED TRIE CODE ###############################################################
+/**
+ * Apply the key_tp's triplet index as TRUE on adjacency matrix 
+ */
+inline static void G_forward(const key_tp &key, std::vector<LogicalMatrix*>& G_adjs)
 {
-  /**
-   * Note: since unordered_map does not have any guarantee about the key order,
-   *    the random sampling is modified by the changing key ordering!
-   *    
-   * If such behavior is unacceptable, consider using std::map, but this
-   *   will cost a change of complexity from O(1) to O(log n)
-   */
-  std::unordered_map<
-    const key_tp, 
-    GraphChange*,
-    key_hash
-    >
-    next_map;
+  const uint8_t one = std::get<0>(key);
+  const uint16_t two = std::get<1>(key);
+  const uint16_t three = std::get<2>(key);
   
-  /**
-   * After Trie receives a word, only the end of it is actually a subgraph,
-   * 
-   * Example to illustrate the intermediate subgraph mechanics:   
-   *        add 2 -> add 1 -> add 5 -> add 4 -> delete 2 -> add 3
-   *        
-   *        then, the visited subgraphs are:
-   *            {[2], [1, 2], [1, 2, 5], [1, 2, 4, 5], [1, 4, 5], [1, 3, 4, 5]}
-   *        
-   *        
-   *        which results in Trie:
-   *        
-   *               2
-   *        root---|      5
-   *               |   2--| 
-   *               1*--|  4*---5
-   *                   |  
-   *                   4*--5  
-   *                   |
-   *                   3*--4*--5
-   *        
-   *        
-   *        ! Note -> nodes with * haven't been visited !
-   *            
-   *  > if no flag is used to determine the node in Trie, we can't mark the above
-   *  > when pushing a word, only its last tail node will be marked
-   */
+
+  (*G_adjs[one])(two, three) = true;
+}
+
+/**
+ * Apply the key_tp's triplet index as FALSE on adjacency matrix 
+ */
+inline static void G_backward(const key_tp &key, std::vector<LogicalMatrix*>& G_adjs)
+{
+  const uint8_t one = std::get<0>(key);
+  const uint16_t two = std::get<1>(key);
+  const uint16_t three = std::get<2>(key);
+  
+  (*G_adjs[one])(two, three) = true;
+}
+
+struct Node
+{
   bool is_graph = false;
+  key_tp *values = nullptr;
+  uint32_t num_values = 0;
   
-  void mark()
-  {
-    this->is_graph = true;
-  }
+  std::unordered_map<
+    key_tp,
+    Node *,
+    key_hash>
+    next_map;
   
-  GraphChange()
+  void print()
   {
-    this->is_graph = false;
+    std::cout << "[";
+    for(uint32_t i = 0; i < num_values; i++)
+    {
+      key_tp k = values[i];
+      std::cout << "(" 
+                << (uint32_t)std::get<0>(k) << "," 
+                << std::get<1>(k) << "," 
+                << std::get<2>(k) << ") ";
+    }
+    std::cout << "]    ";
   }
 };
 
-class Trie
+#define ALLOC_SIZE ((2UL << 13))
+Node PREALLOCATED_POOL[ALLOC_SIZE];
+
+/**
+ * Optimization technique -- allocate a large portion of nodes in advance on stack,
+ * so that the OS won't spend any additional time allocating nodes one by one.
+ */
+class NodePool
 {
 public:
-  Trie()
+  NodePool()
   {
-    
   }
   
-  ~Trie()
+  Node *get()
+  {
+    if (this->current_index >= ALLOC_SIZE)
+    {
+      Rprintf("Maximum number of nodes (%5lu) reached! Now allocating one by one!", ALLOC_SIZE);
+      Node* ret = new Node;
+      return ret;
+    }
+    Node *ret = PREALLOCATED_POOL + this->current_index;
+    this->current_index++;
+    return ret;
+  }
+  
+  uint64_t size()
   {
-    // TODO free, but not really needed for single script :)
+    return this->current_index;
   }
   
+protected:
+  uint64_t current_index = 0;
+};
+
+/**
+ * After Trie receives a word, only the end of it is actually a subgraph,
+ * 
+ * Example to illustrate the intermediate subgraph mechanics:   
+ *        add 2 -> add 1 -> add 5 -> add 4 -> delete 2 -> add 3
+ *        
+ *        then, the visited subgraphs are:
+ *            {[2], [1, 2], [1, 2, 5], [1, 2, 4, 5], [1, 4, 5], [1, 3, 4, 5]}
+ *        
+ *        
+ *        which results in Trie (Which is then compressed):
+ *        
+ *               2
+ *        root---|      5
+ *               |   2--| 
+ *               1*--|  4*---5
+ *                   |  
+ *                   4*--5  
+ *                   |
+ *                   3*--4*--5
+ *        
+ *        
+ *        ! Note -> nodes with * haven't been visited and, thus, not subgraphs !
+ *        Compressed trie example:
+ *        https://www.geeksforgeeks.org/compressed-tries/
+ *        
+ *            
+ *  > if no flag is used to determine the node in Trie, we can't mark the above
+ *  > when pushing a word, only its last tail node will be marked
+ */
+class CompressedTrie
+{
+public:
+  CompressedTrie()  {}
+  ~CompressedTrie()  {}
   
   /**
-   * Append a word consisting of ADD only nodes.
+   * Append a word consisting of sorted ADD only nodes.
    * return TRUE only if non-visited subgraph added
    */
-  bool appendChainAddOnly(std::vector<key_tp>& word)
+  bool AppendChain(std::vector<key_tp>& word);
+  
+  /**
+   * Print current Trie, used only for debug purposes
+   */
+  void PrintTrie();
+  
+  /**
+   * Randomly chooses a graph from the Trie and sets the matrix(adjacency) 
+   * to it
+   */
+  void SampleAdjacencyMatrix(std::vector<LogicalMatrix*>& matrix);
+  
+  /**
+   * Return the number of nodes in a graph
+   */
+  uint64_t num_nodes() {return this->node_pool.size();}
+  
+  /**
+   * Return the number of nodes that are actually subgraphs
+   */
+  uint32_t num_graphs() {return this->total_num_graphs;}
+  
+protected:
+  NodePool node_pool;
+  Node *root = this->node_pool.get();
+  
+  uint32_t total_num_graphs = 0;
+
+  uint32_t current_needed_index;
+  uint32_t current_visited_index;
+  std::vector<LogicalMatrix*>* current_G_adjs;
+  void findRecursive(Node *currNode);
+  
+  void appendWordToNode(Node* toAppend, key_tp* word, uint32_t len);  // TODO remove, not used when no delete
+  bool appendRecursive(key_tp* word, uint32_t len);
+  uint32_t compareValues(Node* toAppend, key_tp* word, uint32_t len);
+  Node* createNode(key_tp* word, uint32_t len);
+  void subdivideNode(Node* node, uint32_t divideIndex);
+  
+  uint64_t SampleLong()
   {
-    if(word.size() == 0) return false;
+    uint32_t sampledIndex = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; 
+    uint32_t sampledIndex2 = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; 
+    uint64_t ret = sampledIndex;
+    ret = ret << 32;
+    ret = ret + sampledIndex2;
+    return ret;
+  }
+};
+
+
+
+bool CompressedTrie::AppendChain(std::vector<key_tp>& word)
+{
+  bool ret = this->appendRecursive(&word[0], word.size());
+  this->total_num_graphs += ret;
+  return ret;
+}
 
-    // Now it is easier to add a word to trie:
-    // since characters are sorted, just traverse and add individual nodes
-    GraphChange* current = nullptr;
-    if(this->root.find(word[0]) == this->root.end())
+
+bool CompressedTrie::appendRecursive(key_tp* word, uint32_t len)
+{
+  Node* currNode = this->root;
+  
+  
+  while(true)
+  {
+    key_tp currFirstLetter = word[0];
+    
+    // No branch exists for our word
+    if(currNode->next_map.find(currFirstLetter) == currNode->next_map.end())
+    {
+      // append new node with entire word
+      Node* appended = this->createNode(word, len);
+      currNode->next_map[currFirstLetter] = appended;
+      return true;
+    }
+    
+    Node* cand = currNode->next_map[currFirstLetter];
+    
+    // Find an index at which graph and word divert
+    uint32_t moveIndex = this->compareValues(cand, word, len);
+    
+    // If node isnt fully in our word -> divide aligned and unaligned part
+    if(moveIndex < cand->num_values)
     {
-      current = new GraphChange(); // word[0], nullptr
-      this->total_num_nodes++;
+      subdivideNode(cand, moveIndex);
       
-      root[word[0]] = current;
     }
-    else{
-      current = root.at(word[0]);
+    // If node is our entire word -> mark and end
+    if(moveIndex == len)
+    {
+      cand->is_graph = true;
+      return true;
     }
-    uint32_t index = 1;
     
-    while(true)
+    // go deeper
+    currNode = cand;
+    word = word + moveIndex;
+    len = len - moveIndex;
+  }
+  
+  
+  
+  return false;
+}
+
+#include <queue>
+
+void CompressedTrie::PrintTrie()
+{
+  std::queue<Node*> inorder_q;
+  inorder_q.push(this->root);
+  
+  std::cout << "#############################\n";
+  
+  while(!inorder_q.empty())
+  {
+    uint32_t size = inorder_q.size();
+    for(int i = 0; i < size; i++)
     {
-      // The entire word is in Trie
-      if(index == word.size())
-      {
-        // update number of graphs, if subgraph wasn't visited -> +1
-        bool is_nonvisited = !(current->is_graph); 
-        this->total_num_graphs += !(current->is_graph); 
-        current->mark(); // mark the node as graph
-        return is_nonvisited;  // stop
-      }
+      Node* n = inorder_q.front();
+      inorder_q.pop();
+      n->print();
       
-      if(current->next_map.find(word[index]) == current->next_map.end())
+      for(const auto& value : n->next_map)
       {
-        // Create and append all the missing nodes
-        current->next_map[word[index]] = new GraphChange();//word[index], current
-        this->total_num_nodes++;
-        
-        for(;index < word.size();index++)
-        {
-          current->next_map[word[index]] = new GraphChange();//word[index], current
-          this->total_num_nodes++;
-          current = current->next_map[word[index]];
-        }
-        
-        // mark the last one only, for more details see GraphDeta
-        // update number of graphs, if subgraph wasn't visited -> +1
-        bool is_nonvisited = !(current->is_graph); 
-        this->total_num_graphs += !(current->is_graph); 
-        current->mark();
-        return is_nonvisited;
+        inorder_q.push(value.second);
       }
-      
-      // Go down the Trie
-      current = current->next_map[word[index]];
-      index++;
     }
+    std::cout << "\n";
   }
+}
+
+void CompressedTrie::subdivideNode(Node* node, uint32_t divideIndex)
+{
+  uint32_t prevLen = node->num_values;
+  if(prevLen == divideIndex) return;
   
-  /**
-   * Randomly chooses a graph from the Trie and sets the G(adjacency) graph
-   * to it
-   */
-  void sampleGraph(std::vector<LogicalMatrix*>& G_adjs)
-  {
-    uint32_t currIndex = 0;
-    
-    // TODO ideally the Rcpp::sample should be modified
-    // to handle larger numbers
-    uint32_t sampledIndex = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; 
-    
-    this->find(&currIndex, sampledIndex, G_adjs);
-  }
   
+  node->num_values = divideIndex;
   
-  /**
-   * Returns a number of subgraphs in Trie. 
-   * 
-   * Note: this isn't the same as the number of nodes. We count only the subgraphs,
-   *       for more detail see the struct GraphChange
-   * 
-   * Note 2: maybe this 32-bit number won't be enough, consider using 64bit number,
-   *   but then, again, Rcpp::sample should be modified to 64-bit (2x sample + shift etc...)!
-   *   e.g., .Machine$integer.max on tested machine:= 2147483647
-   */
-  uint32_t total_num()
-  {
-    return this->total_num_graphs;
-  }
+  Node* newNode = this->node_pool.get();
+  newNode->values = node->values + divideIndex;
+  newNode->num_values = prevLen - divideIndex;
+  newNode->next_map = node->next_map;
+  newNode->is_graph = node->is_graph;
+  
+  node->next_map = {};
+  node->next_map[newNode->values[0]] = newNode;
+  node->is_graph = false;
+  
+  return;
+}
+
+uint32_t CompressedTrie::compareValues(Node* toAppend, key_tp* word, uint32_t len)
+{
+  uint32_t size = toAppend->num_values;
+  size = (size < len) ? size : len;
   
-  uint64_t nodes_num()
+  for(uint32_t i = 1; i < size; i++)
   {
-    return this->total_num_nodes;
+    if(word[i] != toAppend->values[i])
+    {
+      return i;
+    }
   }
+  return size;
+}
 
+Node* CompressedTrie::createNode(key_tp* word, uint32_t len)
+{
+  Node* ret = this->node_pool.get();
+  ret->num_values = len;
+  ret->values = new key_tp[len];
+  std::memcpy(ret->values, (void*)word, len * sizeof(key_tp));
   
-protected:
-  /**
-   * Note: since unordered_map does not have any guarantee about the key order,
-   *    the random sampling is modified by the changing key ordering!
-   *    
-   * If such behavior is unacceptable, consider using std::map, but this
-   *   will cost a change of complexity from O(1) to O(log n)
-   */
-  std::unordered_map<
-    const key_tp, 
-    GraphChange*,
-    key_hash
-  > 
-  root;
+  return ret;
+}
+
+void CompressedTrie::appendWordToNode(Node* toAppend, key_tp* word, uint32_t len)
+{
+  key_tp* newValues = new key_tp[toAppend->num_values + len];
+  std::memcpy(newValues, toAppend->values, toAppend->num_values * sizeof(key_tp));
+  std::memcpy(newValues + toAppend->num_values, word, len * sizeof(key_tp));
+  delete[] toAppend->values;
+  toAppend->values = newValues;
+  toAppend->num_values += len;
+}
+
+void CompressedTrie::SampleAdjacencyMatrix(std::vector<LogicalMatrix*>& matrix)
+{
+  this->current_needed_index = this->SampleLong() % this->total_num_graphs;
+  this->current_visited_index = 0;
+  this->current_G_adjs = &matrix;
   
-  // TODO assert total_num_graphs < INT_MAX
-  uint32_t total_num_graphs = 0;
-  uint64_t total_num_nodes = 0;
+  this->findRecursive(this->root);
   
-  void find(uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs)
-  {
-    // Note: if the sampled subgraphs are too deep and stack memory overflow,  
-    // then consider using the non-recursive version
-    
+  return;
+}
 
-    for(const auto &item : this->root)
-    {  
-      const auto& key = item.first;
-      const auto& value = item.second;
-      
-      // Go down
-      (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = true;
-      
-      *currIdx += value->is_graph;
-      
-      
-      this->recursiveFind(value, currIdx, needed, G_adjs);
-      if(*currIdx == needed)
-      {
-        break;
-      }
-      
-      (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = false;
-    }
+void CompressedTrie::findRecursive(Node *currNode)
+{
+  if (this->current_visited_index == this->current_needed_index)
+  {
+    return;
   }
   
-  
-  void recursiveFind(GraphChange* curr, uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs)
+  for (const auto &item : currNode->next_map)
   {
-    if(*currIdx == needed)
+    const auto &key = item.first;
+    const auto &value = item.second;
+    
+    G_forward(key, *this->current_G_adjs);
+    
+    this->current_visited_index += value->is_graph;
+    
+    this->findRecursive(value);
+    
+    if (this->current_visited_index == this->current_needed_index)
     {
       return;
     }
     
-    for(const auto &item : this->root)
-    {  
-        const auto& key = item.first;
-        const auto& value = item.second;
-        (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = true;
-        currIdx += value->is_graph;
-        
-        this->recursiveFind(value, currIdx, needed, G_adjs);
-        if(*currIdx == needed)
-        {
-          return;
-        }
-        (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = false;
-    }
-    
-    return;
+    G_backward(key, *this->current_G_adjs);
   }
   
-};
+  return;
+}
+
 struct key_action_tp
 {
   ChangeType type;
@@ -375,14 +494,15 @@ struct key_action_tp
     (*adjs[listID])(from, to) = type;
   }
 };
-
+// ########################################### COMPRESSED TRIE CODE ###############################################################
 
 
 struct IData
 {
   const List* PK;
   const List* SK;
-  const NumericVector* sizes;
+  const IntegerVector* sizes;
+  uint32_t N;
   const List* bge_param;
   
   Function* score_full_func;
@@ -396,13 +516,18 @@ struct ISettings
   double beta_H;
 };
 
+#define SAVE_FREQ 1000
+#define PRINT_FREQ 10000
+#define PARTIAL_ADD_ONLY "[pSK-SS][add only]"
+#define PARTIAL_ADD_REMOVE "[pSK-SS][add-remove]"
+#define SKSS_PHASE "[SK-SS][add-remove]"
 
 class Network
 {
 protected:
   IData input_data;
   ISettings settings;
-  //Trie V_G;
+  CompressedTrie V_G;
   
   /**
    * sort individual elements from lowest to largest by unordered_set
@@ -450,19 +575,21 @@ public:
   void SK_SS()
   {
     this->partialSK_SS();
+    /*
     uint32_t sampled_subgraphs = this->unord_V_G.size();
     IntegerVector indices = Rcpp::sample(sampled_subgraphs, this->settings.I);
     indices.insert(indices.begin(), 1);
-    std::sort(indices.begin(), indices.end());
+    std::sort(indices.begin(), indices.end());*/
     
     for(uint32_t i = 0; i < this->settings.I; i++)
     {
       Rcout << "SK-SS, Iteration: " << i << "\n";
-      
+      /*
       for(uint32_t j = indices[i]; j < indices[i+1]; j++)
       {
           this->unord_V_G[j-1].forward(this->G_sampled);
-      }
+      }*/
+      this->V_G.SampleAdjacencyMatrix(this->G_sampled);
       
       
       this->emptyCurrCountsAndG();
@@ -488,54 +615,55 @@ public:
 protected:  
   void SK_SS_phase2_iter()
   {
-    //this->V_G.sampleGraph(this->G); // sample adjacency matrix, e.g., sample G_0
     NumericVector twoScores = this->callScoreFull();
-    IntegerVector ls = Rcpp::sample(2, 2);
-    
     const List* SK_list = this->input_data.SK;
     
-    // for loop over 1...N
-    // for loop over neighbors
-    // try add edge
-    for(uint32_t i = 0; i < SK_list->length(); i++)
+    IntegerVector riter = Rcpp::sample(
+      this->input_data.N, this->input_data.N);
+    
+    for(uint32_t i = 0; i < this->input_data.N; i++)
     {
-      Rprintf("%5d/%5d  \n  Array:20%d\n", i, SK_list->length(), this->unord_V_G.size());
-      uint32_t l = ls[i] - 1;
-      const IntegerMatrix& sk = (*SK_list)[l];
+      uint32_t flatindex = riter[i] - 1;
+      uint32_t lindex = 0;
+      uint32_t xyindex = flatindex;
       
-      // Random permutation of the indices
-      uint32_t num = sk.ncol() * sk.nrow();
-      IntegerVector xy = Rcpp::sample(num, num);
-      
-      // It is necessary to do 2d loop in COL, ROW order, since
-      // rows are local in memory by Rcpp standard
-      // UPD: after random permutation this doesn't matter
-      for(uint32_t j = 0; j < (uint32_t)sk.ncol(); j++)
+      uint32_t sumSizes = 0;
+      while(true)
       {
-        // Save intermediate results
-        Rprintf("Saving intermediate results\n");
-        List& tmp_adjs = this->G_to_list();
-        saveRDS(tmp_adjs, Rcpp::Named("file", "optimG.RData"));
+        uint32_t sizeMatrix = (*this->input_data.sizes)[lindex] * (*this->input_data.sizes)[lindex + 1];
+        sumSizes += sizeMatrix;
         
-        Rprintf("[SK-SS][add only][%5u]/[%5u]\n", j, (uint32_t)sk.ncol());
-        for(uint32_t k = 0; k < (uint32_t)sk.nrow(); k++)
+        if(sumSizes >= flatindex)
         {
-          
-          // extract 2D coords from 1D random permutation
-          uint32_t lin = xy[j * sk.nrow() + k] - 1;
-          uint32_t r = lin % sk.nrow();
-          uint32_t c = lin / sk.nrow();
-          
-          // if inside G_0 -> skip
-          if((*this->G[l])(r,c) != 0)      
-            continue;
-          
-          // try add an edge outside of G_0
-          // do not update V_G since we want to sample from partialSK-SS only!
-          // because of that -> shouldUpdateTrie = false
-          this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD, false);
+          break;
         }
+        lindex++;
+        xyindex -= sizeMatrix;
       }
+      
+      const IntegerMatrix& sk = (*SK_list)[lindex];
+      uint32_t r = xyindex % sk.nrow();
+      uint32_t c = xyindex / sk.nrow();
+      
+      if(i % PRINT_FREQ == 0)
+      {
+        this->DebugInfo(SKSS_PHASE, i);
+      }
+      
+      if(i % SAVE_FREQ == 0)
+      {
+        this->SaveGraph();
+      }
+      
+      // if inside G_0 -> skip
+      if((*this->G[lindex])(r,c) != 0)      
+        continue;
+      
+      // try add an edge outside of G_0
+      // do not update V_G since we want to sample from partialSK-SS only!
+      // because of that -> shouldUpdateTrie = false
+      this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD, false);
+      
     }
   }
   
@@ -544,99 +672,105 @@ protected:
   {
     NumericVector twoScores = this->callScoreFull();
     const List* SK_list = this->input_data.SK;
-    IntegerVector ls = Rcpp::sample(2, 2);
     
+    IntegerVector riter = Rcpp::sample(
+      this->input_data.N, this->input_data.N);
     Rprintf("partialSKSS-add-only phase\n");
-    // for loop over 1...N
-    // for loop over neighbors
-    // try add edge
-    for(uint32_t i = 0; i < SK_list->length(); i++)
+    
+    for(uint32_t i = 0; i < this->input_data.N; i++)
     {
-       uint32_t l = ls[i] - 1;
+      uint32_t flatindex = riter[i] - 1;
+      uint32_t lindex = 0;
+      uint32_t xyindex = flatindex;
+      
+      uint32_t sumSizes = 0;
+      while(true)
+      {
+        uint32_t sizeMatrix = (*this->input_data.sizes)[lindex] * (*this->input_data.sizes)[lindex + 1];
+        
+        sumSizes += sizeMatrix;
+        
+        if(sumSizes >= flatindex)
+        {
+          break;
+        }
+        lindex++;
+        xyindex -= sizeMatrix;
+      }
+      
+      const IntegerMatrix& sk = (*SK_list)[lindex];
+      uint32_t r = xyindex % sk.nrow();
+      uint32_t c = xyindex / sk.nrow();
+      
+      if(i % (PRINT_FREQ * 1000) == 0)
+      {
+        this->DebugInfo(PARTIAL_ADD_ONLY, i);
+      }
+      if(sk(r, c) == 0) 
+      {
+        continue;
+      }
+      this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD);
       
-       const IntegerMatrix& sk = (*SK_list)[l];
-       Rprintf("%5d/%5d  \n  Array:20%d\n", i, SK_list->length(), this->unord_V_G.size());
-
-       // Random permutation of the indices
-       uint32_t num = sk.ncol() * sk.nrow();
-       IntegerVector xy = Rcpp::sample(num, num);
-
-       // It is necessary to do 2d loop in COL, ROW order, since
-       // rows are local in memory by Rcpp standard
-       // UPD: after random permutation this doesn't matter
-       for(uint32_t j = 0; j < (uint32_t)sk.ncol(); j++)
-       {
-          Rprintf("[pSK-SS][add only][%5u]/[%5u]\n", j, (uint32_t)sk.ncol());
-          for(uint32_t k = 0; k < (uint32_t)sk.nrow(); k++)
-          {
-            
-            // extract 2D coords from 1D random permutation
-            uint32_t lin = xy[j * sk.nrow() + k] - 1;
-            uint32_t r = lin % sk.nrow();
-            uint32_t c = lin / sk.nrow();
-            
-            if(sk(r, c) == 0) 
-            {
-              continue;
-            }
-              
-            this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD);
-          }
-       }
     }
     
     Rprintf("partialSKSS-add-remove phase\n");
+    riter = Rcpp::sample(
+      this->input_data.N, this->input_data.N);
     
-    ls = Rcpp::sample(2, 2);
-    // for loop over 1...N
-    // for loop over neighbors
-    // try remove/add edge
-    for(uint32_t i = 0; i < SK_list->length(); i++)
+    for(uint32_t i = 0; i < this->input_data.N; i++)
     {
-      // Save intermediate results
-      Rprintf("Saving intermediate results\n");
-      List& tmp_adjs = this->G_to_list();
-      saveRDS(tmp_adjs, Rcpp::Named("file", "optimG.RData"));
-      
+      uint32_t flatindex = riter[i] - 1;
+      uint32_t lindex = 0;
+      uint32_t xyindex = flatindex;
       
-      Rprintf("%5d/%5d  \n  Array:20%d\n", i, SK_list->length(), this->unord_V_G.size());
-      
-      uint32_t l = ls[i] - 1;
-      LogicalMatrix& gz = *(this->G[l]);
-      const IntegerMatrix& sk = (*SK_list)[l];
+      uint32_t sumSizes = 0;
+      while(true)
+      {
+        uint32_t sizeMatrix = (*this->input_data.sizes)[lindex] * (*this->input_data.sizes)[lindex + 1];
+        
+        sumSizes += sizeMatrix;
+        
+        if(sumSizes >= flatindex)
+        {
+          break;
+        }
+        lindex++;
+        xyindex -= sizeMatrix;
+      }
       
-      // Random permutation of the indices
-      uint32_t num = gz.ncol() * gz.nrow();
-      IntegerVector xy = Rcpp::sample(num, num);
+      const IntegerMatrix& sk = (*SK_list)[lindex];
+      uint32_t r = xyindex % sk.nrow();
+      uint32_t c = xyindex / sk.nrow();
       
+      if(i % PRINT_FREQ == 0)
+      {
+        this->DebugInfo(PARTIAL_ADD_REMOVE, i);
+      }
+      if(i % SAVE_FREQ == 0)
+        this->SaveGraph();
       
-      // It is necessary to do 2d loop in COL, ROW order, since
-      // rows are local in memory by Rcpp standard
-      // UPD: after random permutation this doesn't matter
-      for(uint32_t j = 0; j < (uint32_t)gz.ncol(); j++)
+      if(sk(r, c) == 0) 
       {
-        Rprintf("[pSK-SS][add only][%5u]/[%5u]\n", j, (uint32_t)gz.ncol());
-        for(uint32_t k = 0; k < (uint32_t)gz.nrow(); k++)
-        {
-          // extract 2D coords from 1D random permutation
-          uint32_t lin = xy[j * gz.nrow() + k] - 1;
-          uint32_t r = lin % gz.nrow();
-          uint32_t c = lin / gz.nrow();
-          
-          if(sk(r, c) == 0) 
-          {
-            continue;
-          }
-          
-          if(gz(r,c) == 0)  // if not in G -> try add
-            this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD); 
-          else              // if in G -> try delete
-            this->tryPerformMove(&twoScores, l, r, c, ChangeType::DELETE); 
-        }
+        continue;
       }
+      
+      LogicalMatrix& gz = *(this->G[lindex]);
+      
+      if(gz(r,c) == 0)  // if not in G -> try add
+        this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::ADD); 
+      else              // if in G -> try delete
+        this->tryPerformMove(&twoScores, lindex, r, c, ChangeType::DELETE); 
     }
   }
   
+  void DebugInfo(std::string phase, uint32_t iter)
+  {
+    Rprintf("%s[%5u]/[%5u][Trie:%5u|%5u][Score:%9.6f]\n", 
+            phase.c_str(), iter, this->input_data.N, 
+            this->V_G.num_graphs(), this->V_G.num_nodes(),
+            this->probabilityMaxOptim);
+  }
   
   List& G_to_list()
   {
@@ -794,11 +928,27 @@ protected:
     }
     
     // Try to add word to Trie
-    //this->V_G.appendChainAddOnly(this->ord_V_G);
+    this->V_G.AppendChain(this->ord_V_G);
+    
     key_action_tp to_push;
     to_push.type = action;
     to_push.coord = kt;
-    this->unord_V_G.push_back(to_push);
+    //this->unord_V_G.push_back(to_push);
+  }
+  
+  void SaveGraph()
+  {
+    /*
+    if(this->probabilityMaxOptim <= -80000000000.0)
+    {
+      return;
+    }*/
+    // Save intermediate results
+    Rprintf("Saving intermediate results [%9.6f]\n", this->probabilityMaxOptim);
+    List& tmp_adjs = this->G_to_list();
+    char fileName[50];
+    sprintf(fileName, "optimG_%9.6f.RData", this->probabilityMaxOptim);
+    saveRDS(tmp_adjs, Rcpp::Named("file", fileName));
   }
   
   void CheckVisitedGraphOptimum(NumericVector& scoreChanges)
@@ -869,7 +1019,7 @@ protected:
 List c_SK_SS(
                          const List& PK,
                          const List& SK,
-                         const NumericVector& sizes,
+                         const IntegerVector& sizes,
                          const List& param,
                          uint32_t I,
                          bool debug,
@@ -887,7 +1037,7 @@ List c_SK_SS(
 List c_SK_SS(
                   const List& PK,
                   const List& SK,
-                  const NumericVector& sizes,
+                  const IntegerVector& sizes,
                   const List& param,
                   uint32_t I,
                   bool debug,
@@ -896,12 +1046,20 @@ List c_SK_SS(
                   Function score_change_func
                   )
 {
+  uint32_t N = 0;
+  for(uint32_t i = 0; i < (sizes.size() - 1); i++)
+  {
+    N += sizes[i] * sizes[i+1];
+  }
+  Rprintf("N = %u\n", N);
+  
   IData i_data;
   
   i_data.PK = &PK;
   i_data.SK = &SK;
   i_data.sizes = &sizes;
   i_data.bge_param = &param;
+  i_data.N = N;
   
   i_data.score_full_func = &score_full_func;
   i_data.score_change_func = &score_change_func;
@@ -913,6 +1071,7 @@ List c_SK_SS(
   settings.beta_H = beta_H;
   settings.beta_L = beta_L;
   
+
   Network* net = new Network(i_data, settings);
   
   net->SK_SS();