diff --git a/source/skeleton.R b/source/skeleton.R
index 00871b6b494f80c725907753accf08cd5e622f04..877ee23b443c8807c36f2aaa77066d7f83a02290 100644
--- a/source/skeleton.R
+++ b/source/skeleton.R
@@ -200,15 +200,17 @@ P_D_G <- function(G_adjs, param_in)
     pDG1 <- pDG1 + BGe_compute(adj, param_in)
   }
   
-  nodes_cand <- rownames(G_adjs[[1]])
-  pDG2 <- sum(sapply(nodes_cand, function(node)
-  {
-    score <- BGe_node_compute(node, integer(0), param_in)
-    return(score)
-  })
-  )
-  
-  return(pDG1 + pDG2)
+  # They are still the same!
+  # nodes_cand <- rownames(G_adjs[[1]])
+  # pDG2 <- sum(sapply(nodes_cand, function(node)
+  # {
+  #   score <- BGe_node_compute(node, integer(0), param_in)
+  #   return(score)
+  # })
+  # )
+  # + pDG2
+  
+  return(pDG1)
 }
 
 #'
@@ -378,7 +380,7 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be
   # this is a large computation that computes a large parameter matrix:
   # N x N, where N is the sum of sizes, e.g., total number of genes
   cat("Computing large BGe matrix...\n")
-  if(FALSE)
+  if(TRUE)
   {
     myScore <- scoreparameters_BiDAG_BGe(n = ncol(GE), data = GE)
     param_in <- list(n=myScore$n, awpN=myScore$awpN, TN=myScore$TN, scoreconstvec=myScore$scoreconstvec)
@@ -394,10 +396,18 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be
 
   set.seed(0)
   
+  start.time <- Sys.time()
+  print(start.time)
+  
   E_f_G_out <- c_SK_SS(PK_in,SK_in, sizes, param_in,
                       I, debug, beta_L, beta_H,
                       score_G, score_change_G)
   
+  
+  end.time <- Sys.time()
+  print(end.time)
+  print(end.time - start.time)
+  
   print(E_f_G_out)
   
   E_f_G_out <- lapply(E_f_G_out, function(m) m > alpha)
diff --git a/source/skeleton.cpp b/source/skeleton.cpp
index 6b623982852988b534b62df9dcc019d71b47d3a8..062a41dc2b59808e22e221a06a7a2eb690055bc7 100644
--- a/source/skeleton.cpp
+++ b/source/skeleton.cpp
@@ -8,6 +8,7 @@
 #include <string>
 #include <vector>
 #include <algorithm>
+#include <map>
 
 // Pure C99
 #include <cstring>
@@ -67,9 +68,16 @@ struct key_hash : public std::unary_function<key_t, std::size_t>
 {
   std::size_t operator()(const key_t& k) const
   {
-    return (std::size_t)(std::get<0>(k)) ^ 
-           (std::size_t)(std::get<1>(k)) ^ 
-           (std::size_t)(std::get<2>(k));
+    std::size_t ret = 0;
+    ret ^= std::hash<uint32_t>()(std::get<0>(k)) + 0x9e3779b9 + (ret<<6) + (ret>>2);
+    ret ^= std::hash<uint32_t>()(std::get<1>(k)) + 0x9e3779b9 + (ret<<6) + (ret>>2);
+    ret ^= std::hash<uint32_t>()(std::get<2>(k)) + 0x9e3779b9 + (ret<<6) + (ret>>2);
+    
+    return ret;
+    
+    // return (std::size_t)(std::get<0>(k)) ^ 
+    //        (std::size_t)(std::get<1>(k)) ^ 
+    //        (std::size_t)(std::get<2>(k));
   }
 };
 
@@ -118,6 +126,14 @@ struct GraphChange
    */
   GraphChange* prev = nullptr;
   
+  
+  /**
+   * Note: since unordered_map does not have any guarantee about the key order,
+   *    the random sampling is modified by the changing key ordering!
+   *    
+   * If such behavior is unacceptable, consider using std::map, but this
+   *   will cost a change of complexity from O(1) to O(log n)
+   */
   std::unordered_map<
     const key_t, 
     GraphChange*,
@@ -157,7 +173,7 @@ struct GraphChange
    *        ! Note -> nodes with * haven't been visited !
    *            
    *  > if no flag is used to determine the node in Trie, we can't mark the above
-   *  > when pushing a word, only its last tail will be marked
+   *  > when pushing a word, only its last tail node will be marked
    */
   bool is_graph = false;
   
@@ -214,6 +230,11 @@ struct GraphChange
     
     return prev;
   }
+  
+  void printG()
+  {
+    Rcout << listID << "," << from << "," << to << "; " << is_graph << "\n";
+  }
 };
 
 struct graph_comp
@@ -238,7 +259,7 @@ public:
   
   ~Trie()
   {
-    
+    // TODO free, but not really needed for single script :)
   }
   
   
@@ -250,6 +271,12 @@ public:
   {
     if(word.size() == 0) return false;
     
+    /*
+    Rcout << "adding a word:\n";
+    for(key_t k : word)
+      Rcout << "["<< (uint32_t)std::get<0>(k) << "," << (uint32_t)std::get<1>(k) << "," << (uint32_t)std::get<2>(k) << "]";
+    Rcout<<"\n";
+    */
 
     // Now it is easier to add a word to trie:
     // since characters are sorted, just traverse and add individual nodes
@@ -257,6 +284,7 @@ public:
     if(this->root.find(word[0]) == this->root.end())
     {
       current = new GraphChange(word[0], nullptr);
+      root[word[0]] = current;
     }
     else{
       current = root.at(word[0]);
@@ -299,25 +327,55 @@ public:
     }
   }
   
-  
+  /**
+   * Randomly chooses a graph from the Trie and sets the G(adjacency) graph
+   * to it
+   */
   void sampleGraph(std::vector<LogicalMatrix*>& G_adjs)
   {
     uint32_t currIndex = 0;
     
     // TODO ideally the Rcpp::sample should be modified
     // to handle larger numbers
-    uint32_t sampledIndex = Rcpp::sample(INT_MAX, 1)[0]; 
+    uint32_t sampledIndex = Rcpp::sample(this->total_num_graphs, 1)[0] - 1; 
     
-    // TODO  
+    this->find(&currIndex, sampledIndex, G_adjs);
   }
   
-  // TODO maybe 32 bit won't be enough
+  // 
+  /**
+   * Returns a number of subgraphs in Trie. 
+   * 
+   * Note: this isn't the same as the number of nodes. We count only the subgraphs,
+   *       for more detail see the struct GraphChange
+   * 
+   * Note 2: maybe this 32-bit number won't be enough, consider using 64bit number,
+   *   but then, again, Rcpp::sample should be modified to 64-bit (2x sample + shift etc...)!
+   *   e.g., .Machine$integer.max on tested machine:= 2147483647
+   */
   uint32_t total_num()
   {
     return this->total_num_graphs;
   }
   
+  
+  void print()
+  {
+    
+    for(const auto & [ key, value ] : this->root)
+    {
+        this->printRecursive(value);
+    }
+  }
+  
 protected:
+  /**
+   * Note: since unordered_map does not have any guarantee about the key order,
+   *    the random sampling is modified by the changing key ordering!
+   *    
+   * If such behavior is unacceptable, consider using std::map, but this
+   *   will cost a change of complexity from O(1) to O(log n)
+   */
   std::unordered_map<
     const key_t, 
     GraphChange*,
@@ -325,22 +383,68 @@ protected:
   > 
   root;
   
-  // TODO assert total_num_graphs < INT_MAX
-  uint64_t total_num_graphs = 0;
+  
+  void printRecursive(GraphChange* curr)
+  {
+    Rcout << "recursive:\n";
+    curr->printG();
+    Rcout << "going down:\n";
+    for(const auto & [ key, value ] : curr->next_map)
+    {
+      this->printRecursive(value);
+    }
+    Rcout << "going up:\n";
+  }
+  
   
   
-  void addWordRecursive(std::vector<GraphChange*>& word, uint32_t index)
+  
+  
+  // TODO assert total_num_graphs < INT_MAX
+  uint32_t total_num_graphs = 0;
+  
+  void find(uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs)
   {
-    key_t currKey = std::make_tuple(word[0]->listID, word[0]->from, word[0]->to);
-    if(root.find(currKey) == root.end())
+    // Note: if the sampled subgraphs are too deep and stack memory overflow,  
+    // then consider using the non-recursive version
+    for(const auto & [ key, value ] : this->root)
     {
-        // TODO
+      // Go down
+      value->forward(G_adjs);
+      *currIdx += value->is_graph;
+      
+      
+      this->recursiveFind(value, currIdx, needed, G_adjs);
+      if(*currIdx == needed)
+      {
+        break;
+      }
+      
+      value->backward(G_adjs);
     }
   }
   
-  void findIndex(uint64_t* currIdx, const uint64_t& needed)
+  
+  void recursiveFind(GraphChange* curr, uint32_t* currIdx, const uint32_t& needed, std::vector<LogicalMatrix*>& G_adjs)
   {
-    // TODO
+    if(*currIdx == needed)
+    {
+      return;
+    }
+    
+    curr->forward(G_adjs);
+    currIdx += curr->is_graph;
+    
+    for(const auto & [ key, value ] : curr->next_map)
+    {
+        this->recursiveFind(value, currIdx, needed, G_adjs);
+        if(*currIdx == needed)
+        {
+          return;
+        }
+    }
+    
+    curr->backward(G_adjs);
     return;
   }
   
@@ -354,11 +458,34 @@ protected:
 // [[Rcpp::export]]
 double tst()
 {
-  IntegerVector rnd = Rcpp::sample(6, 6);
-  NumericMatrix t = NumericMatrix(2, 3, rnd.begin());
-  Rcout << t << "\n";
+  Trie trie;
+  trie.print();
+  std::vector<key_t> word;
+
+  
+  word.push_back(std::make_tuple(0, 5, 10));
+  trie.appendChainAddOnly(word);
+  trie.print();
+  
+  word.push_back(std::make_tuple(0, 7, 10));
+  trie.appendChainAddOnly(word);
+  trie.print();
+  
+  word.push_back(std::make_tuple(0, 5, 11));
+  trie.appendChainAddOnly(word);
+  trie.print();
+  
+  word.push_back(std::make_tuple(0, 4, 10));
+  trie.appendChainAddOnly(word);
+  trie.print();
+  
+  word.erase(word.begin() + 0);
+  trie.appendChainAddOnly(word);
+  trie.print();
+  
+  Rcout << "Final trie size: " << trie.total_num() << "\n";
+  
   
-  Rcout << t * 2.5 << "\n";
   return 0.0;
 }
 
@@ -414,6 +541,16 @@ protected:
   std::vector<NumericMatrix*> E_f_G;
   std::vector<NumericMatrix*> current_counts;
   double currentProb = 1.0; // TODO
+  NumericVector initialProbs;
+  
+  
+  double bge_max = -99999999999.0;
+  double bge_min = 9999999999.0;
+  double energy_max = -99999999999.0;
+  double energy_min = 9999999999.0;
+  
+  double prob_max = -99999999999.0;
+  double prob_min = 9999999999.0;
   
 public:  
   Network(IData input, ISettings settings)
@@ -443,6 +580,7 @@ public:
   void SK_SS()
   {
     this->partialSK_SS();
+    return; // TODO remove
     
     for(uint32_t i = 0; i < this->settings.I; i++)
     {
@@ -454,12 +592,17 @@ public:
   
   List& result()
   {
+    Rprintf("BGe min/max: %.10f, %.10f\n", this->bge_min, this->bge_max);
+    Rprintf("Energy min/max: %.10f, %.10f\n", this->energy_min, this->energy_max);
+    Rprintf("Prob min/max: %.10f, %.10f\n", this->prob_min, this->prob_max);
+    Rprintf("Initial probs: %.10f, %.10f\n", this->initialProbs[0], this->initialProbs[1]);
+    
     const List* SK_list = this->input_data.SK;
     List* ret = new List(SK_list->length());
 
     for(uint32_t i = 0; i < SK_list->length(); i++)
     {
-      *this->E_f_G[i] = (*this->E_f_G[i]) / (this->V_G.total_num() / 100.0);
+      *this->E_f_G[i] = (*this->E_f_G[i]); //  / (this->V_G.total_num() / 100.0)
       (*ret)[i] = *this->E_f_G[i];
     }
     
@@ -469,22 +612,59 @@ public:
 protected:  
   void SK_SS_phase2_iter()
   {
-    //TODO
+    this->V_G.sampleGraph(this->G); // sample adjacency matrix, e.g., sample G_0
+    NumericVector twoScores = this->callScoreFull();
+    
+    
+    const List* SK_list = this->input_data.SK;
+    
+    // for loop over 1...N
+    // for loop over neighbors
+    // try add edge
+    for(uint32_t i = 0; i < SK_list->length(); i++)
+    {
+      const IntegerMatrix& sk = (*SK_list)[i];
+      
+      // Random permutation of the indices
+      uint32_t num = sk.ncol() * sk.nrow();
+      IntegerVector xy = Rcpp::sample(num, num);
+      
+      // It is necessary to do 2d loop in COL, ROW order, since
+      // rows are local in memory by Rcpp standard
+      // UPD: after random permutation this doesn't matter
+      for(uint32_t j = 0; j < (uint32_t)sk.ncol(); j++)
+      {
+        for(uint32_t k = 0; k < (uint32_t)sk.nrow(); k++)
+        {
+          
+          // extract 2D coords from 1D random permutation
+          uint32_t lin = xy[j * sk.nrow() + k] - 1;
+          uint32_t r = lin % sk.nrow();
+          uint32_t c = lin / sk.nrow();
+          
+          Rcout << "\014" << 
+            i << "," << j << "," << k << " /" << 
+              (SK_list->length()) << "," << (uint32_t)sk.ncol() << "," << (uint32_t)sk.nrow() <<
+                "\n" << "Trie size: " << this->V_G.total_num(); 
+          
+          // if inside G_0 -> skip
+          if((*this->G[i])(r,c) != 0)      
+            continue;
+          
+          // try add an edge outside of G_0
+          this->tryPerformMove(&twoScores, i, r, c, ChangeType::ADD);
+        }
+      }
+    }
   }
   
   
   void partialSK_SS_iter()
   {
-    List& adjs = this->G_to_list();
-    NumericVector twoScores = (*this->input_data.score_full_func)(
-                       adjs, 
-                       *this->input_data.PK,
-                       this->settings.beta_L,
-                       this->settings.beta_H,
-                       *this->input_data.bge_param
-    );
-    delete &adjs;
-
+    NumericVector twoScores = this->callScoreFull();
+    this->initialProbs = twoScores;
+    
+    
     const List* SK_list = this->input_data.SK;
 
     // for loop over 1...N
@@ -511,12 +691,12 @@ protected:
             uint32_t r = lin % sk.nrow();
             uint32_t c = lin / sk.nrow();
             
-            
-            //"\014" << 
-            Rcout << lin << " = " <<
-              i << "," << r << "," << c << " /" << 
+            Rcout << "\014" << 
+              i << "," << j << "," << k << " /" << 
                 (SK_list->length()) << "," << (uint32_t)sk.ncol() << "," << (uint32_t)sk.nrow() <<
-                  "\n"; 
+                  "\n" << "Trie size: " << this->V_G.total_num() << "\n" 
+                       << "BGe: max:" << this->bge_max << ", min:" << this->bge_min << "\n"
+                       << "prob: " << this->currentProb << "\n"; 
             
 
             if(sk(r,c) == 0)      
@@ -526,7 +706,9 @@ protected:
           }
        }
     }
-
+    
+    return; // TODO remove
+    
     // for loop over 1...N
     // for loop over neighbors
     // try remove/add edge
@@ -551,6 +733,13 @@ protected:
           uint32_t r = lin % gz.nrow();
           uint32_t c = lin / gz.nrow();
           
+          
+          Rcout << "\014" << 
+            i << "," << j << "," << k << " /" << 
+              (SK_list->length()) << "," << (uint32_t)gz.ncol() << "," << (uint32_t)gz.nrow() <<
+                "\n" << "Trie size: " << this->V_G.total_num(); 
+          
+          
           if(gz(r,c) == 0)  // if not in G -> try add
             this->tryPerformMove(&twoScores, i, r, c, ChangeType::ADD); 
           else              // if in G -> try delete
@@ -562,7 +751,7 @@ protected:
     // Normalize counts by 100 * iteration number
     for(uint32_t i = 0; i < SK_list->length(); i++)
     {
-      *this->current_counts[i] = (*this->current_counts[i]) / (this->settings.I * 100.0);
+      *this->current_counts[i] = (*this->current_counts[i]); //  / (this->settings.I * 100.0)
       *this->E_f_G[i] += *this->current_counts[i];
     }
   }
@@ -650,6 +839,7 @@ protected:
 
   void tryPerformMove(NumericVector* score, uint32_t listID, uint32_t from, uint32_t to, ChangeType action)
   {
+    /*
     Rcout << "Trying to ";
     if(action == ChangeType::ADD)
     {
@@ -659,24 +849,19 @@ protected:
       Rcout << "REMOVE";
     }
     Rcout << " edge: " << listID << ", " << from << ", " << to << "\n"; 
+    */
+    NumericVector scoreChanges = this->callScoreChange(score, listID, from, to, action);
     
-    List& adjs = this->G_to_list();
-    NumericVector scoreChanges = (*this->input_data.score_change_func)
-    (
-        adjs, 
-        *this->input_data.PK, 
-        this->settings.beta_L, 
-        this->settings.beta_H, 
-        *this->input_data.bge_param,
-        *this->input_data.sizes,
-        *score, 
-        listID,
-        from, 
-        to, 
-        action == ChangeType::ADD);
+    double bge = scoreChanges[1];
+    double energy = scoreChanges[0];
+    double prob = bge + energy;
     
-    delete &adjs;
-
+    if(bge > this->bge_max) this->bge_max = bge;
+    if(bge < this->bge_min) this->bge_min = bge;
+    if(energy > this->energy_max) this->energy_max = energy;
+    if(energy < this->energy_min) this->energy_min = energy;
+    if(prob > this->prob_max) this->prob_max = prob;
+    if(prob < this->prob_min) this->prob_min = prob;
     /*
      * Here I tested multiple approaches to find the difference, because:
      * We are dealing with ~  -1e+08 or even more, thus
@@ -686,7 +871,7 @@ protected:
      * in each scoring function. For reference see  <score_change(...)> R function
      */
     double mod_diff = (scoreChanges[3] + scoreChanges[4]);
-    Rcout << mod_diff << "\n";
+    // Rcout << mod_diff << "\n";
     
     /**
      * Modified(explicit per-partes) is used, e.g.:
@@ -711,7 +896,11 @@ protected:
     
     this->makeMove(listID, from, to, action);
     
+    // Is it appropriate?
+    this->currentProb *= diff;
+    
     // TODO remove debug
+    /*
     if(action == ChangeType::ADD)
     {
       Rcout << "Adding";
@@ -719,7 +908,7 @@ protected:
     else{
       Rcout << "Removing";
     }
-    Rcout << " edge: " << listID << ", " << from << ", " << to << "\n"; 
+    Rcout << " edge: " << listID << ", " << from << ", " << to << "\n"; */
   }
   
   
@@ -756,6 +945,44 @@ protected:
     }
   }
   
+  
+  NumericVector callScoreChange(NumericVector* score, uint32_t listID, uint32_t from, uint32_t to, ChangeType action)
+  {
+    List& adjs = this->G_to_list();
+    NumericVector scoreChanges = (*this->input_data.score_change_func)
+      (
+          adjs, 
+          *this->input_data.PK, 
+          this->settings.beta_L, 
+          this->settings.beta_H, 
+          *this->input_data.bge_param,
+          *this->input_data.sizes,
+          *score, 
+          listID,
+          from, 
+          to, 
+          action == ChangeType::ADD);
+    
+    delete &adjs;
+    return scoreChanges;
+  }
+  
+  NumericVector callScoreFull()
+  {
+    List& adjs = this->G_to_list();
+    NumericVector twoScores = (*this->input_data.score_full_func)(
+      adjs, 
+      *this->input_data.PK,
+      this->settings.beta_L,
+      this->settings.beta_H,
+      *this->input_data.bge_param
+    );
+    delete &adjs;  
+    return twoScores;
+  }
+  
+  
+  
 };
 
 /**
@@ -814,6 +1041,9 @@ List c_SK_SS(
   
   net->SK_SS();
   List ret = net->result();
+  
+  
+
   delete net;
   
   return ret;