Final version, debug/comments removed

1b579074 · Anuarbekov, Alikhan · 268a61f7 · 1b579074 · 1b579074 · 1b579074
Commit 1b579074 authored 1 year ago by Anuarbekov, Alikhan
--- a/data/hsa_circ_0000228_BN.RData
+++ b/data/hsa_circ_0000228_BN.RData
--- a/source/skeleton.cpp
+++ b/source/skeleton.cpp
@@ -121,12 +121,6 @@ enum ChangeType

 struct GraphChange
 {
-  /**
-   * Reference to parent for potential traversal
-   */
-  //GraphChange* prev = nullptr;
-  
-  
  /**
   * Note: since unordered_map does not have any guarantee about the key order,
   *    the random sampling is modified by the changing key ordering!
@@ -141,13 +135,6 @@ struct GraphChange
    >
    next_map;
  
-  /**
-   * key_tp, but divided to be more readable
-   */
-  //uint32_t from;
-  //uint32_t to;
-  //uint32_t listID;
-  
  /**
   * After Trie receives a word, only the end of it is actually a subgraph,
   * 
@@ -181,64 +168,12 @@ struct GraphChange
  {
    this->is_graph = true;
  }
-  /*
-  key_tp key()
-  {
-    return std::make_tuple(this->listID, this->from, this->to);
-  }*/
-  
  
-  //key_tp kt, GraphChange* parent
  GraphChange()
  {
-    //this->listID = std::get<0>(kt);
-    //this->from = std::get<1>(kt);
-    //this->to = std::get<2>(kt);
-    
    this->is_graph = false;
-    
-    //this->prev = parent;
-  }
-  
-  /**
-   * When going down the node, we add the edge, storing 1 ADJ matrix only
-   */
-  /*
-  GraphChange* forward(std::vector<LogicalMatrix*>& adjs)
-  {
-    (*adjs[listID])(from, to) = true;
-    
-    return next;
-  }*/
-  
-  /**
-   * When going up the node, we remove the edge, storing 1 ADJ matrix only
-   */
-  /*
-  GraphChange* backward(std::vector<LogicalMatrix*>& adjs)
-  {
-    (*adjs[listID])(from, to) = false;
-    
-    return prev;
  }
-  
-  void printG()
-  {
-    Rcout << listID << "," << from << "," << to << "; " << is_graph << "\n";
-  }*/
 };
-/*
-struct graph_comp
-{
-  inline bool operator() (GraphChange*& k1, GraphChange*& k2)
-  {
-    key_tp t1 = std::make_tuple(k1->listID, k1->from, k1->to);
-    key_tp t2 = std::make_tuple(k2->listID, k2->from, k2->to);
-    
-    return key_comp()(t1, t2);
-  }
-};*/
-

 class Trie
 {
@@ -331,7 +266,7 @@ public:
    this->find(&currIndex, sampledIndex, G_adjs);
  }
  
-  // 
+  
  /**
   * Returns a number of subgraphs in Trie. 
   * 
@@ -377,15 +312,13 @@ protected:
    // Note: if the sampled subgraphs are too deep and stack memory overflow,  
    // then consider using the non-recursive version
    
-    //for(const auto & [ key, value ] : this->root)
-    //{
+
    for(const auto &item : this->root)
    {  
      const auto& key = item.first;
      const auto& value = item.second;
      
      // Go down
-      //value->forward(G_adjs);
      (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = true;
      
      *currIdx += value->is_graph;
@@ -397,7 +330,6 @@ protected:
        break;
      }
      
-      //value->backward(G_adjs);
      (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = false;
    }
  }
@@ -410,11 +342,6 @@ protected:
      return;
    }
    
-    //curr->forward(G_adjs);
-    //currIdx += curr->is_graph;
-    
-    //for(const auto & [ key, value ] : curr->next_map)
-    //{
    for(const auto &item : this->root)
    {  
        const auto& key = item.first;
@@ -430,8 +357,6 @@ protected:
        (*G_adjs[std::get<0>(key)])(std::get<1>(key), std::get<2>(key)) = false;
    }
    
-    //curr->backward(G_adjs);
-    
    return;
  }
  
@@ -548,8 +473,7 @@ public:
  List& result()
  {
    Rprintf("Optimum log probability: %.10f\n", this->probabilityMaxOptim);
-    //Rprintf("Trie total size: %.5d\n", this->V_G.total_num());
-    
+
    const List* SK_list = this->input_data.SK;
    List* ret = new List(SK_list->length());

@@ -577,7 +501,6 @@ protected:
    {
      Rprintf("%5d/%5d  \n  Array:20%d\n", i, SK_list->length(), this->unord_V_G.size());
      uint32_t l = ls[i] - 1;
-      //if(l == 0) continue;
      const IntegerMatrix& sk = (*SK_list)[l];
      
      // Random permutation of the indices
@@ -603,12 +526,6 @@ protected:
          uint32_t r = lin % sk.nrow();
          uint32_t c = lin / sk.nrow();
          
-          // Rprintf("\014[pSK-SS][add only][%5u,%5u,%5u]/[%5u,%5u,%5u]\n"
-          //           "Probability optim: %.10f\nTrie: nodes: %.10lu\n, subgraphs %.5u\n", 
-          //           i,j,k,
-          //           SK_list->length(), (uint32_t)sk.ncol(), (uint32_t)sk.nrow(),
-          //           this->probabilityMaxOptim, this->V_G.nodes_num(), this->V_G.total_num());
-          
          // if inside G_0 -> skip
          if((*this->G[l])(r,c) != 0)      
            continue;
@@ -640,13 +557,10 @@ protected:
       const IntegerMatrix& sk = (*SK_list)[l];
       Rprintf("%5d/%5d  \n  Array:20%d\n", i, SK_list->length(), this->unord_V_G.size());

-       //if(l == 0) continue;
-        
       // Random permutation of the indices
       uint32_t num = sk.ncol() * sk.nrow();
       IntegerVector xy = Rcpp::sample(num, num);
-       //IntegerVector xy = Rcpp::Range(1, num); // TODO remove 
-       
+
       // It is necessary to do 2d loop in COL, ROW order, since
       // rows are local in memory by Rcpp standard
       // UPD: after random permutation this doesn't matter
@@ -661,13 +575,6 @@ protected:
            uint32_t r = lin % sk.nrow();
            uint32_t c = lin / sk.nrow();
            
-            // Rprintf("\014[pSK-SS][add only][%5u,%5u,%5u]/[%5u,%5u,%5u]\n"
-            //           "Probability optim: %.10f\nTrie: nodes: %.10lu\n, subgraphs %.5u\n",
-            //         i,j,k,
-            //         SK_list->length(), (uint32_t)sk.ncol(), (uint32_t)sk.nrow(),
-            //         this->probabilityMaxOptim, this->V_G.nodes_num(), this->V_G.total_num()
-            //         );
-            
            if(sk(r, c) == 0) 
            {
              continue;
@@ -676,7 +583,6 @@ protected:
            this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD);
          }
       }
-       Rcout << "GLOBAL COUNTER: " << this->ord_V_G.size() << "\n";
    }
    
    Rprintf("partialSKSS-add-remove phase\n");
@@ -722,21 +628,12 @@ protected:
            continue;
          }
          
-          // Rprintf("\014[pSK-SS][add only][%5u,%5u,%5u]/[%5u,%5u,%5u]\n"
-          //           "Probability optim: %.10f\nTrie: nodes: %.10lu\n, subgraphs %.5u\n", 
-          //           i,j,k,
-          //           SK_list->length(), (uint32_t)gz.ncol(), (uint32_t)gz.nrow(),
-          //           this->probabilityMaxOptim, this->V_G.nodes_num(), this->V_G.total_num());
-          
-          
          if(gz(r,c) == 0)  // if not in G -> try add
            this->tryPerformMove(&twoScores, l, r, c, ChangeType::ADD); 
          else              // if in G -> try delete
            this->tryPerformMove(&twoScores, l, r, c, ChangeType::DELETE); 
        }
      }
-      
-      
    }
  }
  
@@ -999,8 +896,6 @@ List c_SK_SS(
                  Function score_change_func
                  )
 {
-  Rcout << "I am in SK_SS function!\n";
-  
  IData i_data;
  
  i_data.PK = &PK;
@@ -1022,9 +917,6 @@ List c_SK_SS(
  
  net->SK_SS();
  List ret = net->result();
-  
-  
-
  delete net;
  
  return ret;

--- a/workflow.Rmd
+++ b/workflow.Rmd
@@ -412,6 +412,8 @@ MDS_RELATED_TERMS <- convert_circGPA(res[[2]])

 ```{r}
 library(ggVennDiagram)
+library(reshape2)
+library(ggplot2)

 rank1_circGPA <- as.character(df1_orig$Row.names[order(df1_orig$pvalue)])
 rank1_circGPA <- rank1_circGPA[1:100]
@@ -445,9 +447,22 @@ rank1_BN <- as.character(df1$Row.names[order(df1$pvalue)])
 rank1_BN_ids <- match(MDS_RELATED_TERMS, rank1_BN)
 rank1_BN_ids <- rank1_BN_ids[!is.na(rank1_BN_ids)]

-bdata <- data.frame(orig=rank1_circGPA_ids, corr=rank1_corr_ids, BN=rank1_BN_ids)
-boxplot(bdata$orig, bdata$corr, bdata$BN,
-        names = c("circGPA", "circGPAcorr", "circGPA-BN"))
+# bdata <- data.frame(orig=rank1_circGPA_ids, corr=rank1_corr_ids, BN=rank1_BN_ids)
+# boxplot(bdata$orig, bdata$corr, bdata$BN,
+#         names = c("circGPA", "circGPAcorr", "circGPA-BN"))
+
+bdata <- data.frame(
+  type=c(
+    rep("circGPA",length(rank1_circGPA_ids)),
+    rep("circGPAcorr",length(rank1_corr_ids)),
+    rep("circGPA-BN",length(rank1_BN_ids))
+    ),
+  data=c(
+    rank1_circGPA_ids, rank1_corr_ids, rank1_BN_ids
+  )
+  )
+
+ggplot(data = bdata, aes(x=data, y=type)) + geom_boxplot(aes(fill=type)) + xlab("Position in GO term ordering") + ylab("circGPA version") + ggtitle("A distribution of MDS-related GO terms")

 ```

@@ -482,9 +497,18 @@ rank2_BN <- as.character(df2$Row.names[order(df2$pvalue)])
 rank2_BN_ids <- match(MDS_RELATED_TERMS, rank2_BN)
 rank2_BN_ids <- rank2_BN_ids[!is.na(rank2_BN_ids)]

-bdata <- data.frame(orig=rank2_circGPA_ids, corr=rank2_corr_ids, BN=rank2_BN_ids)
-boxplot(bdata$orig, bdata$corr, bdata$BN,
-        names = c("circGPA", "circGPAcorr", "circGPA-BN"))
+bdata <- data.frame(
+  type=c(
+    rep("circGPA",length(rank2_circGPA_ids)),
+    rep("circGPAcorr",length(rank2_corr_ids)),
+    rep("circGPA-BN",length(rank2_BN_ids))
+    ),
+  data=c(
+    rank2_circGPA_ids, rank2_corr_ids, rank2_BN_ids
+  )
+  )
+
+ggplot(data = bdata, aes(x=data, y=type)) + geom_boxplot(aes(fill=type)) + xlab("Position in GO term ordering") + ylab("circGPA version") + ggtitle("A distribution of MDS-related GO terms")
 ```

 ```{r}
@@ -518,9 +542,18 @@ rank3_BN <- as.character(df3$Row.names[order(df3$pvalue)])
 rank3_BN_ids <- match(MDS_RELATED_TERMS, rank3_BN)
 rank3_BN_ids <- rank3_BN_ids[!is.na(rank3_BN_ids)]

-bdata <- data.frame(orig=rank3_circGPA_ids, corr=rank3_corr_ids, BN=rank3_BN_ids)
-boxplot(bdata$orig, bdata$corr, bdata$BN,
-        names = c("circGPA", "circGPAcorr", "circGPA-BN"))
+bdata <- data.frame(
+  type=c(
+    rep("circGPA",length(rank3_circGPA_ids)),
+    rep("circGPAcorr",length(rank3_corr_ids)),
+    rep("circGPA-BN",length(rank3_BN_ids))
+    ),
+  data=c(
+    rank3_circGPA_ids, rank3_corr_ids, rank3_BN_ids
+  )
+  )
+
+ggplot(data = bdata, aes(x=data, y=type)) + geom_boxplot(aes(fill=type)) + xlab("Position in GO term ordering") + ylab("circGPA version") + ggtitle("A distribution of MDS-related GO terms")

 ```