Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PiM-SK-SS
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Anuarbekov, Alikhan
PiM-SK-SS
Commits
f6f41d01
Commit
f6f41d01
authored
1 year ago
by
Anuarbekov, Alikhan
Browse files
Options
Downloads
Patches
Plain Diff
All is ready except for final scoring -> Overflow of BGe score
parent
f861ec0e
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
source/skeleton.R
+20
-10
20 additions, 10 deletions
source/skeleton.R
source/skeleton.cpp
+288
-58
288 additions, 58 deletions
source/skeleton.cpp
with
308 additions
and
68 deletions
source/skeleton.R
+
20
−
10
View file @
f6f41d01
...
...
@@ -200,15 +200,17 @@ P_D_G <- function(G_adjs, param_in)
pDG1
<-
pDG1
+
BGe_compute
(
adj
,
param_in
)
}
nodes_cand
<-
rownames
(
G_adjs
[[
1
]])
pDG2
<-
sum
(
sapply
(
nodes_cand
,
function
(
node
)
{
score
<-
BGe_node_compute
(
node
,
integer
(
0
),
param_in
)
return
(
score
)
})
)
return
(
pDG1
+
pDG2
)
# They are still the same!
# nodes_cand <- rownames(G_adjs[[1]])
# pDG2 <- sum(sapply(nodes_cand, function(node)
# {
# score <- BGe_node_compute(node, integer(0), param_in)
# return(score)
# })
# )
# + pDG2
return
(
pDG1
)
}
#'
...
...
@@ -378,7 +380,7 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be
# this is a large computation that computes a large parameter matrix:
# N x N, where N is the sum of sizes, e.g., total number of genes
cat
(
"Computing large BGe matrix...\n"
)
if
(
FALS
E
)
if
(
TRU
E
)
{
myScore
<-
scoreparameters_BiDAG_BGe
(
n
=
ncol
(
GE
),
data
=
GE
)
param_in
<-
list
(
n
=
myScore
$
n
,
awpN
=
myScore
$
awpN
,
TN
=
myScore
$
TN
,
scoreconstvec
=
myScore
$
scoreconstvec
)
...
...
@@ -394,10 +396,18 @@ SK_SS_tripartite <- function(GE, PK, SK, parts, I=1, debug=FALSE, alpha=0.05, be
set.seed
(
0
)
start.time
<-
Sys.time
()
print
(
start.time
)
E_f_G_out
<-
c_SK_SS
(
PK_in
,
SK_in
,
sizes
,
param_in
,
I
,
debug
,
beta_L
,
beta_H
,
score_G
,
score_change_G
)
end.time
<-
Sys.time
()
print
(
end.time
)
print
(
end.time
-
start.time
)
print
(
E_f_G_out
)
E_f_G_out
<-
lapply
(
E_f_G_out
,
function
(
m
)
m
>
alpha
)
...
...
This diff is collapsed.
Click to expand it.
source/skeleton.cpp
+
288
−
58
View file @
f6f41d01
...
...
@@ -8,6 +8,7 @@
#include
<string>
#include
<vector>
#include
<algorithm>
#include
<map>
// Pure C99
#include
<cstring>
...
...
@@ -67,9 +68,16 @@ struct key_hash : public std::unary_function<key_t, std::size_t>
{
std
::
size_t
operator
()(
const
key_t
&
k
)
const
{
return
(
std
::
size_t
)(
std
::
get
<
0
>
(
k
))
^
(
std
::
size_t
)(
std
::
get
<
1
>
(
k
))
^
(
std
::
size_t
)(
std
::
get
<
2
>
(
k
));
std
::
size_t
ret
=
0
;
ret
^=
std
::
hash
<
uint32_t
>
()(
std
::
get
<
0
>
(
k
))
+
0x9e3779b9
+
(
ret
<<
6
)
+
(
ret
>>
2
);
ret
^=
std
::
hash
<
uint32_t
>
()(
std
::
get
<
1
>
(
k
))
+
0x9e3779b9
+
(
ret
<<
6
)
+
(
ret
>>
2
);
ret
^=
std
::
hash
<
uint32_t
>
()(
std
::
get
<
2
>
(
k
))
+
0x9e3779b9
+
(
ret
<<
6
)
+
(
ret
>>
2
);
return
ret
;
// return (std::size_t)(std::get<0>(k)) ^
// (std::size_t)(std::get<1>(k)) ^
// (std::size_t)(std::get<2>(k));
}
};
...
...
@@ -118,6 +126,14 @@ struct GraphChange
*/
GraphChange
*
prev
=
nullptr
;
/**
* Note: since unordered_map does not have any guarantee about the key order,
* the random sampling is modified by the changing key ordering!
*
* If such behavior is unacceptable, consider using std::map, but this
* will cost a change of complexity from O(1) to O(log n)
*/
std
::
unordered_map
<
const
key_t
,
GraphChange
*
,
...
...
@@ -157,7 +173,7 @@ struct GraphChange
* ! Note -> nodes with * haven't been visited !
*
* > if no flag is used to determine the node in Trie, we can't mark the above
* > when pushing a word, only its last tail will be marked
* > when pushing a word, only its last tail
node
will be marked
*/
bool
is_graph
=
false
;
...
...
@@ -214,6 +230,11 @@ struct GraphChange
return
prev
;
}
void
printG
()
{
Rcout
<<
listID
<<
","
<<
from
<<
","
<<
to
<<
"; "
<<
is_graph
<<
"
\n
"
;
}
};
struct
graph_comp
...
...
@@ -238,7 +259,7 @@ public:
~
Trie
()
{
// TODO free, but not really needed for single script :)
}
...
...
@@ -250,6 +271,12 @@ public:
{
if
(
word
.
size
()
==
0
)
return
false
;
/*
Rcout << "adding a word:\n";
for(key_t k : word)
Rcout << "["<< (uint32_t)std::get<0>(k) << "," << (uint32_t)std::get<1>(k) << "," << (uint32_t)std::get<2>(k) << "]";
Rcout<<"\n";
*/
// Now it is easier to add a word to trie:
// since characters are sorted, just traverse and add individual nodes
...
...
@@ -257,6 +284,7 @@ public:
if
(
this
->
root
.
find
(
word
[
0
])
==
this
->
root
.
end
())
{
current
=
new
GraphChange
(
word
[
0
],
nullptr
);
root
[
word
[
0
]]
=
current
;
}
else
{
current
=
root
.
at
(
word
[
0
]);
...
...
@@ -299,25 +327,55 @@ public:
}
}
/**
* Randomly chooses a graph from the Trie and sets the G(adjacency) graph
* to it
*/
void
sampleGraph
(
std
::
vector
<
LogicalMatrix
*>&
G_adjs
)
{
uint32_t
currIndex
=
0
;
// TODO ideally the Rcpp::sample should be modified
// to handle larger numbers
uint32_t
sampledIndex
=
Rcpp
::
sample
(
INT_MAX
,
1
)[
0
];
uint32_t
sampledIndex
=
Rcpp
::
sample
(
this
->
total_num_graphs
,
1
)[
0
]
-
1
;
// TODO
this
->
find
(
&
currIndex
,
sampledIndex
,
G_adjs
);
}
// TODO maybe 32 bit won't be enough
//
/**
* Returns a number of subgraphs in Trie.
*
* Note: this isn't the same as the number of nodes. We count only the subgraphs,
* for more detail see the struct GraphChange
*
* Note 2: maybe this 32-bit number won't be enough, consider using 64bit number,
* but then, again, Rcpp::sample should be modified to 64-bit (2x sample + shift etc...)!
* e.g., .Machine$integer.max on tested machine:= 2147483647
*/
uint32_t
total_num
()
{
return
this
->
total_num_graphs
;
}
void
print
()
{
for
(
const
auto
&
[
key
,
value
]
:
this
->
root
)
{
this
->
printRecursive
(
value
);
}
}
protected
:
/**
* Note: since unordered_map does not have any guarantee about the key order,
* the random sampling is modified by the changing key ordering!
*
* If such behavior is unacceptable, consider using std::map, but this
* will cost a change of complexity from O(1) to O(log n)
*/
std
::
unordered_map
<
const
key_t
,
GraphChange
*
,
...
...
@@ -325,22 +383,68 @@ protected:
>
root
;
// TODO assert total_num_graphs < INT_MAX
uint64_t
total_num_graphs
=
0
;
void
printRecursive
(
GraphChange
*
curr
)
{
Rcout
<<
"recursive:
\n
"
;
curr
->
printG
();
Rcout
<<
"going down:
\n
"
;
for
(
const
auto
&
[
key
,
value
]
:
curr
->
next_map
)
{
this
->
printRecursive
(
value
);
}
Rcout
<<
"going up:
\n
"
;
}
void
addWordRecursive
(
std
::
vector
<
GraphChange
*>&
word
,
uint32_t
index
)
// TODO assert total_num_graphs < INT_MAX
uint32_t
total_num_graphs
=
0
;
void
find
(
uint32_t
*
currIdx
,
const
uint32_t
&
needed
,
std
::
vector
<
LogicalMatrix
*>&
G_adjs
)
{
key_t
currKey
=
std
::
make_tuple
(
word
[
0
]
->
listID
,
word
[
0
]
->
from
,
word
[
0
]
->
to
);
if
(
root
.
find
(
currKey
)
==
root
.
end
())
// Note: if the sampled subgraphs are too deep and stack memory overflow,
// then consider using the non-recursive version
for
(
const
auto
&
[
key
,
value
]
:
this
->
root
)
{
// TODO
// Go down
value
->
forward
(
G_adjs
);
*
currIdx
+=
value
->
is_graph
;
this
->
recursiveFind
(
value
,
currIdx
,
needed
,
G_adjs
);
if
(
*
currIdx
==
needed
)
{
break
;
}
value
->
backward
(
G_adjs
);
}
}
void
findIndex
(
uint64_t
*
currIdx
,
const
uint64_t
&
needed
)
void
recursiveFind
(
GraphChange
*
curr
,
uint32_t
*
currIdx
,
const
uint32_t
&
needed
,
std
::
vector
<
LogicalMatrix
*>&
G_adjs
)
{
// TODO
if
(
*
currIdx
==
needed
)
{
return
;
}
curr
->
forward
(
G_adjs
);
currIdx
+=
curr
->
is_graph
;
for
(
const
auto
&
[
key
,
value
]
:
curr
->
next_map
)
{
this
->
recursiveFind
(
value
,
currIdx
,
needed
,
G_adjs
);
if
(
*
currIdx
==
needed
)
{
return
;
}
}
curr
->
backward
(
G_adjs
);
return
;
}
...
...
@@ -354,11 +458,34 @@ protected:
// [[Rcpp::export]]
double
tst
()
{
IntegerVector
rnd
=
Rcpp
::
sample
(
6
,
6
);
NumericMatrix
t
=
NumericMatrix
(
2
,
3
,
rnd
.
begin
());
Rcout
<<
t
<<
"
\n
"
;
Trie
trie
;
trie
.
print
();
std
::
vector
<
key_t
>
word
;
word
.
push_back
(
std
::
make_tuple
(
0
,
5
,
10
));
trie
.
appendChainAddOnly
(
word
);
trie
.
print
();
word
.
push_back
(
std
::
make_tuple
(
0
,
7
,
10
));
trie
.
appendChainAddOnly
(
word
);
trie
.
print
();
word
.
push_back
(
std
::
make_tuple
(
0
,
5
,
11
));
trie
.
appendChainAddOnly
(
word
);
trie
.
print
();
word
.
push_back
(
std
::
make_tuple
(
0
,
4
,
10
));
trie
.
appendChainAddOnly
(
word
);
trie
.
print
();
word
.
erase
(
word
.
begin
()
+
0
);
trie
.
appendChainAddOnly
(
word
);
trie
.
print
();
Rcout
<<
"Final trie size: "
<<
trie
.
total_num
()
<<
"
\n
"
;
Rcout
<<
t
*
2.5
<<
"
\n
"
;
return
0.0
;
}
...
...
@@ -414,6 +541,16 @@ protected:
std
::
vector
<
NumericMatrix
*>
E_f_G
;
std
::
vector
<
NumericMatrix
*>
current_counts
;
double
currentProb
=
1.0
;
// TODO
NumericVector
initialProbs
;
double
bge_max
=
-
99999999999.0
;
double
bge_min
=
9999999999.0
;
double
energy_max
=
-
99999999999.0
;
double
energy_min
=
9999999999.0
;
double
prob_max
=
-
99999999999.0
;
double
prob_min
=
9999999999.0
;
public:
Network
(
IData
input
,
ISettings
settings
)
...
...
@@ -443,6 +580,7 @@ public:
void
SK_SS
()
{
this
->
partialSK_SS
();
return
;
// TODO remove
for
(
uint32_t
i
=
0
;
i
<
this
->
settings
.
I
;
i
++
)
{
...
...
@@ -454,12 +592,17 @@ public:
List
&
result
()
{
Rprintf
(
"BGe min/max: %.10f, %.10f
\n
"
,
this
->
bge_min
,
this
->
bge_max
);
Rprintf
(
"Energy min/max: %.10f, %.10f
\n
"
,
this
->
energy_min
,
this
->
energy_max
);
Rprintf
(
"Prob min/max: %.10f, %.10f
\n
"
,
this
->
prob_min
,
this
->
prob_max
);
Rprintf
(
"Initial probs: %.10f, %.10f
\n
"
,
this
->
initialProbs
[
0
],
this
->
initialProbs
[
1
]);
const
List
*
SK_list
=
this
->
input_data
.
SK
;
List
*
ret
=
new
List
(
SK_list
->
length
());
for
(
uint32_t
i
=
0
;
i
<
SK_list
->
length
();
i
++
)
{
*
this
->
E_f_G
[
i
]
=
(
*
this
->
E_f_G
[
i
])
/
(
this
->
V_G
.
total_num
()
/
100.0
)
;
*
this
->
E_f_G
[
i
]
=
(
*
this
->
E_f_G
[
i
])
;
//
/ (this->V_G.total_num() / 100.0)
(
*
ret
)[
i
]
=
*
this
->
E_f_G
[
i
];
}
...
...
@@ -469,22 +612,59 @@ public:
protected
:
void
SK_SS_phase2_iter
()
{
//TODO
this
->
V_G
.
sampleGraph
(
this
->
G
);
// sample adjacency matrix, e.g., sample G_0
NumericVector
twoScores
=
this
->
callScoreFull
();
const
List
*
SK_list
=
this
->
input_data
.
SK
;
// for loop over 1...N
// for loop over neighbors
// try add edge
for
(
uint32_t
i
=
0
;
i
<
SK_list
->
length
();
i
++
)
{
const
IntegerMatrix
&
sk
=
(
*
SK_list
)[
i
];
// Random permutation of the indices
uint32_t
num
=
sk
.
ncol
()
*
sk
.
nrow
();
IntegerVector
xy
=
Rcpp
::
sample
(
num
,
num
);
// It is necessary to do 2d loop in COL, ROW order, since
// rows are local in memory by Rcpp standard
// UPD: after random permutation this doesn't matter
for
(
uint32_t
j
=
0
;
j
<
(
uint32_t
)
sk
.
ncol
();
j
++
)
{
for
(
uint32_t
k
=
0
;
k
<
(
uint32_t
)
sk
.
nrow
();
k
++
)
{
// extract 2D coords from 1D random permutation
uint32_t
lin
=
xy
[
j
*
sk
.
nrow
()
+
k
]
-
1
;
uint32_t
r
=
lin
%
sk
.
nrow
();
uint32_t
c
=
lin
/
sk
.
nrow
();
Rcout
<<
"
\014
"
<<
i
<<
","
<<
j
<<
","
<<
k
<<
" /"
<<
(
SK_list
->
length
())
<<
","
<<
(
uint32_t
)
sk
.
ncol
()
<<
","
<<
(
uint32_t
)
sk
.
nrow
()
<<
"
\n
"
<<
"Trie size: "
<<
this
->
V_G
.
total_num
();
// if inside G_0 -> skip
if
((
*
this
->
G
[
i
])(
r
,
c
)
!=
0
)
continue
;
// try add an edge outside of G_0
this
->
tryPerformMove
(
&
twoScores
,
i
,
r
,
c
,
ChangeType
::
ADD
);
}
}
}
}
void
partialSK_SS_iter
()
{
List
&
adjs
=
this
->
G_to_list
();
NumericVector
twoScores
=
(
*
this
->
input_data
.
score_full_func
)(
adjs
,
*
this
->
input_data
.
PK
,
this
->
settings
.
beta_L
,
this
->
settings
.
beta_H
,
*
this
->
input_data
.
bge_param
);
delete
&
adjs
;
NumericVector
twoScores
=
this
->
callScoreFull
();
this
->
initialProbs
=
twoScores
;
const
List
*
SK_list
=
this
->
input_data
.
SK
;
// for loop over 1...N
...
...
@@ -511,12 +691,12 @@ protected:
uint32_t
r
=
lin
%
sk
.
nrow
();
uint32_t
c
=
lin
/
sk
.
nrow
();
//"\014" <<
Rcout
<<
lin
<<
" = "
<<
i
<<
","
<<
r
<<
","
<<
c
<<
" /"
<<
Rcout
<<
"
\014
"
<<
i
<<
","
<<
j
<<
","
<<
k
<<
" /"
<<
(
SK_list
->
length
())
<<
","
<<
(
uint32_t
)
sk
.
ncol
()
<<
","
<<
(
uint32_t
)
sk
.
nrow
()
<<
"
\n
"
;
"
\n
"
<<
"Trie size: "
<<
this
->
V_G
.
total_num
()
<<
"
\n
"
<<
"BGe: max:"
<<
this
->
bge_max
<<
", min:"
<<
this
->
bge_min
<<
"
\n
"
<<
"prob: "
<<
this
->
currentProb
<<
"
\n
"
;
if
(
sk
(
r
,
c
)
==
0
)
...
...
@@ -526,7 +706,9 @@ protected:
}
}
}
return
;
// TODO remove
// for loop over 1...N
// for loop over neighbors
// try remove/add edge
...
...
@@ -551,6 +733,13 @@ protected:
uint32_t
r
=
lin
%
gz
.
nrow
();
uint32_t
c
=
lin
/
gz
.
nrow
();
Rcout
<<
"
\014
"
<<
i
<<
","
<<
j
<<
","
<<
k
<<
" /"
<<
(
SK_list
->
length
())
<<
","
<<
(
uint32_t
)
gz
.
ncol
()
<<
","
<<
(
uint32_t
)
gz
.
nrow
()
<<
"
\n
"
<<
"Trie size: "
<<
this
->
V_G
.
total_num
();
if
(
gz
(
r
,
c
)
==
0
)
// if not in G -> try add
this
->
tryPerformMove
(
&
twoScores
,
i
,
r
,
c
,
ChangeType
::
ADD
);
else
// if in G -> try delete
...
...
@@ -562,7 +751,7 @@ protected:
// Normalize counts by 100 * iteration number
for
(
uint32_t
i
=
0
;
i
<
SK_list
->
length
();
i
++
)
{
*
this
->
current_counts
[
i
]
=
(
*
this
->
current_counts
[
i
])
/
(
this
->
settings
.
I
*
100.0
)
;
*
this
->
current_counts
[
i
]
=
(
*
this
->
current_counts
[
i
])
;
//
/ (this->settings.I * 100.0)
*
this
->
E_f_G
[
i
]
+=
*
this
->
current_counts
[
i
];
}
}
...
...
@@ -650,6 +839,7 @@ protected:
void
tryPerformMove
(
NumericVector
*
score
,
uint32_t
listID
,
uint32_t
from
,
uint32_t
to
,
ChangeType
action
)
{
/*
Rcout << "Trying to ";
if(action == ChangeType::ADD)
{
...
...
@@ -659,24 +849,19 @@ protected:
Rcout << "REMOVE";
}
Rcout << " edge: " << listID << ", " << from << ", " << to << "\n";
*/
NumericVector
scoreChanges
=
this
->
callScoreChange
(
score
,
listID
,
from
,
to
,
action
);
List
&
adjs
=
this
->
G_to_list
();
NumericVector
scoreChanges
=
(
*
this
->
input_data
.
score_change_func
)
(
adjs
,
*
this
->
input_data
.
PK
,
this
->
settings
.
beta_L
,
this
->
settings
.
beta_H
,
*
this
->
input_data
.
bge_param
,
*
this
->
input_data
.
sizes
,
*
score
,
listID
,
from
,
to
,
action
==
ChangeType
::
ADD
);
double
bge
=
scoreChanges
[
1
];
double
energy
=
scoreChanges
[
0
];
double
prob
=
bge
+
energy
;
delete
&
adjs
;
if
(
bge
>
this
->
bge_max
)
this
->
bge_max
=
bge
;
if
(
bge
<
this
->
bge_min
)
this
->
bge_min
=
bge
;
if
(
energy
>
this
->
energy_max
)
this
->
energy_max
=
energy
;
if
(
energy
<
this
->
energy_min
)
this
->
energy_min
=
energy
;
if
(
prob
>
this
->
prob_max
)
this
->
prob_max
=
prob
;
if
(
prob
<
this
->
prob_min
)
this
->
prob_min
=
prob
;
/*
* Here I tested multiple approaches to find the difference, because:
* We are dealing with ~ -1e+08 or even more, thus
...
...
@@ -686,7 +871,7 @@ protected:
* in each scoring function. For reference see <score_change(...)> R function
*/
double
mod_diff
=
(
scoreChanges
[
3
]
+
scoreChanges
[
4
]);
Rcout
<<
mod_diff
<<
"
\n
"
;
//
Rcout << mod_diff << "\n";
/**
* Modified(explicit per-partes) is used, e.g.:
...
...
@@ -711,7 +896,11 @@ protected:
this
->
makeMove
(
listID
,
from
,
to
,
action
);
// Is it appropriate?
this
->
currentProb
*=
diff
;
// TODO remove debug
/*
if(action == ChangeType::ADD)
{
Rcout << "Adding";
...
...
@@ -719,7 +908,7 @@ protected:
else{
Rcout << "Removing";
}
Rcout
<<
" edge: "
<<
listID
<<
", "
<<
from
<<
", "
<<
to
<<
"
\n
"
;
Rcout << " edge: " << listID << ", " << from << ", " << to << "\n";
*/
}
...
...
@@ -756,6 +945,44 @@ protected:
}
}
NumericVector
callScoreChange
(
NumericVector
*
score
,
uint32_t
listID
,
uint32_t
from
,
uint32_t
to
,
ChangeType
action
)
{
List
&
adjs
=
this
->
G_to_list
();
NumericVector
scoreChanges
=
(
*
this
->
input_data
.
score_change_func
)
(
adjs
,
*
this
->
input_data
.
PK
,
this
->
settings
.
beta_L
,
this
->
settings
.
beta_H
,
*
this
->
input_data
.
bge_param
,
*
this
->
input_data
.
sizes
,
*
score
,
listID
,
from
,
to
,
action
==
ChangeType
::
ADD
);
delete
&
adjs
;
return
scoreChanges
;
}
NumericVector
callScoreFull
()
{
List
&
adjs
=
this
->
G_to_list
();
NumericVector
twoScores
=
(
*
this
->
input_data
.
score_full_func
)(
adjs
,
*
this
->
input_data
.
PK
,
this
->
settings
.
beta_L
,
this
->
settings
.
beta_H
,
*
this
->
input_data
.
bge_param
);
delete
&
adjs
;
return
twoScores
;
}
};
/**
...
...
@@ -814,6 +1041,9 @@ List c_SK_SS(
net
->
SK_SS
();
List
ret
=
net
->
result
();
delete
net
;
return
ret
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment