Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Martin Řepa
bachelor-thesis
Commits
256aeed9
Commit
256aeed9
authored
Mar 06, 2019
by
Martin Řepa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Successfully migrated to pytorch. Working loss func
parent
78bdbab5
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
336 additions
and
228 deletions
+336
-228
Pipfile.lock
Pipfile.lock
+173
-131
src/data/loader.py
src/data/loader.py
+1
-0
src/game_solver.py
src/game_solver.py
+39
-32
src/neural_networks/network.py
src/neural_networks/network.py
+123
-65
No files found.
Pipfile.lock
View file @
256aeed9
This diff is collapsed.
Click to expand it.
src/data/loader.py
View file @
256aeed9
...
...
@@ -14,6 +14,7 @@ def np_arrays_from_scored_csv(file_name: str, label: int,
See usage in main
"""
# TODO enable load zero size array aswell
content
=
pandas
.
read_csv
(
Path
(
dirname
(
__file__
))
/
Path
(
'scored'
)
/
Path
(
file_name
))
batch
=
[]
labels
=
[]
...
...
src/game_solver.py
View file @
256aeed9
import
logging
import
operator
from
collections
import
Counter
from
itertools
import
count
from
typing
import
List
...
...
@@ -10,7 +9,8 @@ import pulp
from
config
import
RootConfig
from
src.data.loader
import
np_arrays_from_scored_csv
from
src.neural_networks.network
import
NeuralNetwork
from
src.neural_networks.network
import
NeuralNetwork
,
FormattedBenignData
,
\
FormattedMaliciousData
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -44,35 +44,46 @@ class GameSolver:
self
.
utility
=
conf
.
base_conf
.
utility_function
train
=
conf
.
nn_train_conf
self
.
benign_data
=
np_arrays_from_scored_csv
(
train
.
benign_data_file_name
,
0
,
train
.
benign_data_count
)
self
.
benign_data_prob
=
self
.
calculate_benign_data_prob
()
def
calculate_benign_data_prob
(
self
):
# TODO maybe this rounding is not really good for real results
benign_data
=
list
(
map
(
lambda
x
:
tuple
(
map
(
lambda
y
:
round
(
y
,
2
),
x
)),
self
.
benign_data
[
0
]))
benign_data_prob
=
Counter
(
benign_data
)
for
key
,
val
in
benign_data_prob
.
items
():
benign_data_prob
[
key
]
=
val
/
len
(
benign_data
)
return
benign_data_prob
def
_get_trained_nn
(
self
,
attacker_features_x
,
attacker_actions
)
->
NeuralNetwork
:
raw_benign_x
,
_
=
np_arrays_from_scored_csv
(
train
.
benign_data_file_name
,
0
,
train
.
benign_data_count
)
self
.
benign_data
=
self
.
prepare_benign_data
(
raw_benign_x
)
def
prepare_benign_data
(
self
,
raw_x_data
):
unique
,
counts
=
np
.
unique
(
raw_x_data
,
axis
=
0
,
return_counts
=
True
)
probs
=
np
.
array
([
count
/
len
(
raw_x_data
)
for
count
in
counts
])
benign_y
=
np
.
zeros
(
len
(
unique
))
return
FormattedBenignData
(
unique
,
probs
,
benign_y
)
# def calculate_benign_data_with_probs(self):
# # TODO maybe this rounding is not really good for real results
# benign_data = list(map(lambda x: tuple(map(lambda y: round(y, 2), x)),
# self.benign_data[0]))
# benign_data_counter = Counter(benign_data)
# benign_data_points = []
# benign_data_probs = []
# for key, val in benign_data_counter.items():
# benign_data_points.append(key)
# benign_data_probs.append(val / len(benign_data))
# return np.array(benign_data_points), np.array(benign_data_probs)
def
_get_trained_nn
(
self
,
attack
:
FormattedMaliciousData
)
->
NeuralNetwork
:
# Initialize the model
network
=
NeuralNetwork
(
self
.
conf
.
base_conf
.
features_count
,
self
.
conf
.
nn_conf
,
self
.
conf
.
nn_train_conf
)
network
.
set_
attacker_actions
(
attacker_actions
)
network
.
train
(
attacker_features_x
,
self
.
benign_data
)
network
.
set_
data
(
self
.
benign_data
,
attack
)
network
.
train
()
# TODO use different dataset to calc false_positives
# network.calc_n0_false_positives(self.benign_data[0])
return
network
def
double_oracle
(
self
,
actions_p1
:
List
)
->
Result
:
non_attack
=
FormattedMaliciousData
(
np
.
empty
(
0
),
np
.
empty
(
0
),
np
.
empty
(
0
))
# Get initial actions as the first ones
played_actions_p1
=
set
(
actions_p1
[:
1
])
played_actions_p2
=
{
self
.
_get_trained_nn
(
[[]]
)}
played_actions_p2
=
{
self
.
_get_trained_nn
(
non_attack
)}
for
i
in
count
():
logger
.
debug
(
f
'Iteration:
{
i
}
\n
'
)
...
...
@@ -149,21 +160,16 @@ class GameSolver:
lambda
a2
:
self
.
utility
(
a1
,
a2
),
actions_2
),
p2
)))
def
best_response_p2
(
self
,
used_actions_p1
,
probs_p1
):
malicious_features
=
[]
for
ai
,
pi
in
zip
(
used_actions_p1
,
probs_p1
):
counter
=
int
(
self
.
conf
.
nn_train_conf
.
malicious_data_count
*
pi
)
for
_
in
range
(
counter
):
malicious_features
.
append
(
ai
)
# Take only attacker actions which are played with non zero probability
non_zero_p
=
np
.
where
(
np
.
asarray
(
probs_p1
)
!=
0
)
actions_2
=
np
.
asarray
(
used_actions_p1
)[
non_zero_p
]
p2
=
np
.
asarray
(
probs_p1
)[
non_zero_p
]
attacker_actions
=
(
actions_2
,
p2
)
unique_attack_x
=
np
.
asarray
(
used_actions_p1
)[
non_zero_p
]
attack_probs
=
np
.
asarray
(
probs_p1
)[
non_zero_p
]
attack_y
=
np
.
ones
(
len
(
unique_attack_x
))
attack
=
FormattedMaliciousData
(
unique_attack_x
,
attack_probs
,
attack_y
)
logger
.
debug
(
'Let
\'
s train new NN with this malicious data:'
)
logger
.
debug
(
f
'
{
malicious_features
}
\n
'
)
return
self
.
_get_trained_nn
(
malicious_features
,
attacker_actions
)
logger
.
debug
(
f
'
{
unique_attack_x
}
\n
'
)
return
self
.
_get_trained_nn
(
attack
)
def
solve_zero_sum_game_pulp
(
self
,
actions_p1
:
List
[
List
[
float
]],
actions_p2
:
List
[
NeuralNetwork
]):
...
...
@@ -195,7 +201,8 @@ class GameSolver:
# Calc false positive cost with benign data probability distribution
fp_cost
=
0
for
features
,
features_prob
in
self
.
benign_data_prob
.
items
():
benign_points
,
benign_probs
=
self
.
benign_data_with_probs
for
features
,
features_prob
in
zip
(
benign_points
,
benign_probs
):
for
nn
,
nn_prob
in
zip
(
actions_p2
,
probs_p_two
):
l
=
nn
.
limit_predict
(
features
)[
0
]
fp_cost
+=
(
l
**
4
)
*
features_prob
*
nn_prob
...
...
src/neural_networks/network.py
View file @
256aeed9
import
logging
from
pathlib
import
Path
from
typing
import
List
,
Tuple
import
attr
import
numpy
as
np
import
torch
from
sklearn.model_selection
import
train_test_split
...
...
@@ -14,6 +14,20 @@ from src.data.loader import np_arrays_from_scored_csv
logger
=
logging
.
getLogger
(
__name__
)
# TODO one class is enough
@
attr
.
s
class
FormattedBenignData
:
unique_x
:
np
.
array
=
attr
.
ib
()
probs_x
:
np
.
array
=
attr
.
ib
()
y
:
np
.
array
=
attr
.
ib
()
@
attr
.
s
class
FormattedMaliciousData
:
features
:
np
.
array
=
attr
.
ib
()
probs_features
:
np
.
array
=
attr
.
ib
()
y
:
np
.
array
=
attr
.
ib
()
class
OrderCounter
:
order
=
0
...
...
@@ -36,71 +50,100 @@ class NeuralNetwork:
nn
.
Linear
(
12
,
1
),
nn
.
Sigmoid
()
)
self
.
loss_fn
=
nn
.
BCELoss
()
self
.
attacker_actions
=
None
self
.
epochs
=
nn_conf
.
epochs
self
.
validation_split
=
nn_train_conf
.
validation_split
self
.
id
=
OrderCounter
.
next
()
self
.
order
=
OrderCounter
.
next
()
def
set_attacker_actions
(
self
,
attacker_actions
:
Tuple
):
self
.
attacker_actions
=
attacker_actions
def
loss_function
(
self
):
pass
def
_prepare_data
(
self
,
attacker_features_x
:
List
[
List
[
float
]],
benign_data
:
Tuple
[
np
.
ndarray
,
np
.
ndarray
]):
x
,
y
=
benign_data
# Add attacker's malicious actions to dataset
attacker_features_x
=
np
.
array
(
attacker_features_x
)
if
len
(
attacker_features_x
[
0
]):
attacker_features_y
=
[[
1
]
for
_
in
attacker_features_x
]
x
=
np
.
concatenate
((
x
,
attacker_features_x
),
axis
=
0
)
y
=
np
.
concatenate
((
y
,
attacker_features_y
),
axis
=
0
)
# Variables used for loss function
self
.
attacker_actions
:
FormattedMaliciousData
=
None
self
.
benign_data
:
FormattedBenignData
=
None
# Shuffle benign and malicious data
x
,
y
=
shuffle
(
x
,
y
,
random_state
=
1
)
# Split data so we have train dataset and validation dataset
data
=
train_test_split
(
x
,
y
,
test_size
=
self
.
validation_split
)
# Convert data to float() for pyTorch model compatibility
data
=
tuple
(
map
(
lambda
a
:
torch
.
from_numpy
(
a
).
float
(),
data
))
# Return final data (x_train, x_validate, y_train, y_validate)
return
data
def
train
(
self
,
attacker_features_x
:
List
[
List
[
float
]],
benign_data
:
Tuple
[
np
.
ndarray
,
np
.
ndarray
]):
data
=
self
.
_prepare_data
(
attacker_features_x
,
benign_data
)
x_train
,
x_validate
,
y_train
,
y_validate
=
data
self
.
_train
(
x_train
,
y_train
,
x_validate
,
y_validate
)
# TODO Just tmp
self
.
loss_fn
=
nn
.
BCELoss
()
def
_train
(
self
,
x
,
y
,
x_validate
,
y_validate
):
learning_rate
=
1e-2
def
__str__
(
self
):
return
f
'Neural network with id:
{
self
.
id
}
'
def
set_data
(
self
,
benign_data
,
attack
):
self
.
attacker_actions
=
attack
self
.
benign_data
=
benign_data
def
loss_function
(
self
,
x
,
limits
,
real_y
,
probs
):
zero_sum_part
=
real_y
*
(
1
-
limits
)
*
torch
.
prod
(
x
,
dim
=
1
)
*
probs
fp_cost
=
(
1
-
real_y
)
*
probs
*
torch
.
pow
(
limits
,
4
)
sum_loss
=
torch
.
add
(
torch
.
sum
(
zero_sum_part
),
torch
.
sum
(
fp_cost
))
return
torch
.
div
(
sum_loss
,
len
(
x
))
# Calc false positive cost
# def_indexes = (real_y == 0)
# def_limits = limits[def_indexes]
# def_probs = real_y[def_indexes]
# fp_cost = torch.pow(torch.pow(def_limits, 4), def_probs)
#
# # Calc zero sum part
# attacker_indexes = (real_y == 1)
# att_limits = limits[attacker_indexes]
# att_x = x[attacker_indexes]
# att_probs = probs[attacker_indexes]
# att_rewards = torch.prod(att_x, dim=1)
# att_rewards = torch.pow(att_rewards, att_probs)
# zero_sum = torch.pow(att_rewards, torch.sub(1, att_limits))
#
# final_fp_cost = torch.sum(fp_cost)
# final_zero_sum_part = torch.sum(zero_sum)
# loss = torch.add(final_fp_cost, final_zero_sum_part)
# return loss
def
_prepare_data
(
self
):
defender
=
self
.
benign_data
attacker
=
self
.
attacker_actions
x
=
np
.
concatenate
((
defender
.
unique_x
,
attacker
.
features
),
axis
=
0
)
y
=
np
.
concatenate
((
defender
.
y
,
attacker
.
y
),
axis
=
0
)
probs
=
np
.
concatenate
((
defender
.
probs_x
,
attacker
.
probs_features
),
axis
=
0
)
# Shuffle before splitting
x
,
y
,
probs
=
shuffle
(
x
,
y
,
probs
,
random_state
=
1
)
# Split to train and train data given the ratio in config
data
=
train_test_split
(
x
,
y
,
probs
,
test_size
=
self
.
validation_split
)
x_train
,
x_test
,
y_train
,
y_test
,
probs_train
,
probs_test
=
data
self
.
x_train
=
torch
.
from_numpy
(
x_train
).
float
()
self
.
x_test
=
torch
.
from_numpy
(
x_test
).
float
()
self
.
y_train
=
torch
.
from_numpy
(
y_train
).
float
()
self
.
y_test
=
torch
.
from_numpy
(
y_test
).
float
()
self
.
probs_train
=
torch
.
from_numpy
(
probs_train
).
float
()
self
.
probs_test
=
torch
.
from_numpy
(
probs_test
).
float
()
def
train
(
self
):
self
.
_prepare_data
()
self
.
_train
()
def
_train
(
self
):
learning_rate
=
1e-4
optimizer
=
torch
.
optim
.
Adam
(
self
.
model
.
parameters
(),
lr
=
learning_rate
)
x
.
requires_grad
=
True
for
e
in
range
(
self
.
epochs
):
logger
.
debug
(
f
'Running epoch number
{
e
}
/
{
self
.
epochs
}
'
)
# Forward pass: compute predicted y by passing x to the model.
y_pred
=
self
.
model
(
x
)
train_limits
=
self
.
limit_predict
(
self
.
x_train
,
with_grad
=
True
)
# for l in train_limits:
# print(l.dtype, end=' ')
# print()
# Compute and print loss.
loss
=
self
.
loss_fn
(
y_pred
,
y
)
logger
.
debug
(
f
'TestLoss:
{
loss
.
item
()
}
, ValidateLoss: todo'
)
# todo
# Compute loss.
loss
=
self
.
loss_function
(
self
.
x_train
,
train_limits
,
self
.
y_train
,
self
.
probs_train
)
# loss = self.loss_fn(train_limits, self.y_train)
# Compute validation loss and report some info
if
e
%
5
==
0
:
with
torch
.
no_grad
():
y_
validate_
pred
=
self
.
model
(
x_validate
)
validate_loss
=
self
.
loss_fn
(
y_validate_pred
,
y_validate
)
test_limits
=
self
.
limit_predict
(
self
.
x_test
)
validate_
loss
=
self
.
loss_function
(
self
.
x_test
,
test_limits
,
self
.
y_test
,
self
.
probs_test
)
logging
.
debug
(
f
'Epoch:
{
e
}
/
{
self
.
epochs
}
,
\t
'
f
'TrainLoss:
{
loss
.
item
()
}
,
\t
'
f
'ValidateLoss:
{
validate_loss
}
,
\t
'
)
f
'TrainLoss:
{
loss
}
,
\t
'
f
'ValidateLoss:
{
validate_loss
.
item
()
}
,
\t
'
)
# Before the backward pass, use the optimizer object to zero all of
# the gradients for the variables it will update
...
...
@@ -114,17 +157,21 @@ class NeuralNetwork:
# parameters
optimizer
.
step
()
def
raw_predict
(
self
,
x
):
with
torch
.
no_grad
():
tensor
=
torch
.
tensor
(
x
).
float
()
res
=
self
.
model
(
tensor
)
return
res
.
numpy
()
def
_raw_predict
(
self
,
tensor
:
torch
.
Tensor
):
# TODO maybe this can help
return
self
.
model
(
tensor
)
def
limit_predict
(
self
,
x
):
raw_prediction
=
self
.
raw_predict
(
x
)
def
limit_predict
(
self
,
x
:
torch
.
Tensor
,
with_grad
=
False
):
if
with_grad
:
raw_prediction
=
self
.
_raw_predict
(
x
)
else
:
with
torch
.
no_grad
():
raw_prediction
=
self
.
_raw_predict
(
x
)
np_limit_func
=
np
.
vectorize
(
lambda
p
:
0
if
p
<
0.5
else
(
p
-
0.5
)
*
2
)
return
np_limit_func
(
raw_prediction
)
# The same as lambda p: 0 if p < 0.5 else (p - 0.5) * 2
clamped
=
raw_prediction
.
clamp
(
min
=
0.5
,
max
=
1
)
limit
=
torch
.
mul
(
torch
.
add
(
clamped
,
-
0.5
),
2
)
return
limit
def
setup_loger
(
conf
):
...
...
@@ -136,13 +183,24 @@ def setup_loger(conf):
if
__name__
==
'__main__'
:
setup_loger
(
RootConfig
())
benign_x
,
benign_y
=
np_arrays_from_scored_csv
(
benign_x
,
_
=
np_arrays_from_scored_csv
(
Path
(
'all_benign_scored.csv'
),
0
,
1000
)
malicious_x
,
malicious_y
=
np_arrays_from_scored_csv
(
Path
(
'scored_malicious.csv'
),
1
,
0
)
malicious_x
,
_
=
np_arrays_from_scored_csv
(
Path
(
'scored_malicious.csv'
),
1
,
500
)
benign_unique_x
,
counts
=
np
.
unique
(
benign_x
,
axis
=
0
,
return_counts
=
True
)
probs_benign
=
np
.
array
([
count
/
len
(
benign_x
)
for
count
in
counts
])
benign_y
=
np
.
zeros
(
len
(
benign_unique_x
))
benign_data
=
FormattedBenignData
(
benign_unique_x
,
probs_benign
,
benign_y
)
malicious_unique_x
,
counts
=
np
.
unique
(
malicious_x
,
axis
=
0
,
return_counts
=
True
)
probs_malicious
=
np
.
array
([
count
/
len
(
malicious_unique_x
)
for
count
in
counts
])
malicious_y
=
np
.
ones
(
len
(
malicious_unique_x
))
malicious_data
=
FormattedMaliciousData
(
malicious_unique_x
,
probs_malicious
,
malicious_y
)
nn
=
NeuralNetwork
()
nn
.
train
(
malicious_x
,
(
benign_x
,
benign_y
))
nn
.
set_data
(
benign_data
,
malicious_data
)
nn
.
train
()
# test_loss, test_acc = network.model.evaluate(x_test, y_test)
# print('Test loss:', test_loss)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment