Commit 78bdbab5 authored by Martin Řepa's avatar Martin Řepa

backup

parent 58614315
......@@ -14,6 +14,7 @@ pandas = "*"
sklearn = "*"
tensorflow = "*"
matplotlib = "*"
torch = "*"
[requires]
python_version = "3.6"
No preview for this file type
......@@ -2,13 +2,13 @@ from typing import Callable
import attr
from utility import base_utility
from utility import rate_limit_utility
@attr.s
class NeuralNetworkConfig:
# Number of epochs in a neural network training phase
epochs: int = attr.ib(default=40)
epochs: int = attr.ib(default=1000)
# String with loss_function definition.
# List of available functions: https://keras.io/losses/
......@@ -18,24 +18,26 @@ class NeuralNetworkConfig:
# List of available optimizers: https://keras.io/optimizers/
optimizer: str = attr.ib(default='adam')
# From docs:
# Value used for weighting the loss function (during training only) for
# malicious requests. This can be useful to tell the model to "pay more
# attention" to malicious samples.
# Setting it to 1 makes loss function behave equally for both predictions
# during training
fp_weight: int = attr.ib(default=5)
fp_weight: int = attr.ib(default=1)
@attr.s
class TrainingNnConfig:
# Name of .csv file in src/data/scored directory with scored data which will
# be used as benign data in neural network training phase
benign_data_file_name: str = attr.ib(default='all_benign_scored.csv')
benign_data_file_name: str = attr.ib(default='test.csv') #all_benign_scored.csv
# Number of benign records to be used
benign_data_count: int = attr.ib(default=1000)
# Number \in [0-1] representing fraction of data used as validation dataset
validation_split: float = attr.ib(default=0.1)
# Specifying number of fake malicious DNS records created each
# iteration of double oracle algorithm from attacker's actions used in
# neural network training phase
......@@ -62,11 +64,11 @@ class BaseConfig:
# Sum(probability of each action times its fp_rate) must be less than this
# number. Fp_rate of the action is total number of malicious prediction for
# given benign data set
false_positives_allowed: int = attr.ib(default=10)
false_positives_allowed: int = attr.ib(default=0.5)
# Function to calculate utility given the actions
# f: List[float], NeuralNetwork -> float
utility_function: Callable = attr.ib(default=base_utility)
utility_function: Callable = attr.ib(default=rate_limit_utility)
@attr.s
......
......@@ -27,14 +27,14 @@ def np_arrays_from_scored_csv(file_name: str, label: int,
record.append(float(item))
batch.append(record)
labels.append(label)
labels.append([label])
if len(batch) == count_max:
break
return np.array(batch, np.float), np.array(labels, np.int8)
return np.array(batch, np.float), np.array(labels, np.uint8)
if __name__ == "__main__":
a = np_arrays_from_scored_csv(Path('scored/scored_malicious.csv'), 1, 100)
a = np_arrays_from_scored_csv('all_benign_scored.csv', 0, 1000)
print(a)
initial0,initial1
0.036923076923076927,0.8166666666666665
0.05256410256410256,0.64
0.1382051282051282,0.56
0.15384615384615385,0.3833333333333333
0.12948717948717947,0.41142857142857137
0.21512820512820513,0.34500000000000003
0.24076923076923076,0.2688888888888889
0.2364102564102564,0.22
0.3120512820512821,0.23636363636363636
0.33769230769230774,0.16666666666666663
0.3233333333333333,0.22
0.378974358974359,0.1857142857142857
0.43461538461538457,0.16333333333333336
0.3802564102564102,0.1325
0.4358974358974359,0.16294117647058826
0.4115384615384615,0.13444444444444445
0.5271794871794871,0.18684210526315792
0.5328205128205128,0.13
0.5484615384615384,0.10380952380952381
0.5941025641025641,0.15818181818181817
0.6197435897435898,0.06304347826086956
0.6353846153846154,0.07833333333333332
0.631025641025641,0.134
0.7066666666666667,0.12000000000000001
0.6823076923076923,0.046296296296296294
0.7079487179487179,0.11285714285714285
0.7635897435897436,0.0696551724137931
0.8192307692307692,0.07666666666666669
0.8448717948717949,0.11387096774193549
0.8505128205128205,0.08125
0.8161538461538461,0.05878787878787879
0.9017948717948718,0.06647058823529413
0.8874358974358973,0.024285714285714285
0.923076923076923,0.09222222222222223
0.9087179487179486,0.02027027027027027
1,0.03842105263157895
0.98,0.09666666666666666
0.05692307692307692,0.8766666666666666
0.06256410256410255,0.62
0.17820512820512818,0.48000000000000004
0.16384615384615386,0.46333333333333326
0.1794871794871795,0.3214285714285714
0.2251282051282051,0.29500000000000004
0.27076923076923076,0.2488888888888889
0.24641025641025638,0.24000000000000002
0.28205128205128205,0.21636363636363637
0.3476923076923077,0.22666666666666666
0.35333333333333333,0.24000000000000002
0.328974358974359,0.1857142857142857
0.3846153846153846,0.19333333333333336
0.37025641025641026,0.17250000000000001
0.4058974358974359,0.16294117647058826
0.4915384615384615,0.09444444444444446
0.4971794871794872,0.1668421052631579
0.5128205128205128,0.16
0.5384615384615384,0.0838095238095238
0.5241025641025641,0.12818181818181817
0.5497435897435897,0.07304347826086957
0.5853846153846154,0.12833333333333333
0.631025641025641,0.15400000000000003
0.6966666666666667,0.14
0.7123076923076923,0.1362962962962963
0.757948717948718,0.052857142857142846
0.7435897435897436,0.0896551724137931
0.7992307692307692,0.07666666666666669
0.7748717948717948,0.09387096774193548
0.7705128205128204,0.04125
0.8461538461538461,0.05878787878787879
0.8217948717948718,0.046470588235294125
0.9274358974358974,0.04428571428571429
0.903076923076923,0.03222222222222223
0.9487179487179487,0.10027027027027027
0.9343589743589743,0.09842105263157895
1.0,0.04666666666666666
0.06692307692307693,0.8966666666666666
0.10256410256410256,0.67
0.17820512820512818,0.56
0.18384615384615385,0.4233333333333333
0.2094871794871795,0.3714285714285714
0.21512820512820513,0.34500000000000003
0.19076923076923075,0.3388888888888889
0.2264102564102564,0.31
0.24205128205128204,0.27636363636363637
0.3476923076923077,0.25666666666666665
0.3233333333333333,0.19
0.318974358974359,0.1557142857142857
0.3846153846153846,0.14333333333333337
0.43025641025641026,0.1625
0.4358974358974359,0.10294117647058824
0.4515384615384615,0.09444444444444446
0.48717948717948717,0.1568421052631579
0.48282051282051275,0.17
0.5584615384615385,0.1538095238095238
0.5241025641025641,0.08818181818181818
0.5997435897435898,0.09304347826086956
0.6253846153846154,0.11833333333333332
0.691025641025641,0.14400000000000002
0.6466666666666666,0.08
0.6423076923076922,0.056296296296296296
0.6879487179487179,0.14285714285714285
0.7835897435897436,0.07965517241379311
0.8192307692307692,0.08666666666666668
0.8048717948717948,0.1338709677419355
0.8405128205128205,0.06125
0.8361538461538461,0.1087878787878788
0.8817948717948718,0.07647058823529412
0.8874358974358973,0.09428571428571429
0.963076923076923,0.08222222222222222
0.9987179487179487,0.11027027027027028
0.9743589743589743,0.03842105263157895
0.95,0.07666666666666666
0.08692307692307692,0.8666666666666666
0.12256410256410256,0.7000000000000001
0.17820512820512818,0.5700000000000001
0.12384615384615386,0.46333333333333326
0.16948717948717948,0.3714285714285714
0.21512820512820513,0.375
0.26076923076923075,0.3188888888888889
0.2264102564102564,0.31
0.23205128205128206,0.25636363636363635
0.3276923076923077,0.24666666666666665
0.3233333333333333,0.17
0.38897435897435895,0.2157142857142857
0.4046153846153846,0.18333333333333338
0.43025641025641026,0.1825
0.4058974358974359,0.10294117647058824
0.4515384615384615,0.16444444444444445
0.4571794871794872,0.0868421052631579
0.5028205128205128,0.18
0.5784615384615385,0.11380952380952382
0.5741025641025641,0.1481818181818182
0.6097435897435898,0.09304347826086956
0.5853846153846154,0.08833333333333332
0.671025641025641,0.12400000000000001
0.6466666666666666,0.060000000000000005
0.7323076923076923,0.10629629629629629
0.747948717948718,0.12285714285714285
0.7035897435897436,0.0596551724137931
0.8192307692307692,0.04666666666666668
0.7648717948717948,0.08387096774193549
0.7705128205128204,0.051250000000000004
0.8861538461538462,0.1087878787878788
0.8317948717948718,0.02647058823529412
0.9474358974358974,0.04428571428571429
0.943076923076923,0.11222222222222222
0.9087179487179486,0.06027027027027027
0.9343589743589743,0.02842105263157895
0.96,0.09666666666666666
......@@ -105,12 +105,10 @@ class Synthesizer:
if __name__ == "__main__":
synt = Synthesizer(2)
synt.add_cluster_around_2Dfunc(lambda x: 0.2, 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 0.8, 0.05)
synt.add_cluster(Cluster([0.2, 0.2], 0.15, 200))
synt.add_cluster(Cluster([0.2, 0.8], 0.15, 200))
synt.add_cluster(Cluster([0.8, 0.2], 0.15, 200))
synt.add_cluster(Cluster([0.8, 0.8], 0.15, 200))
synt.add_cluster_around_2Dfunc(lambda x: 1/(15*x), 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 1 / (15 * x), 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 1 / (15 * x), 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 1 / (15 * x), 0.05)
synt.generate()
synt.plot2D()
synt.save_to_file(Path('scored/test.csv'))
......@@ -25,6 +25,7 @@ class Game:
def _create_attacker_actions(self):
one_axis = np.linspace(0, 1, 101) # [0.00, 0.01, 0.02, ..., 0.99, 1.00]
# one_axis = np.linspace(0, 1, 11) # [0.0, 0.1, 0.2, ..., 0.9, 1.0]
axes = self._conf.base_conf.features_count - 1
return list(itertools.product(one_axis, *itertools.repeat(one_axis, axes)))
......
import logging
import operator
from collections import Counter
from itertools import count
from typing import List
import attr
import numpy as np
import pulp
from config import RootConfig
......@@ -22,88 +24,146 @@ class Result:
probs_p2: List = attr.ib()
# Lazy evaluation wrapper for new best response similarity calculation
class LazyWrapper(object):
def __init__(self, func):
self.func = func
self.value = None
def __call__(self):
try:
return self.value
except AttributeError:
self.value = self.func()
return self.value
class GameSolver:
def __init__(self, conf: RootConfig):
self.conf = conf
self.utility = conf.base_conf.utility_function
train = conf.nn_train_conf
self.benign_data = np_arrays_from_scored_csv(train.benign_data_file_name,
0, train.benign_data_count)
def _get_trained_nn(self, attacker_features_x: List[List[float]]) -> NeuralNetwork:
self.benign_data = np_arrays_from_scored_csv(
train.benign_data_file_name,
0, train.benign_data_count)
self.benign_data_prob = self.calculate_benign_data_prob()
def calculate_benign_data_prob(self):
# TODO maybe this rounding is not really good for real results
benign_data = list(map(lambda x: tuple(map(lambda y: round(y, 2), x)),
self.benign_data[0]))
benign_data_prob = Counter(benign_data)
for key, val in benign_data_prob.items():
benign_data_prob[key] = val / len(benign_data)
return benign_data_prob
def _get_trained_nn(self, attacker_features_x, attacker_actions) -> NeuralNetwork:
# Initialize the model
network = NeuralNetwork(self.conf.base_conf.features_count,
self.conf.nn_conf)
self.conf.nn_conf,
self.conf.nn_train_conf)
network.set_attacker_actions(attacker_actions)
network.train(attacker_features_x, self.benign_data)
network.calc_n0_false_positives(self.benign_data[0])
# TODO use different dataset to calc false_positives
# network.calc_n0_false_positives(self.benign_data[0])
return network
def double_oracle(self, actions_p1: List) -> Result:
# Get initial actions as the first ones
used_actions_p1 = set(actions_p1[:1])
used_actions_p2 = {self._get_trained_nn([[]])}
played_actions_p1 = set(actions_p1[:1])
played_actions_p2 = {self._get_trained_nn([[]])}
for i in count():
logger.debug(f'Iteration: {i}\n')
ordered_used_actions_p1 = list(used_actions_p1)
ordered_used_actions_p2 = list(used_actions_p2)
ordered_actions_p1 = list(played_actions_p1)
ordered_actions_p2 = list(played_actions_p2)
# Solve current game with linear programming
value, probs_p1, probs_p2 = self.solve_zero_sum_game_pulp(ordered_used_actions_p1, ordered_used_actions_p2)
value, probs_p1, probs_p2 = self.solve_zero_sum_game_pulp(
ordered_actions_p1, ordered_actions_p2)
# Find best responses for each player given the mixture strategies
br_p1 = self.best_response_p1(actions_p1, ordered_used_actions_p2, probs_p2)
br_p2 = self.best_response_p2(ordered_used_actions_p1, probs_p1)
br_p1 = self.best_response_p1(actions_p1, ordered_actions_p2, probs_p2)
br_p2 = self.best_response_p2(ordered_actions_p1, probs_p1)
br_p1_exists = self.does_br_p1_exist(br_p1, ordered_actions_p1,
ordered_actions_p2)
br_p2_exists = self.does_br_p2_exist(br_p2, ordered_actions_p2,
ordered_actions_p1)
# If there is no new action in best responses, algorithm ends
if br_p1 in used_actions_p1 and self.is_nn_similar(br_p2, ordered_used_actions_p2, used_actions_p1):
return Result(value, ordered_used_actions_p1, probs_p1,
ordered_used_actions_p2, probs_p2)
if br_p1_exists and br_p2_exists:
return Result(value, ordered_actions_p1, probs_p1,
ordered_actions_p2, probs_p2)
# Otherwise add new actions to lists and continue
used_actions_p1.add(br_p1)
used_actions_p2.add(br_p2) # TODO get rid of duplicates
def is_nn_similar(self, new_nn: NeuralNetwork, old_nns: List, actions_p1):
"""
Compares utilities of a new neural network with utilities of the old
neural networks and checks if the new one is similar to some old
(difference of its utilities for every p1 action is lower than epsilon)
"""
logger.debug('Let\'s compare new neural network with the others:')
if not br_p1_exists: played_actions_p1.add(br_p1)
if not br_p2_exists: played_actions_p2.add(br_p2)
def does_br_p1_exist(self, new_br, old_brs, neural_networks_p2):
logger.debug('Comparing new br of player1 with the existing ones:')
if new_br in old_brs:
logger.debug('This attacker action already exists')
return True
utilities_of_new_action = [self.utility(new_br, a2) for a2 in neural_networks_p2]
for old_br in old_brs:
as_good = True
for new_utility, nn in zip(utilities_of_new_action, neural_networks_p2):
old_utility = self.utility(old_br, nn)
if abs(old_utility - new_utility) > 0.05:
as_good = False
break
if as_good:
logger.debug('This attacker action already exists')
return True
logger.debug('This attacker action does not exist yet')
return False
def does_br_p2_exist(self, new_nn: NeuralNetwork, old_nns: List,
actions_p1):
logger.debug('Comparing new neural network with the existing ones:')
utilities_of_new_nn = [self.utility(a1, new_nn) for a1 in actions_p1]
for old_nn in old_nns:
as_good = True
for new_utility, a1 in zip(utilities_of_new_nn, actions_p1):
old_utility = self.utility(a1, old_nn)
logger.debug(f'old utility: {old_utility}, '
f'new utility: {new_utility}, '
f'difference: {abs(old_utility-new_utility)}')
for new_utility, action_p1 in zip(utilities_of_new_nn, actions_p1):
old_utility = self.utility(action_p1, old_nn)
if abs(old_utility - new_utility) > self.conf.base_conf.epsilon:
as_good = False
break
if as_good:
logger.debug("Yep, this neural network already exists.")
logger.debug('This neural network already exists')
return True
logger.debug('Nope, this is new neural network')
logger.debug('This neural network does not exist yet')
return False
def best_response_p1(self, actions_p1, used_actions_p2, probs_p2):
return max(actions_p1, key=lambda a1: sum(map(operator.mul, map(lambda a2: self.utility(a1, a2), used_actions_p2), probs_p2)))
# Take only defenders actions which are played with non zero probability
non_zero_p = np.where(np.asarray(probs_p2) != 0)
actions_2 = np.asarray(used_actions_p2)[non_zero_p]
p2 = np.asarray(probs_p2)[non_zero_p]
return max(actions_p1, key=lambda a1: sum(map(operator.mul, map(
lambda a2: self.utility(a1, a2), actions_2), p2)))
def best_response_p2(self, used_actions_p1, probs_p1):
malicious_features = []
for ai, pi in zip(used_actions_p1, probs_p1):
counter = int(self.conf.nn_train_conf.malicious_data_count*pi)
for i in range(counter):
counter = int(self.conf.nn_train_conf.malicious_data_count * pi)
for _ in range(counter):
malicious_features.append(ai)
# Take only attacker actions which are played with non zero probability
non_zero_p = np.where(np.asarray(probs_p1) != 0)
actions_2 = np.asarray(used_actions_p1)[non_zero_p]
p2 = np.asarray(probs_p1)[non_zero_p]
attacker_actions = (actions_2, p2)
logger.debug('Let\'s train new NN with this malicious data:')
logger.debug(f'{malicious_features}\n')
return self._get_trained_nn(malicious_features)
return self._get_trained_nn(malicious_features, attacker_actions)
def solve_zero_sum_game_pulp(self, actions_p1: List[List[float]],
actions_p2: List[NeuralNetwork]):
......@@ -120,40 +180,54 @@ class GameSolver:
m += v
# Player two probability vector
probs_p_two = [pulp.LpVariable("np" + str(i), 0, 1) for i in range(len(actions_p2))]
probs_p_two = [pulp.LpVariable("np" + str(i), 0, 1) for i in
range(len(actions_p2))]
m += pulp.lpSum(probs_p_two) == 1 # Probabilities sum to 1
suma = []
i = 0
for a2 in actions_p2:
suma.append(probs_p_two[i]*a2.get_false_positive_rate())
i += 1
fp_constraint = pulp.lpSum(suma) <= self.conf.base_conf.false_positives_allowed
m += fp_constraint
# Set false positive constraint
# suma = []
# i = 0
# for a2 in actions_p2:
# suma.append(probs_p_two[i]*a2.get_false_positive_rate())
# i += 1
# fp_constraint = pulp.lpSum(suma) <= self.conf.base_conf.false_positives_allowed
# m += fp_constraint
# Calc false positive cost with benign data probability distribution
fp_cost = 0
for features, features_prob in self.benign_data_prob.items():
for nn, nn_prob in zip(actions_p2, probs_p_two):
l = nn.limit_predict(features)[0]
fp_cost += (l**4) * features_prob * nn_prob
# Define main constraints
constraints = []
for a1 in actions_p1:
suma = []
suma = [fp_cost]
j = 0
for a2 in actions_p2:
suma.append(probs_p_two[j]*self.utility(a1, a2))
suma.append(probs_p_two[j] * self.utility(a1, a2))
j += 1
constraints.append(pulp.lpSum(suma) <= v)
for c in constraints:
m += c
# Let's solve
m.solve()
logger.debug(f'LP solved')
logger.debug(f'Value of the game: {v.varValue}')
logger.debug(f'Number of false positives in this game: {fp_constraint}')
logger.debug(f'Number of false positives in this game: {fp_cost}')
logger.debug(f'Found solution: {pulp.LpStatus[m.status]}')
logger.debug(f'Attacker\' probabilities:')
logger.debug(f'{list(str(abs(c.pi)) + " " for c in constraints)}')
logger.debug(f'Deffender\'s probabilities:')
logger.debug(f'{list(str(prob.varValue) + " " for prob in probs_p_two)}')
logger.debug(
f'{list(str(prob.varValue) + " " for prob in probs_p_two)}')
return v.varValue, [abs(c.pi) for c in constraints], [prob.varValue for prob in probs_p_two]
return v.varValue, [abs(c.pi) for c in constraints], [prob.varValue for
prob in
probs_p_two]
if __name__ == "__main__":
......
from typing import List, Tuple
from tensorflow import keras
import numpy as np
from sklearn.utils import shuffle
from config import NeuralNetworkConfig, TrainingNnConfig
from neural_networks.network import OrderCounter
def tmp_loss_function(y_true, y_pred):
return keras.backend.mean(100 * keras.backend.square(y_pred - y_true),
axis=-1)
class KerasNeuralNetwork:
def __init__(self, input_features=2,
nn_conf: NeuralNetworkConfig = NeuralNetworkConfig(),
nn_train_conf: TrainingNnConfig = TrainingNnConfig()):
self.model = keras.Sequential([
keras.layers.Dense(10, activation='relu',
input_shape=(input_features,)),
keras.layers.Dense(12, activation='relu'),
keras.layers.Dense(1, activation='sigmoid'),
]
)
# nn_conf.loss_function
self.model.compile(loss=tmp_loss_function,
optimizer=nn_conf.optimizer,
metrics=['accuracy'])
self.false_positives = None
self.epochs = nn_conf.epochs
self.fp_weight = nn_conf.fp_weight
self.validation_split = nn_train_conf.validation_split
self.order = OrderCounter.next()
def train(self,
attacker_features_x: List[List[float]],
benign_data: Tuple[np.ndarray, np.ndarray]):
x, y = benign_data
# There are some attacker's features
attacker_features_x = np.array(attacker_features_x)
if len(attacker_features_x[0]):
attacker_features_y = [1 for _ in attacker_features_x]
x = np.concatenate((x, attacker_features_x), axis=0)
y = np.concatenate((y, attacker_features_y), axis=0)
x, y = shuffle(x, y, random_state=1)
self.model.fit(x, y,
validation_split=self.validation_split,
epochs=self.epochs,
class_weight={0: 1, 1: self.fp_weight})
def calc_n0_false_positives(self, x_test: np.ndarray):
limits = self.predict_rate_limit(x_test)
self.false_positives = sum(map(lambda l: l ** 4, limits))
def predict(self, xs: np.ndarray):
return self.model.predict(xs)
def predict_rate_limit(self, xs: np.ndarray):
prediction = self.predict(xs)
return list(map(lambda x: 0 if x < 0.5 else (x - 0.5) * 2, prediction))
def predict_solo(self, attacker_features: List[float]) -> int:
features = np.array([attacker_features])
prediction = self.model.predict(features)
# returns number \in [0, 1]
return prediction[0][0]
def predict_solo_rate_limit(self, attacker_features: List[float]) -> int:
prediction = self.predict_solo(attacker_features)
return 0 if prediction < 0.5 else (prediction - 0.5) * 2
def get_false_positive_rate(self):
return self.false_positives
def __str__(self):