Commit 2c39d5b3 authored by Martin Řepa's avatar Martin Řepa

Attacker OOP, nice 3x2 grid plotting, other small upgrades

parent 49a8c25a
import itertools
import logging
import operator
import random
from typing import List
import numpy as np
import torch
from config import ModelConfig
import numpy as np
import itertools
import logging
logger = logging.getLogger(__name__)
def create_attacker_actions(dimension: int):
one_axis = np.linspace(0, 1, 101) # [0.00, 0.01, 0.02, ..., 0.99, 1.00]
repeat = dimension - 1
generator = itertools.product(one_axis, *itertools.repeat(one_axis, repeat))
return np.array(list(generator))
class Attacker:
def __init__(self, model_conf: ModelConfig):
self.conf = model_conf.attacker_conf
self.features_count = model_conf.features_count
self.utility = model_conf.attacker_utility
self.torch_utility = model_conf.attacker_torch_utility
self.actions: np.array = None
if not self.conf.use_gradient_descent:
self.actions = create_attacker_actions(self.features_count)
def get_conf(self):
return self.conf
def random_action(self) -> List:
# Return random set of features as action
return [np.random.uniform(0.0, 1.0) for _ in range(self.features_count)]
def get_initial_action(self) -> List:
return self.random_action()
def get_best_response(self, def_actions: List, def_probs: List):
# Take only defenders actions which are played with non zero probability
......@@ -36,92 +32,73 @@ class Attacker:
actions = np.asarray(def_actions)[non_zero_p]
probs = np.asarray(def_probs)[non_zero_p]
if self.conf.use_gradient_descent:
return self._gradient_best_response(actions, probs)
return self._get_best_response(actions, probs)
def does_br_exists(self, played_actions_p1, br_p1, value):
it_does = self._does_br_exists(played_actions_p1, br_p1, value)
if it_does:
logger.debug('This attacker action already exists')
else:
return self._discrete_best_response(actions, probs)
logger.debug('This attacker action does not exist yet')
return it_does
def create_discrete_actions(self):
one_axis = np.linspace(0, 1, 101) # [0.00, 0.01, 0.02, ..., 0.99, 1.00]
repeat = self.features_count - 1
generator = itertools.product(one_axis, *itertools.repeat(one_axis, repeat))
return np.array(list(generator))
def _get_best_response(self, def_actions: List, def_probs: List) -> List:
raise NotImplementedError()
def _discrete_best_response(self, def_actions: List, def_probs: List) -> List:
def _does_br_exists(self, played_actions_p1, br_p1, value):
raise NotImplementedError()
class DiscreteAttacker(Attacker):
def __init__(self, model_conf: ModelConfig):
super().__init__(model_conf)
self.utility = model_conf.attacker_utility
self.actions = super().create_discrete_actions()
def _get_best_response(self, def_actions: List, def_probs: List) -> List:
best_rp = max(self.actions, key=lambda a1: sum(map(operator.mul, map(
lambda a2: self.utility(a1, a2), def_actions), def_probs)))
return list(best_rp)
def _do_gradient_descent(self, def_actions: List, def_probs: List) -> tuple:
# Create random initial position
raw = [random.uniform(0.0, 1.0) for _ in range(self.features_count)]
def _does_br_exists(self, played_actions_p1, br_p1, value):
return br_p1 in played_actions_p1
# Create tensor with attacker action which will be updated
attacker_action = torch.tensor(raw, requires_grad=True)
# Create pytorch adam optimiser to update tensor
optimizer = torch.optim.Adam([attacker_action],
lr=self.conf.learning_rate)
class GradientAttacker(Attacker):
def __init__(self, model_conf: ModelConfig):
super().__init__(model_conf)
self.torch_utility = model_conf.attacker_torch_utility
def _get_best_response(self, def_actions: List, def_probs: List):
all_actions = []
for _ in range(self.conf.tries_for_best_response):
all_actions.append(super().random_action())
all_actions = torch.tensor(all_actions, requires_grad=True)
optimizer = torch.optim.Adam([all_actions], lr=self.conf.learning_rate)
for i in range(self.conf.epochs):
# logger.debug(f'Epoch {i} in attacker best response searching')
loss = 0
losses = 0
for nn, prob in zip(def_actions, def_probs):
# Attacker wants to maximize its gain, but optimiser tries
# to minimize. That's why we negate the objective function
loss += -(self.torch_utility(attacker_action, nn) * prob)
losses += -(self.torch_utility(all_actions, nn) * prob)
loss = torch.mean(losses)
# Calculate gradient and update the value
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Limit the boundary of input features to interval [0, 1]
attacker_action.data.clamp_(min=0.0, max=1.0)
action = [attacker_action[i].item() for i in range(self.features_count)]
action_gain = - loss.item() # Negate the loss again for correct value
return action, action_gain
def _gradient_best_response(self, def_actions: List, def_probs: List) -> List:
best_rsp, best_gain = self._do_gradient_descent(def_actions, def_probs)
# Clamp input tensor to allowed range after gradient descent update
all_actions.data.clamp_(min=0.0, max=1.0)
# Try to find best respond n-1 more times
for _ in range(1, self.conf.tries_for_best_response):
action, gain = self._do_gradient_descent(def_actions, def_probs)
if gain > best_gain:
best_rsp = action
best_gain = gain
i = torch.argmin(losses)
best_action = all_actions[i]
self.value_of_last_brp = -losses[i].item()
return [best_action[0].item(), best_action[1].item()]
return best_rsp
def get_initial_action(self) -> List:
# Return random set of features as initial action
return [random.uniform(0.0, 1.0) for _ in range(self.features_count)]
def does_br_exists(self, new_br, old_brs, defenders_networks):
it_does = self._does_br_exists(new_br, old_brs, defenders_networks)
if it_does:
logger.debug('This attacker action already exists')
else:
logger.debug('This attacker action does not exist yet')
return it_does
def _does_br_exists(self, new_br, old_brs, defenders_networks):
if self.conf.use_gradient_descent:
return self._exists_by_epsilon(new_br, old_brs, defenders_networks)
else:
return new_br in old_brs
def _exists_by_epsilon(self, new_br, old_brs, defenders_networks):
u = self.utility
new_action_utilities = [u(new_br, a2) for a2 in defenders_networks]
for old_br in old_brs:
as_good = True
for new_utility, nn in zip(new_action_utilities, defenders_networks):
old_utility = u(old_br, nn)
# If difference is at least one time bigger, it's
# not similar action
if abs(old_utility - new_utility) > self.conf.epsion:
as_good = False
break
if as_good:
return True
return False
def _does_br_exists(self, played_actions_p1, br_p1, value):
return self.value_of_last_brp - value < self.conf.epsion
......@@ -67,20 +67,10 @@ class Defender:
network.train()
return network
def does_br_exists(self, new_nn, old_nns, attacker_actions):
logger.debug('Comparing new neural network with the existing ones:')
new_nn_utilities = [ self.attacker_utility(a1, new_nn) +
new_nn.final_fp_cost for a1 in attacker_actions]
for old_nn in old_nns:
as_good = True
for new_utility, action_p1 in zip(new_nn_utilities, attacker_actions):
old_utility = self.attacker_utility(action_p1, old_nn) \
+ old_nn.final_fp_cost
if abs(old_utility - new_utility) > self.conf.defender_epsilon:
as_good = False
break
if as_good:
logger.debug('This neural network already exists')
return True
logger.debug('This neural network does not exist yet')
return False
def does_br_exists(self, best_response: NeuralNetwork, value):
it_does = value - best_response.final_loss < self.conf.defender_epsilon
if it_does:
logger.debug('This neural network already exists')
else:
logger.debug('This neural network does not exist yet')
return it_does
......@@ -22,7 +22,7 @@ class NeuralNetworkConfig:
class DefenderConfig:
# 2 neural networks are considered the same if difference of game value for
# them and each attacker's action is less than epsion
defender_epsilon: float = attr.ib(default=5e-3)
defender_epsilon: float = attr.ib(default=1e-3)
# This number of neural networks will be trained in each double oracle
# iteration and the best one will be considered as a best response
......@@ -45,12 +45,12 @@ class AttackerConfig:
# of attacker's utility function for them and all defender's actions is less
# than this value
# Attention. Used only when use_gradient_descent is set to True!
epsion: float = attr.ib(default=5e-3)
epsion: float = attr.ib(default=1e-3)
# Number of random tries to find attacker action using gradient descent.
# The one with best final loss value would be chosen.
# Attention. Used only when use_gradient_descent is set to True!
tries_for_best_response: int = attr.ib(default=7)
tries_for_best_response: int = attr.ib(default=264)
# Learning rate for optimiser which updates attacker action while searching
# for best response using gradient descent
......@@ -60,7 +60,7 @@ class AttackerConfig:
# Number of iterations used to update gradient descent while searching for
# best response
# Attention. Used only when use_gradient_descent is set to True!
epochs = 500
epochs = 200
@attr.s
......@@ -81,12 +81,15 @@ class ModelConfig:
# Defender
defender_conf: DefenderConfig = attr.ib(default=DefenderConfig())
# i_a
# i_a, used only for latency
i_a: int = attr.ib(default=1)
# i_d
# i_d, used only for latency
i_d: int = attr.ib(default=4)
# malicious : benign ratio in datatests
benign_ratio: int = attr.ib(default=1)
# Function to calculate utility for attacker given the actions
# f: List[float], NeuralNetwork -> float
attacker_utility: Callable = attr.ib(init=False)
......@@ -99,7 +102,8 @@ class ModelConfig:
self.attacker_utility = get_attacker_utility(self.i_a)
self.attacker_torch_utility = get_attacker_torch_grad_utility(self.i_a)
self.defender_conf.nn_conf.loss_function = get_nn_loss_function(
self.i_a, self.i_d)
self.i_a, self.i_d,
self.benign_ratio)
@attr.s
......
......@@ -22,15 +22,19 @@ class Game:
def solve_game(self):
logger.info("Starting game solver")
gs = GameSolver(self._conf.model_conf)
gs = GameSolver(self._conf)
self.result = gs.double_oracle()
self._write_summary()
self._plot_result()
# self._plot_result()
def _write_summary(self):
print('\n\n-------------------------------------------------')
logger.info(f'Game has ended with value: {self.result.value}')
logger.info(f'Game has ended with these values\n'
f'transformed zero sum game value: {self.result.zero_sum_nash_val}'
f'attacker value of original game: {self.result.attacker_value}'
f'defender value of original game: {self.result.defender_value}')
logger.info('Attacker: action x probability')
for a, p in zip(self.result.ordered_actions_p1, self.result.probs_p1):
logger.info(f'{a} x {p}')
......@@ -45,10 +49,7 @@ class Game:
if not self._conf.plot_result:
return
logger.debug("Plotting result...")
p = Plotter(self.result.ordered_actions_p1,
self.result.probs_p1,
self.result.ordered_actions_p2,
self.result.probs_p2)
p = Plotter(self.result)
p.plot_result()
......
......@@ -3,11 +3,14 @@ from itertools import count
from typing import List
import attr
import matplotlib.pyplot as plt
import numpy as np
import pulp
import torch
from actors.attacker import Attacker
from actors.attacker import DiscreteAttacker, GradientAttacker
from actors.defender import Defender
from config import ModelConfig
from config import RootConfig
from src.neural_networks.network import NeuralNetwork
logger = logging.getLogger(__name__)
......@@ -15,20 +18,119 @@ logger = logging.getLogger(__name__)
@attr.s
class Result:
value: int = attr.ib()
zero_sum_nash_val: int = attr.ib()
attacker_value: int = attr.ib()
defender_value: int = attr.ib()
ordered_actions_p1: List = attr.ib()
probs_p1: List = attr.ib()
ordered_actions_p2: List = attr.ib()
probs_p2: List = attr.ib()
def get_fp_cost(probs2, played2):
fp_cost = 0
for action, prob in zip(played2, probs2):
if prob == 0:
continue
fp_cost += action.final_fp_cost * prob
return fp_cost
class GameSolver:
def __init__(self, conf: ModelConfig):
self.attacker_utility = conf.attacker_utility
def __init__(self, conf: RootConfig):
self.conf = conf.model_conf
self.plot = conf.plot_result
# Define game actors
self.attacker = Attacker(conf)
self.defender = Defender(conf)
if conf.model_conf.attacker_conf.use_gradient_descent:
self.attacker = GradientAttacker(conf.model_conf)
else:
self.attacker = DiscreteAttacker(conf.model_conf)
self.defender = Defender(conf.model_conf)
# Variables for plotting progress
if conf.plot_result:
self._init_plots()
def _init_plots(self):
plt.ion()
self.fig, self.ax = plt.subplots(2, 3)
self.actions = torch.tensor(self.attacker.create_discrete_actions()).float()
self.plotted = []
self.ax[0][0].set_title('Defender nash strategy')
self.ax[0][1].set_title('Attacker nash strategy')
self.ax[0][1].set_xlim([0, 1])
self.ax[0][1].set_ylim([0, 1])
self.ax[0][2].set_title('All attackers actions played')
self.ax[0][2].set_xlim([0, 1])
self.ax[0][2].set_ylim([0, 1])
self.ax[1][0].set_title('Defender best response')
self.ax[1][1].set_title('Attacker best response')
self.ax[1][1].set_ylim([0, 1])
self.ax[1][1].set_ylim([0, 1])
self.ax[1][2].set_title('Nothing for now')
plt.tight_layout()
def plot_paths(self):
for iteration, points in self.val_paths:
plt.title("All iterations so far")
plt.scatter([iteration], points[:1], c='blue', s=20) # nash
plt.scatter([iteration], points[1:2], c='red', s=10) # attacker brp
plt.scatter([iteration], points[2:3], c='green', s=10) # defender brp
plt.show()
for iteration, points in self.val_paths[len(self.val_paths)-5:]:
plt.title("Last 5 iterations")
plt.scatter([iteration], points[:1], c='blue', s=20) # nash
plt.scatter([iteration], points[1:2], c='red', s=10) # attacker brp
plt.scatter([iteration], points[2:3], c='green', s=10) # defender brp
plt.show()
def plot_iteration(self, iteration, zero_sum_val, played_p2, probs_p2,
played_p1, probs_p1, br_p1, br_p2):
# Remove all lines from previous iteration plotting
for item in self.plotted:
item.remove()
self.plotted = []
# Set title of current figure
self.fig.suptitle(f'Iteration: {iteration}, value: {zero_sum_val}')
# Plot heat-map of defender's nash strategy actions
res = np.zeros((101, 101))
for nn, prob in zip(played_p2, probs_p2):
if prob == 0: continue
predictions = nn.latency_predict(self.actions).numpy()
res += (predictions * prob).reshape((101, 101))
self.plotted.append(self.ax[0][0].imshow(res, cmap='Reds', vmin=0,
vmax=1, origin='lower', interpolation='spline16'))
# Plot heat-map of defender's best response
res = br_p2.latency_predict(self.actions).numpy().reshape((101, 101))
self.plotted.append(self.ax[1][0].imshow(res, cmap='Reds', vmin=0,
vmax=1, origin='lower', interpolation='spline16'))
# Plot attacker nash strategy
for point, prob in zip(played_p1, probs_p1):
if prob == 0:
continue
self.plotted.append(self.ax[0][1].scatter(point[0], point[1], c='red', marker='^'))
self.plotted.append(self.ax[0][1].annotate(f'{round(prob, 2)}', (point[0], point[1])))
# Plot attacker best response
self.plotted.append(self.ax[1][1].scatter(br_p1[0], br_p1[1], c='red'))
# Add attacker new action to subplot with all his actions
self.ax[0][2].scatter(br_p1[0], br_p1[1], c='blue', marker='^')
# Show the result
self.fig.canvas.draw()
plt.pause(0.000001)
def double_oracle(self) -> Result:
# Get initial actions as the first ones
......@@ -39,21 +141,32 @@ class GameSolver:
logger.debug(f'Iteration: {i}\n')
# Solve current game with linear programming
value, probs_p1, probs_p2 = self.solve_zero_sum_game(
zero_sum_nash_val, probs_p1, probs_p2 = self.solve_zero_sum_game(
played_actions_p1, played_actions_p2)
# Find best responses for each player given the mixture strategies
br_p1 = self.attacker.get_best_response(played_actions_p2, probs_p2)
br_p2 = self.defender.get_best_response(played_actions_p1, probs_p1)
br_p1_exists = self.attacker.does_br_exists(br_p1, played_actions_p1,
played_actions_p2)
br_p2_exists = self.defender.does_br_exists(br_p2, played_actions_p2,
played_actions_p1)
fp_cost = get_fp_cost(probs_p2, played_actions_p2)
attacker_value = zero_sum_nash_val - fp_cost
defender_value = -zero_sum_nash_val
# Plot progress
if self.plot:
self.plot_iteration(i, zero_sum_nash_val, played_actions_p2, probs_p2,
played_actions_p1, probs_p1, br_p1, br_p2)
# Are those new actions good enough?
br_p1_exists = self.attacker.does_br_exists(played_actions_p1,
br_p1, attacker_value)
br_p2_exists = self.defender.does_br_exists(br_p2,
zero_sum_nash_val)
# If there is no new action in best responses, algorithm ends
if br_p1_exists and br_p2_exists:
return Result(value, played_actions_p1, probs_p1,
return Result(zero_sum_nash_val, attacker_value,
defender_value, played_actions_p1, probs_p1,
played_actions_p2, probs_p2)
# Otherwise add new actions to lists and continue
......@@ -61,7 +174,7 @@ class GameSolver:
if not br_p2_exists: played_actions_p2.append(br_p2)
def solve_zero_sum_game(self, actions_p1: List[List[float]],
actions_p2: List[NeuralNetwork]):
actions_p2: List[NeuralNetwork]):
logger.debug('Going to solve current state with LP')
logger.debug(f'Attacker\'s actions by now: {actions_p1}')
......@@ -90,7 +203,7 @@ class GameSolver:
suma = [fp_cost]
j = 0
for a2 in actions_p2:
suma.append(probs_p_two[j] * self.attacker_utility(a1, a2))
suma.append(probs_p_two[j] * self.conf.attacker_utility(a1, a2))
j += 1
constraints.append(pulp.lpSum(suma) <= v)
for c in constraints:
......
......@@ -8,3 +8,5 @@ if __name__ == "__main__":
game = Game(conf)
game.solve_game()
input('Pres enter to exit')
......@@ -93,7 +93,7 @@ class NeuralNetwork:
for e in range(self.conf.epochs):
# Forward pass: compute predicted y by passing x to the model
train_ltncies = self._latency_predict(self.x_train, with_grad=True)
train_ltncies = self.latency_predict(self.x_train, with_grad=True)
# Compute loss
loss, _ = self.conf.loss_function(self.x_train, train_ltncies,
......@@ -117,8 +117,8 @@ class NeuralNetwork:
optimizer.step()
with torch.no_grad():
loss, fp_part = self.loss_function(self.x_train, train_ltncies,
self.y_train, self.probs_train)
loss, fp_part = self.conf.loss_function(self.x_train, train_ltncies,
self.y_train, self.probs_train)
# measuring quality of final network
self.final_loss = loss.item()
self.final_fp_cost = fp_part.item()
......@@ -127,18 +127,14 @@ class NeuralNetwork:
pred = self.model(tensor)
return pred.flatten().float()
def _latency_predict(self, x: torch.Tensor, with_grad=False):
def latency_predict(self, x: torch.Tensor, with_grad=False):
if with_grad:
raw_prediction = self._raw_predict(x)
else:
with torch.no_grad():
raw_prediction = self._raw_predict(x)
# The same as lambda p: 0 if p < 0.5 else (p - 0.5) * 2
# TODO try to use e.g. sigmoid
clamped = raw_prediction.clamp(min=0.5, max=1)
latency = torch.mul(torch.add(clamped, -0.5), 2)
return latency
return raw_prediction
def predict_single_latency(self, input, return_tensor=False):
in_type = type(input)
......@@ -147,9 +143,9 @@ class NeuralNetwork:
input = torch.tensor(input).float()
if return_tensor:
return self._latency_predict(input)[0]
return self.latency_predict(input)[0]
else:
return self._latency_predict(input)[0].item()
return self.latency_predict(input)[0].item()
def setup_loger(debug: bool):
......@@ -164,7 +160,7 @@ if __name__ == '__main__':
benign_x, _ = np_arrays_from_scored_csv(
Path('all_benign_scored.csv'), 0, 500)
malicious_x, _ = np_arrays_from_scored_csv(
Path('scored_malicious.csv'), 1, 1)
Path('scored_malicious.csv'), 1, 400)
benign_unique_x, counts = np.unique(benign_x, axis=0, return_counts=True)
probs_benign = np.array([count / len(benign_x) for count in counts])
......@@ -177,7 +173,7 @@ if __name__ == '__main__':
malicious_data = FormattedData(malicious_unique_x, probs_malicious, malicious_y)
conf = RootConfig()
nn = NeuralNetwork(conf.model_conf.nn_loss_function)
nn = NeuralNetwork(2, conf.model_conf.defender_conf.nn_conf)
nn.set_data(benign_data, malicious_data)
nn.train()
import functools
import itertools
import operator
import sys
from collections import Counter
from typing import List, Tuple
import matplotlib.pyplot as plt
import numpy as np
import pulp
from data.loader import np_arrays_from_scored_csv
# Global "config" variables
features_num = 2
density = 2 # 1 is low | 2 is high
FP_cost = 1
benign_data_file = 'test.csv' # TODO use proper data
def create_actions() -> List:
if density == 1:
one_axis = np.linspace(0, 1, 11) # [0, 0.1, 0.2, ..., 0.9, 1]
elif density == 2:
one_axis = np.linspace(0, 1, 101) # [0, 0.01, 0.02, ..., 0.99, 1]
else:
print(f'Density is not high (2) neither low (1). It is {density}')
sys.exit(5)
one_axis = np.round(one_axis, density) # To get rid of bad double precision
generator = itertools.product(*itertools.repeat(one_axis, features_num))
return list(generator)
def get_data() -> dict:
benign, _ = np_arrays_from_scored_csv(benign_data_file, 0)
# Round to proper density
benign = list(
map(lambda x: tuple(map(lambda y: round(y, density), x)), benign))
benign_data_prob = Counter(benign)
for key, val in benign_data_prob.items():
benign_data_prob[key] = val / len(benign)
return benign_data_prob
def utility(attacker_action: Tuple):
return functools.reduce(operator.mul, attacker_action, 1)
def solve_with_lp(actions_attacker: List,
actions_defender: List,
benign_data_prob: dict):
print('Going to solve with LP')
print(f'Attacker\'s actions: {actions_attacker}')
print(f'Defender\'s actions: {actions_defender}')
print(f'Benign data probabilities are: {benign_data_prob}')
# Create LP problem
m = pulp.LpProblem("Zero sum game", pulp.LpMinimize)
# Minimizing value "v"
v = pulp.LpVariable("v")
m += v
# Player two probabilities vector
print("Defining defenders probabilities...")
probs_defender = []
for action in defender_actions:
probs_point = pulp.LpVariable(f'p({action[0]},{action[1]})', 0, 1)
probs_defender.append(probs_point)
fp_cost = 0
num_of_fp = 0
for action, prob in zip(defender_actions, probs_defender):
cur_fp = prob * benign_data_prob[action]
num_of_fp += cur_fp
fp_cost += cur_fp * FP_cost
print("Defining main constraint...")
constraints = []
for i in range(len(attacker_actions)):
sum = [fp_cost, (1 - probs_defender[i]) * utility(attacker_actions[i])]
constraints.append(pulp.lpSum(sum) <= v)
for c in constraints:
m += c
print("Ok, let's solve now...")
m.solve()
print(f'LP solved')
print(f'Value of the zero sum game: {v.varValue}')
print(f'Found solution: {pulp.LpStatus[m.status]}')
print(f'Attacker\'s probabilities:')
for i in range(len(constraints)):
prob = abs(constraints[i].pi)
if prob == 0: continue
print(f'Action {attacker_actions[i]} -> {prob}')
print(f'Deffender\'s probabilities to block:')
for i in range(len(probs_defender)):
print(f'Action {attacker_actions[i]} -> {probs_defender[i].varValue}')
# Return value and probabilities
return num_of_fp.value(), v.varValue, [abs(c.pi) for c in constraints], \
[prob.varValue for prob in probs_defender]
def plot_summarization(def_actions, def_probs, at_actions, at_probs, value):
plt.ion()