Commit 8dce72cd authored by Martin Řepa's avatar Martin Řepa

Implement attacker best response via gradient descent

parent d3954f71
import operator
import random
from typing import List
import torch
from config import ModelConfig
import numpy as np
import itertools
......@@ -27,27 +30,79 @@ class Attacker:
self.actions = create_attacker_actions(self.features_count)
def get_best_response(self, def_actions: List, def_probs: List):
# Take only defenders actions which are played with non zero probability
non_zero_p = np.where(np.asarray(def_probs) != 0)
actions = np.asarray(def_actions)[non_zero_p]
probs = np.asarray(def_probs)[non_zero_p]
if self.conf.use_gradient_descent:
self._gradient_best_response(def_actions, def_probs)
return self._gradient_best_response(actions, probs)
else:
return self._discrete_best_response(def_actions, def_probs)
return self._discrete_best_response(actions, probs)
# # TMP
# optimal = self._discrete_best_response(def_actions, def_probs)
# gradient_brp = self._gradient_best_response(def_actions, def_probs)
#
# if list(map(lambda a: round(a, 2), gradient_brp)) != optimal:
# print("A je to v píči")
#
# return optimal
def _discrete_best_response(self, def_actions: List, def_probs: List) -> List:
# Take only defenders actions which are played with non zero probability
non_zero_p = np.where(np.asarray(def_probs) != 0)
actions_2 = np.asarray(def_actions)[non_zero_p]
p2 = np.asarray(def_probs)[non_zero_p]
best_rp = max(self.actions, key=lambda a1: sum(map(operator.mul, map(
lambda a2: self.utility(a1, a2), actions_2), p2)))
lambda a2: self.utility(a1, a2), def_actions), def_probs)))
return list(best_rp)
def _do_gradient_descent(self, def_actions: List, def_probs: List) -> tuple:
# Create random initial position
raw = [random.uniform(0.0, 1.0) for _ in range(self.features_count)]
# Create tensor with attacker action which will be updated
attacker_action = torch.tensor(raw, requires_grad=True)
# Create pytorch adam optimiser to update tensor
optimizer = torch.optim.Adam([attacker_action],
lr=self.conf.learning_rate)
for i in range(self.conf.epochs):
# logger.debug(f'Epoch {i} in attacker best response searching')
loss = 0
for nn, prob in zip(def_actions, def_probs):
prediction = nn._limit_predict(attacker_action,
with_grad=True)
# Attacker wants to maximize its gain, but optimiser tries
# to minimize. That's why we negate the objective function
loss += -(torch.add(1, -prediction) * prob *
torch.prod(attacker_action))
# Calculate gradient and update the value
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Limit the boundary of input features to interval [0, 1]
attacker_action.data.clamp_(min=0.0, max=1.0)
action = [attacker_action[i].item() for i in range(self.features_count)]
action_gain = - loss.item() # Negate the loss again
return action, action_gain
def _gradient_best_response(self, def_actions: List, def_probs: List) -> List:
# TODO
pass
best_rsp, best_gain = self._do_gradient_descent(def_actions, def_probs)
# Try to find best respond n-1 more times
for _ in range(1, self.conf.tries_for_best_response):
action, gain = self._do_gradient_descent(def_actions, def_probs)
if gain > best_gain:
best_rsp = action
best_gain = gain
return best_rsp
def get_initial_action(self):
return self.get_best_response([], [])
def get_initial_action(self) -> List:
# Return random set of features as initial action
return [random.uniform(0.0, 1.0) for _ in range(self.features_count)]
def does_br_exists(self, new_br, old_brs, defenders_networks):
it_does = self._does_br_exists(new_br, old_brs, defenders_networks)
......
......@@ -35,19 +35,28 @@ class AttackerConfig:
# If set to False, attacker actions are discrete and the whole space is
# traversed to find best response. Example for R^2:
# [(.0,.01),(.0,.02),...,(.1,.1)]
use_gradient_descent: bool = attr.ib(default=False)
use_gradient_descent: bool = attr.ib(default=True)
# 2 attacker actions are considered the same if difference of absolute value
# of attacker's utility function for them and all defender's actions is less
# than this value
# Used only when use_gradient_descent is set to True
# Attention. Used only when use_gradient_descent is set to True!
epsion: float = attr.ib(default=5e-3)
# Number of random tries to find attacker action using gradient descent.
# The one with best final loss value would be chosen.
# Used only when use_gradient_descent is set to True
# Attention. Used only when use_gradient_descent is set to True!
tries_for_best_response: int = attr.ib(default=7)
# Learning rate for optimiser which updates attacker action while searching
# for best response using gradient descent
# Attention. Used only when use_gradient_descent is set to True!
learning_rate = 0.5e-2
# Number of iterations used to update gradient descent while searching for
# best response
# Attention. Used only when use_gradient_descent is set to True!
epochs = 500
@attr.s
class ModelConfig:
......
......@@ -87,7 +87,7 @@ class NeuralNetwork:
def _set_weights(self):
def init_weights(m):
if type(m) == nn.Linear:
torch.nn.init.xavier_uniform(m.weight)
torch.nn.init.xavier_uniform_(m.weight)
m.bias.data.fill_(.0)
self.model.apply(init_weights)
......
......@@ -127,17 +127,20 @@ def calc_optimal_br(nns: List, probs: List) -> tuple:
optimal_solution = max(attacker_actions, key=lambda a: sum(map(operator.mul,
map(lambda nn: (1 - nn.predict_single_limit(a)) *
a[0] * a[1], nns), probs)))
return optimal_solution
optimal_gain = sum(map(operator.mul,
map(lambda nn: (1 - nn.predict_single_limit(optimal_solution)) *
np.product(optimal_solution), nns), probs))
return optimal_solution, optimal_gain
if __name__ == '__main__':
setup_loger(True)
# Get some random defender's neural network
nns, probs = get_some_trained_nns(0)
nns, probs = get_some_trained_nns(1)
# Find optimal best response using LP
optimal_best_response = calc_optimal_br(nns, probs)
optimal_best_response, _ = calc_optimal_br(nns, probs)
# attacker's best responses
attacker_brs_gradient_descent = find_attacker_best_responses(nns, probs, 2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment