Commit e26082f6 authored by Martin Řepa's avatar Martin Řepa

Use batches for learning nn and start ising CUDA

parent c5e03e51
......@@ -9,7 +9,7 @@ import torch
from config import ModelConfig
logger = logging.getLogger(__name__)
DEVICE = torch.device('cuda')
class Attacker:
def __init__(self, model_conf: ModelConfig):
......@@ -24,7 +24,8 @@ class Attacker:
return [np.random.uniform(0.0, 1.0) for _ in range(self.features_count)]
def get_initial_action(self) -> List:
return [0.59897846, 0.2900984]
return [1., 1.]
# return [0.59897846, 0.2900984]
# return self.random_action()
def get_best_response(self, def_actions: List, def_probs: List):
......@@ -84,7 +85,7 @@ class GradientAttacker(Attacker):
all_actions = []
for _ in range(self.conf.tries_for_best_response):
all_actions.append(super().random_action())
all_actions = torch.tensor(all_actions, requires_grad=True)
all_actions = torch.tensor(all_actions, requires_grad=True, device=DEVICE)
optimizer = torch.optim.Adam([all_actions], lr=self.conf.learning_rate)
for _ in range(self.conf.epochs):
......
from typing import List
from config import ModelConfig
from data.loader import np_arrays_from_scored_csv
from neural_networks.network import NeuralNetwork, FormattedData
from data.loader import np_matrix_from_scored_csv
from neural_networks.network import NeuralNetwork, FormattedData, BenignData
import numpy as np
import logging
logger = logging.getLogger(__name__)
def prepare_benign_data(raw_x_data) -> FormattedData:
def prepare_benign_data(raw_x_data) -> BenignData:
logger.debug('Let\'s prepare benign data. Taking only unique records.')
unique, counts = np.unique(raw_x_data, axis=0, return_counts=True)
probs = np.array([count / len(raw_x_data) for count in counts])
benign_y = np.zeros(len(unique))
raw_y_label = np.zeros(len(unique))
logger.debug('Data preparation done.')
return FormattedData(unique, probs, benign_y)
return BenignData(unique, counts, raw_y_label)
class Defender:
......@@ -25,8 +24,7 @@ class Defender:
self.attacker_utility = model_conf.attacker_utility
# Prepare benign data
raw_x, _ = np_arrays_from_scored_csv(model_conf.benign_data_file_name,
0, model_conf.benign_data_count)
raw_x, _ = np_matrix_from_scored_csv(self.conf.benign_data_file_name, 0)
self.benign_data = prepare_benign_data(raw_x)
def get_best_response(self, att_actions: List, att_probs: List) -> NeuralNetwork:
......@@ -50,14 +48,14 @@ class Defender:
# If my response is to block nothing it might be hard to train so
# I try 10 more nns to train # TODO check if this does not get stuck
attacker_goal = np.sum(np.prod(attack.unique_x, axis=1)*attack.probs_x)
tries = 0
while abs(best_nn.final_loss - attacker_goal) < 1e-5 and tries < 10:
tries += 1
new_nn = self._train_nn(attack)
self._log_creation(new_nn, best_nn)
if new_nn.final_loss < best_nn.final_loss:
best_nn = new_nn
# attacker_goal = np.sum(np.prod(attack.unique_x, axis=1)*attack.probs_x)
# tries = 0
# while abs(best_nn.final_loss - attacker_goal) < 1e-5 and tries < 10:
# tries += 1
# new_nn = self._train_nn(attack)
# self._log_creation(new_nn, best_nn)
# if new_nn.final_loss < best_nn.final_loss:
# best_nn = new_nn
return best_nn
......
......@@ -8,10 +8,15 @@ from utility import *
@attr.s
class NeuralNetworkConfig:
# Number of epochs in a neural network training phase
epochs: int = attr.ib(default=5000)
epochs: int = attr.ib(default=10000)
# Learning rate for Adam optimiser
learning_rate = 0.1e-2
learning_rate = 0.1e-3
# learning_rate = 0.1e-2
# learning_rate = 0.5e-2
# Size of a benign data batch used in each epoch of training
batch_size = 1000
# Loss function used for training
loss_function: Callable = attr.ib(init=False)
......@@ -31,6 +36,11 @@ class DefenderConfig:
# conf of neural networks
nn_conf: NeuralNetworkConfig = attr.ib(default=NeuralNetworkConfig())
# Name of .csv file in src/data/scored directory with scored data which will
# be used as benign data in neural network training phase
benign_data_file_name: str = attr.ib(default='normal_distribution_experiments.csv')
# benign_data_file_name: str = attr.ib(default='test.csv')
@attr.s
class AttackerConfig:
......@@ -67,13 +77,6 @@ class ModelConfig:
# Use blocking or latency model?
use_blocking_model: bool = attr.ib(default=False)
# Name of .csv file in src/data/scored directory with scored data which will
# be used as benign data in neural network training phase
benign_data_file_name: str = attr.ib(default='test.csv')
# Number of benign records to be loaded
benign_data_count: int = attr.ib(default=1000)
# Number of features
features_count: int = attr.ib(default=2)
......@@ -90,7 +93,7 @@ class ModelConfig:
i_d: int = attr.ib(default=4)
# malicious : benign ratio in datatests
benign_ratio: int = attr.ib(default=1)
benign_ratio: int = attr.ib(default=10)
# Function to calculate utility for attacker given the actions
# f: List[float], NeuralNetwork -> float
......
from os.path import dirname
from pathlib import Path
import pandas as pd
import numpy as np
import pandas
def np_arrays_from_scored_csv(file_name: str, label: int,
count_max: int = None, shuffle=False):
"""
Returns 2 x N array
Zero index contains array with data in a given .csv file.
First index contains array with label (2nd arg) for each line in .csv file
def np_matrix_from_scored_csv(file_name: str, label: int):
df = pd.read_csv(Path(dirname(__file__)) / Path('scored')/Path(file_name))
See usage in main
"""
# TODO enable load zero size array aswell
content = pandas.read_csv(Path(dirname(__file__)) / Path('scored')/Path(file_name))
batch = []
labels = []
if shuffle:
content = content.sample(frac=1)
for idx, row in content.iterrows():
record = []
for item in row:
record.append(float(item))
batch.append(record)
labels.append([label])
if len(batch) == count_max:
break
return np.array(batch, np.float), np.array(labels, np.uint8)
matrix = df.values
labels = np.full(len(matrix), label)
return matrix, labels
if __name__ == "__main__":
a = np_arrays_from_scored_csv('all_benign_scored.csv', 0, 1000)
a = np_matrix_from_scored_csv('test.csv', 0)
print(a)
......@@ -37,19 +37,19 @@ class Synthesizer:
f'{self._features_num}')
self._clusters.append(cluster)
def add_cluster_around_2Dfunc(self, func: Callable, radius: float):
def add_cluster_around_2Dfunc(self, func: Callable, radius: float, num=40):
"""
Add some points around a function.
Possible only for space R^2
"""
x_axis = np.linspace(0, 1, 40)
x_axis = np.linspace(0, 1, num)
tmp = int(radius * 100)
for x in x_axis:
y = func(x)
if y < 0 or y > 1:
continue
x += random.randint(-tmp, tmp)/100
y += random.randint(-tmp, tmp)/100
y = np.random.normal(loc=y, scale=radius/2, size=1)[0]
x = min(max(x, 0), 1)
y = min(max(y, 0), 1)
self.points.append([x, y])
......@@ -67,6 +67,9 @@ class Synthesizer:
point.append(coord)
self.points.append(point)
def add_raw_data(self, arr):
self.points.extend(arr)
def plot2D(self):
"""
Show generated data in a 2D chart
......@@ -74,6 +77,7 @@ class Synthesizer:
Data needs to be generated at first with 'generate' function
"""
print('Let\'s plot result')
if not self._generated:
raise RuntimeError('Trying to plot while points are not generated')
if self._features_num != 2:
......@@ -81,9 +85,12 @@ class Synthesizer:
f'{self._features_num} features')
plt.xlim(0, 1)
plt.ylim(0, 1)
for point in self.points:
plt.scatter(point[0], point[1], c='red')
xs = list(map(lambda x: x[0], self.points))
ys = list(map(lambda x: x[1], self.points))
plt.scatter(xs, ys, c='red', s=1.)
plt.show()
print('done')
def save_to_file(self, path: Path):
"""
......@@ -105,10 +112,15 @@ class Synthesizer:
if __name__ == "__main__":
synt = Synthesizer(2)
synt.add_cluster_around_2Dfunc(lambda x: 1/(15*x), 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 1 / (15 * x), 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 1 / (15 * x), 0.05)
synt.add_cluster_around_2Dfunc(lambda x: 1 / (15 * x), 0.05)
# synt.add_cluster_around_2Dfunc(lambda x: 0.8/(15*(x+.05)), 0.25, 15000)
# Generate normal distribution data
arr = np.random.multivariate_normal([0, 0], [[2, 1], [1, 1]], 10000)
arr = arr - np.min(arr, axis=0)
m = np.max(arr, axis=0)
arr = arr / (m + (m*0.2))
synt.add_raw_data(arr)
synt.generate()
synt.plot2D()
synt.save_to_file(Path('scored/test.csv'))
# synt.save_to_file(Path('scored/normal_distribution_experiments.csv'))
......@@ -4,18 +4,17 @@ import time
from pathlib import Path
import attr
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.utils import shuffle
from torch import nn
from torch import optim
import matplotlib.pyplot as plt
from config import NeuralNetworkConfig, RootConfig
from data.loader import np_arrays_from_scored_csv
from data.loader import np_matrix_from_scored_csv
logger = logging.getLogger(__name__)
DEVICE = torch.device('cuda')
@attr.s
class FormattedData:
......@@ -24,6 +23,13 @@ class FormattedData:
y: np.array = attr.ib()
@attr.s
class BenignData:
unique_x: np.array = attr.ib()
counts: np.array = attr.ib()
y: np.array = attr.ib()
class OrderCounter:
order = 0
......@@ -64,31 +70,50 @@ class NeuralNetwork:
def __init__(self, input_features=2,
nn_conf: NeuralNetworkConfig = NeuralNetworkConfig()):
self.model = nn.Sequential(
nn.Linear(input_features, 40),
nn.LeakyReLU(),
nn.Linear(40, 30),
nn.LeakyReLU(),
nn.Linear(30, 1),
# nn.Tanh(),
# SoftClip(50)
nn.Sigmoid()
)
nn.Linear(input_features, 20),
nn.ReLU(),
nn.Linear(20, 10),
nn.ReLU(),
nn.Linear(10, 13),
nn.ReLU(),
nn.Linear(13, 1),
nn.Tanh(),
SoftClip(50)
# nn.Sigmoid()
).to(DEVICE)
self._set_weights()
self.conf = nn_conf
self.id = OrderCounter.next()
# Variables used for loss function
self.attacker_actions: FormattedData = None
self.benign_data: FormattedData = None
self.benign_data: BenignData = None
# Variables from last training epoch measuring quality
self.final_loss = None
self.final_fp_cost = None
# -------------- <TMP> --------------------
# self.max_constant = constant
# self.cur_value = .0
# self.step = .05
# self.edge_epoch = (self.conf.epochs*0.2)
# self.incr_each = self.edge_epoch / (self.max_constant/.05)
# def get_cur_coefficient(self, epoch) -> float:
# return self.max_constant
# if epoch > self.edge_epoch:
# return self.max_constant
# return .01
# if self.cur_value < self.max_constant and epoch % self.incr_each == 0:
# self.cur_value += self.step
# return self.cur_value
# -------------- </TMP> --------------------
def __str__(self):
return f'Neural network id:{self.id}, final loss: {self.final_loss}'
def set_data(self, benign_data: FormattedData, attack: FormattedData):
def set_data(self, benign_data: BenignData, attack: FormattedData):
self.attacker_actions = attack
self.benign_data = benign_data
......@@ -97,14 +122,15 @@ class NeuralNetwork:
attacker = self.attacker_actions
x = np.concatenate((defender.unique_x, attacker.unique_x), axis=0)
y = np.concatenate((defender.y, attacker.y), axis=0)
probs = np.concatenate((defender.probs_x, attacker.probs_x), axis=0)
probs = np.concatenate((defender.counts/np.sum(defender.counts),
attacker.probs_x), axis=0)
# Shuffle before splitting
x, y, probs = shuffle(x, y, probs, random_state=1)
self.train_y = torch.cat((torch.zeros(self.conf.batch_size).float(),
torch.tensor(attacker.y).float())).to(DEVICE)
self.x_train = torch.tensor(x).float()
self.y_train = torch.tensor(y).float()
self.probs_train = torch.tensor(probs).float()
self.all_x = torch.tensor(x).float().to(DEVICE)
self.all_y = torch.tensor(y).float().to(DEVICE)
self.all_probs = torch.tensor(probs).float().to(DEVICE)
def _set_weights(self):
def init_weights(m):
......@@ -117,17 +143,33 @@ class NeuralNetwork:
self._prepare_data()
self._train()
def get_train_batch(self):
batch_idxs = np.random.choice(len(self.benign_data.unique_x),
self.conf.batch_size)
current_batch_samples = np.sum(self.benign_data.counts[batch_idxs])
batch_x_np = np.concatenate((self.benign_data.unique_x[batch_idxs],
self.attacker_actions.unique_x), axis=0)
batch_probs_np = np.concatenate((self.benign_data.counts[batch_idxs]/current_batch_samples,
self.attacker_actions.probs_x), axis=0)
batch_x = torch.tensor(batch_x_np).float().to(DEVICE)
batch_probs = torch.tensor(batch_probs_np).float().to(DEVICE)
return batch_x, batch_probs
def _train(self):
learning_rate = self.conf.learning_rate
optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
for e in range(self.conf.epochs):
batch_x, batch_probs = self.get_train_batch()
# Forward pass: compute predicted y by passing x to the model
train_ltncies = self.latency_predict(self.x_train, with_grad=True)
train_ltncies = self.latency_predict(batch_x, with_grad=True)
# Compute loss
loss, _ = self.conf.loss_function(self.x_train, train_ltncies,
self.y_train, self.probs_train)
loss, _ = self.conf.loss_function(batch_x, train_ltncies,
self.train_y, batch_probs)
# Log loss function value each 5 epochs
if e % 50 == 0:
......@@ -147,8 +189,10 @@ class NeuralNetwork:
optimizer.step()
with torch.no_grad():
loss, fp_part = self.conf.loss_function(self.x_train, train_ltncies,
self.y_train, self.probs_train)
train_ltncies = self.latency_predict(self.all_x, with_grad=True)
loss, fp_part = self.conf.loss_function(self.all_x, train_ltncies,
self.all_y, self.all_probs)
logger.debug(f'Final loss of this nn: {loss}\tfp_part is: {fp_part}')
# measuring quality of final network
self.final_loss = loss.item()
self.final_fp_cost = fp_part.item()
......@@ -164,7 +208,7 @@ class NeuralNetwork:
self.plotting = plt.subplots()
one_axis = np.linspace(0, 1, 101) # [0.00, 0.01, 0.02, ..., 0.99, 1.00]
generator = itertools.product(*itertools.repeat(one_axis, 2))
actions = torch.tensor(np.array(list(generator))).float()
actions = torch.tensor(np.array(list(generator))).float().to(DEVICE)
self.actions = actions
finally:
# Remove all lines from previous iteration plotting
......@@ -194,7 +238,7 @@ class NeuralNetwork:
in_type = type(input)
if in_type == list or in_type == tuple or \
in_type == np.array or in_type == np.ndarray:
input = torch.tensor(input).float()
input = torch.tensor(input).float().to(DEVICE)
if return_tensor:
return self.latency_predict(input)[0]
......@@ -211,9 +255,9 @@ def setup_loger(debug: bool):
if __name__ == '__main__':
setup_loger(True)
benign_x, _ = np_arrays_from_scored_csv(
benign_x, _ = np_matrix_from_scored_csv(
Path('all_benign_scored.csv'), 0, 500)
malicious_x, _ = np_arrays_from_scored_csv(
malicious_x, _ = np_matrix_from_scored_csv(
Path('scored_malicious.csv'), 1, 400)
benign_unique_x, counts = np.unique(benign_x, axis=0, return_counts=True)
......
......@@ -9,7 +9,7 @@ import numpy as np
import pulp
from os.path import dirname
from data.loader import np_arrays_from_scored_csv
from data.loader import np_matrix_from_scored_csv
......@@ -50,6 +50,7 @@ def solve_with_lp(actions_attacker: List,
print("Defining main constraint...")
constraints = []
for i in range(len(attacker_actions)):
print(f'Defining constraint for {i}')
suma = [fp_cost]
for j in range(len(l)):
suma.append(probs_defender[i][j] * u(attacker_actions[i], l[j]))
......@@ -202,7 +203,7 @@ if __name__ == "__main__":
u = utility
# load data, round data and make occurrences dict
benign = np_arrays_from_scored_csv('test.csv', 0)[0]
benign = np_matrix_from_scored_csv('normal_distribution_experiments.csv', 0)[0]
benign_data = list(
map(lambda x: tuple(map(lambda y: round(y, 2), x)), benign))
benign_data_prob = Counter(benign_data)
......
......@@ -3,7 +3,9 @@ import numpy as np
import torch
from config import PlotterConfig
from data.loader import np_arrays_from_scored_csv
from data.loader import np_matrix_from_scored_csv
DEVICE = torch.device('cuda')
def plot_paths(self):
......@@ -47,14 +49,13 @@ class Plotter:
def _init_plots(self, discr_actions):
plt.ion()
self.fig, self.ax = plt.subplots(2, 3)
self.actions = torch.tensor(discr_actions).float()
self.actions = torch.tensor(discr_actions).float().to(DEVICE)
self.plotted = []
# TMP-----------
x, _ = np_arrays_from_scored_csv('test.csv', 0, 1000)
x = x * 100
for point in x:
self.ax[0][0].scatter([point[0]], [point[1]], c='blue')
x, _ = np_matrix_from_scored_csv('normal_distribution_experiments.csv', 0)
x = np.unique(x, axis=0) * 100
self.ax[0][0].scatter(x[:, 0], x[:, 1], c='blue', s=1.)
# ---------------
self.ax[0][0].set_title('Defender nash strategy')
......@@ -109,7 +110,7 @@ class LatencyPlotter(Plotter):
res = np.zeros((101, 101))
for nn, prob in zip(played_p2, probs_p2):
if prob == 0: continue
predictions = nn.latency_predict(self.actions).numpy()
predictions = nn.latency_predict(self.actions).cpu().numpy()
res += (predictions * prob).reshape((101, 101), order='F')
self.plotted.append(self.ax[0][0].imshow(res, cmap='Reds', vmin=0,
vmax=1, origin='lower',
......@@ -128,7 +129,7 @@ class LatencyPlotter(Plotter):
self.ax[0][2].scatter(br_p1[0], br_p1[1], c='blue', marker='^')
# Plot heat-map of defender's best response
res = br_p2.latency_predict(self.actions).numpy().reshape((101, 101), order='F')
res = br_p2.latency_predict(self.actions).cpu().numpy().reshape((101, 101), order='F')
self.plotted.append(self.ax[1][0].imshow(res, cmap='Reds', vmin=0,
vmax=1, origin='lower',
interpolation='spline16'))
......@@ -164,7 +165,7 @@ class BlockingPlotter(Plotter):
res = np.zeros((101, 101))
for nn, prob in zip(played_p2, probs_p2):
if prob == 0: continue
predictions = nn.latency_predict(self.actions).numpy()
predictions = nn.latency_predict(self.actions).cpu().numpy()
res += (predictions * prob).reshape((101, 101), order='F')
self.plotted.append(self.ax[0][0].imshow(res, cmap='Reds', vmin=0,
vmax=1, origin='lower'))
......@@ -182,7 +183,7 @@ class BlockingPlotter(Plotter):
self.ax[0][2].scatter(br_p1[0], br_p1[1], c='blue', marker='^')
# Plot heat-map of defender's best response
res = br_p2.latency_predict(self.actions).numpy().reshape((101, 101), order='F')
res = br_p2.latency_predict(self.actions).cpu().numpy().reshape((101, 101), order='F')
self.plotted.append(self.ax[1][0].imshow(res, cmap='Reds', vmin=0,
vmax=1, origin='lower'))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment