network.py 6.34 KB
Newer Older
Martin Řepa's avatar
backup  
Martin Řepa committed
1
import logging
2 3
from pathlib import Path

4
import attr
5
import numpy as np
Martin Řepa's avatar
backup  
Martin Řepa committed
6
import torch
7 8
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
Martin Řepa's avatar
backup  
Martin Řepa committed
9 10
from torch import nn
from torch import optim
11

Martin Řepa's avatar
backup  
Martin Řepa committed
12
from config import NeuralNetworkConfig, TrainingNnConfig, RootConfig
13 14
from src.data.loader import np_arrays_from_scored_csv

Martin Řepa's avatar
backup  
Martin Řepa committed
15 16
logger = logging.getLogger(__name__)

17
@attr.s
18
class FormattedData:
19 20 21 22 23
    unique_x: np.array = attr.ib()
    probs_x: np.array = attr.ib()
    y: np.array = attr.ib()


Martin Řepa's avatar
backup  
Martin Řepa committed
24 25 26 27 28 29 30 31
class OrderCounter:
    order = 0

    @staticmethod
    def next():
        OrderCounter.order += 1
        return OrderCounter.order

32 33

class NeuralNetwork:
34
    def __init__(self, input_features=2,
Martin Řepa's avatar
backup  
Martin Řepa committed
35 36 37 38 39 40 41 42 43
                 nn_conf: NeuralNetworkConfig = NeuralNetworkConfig(),
                 nn_train_conf: TrainingNnConfig = TrainingNnConfig()):
        self.model = nn.Sequential(
            nn.Linear(input_features, 10),
            nn.ReLU(),
            nn.Linear(10, 12),
            nn.ReLU(),
            nn.Linear(12, 1),
            nn.Sigmoid()
44
        )
45
        self.epochs = nn_conf.epochs
Martin Řepa's avatar
backup  
Martin Řepa committed
46
        self.validation_split = nn_train_conf.validation_split
47
        self.id = OrderCounter.next()
48

49
        # Variables used for loss function
50 51 52 53 54
        self.attacker_actions: FormattedData = None
        self.benign_data: FormattedData = None

        # Variable for value of loss function in last epoch measuring quality
        self.final_loss = None
55

56 57
        # PyTorch built in Binary Cross-entropy loss function
        # self.loss_fn = nn.BCELoss()
58

59
    def __str__(self):
60
        return f'Neural network id:{self.id}, final loss: {self.final_loss}'
61 62 63 64 65 66

    def set_data(self, benign_data, attack):
        self.attacker_actions = attack
        self.benign_data = benign_data

    def loss_function(self, x, limits, real_y, probs):
67 68 69 70 71 72 73
        zero_sum_part = torch.sum(real_y*(1-limits)*torch.prod(x, dim=1)*probs)
        fp_cost = self._fp_cost_tensor(limits, real_y, probs)
        sum_loss = torch.add(zero_sum_part, fp_cost)
        return sum_loss

    def _fp_cost_tensor(self, limits, real_y, probs):
        return torch.sum((1-real_y) * probs * torch.pow(limits, 4))
74 75 76 77 78

    def _prepare_data(self):
        defender = self.benign_data
        attacker = self.attacker_actions

79
        x = np.concatenate((defender.unique_x, attacker.unique_x), axis=0)
80
        y = np.concatenate((defender.y, attacker.y), axis=0)
81
        probs = np.concatenate((defender.probs_x, attacker.probs_x), axis=0)
82 83 84 85

        # Shuffle before splitting
        x, y, probs = shuffle(x, y, probs, random_state=1)

86 87 88 89
        # TODO use validation data aswell
        self.x_train = torch.from_numpy(x).float()
        self.y_train = torch.from_numpy(y).float()
        self.probs_train = torch.from_numpy(probs).float()
90 91 92 93 94 95

    def train(self):
        self._prepare_data()
        self._train()

    def _train(self):
96
        learning_rate = 0.5e-2
Martin Řepa's avatar
backup  
Martin Řepa committed
97
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
98

Martin Řepa's avatar
backup  
Martin Řepa committed
99 100
        for e in range(self.epochs):
            # Forward pass: compute predicted y by passing x to the model.
101
            train_limits = self._limit_predict(self.x_train, with_grad=True)
102

103 104 105 106
            # Compute loss.
            loss = self.loss_function(self.x_train, train_limits, self.y_train,
                                      self.probs_train)
            # loss = self.loss_fn(train_limits, self.y_train)
Martin Řepa's avatar
backup  
Martin Řepa committed
107 108 109 110

            # Compute validation loss and report some info
            if e % 5 == 0:
                logging.debug(f'Epoch: {e}/{self.epochs},\t'
111
                              f'TrainLoss: {loss},\t')
Martin Řepa's avatar
backup  
Martin Řepa committed
112 113 114 115

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables it will update
            optimizer.zero_grad()
116

Martin Řepa's avatar
backup  
Martin Řepa committed
117 118 119
            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()
120

Martin Řepa's avatar
backup  
Martin Řepa committed
121 122 123
            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optimizer.step()
124 125 126 127

        self.final_fp_cost = self._fp_cost_tensor(train_limits, self.y_train,
                                                  self.probs_train).item()
        # TODO use validation set for final_loss (used in best_response_p2)
128
        self.final_loss = loss
129

130
    def _raw_predict(self, tensor: torch.Tensor):
131 132
        pred = self.model(tensor)
        return pred.flatten().float()
133

134
    def _limit_predict(self, x: torch.Tensor, with_grad=False):
135 136 137 138 139
        if with_grad:
                raw_prediction = self._raw_predict(x)
        else:
            with torch.no_grad():
                raw_prediction = self._raw_predict(x)
140

141 142 143 144
        # The same as lambda p: 0 if p < 0.5 else (p - 0.5) * 2
        clamped = raw_prediction.clamp(min=0.5, max=1)
        limit = torch.mul(torch.add(clamped, -0.5), 2)
        return limit
Martin Řepa's avatar
backup  
Martin Řepa committed
145

146
    def predict_single_limit(self, input, return_tensor=False):
147 148 149 150 151
        in_type = type(input)
        if in_type == list or in_type == tuple or \
                in_type == np.array or in_type == np.ndarray:
            input = torch.tensor(input).float()

152 153 154 155
        if return_tensor:
            return self._limit_predict(input)[0]
        else:
            return self._limit_predict(input)[0].item()
156

Martin Řepa's avatar
backup  
Martin Řepa committed
157 158 159 160 161 162 163 164 165 166

def setup_loger(conf):
    log_format = ('%(asctime)-15s\t%(name)s:%(levelname)s\t'
                  '%(module)s:%(funcName)s:%(lineno)s\t%(message)s')
    level = logging.DEBUG if conf.base_conf.debug else logging.INFO
    logging.basicConfig(level=level, format=log_format)


if __name__ == '__main__':
    setup_loger(RootConfig())
167
    benign_x, _ = np_arrays_from_scored_csv(
168
        Path('all_benign_scored.csv'), 0, 500)
169
    malicious_x, _ = np_arrays_from_scored_csv(
170
        Path('scored_malicious.csv'), 1, 1)
171 172 173 174

    benign_unique_x, counts = np.unique(benign_x, axis=0, return_counts=True)
    probs_benign = np.array([count / len(benign_x) for count in counts])
    benign_y = np.zeros(len(benign_unique_x))
175
    benign_data = FormattedData(benign_unique_x, probs_benign, benign_y)
176 177 178 179

    malicious_unique_x, counts = np.unique(malicious_x, axis=0, return_counts=True)
    probs_malicious = np.array([count / len(malicious_unique_x) for count in counts])
    malicious_y = np.ones(len(malicious_unique_x))
180
    malicious_data = FormattedData(malicious_unique_x, probs_malicious, malicious_y)
Martin Řepa's avatar
backup  
Martin Řepa committed
181 182

    nn = NeuralNetwork()
183 184
    nn.set_data(benign_data, malicious_data)
    nn.train()
Martin Řepa's avatar
backup  
Martin Řepa committed
185