network.py 7.4 KB
Newer Older
Martin Řepa's avatar
backup  
Martin Řepa committed
1
import logging
2 3
from pathlib import Path

4
import attr
5
import numpy as np
Martin Řepa's avatar
backup  
Martin Řepa committed
6
import torch
7 8
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
Martin Řepa's avatar
backup  
Martin Řepa committed
9 10
from torch import nn
from torch import optim
11

Martin Řepa's avatar
backup  
Martin Řepa committed
12
from config import NeuralNetworkConfig, TrainingNnConfig, RootConfig
13 14
from src.data.loader import np_arrays_from_scored_csv

Martin Řepa's avatar
backup  
Martin Řepa committed
15 16
logger = logging.getLogger(__name__)

17 18 19 20 21 22 23 24 25 26 27 28 29 30
# TODO one class is enough
@attr.s
class FormattedBenignData:
    unique_x: np.array = attr.ib()
    probs_x: np.array = attr.ib()
    y: np.array = attr.ib()


@attr.s
class FormattedMaliciousData:
    features: np.array = attr.ib()
    probs_features: np.array = attr.ib()
    y: np.array = attr.ib()

Martin Řepa's avatar
backup  
Martin Řepa committed
31 32 33 34 35 36 37 38 39

class OrderCounter:
    order = 0

    @staticmethod
    def next():
        OrderCounter.order += 1
        return OrderCounter.order

40 41

class NeuralNetwork:
42
    def __init__(self, input_features=2,
Martin Řepa's avatar
backup  
Martin Řepa committed
43 44 45 46 47 48 49 50 51
                 nn_conf: NeuralNetworkConfig = NeuralNetworkConfig(),
                 nn_train_conf: TrainingNnConfig = TrainingNnConfig()):
        self.model = nn.Sequential(
            nn.Linear(input_features, 10),
            nn.ReLU(),
            nn.Linear(10, 12),
            nn.ReLU(),
            nn.Linear(12, 1),
            nn.Sigmoid()
52
        )
53
        self.epochs = nn_conf.epochs
Martin Řepa's avatar
backup  
Martin Řepa committed
54
        self.validation_split = nn_train_conf.validation_split
55
        self.id = OrderCounter.next()
56

57 58 59
        # Variables used for loss function
        self.attacker_actions: FormattedMaliciousData = None
        self.benign_data: FormattedBenignData = None
60

61 62
        # TODO Just tmp
        self.loss_fn = nn.BCELoss()
63

64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
    def __str__(self):
        return f'Neural network with id: {self.id}'

    def set_data(self, benign_data, attack):
        self.attacker_actions = attack
        self.benign_data = benign_data

    def loss_function(self, x, limits, real_y, probs):
        zero_sum_part = real_y*(1-limits)*torch.prod(x, dim=1)*probs
        fp_cost = (1-real_y)*probs*torch.pow(limits, 4)
        sum_loss = torch.add(torch.sum(zero_sum_part), torch.sum(fp_cost))
        return torch.div(sum_loss, len(x))

        # Calc false positive cost
        # def_indexes = (real_y == 0)
        # def_limits = limits[def_indexes]
        # def_probs = real_y[def_indexes]
        # fp_cost = torch.pow(torch.pow(def_limits, 4), def_probs)
        #
        # # Calc zero sum part
        # attacker_indexes = (real_y == 1)
        # att_limits = limits[attacker_indexes]
        # att_x = x[attacker_indexes]
        # att_probs = probs[attacker_indexes]
        # att_rewards = torch.prod(att_x, dim=1)
        # att_rewards = torch.pow(att_rewards, att_probs)
        # zero_sum = torch.pow(att_rewards, torch.sub(1, att_limits))
        #
        # final_fp_cost = torch.sum(fp_cost)
        # final_zero_sum_part = torch.sum(zero_sum)
        # loss = torch.add(final_fp_cost, final_zero_sum_part)
        # return loss

    def _prepare_data(self):
        defender = self.benign_data
        attacker = self.attacker_actions

        x = np.concatenate((defender.unique_x, attacker.features), axis=0)
        y = np.concatenate((defender.y, attacker.y), axis=0)
        probs = np.concatenate((defender.probs_x, attacker.probs_features), axis=0)

        # Shuffle before splitting
        x, y, probs = shuffle(x, y, probs, random_state=1)

        # Split to train and train data given the ratio in config
        data = train_test_split(x, y, probs, test_size=self.validation_split)
        x_train, x_test, y_train, y_test, probs_train, probs_test = data

        self.x_train = torch.from_numpy(x_train).float()
        self.x_test = torch.from_numpy(x_test).float()
        self.y_train = torch.from_numpy(y_train).float()
        self.y_test = torch.from_numpy(y_test).float()
        self.probs_train = torch.from_numpy(probs_train).float()
        self.probs_test = torch.from_numpy(probs_test).float()

    def train(self):
        self._prepare_data()
        self._train()

    def _train(self):
        learning_rate = 1e-4
Martin Řepa's avatar
backup  
Martin Řepa committed
125
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
126

Martin Řepa's avatar
backup  
Martin Řepa committed
127 128
        for e in range(self.epochs):
            # Forward pass: compute predicted y by passing x to the model.
129 130 131 132
            train_limits = self.limit_predict(self.x_train, with_grad=True)
            # for l in train_limits:
            #     print(l.dtype, end='  ')
            # print()
133

134 135 136 137
            # Compute loss.
            loss = self.loss_function(self.x_train, train_limits, self.y_train,
                                      self.probs_train)
            # loss = self.loss_fn(train_limits, self.y_train)
Martin Řepa's avatar
backup  
Martin Řepa committed
138 139 140

            # Compute validation loss and report some info
            if e % 5 == 0:
141 142 143
                test_limits = self.limit_predict(self.x_test)
                validate_loss = self.loss_function(self.x_test, test_limits,
                                                   self.y_test, self.probs_test)
Martin Řepa's avatar
backup  
Martin Řepa committed
144
                logging.debug(f'Epoch: {e}/{self.epochs},\t'
145 146
                              f'TrainLoss: {loss},\t'
                              f'ValidateLoss: {validate_loss.item()},\t')
Martin Řepa's avatar
backup  
Martin Řepa committed
147 148 149 150

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables it will update
            optimizer.zero_grad()
151

Martin Řepa's avatar
backup  
Martin Řepa committed
152 153 154
            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()
155

Martin Řepa's avatar
backup  
Martin Řepa committed
156 157 158
            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optimizer.step()
159

160 161 162
    def _raw_predict(self, tensor: torch.Tensor):
          # TODO maybe this can help
        return self.model(tensor)
163

164 165 166 167 168 169
    def limit_predict(self, x: torch.Tensor, with_grad=False):
        if with_grad:
                raw_prediction = self._raw_predict(x)
        else:
            with torch.no_grad():
                raw_prediction = self._raw_predict(x)
170

171 172 173 174
        # The same as lambda p: 0 if p < 0.5 else (p - 0.5) * 2
        clamped = raw_prediction.clamp(min=0.5, max=1)
        limit = torch.mul(torch.add(clamped, -0.5), 2)
        return limit
Martin Řepa's avatar
backup  
Martin Řepa committed
175 176 177 178 179 180 181 182 183 184 185


def setup_loger(conf):
    log_format = ('%(asctime)-15s\t%(name)s:%(levelname)s\t'
                  '%(module)s:%(funcName)s:%(lineno)s\t%(message)s')
    level = logging.DEBUG if conf.base_conf.debug else logging.INFO
    logging.basicConfig(level=level, format=log_format)


if __name__ == '__main__':
    setup_loger(RootConfig())
186
    benign_x, _ = np_arrays_from_scored_csv(
Martin Řepa's avatar
backup  
Martin Řepa committed
187
        Path('all_benign_scored.csv'), 0, 1000)
188 189 190 191 192 193 194 195 196 197 198 199
    malicious_x, _ = np_arrays_from_scored_csv(
        Path('scored_malicious.csv'), 1, 500)

    benign_unique_x, counts = np.unique(benign_x, axis=0, return_counts=True)
    probs_benign = np.array([count / len(benign_x) for count in counts])
    benign_y = np.zeros(len(benign_unique_x))
    benign_data = FormattedBenignData(benign_unique_x, probs_benign, benign_y)

    malicious_unique_x, counts = np.unique(malicious_x, axis=0, return_counts=True)
    probs_malicious = np.array([count / len(malicious_unique_x) for count in counts])
    malicious_y = np.ones(len(malicious_unique_x))
    malicious_data = FormattedMaliciousData(malicious_unique_x, probs_malicious, malicious_y)
Martin Řepa's avatar
backup  
Martin Řepa committed
200 201

    nn = NeuralNetwork()
202 203
    nn.set_data(benign_data, malicious_data)
    nn.train()
Martin Řepa's avatar
backup  
Martin Řepa committed
204 205 206 207 208 209 210

    # test_loss, test_acc = network.model.evaluate(x_test, y_test)
    # print('Test loss:', test_loss)
    # print('Test accuracy:', test_acc)
    #
    # network.calc_n0_false_positives(benign_x)
    # print(network.get_false_positive_rate())