network.py 6.28 KB
Newer Older
Martin Řepa's avatar
backup  
Martin Řepa committed
1
import logging
2 3
from pathlib import Path

4
import attr
5
import numpy as np
Martin Řepa's avatar
backup  
Martin Řepa committed
6
import torch
7
from sklearn.utils import shuffle
Martin Řepa's avatar
backup  
Martin Řepa committed
8 9
from torch import nn
from torch import optim
10

11
from config import NeuralNetworkConfig, RootConfig
12 13
from src.data.loader import np_arrays_from_scored_csv

Martin Řepa's avatar
backup  
Martin Řepa committed
14 15
logger = logging.getLogger(__name__)

16

17
@attr.s
18
class FormattedData:
19 20 21 22 23
    unique_x: np.array = attr.ib()
    probs_x: np.array = attr.ib()
    y: np.array = attr.ib()


Martin Řepa's avatar
backup  
Martin Řepa committed
24 25 26 27 28 29 30 31
class OrderCounter:
    order = 0

    @staticmethod
    def next():
        OrderCounter.order += 1
        return OrderCounter.order

32 33

class NeuralNetwork:
34
    def __init__(self, input_features=2,
35
                 nn_conf: NeuralNetworkConfig = NeuralNetworkConfig()):
Martin Řepa's avatar
backup  
Martin Řepa committed
36 37 38 39 40 41 42
        self.model = nn.Sequential(
            nn.Linear(input_features, 10),
            nn.ReLU(),
            nn.Linear(10, 12),
            nn.ReLU(),
            nn.Linear(12, 1),
            nn.Sigmoid()
43
        )
44 45
        self._set_weights()
        self.conf = nn_conf
46
        self.id = OrderCounter.next()
47

48
        # Variables used for loss function
49 50 51
        self.attacker_actions: FormattedData = None
        self.benign_data: FormattedData = None

52
        # Variables from last training epoch measuring quality
53
        self.final_loss = None
54
        self.final_fp_cost = None
55

56
    def __str__(self):
57
        return f'Neural network id:{self.id}, final loss: {self.final_loss}'
58

59
    def set_data(self, benign_data: FormattedData, attack: FormattedData):
60 61 62 63
        self.attacker_actions = attack
        self.benign_data = benign_data

    def loss_function(self, x, limits, real_y, probs):
64 65 66 67 68 69 70
        zero_sum_part = torch.sum(real_y*(1-limits)*torch.prod(x, dim=1)*probs)
        fp_cost = self._fp_cost_tensor(limits, real_y, probs)
        sum_loss = torch.add(zero_sum_part, fp_cost)
        return sum_loss

    def _fp_cost_tensor(self, limits, real_y, probs):
        return torch.sum((1-real_y) * probs * torch.pow(limits, 4))
71 72 73 74 75

    def _prepare_data(self):
        defender = self.benign_data
        attacker = self.attacker_actions

76
        x = np.concatenate((defender.unique_x, attacker.unique_x), axis=0)
77
        y = np.concatenate((defender.y, attacker.y), axis=0)
78
        probs = np.concatenate((defender.probs_x, attacker.probs_x), axis=0)
79 80 81 82

        # Shuffle before splitting
        x, y, probs = shuffle(x, y, probs, random_state=1)

83 84 85
        self.x_train = torch.from_numpy(x).float()
        self.y_train = torch.from_numpy(y).float()
        self.probs_train = torch.from_numpy(probs).float()
86

87 88 89 90 91 92 93
    def _set_weights(self):
        def init_weights(m):
            if type(m) == nn.Linear:
                torch.nn.init.xavier_uniform(m.weight)
                m.bias.data.fill_(.0)
        self.model.apply(init_weights)

94 95 96 97 98
    def train(self):
        self._prepare_data()
        self._train()

    def _train(self):
99
        learning_rate = self.conf.learning_rate
Martin Řepa's avatar
backup  
Martin Řepa committed
100
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
101

102
        for e in range(self.conf.epochs):
Martin Řepa's avatar
backup  
Martin Řepa committed
103
            # Forward pass: compute predicted y by passing x to the model.
104
            train_limits = self._limit_predict(self.x_train, with_grad=True)
105

106 107 108 109
            # Compute loss.
            loss = self.loss_function(self.x_train, train_limits, self.y_train,
                                      self.probs_train)
            # loss = self.loss_fn(train_limits, self.y_train)
Martin Řepa's avatar
backup  
Martin Řepa committed
110 111 112

            # Compute validation loss and report some info
            if e % 5 == 0:
113
                logging.debug(f'Epoch: {e}/{self.conf.epochs},\t'
114
                              f'TrainLoss: {loss},\t')
Martin Řepa's avatar
backup  
Martin Řepa committed
115 116 117 118

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables it will update
            optimizer.zero_grad()
119

Martin Řepa's avatar
backup  
Martin Řepa committed
120 121 122
            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()
123

Martin Řepa's avatar
backup  
Martin Řepa committed
124 125 126
            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optimizer.step()
127 128 129

        self.final_fp_cost = self._fp_cost_tensor(train_limits, self.y_train,
                                                  self.probs_train).item()
130
        self.final_loss = loss
131

132
    def _raw_predict(self, tensor: torch.Tensor):
133 134
        pred = self.model(tensor)
        return pred.flatten().float()
135

136
    def _limit_predict(self, x: torch.Tensor, with_grad=False):
137 138 139 140 141
        if with_grad:
                raw_prediction = self._raw_predict(x)
        else:
            with torch.no_grad():
                raw_prediction = self._raw_predict(x)
142

143
        # The same as lambda p: 0 if p < 0.5 else (p - 0.5) * 2
144
        # TODO try to use e.g. sigmoid
145 146 147
        clamped = raw_prediction.clamp(min=0.5, max=1)
        limit = torch.mul(torch.add(clamped, -0.5), 2)
        return limit
Martin Řepa's avatar
backup  
Martin Řepa committed
148

149
    def predict_single_limit(self, input, return_tensor=False):
150 151 152 153 154
        in_type = type(input)
        if in_type == list or in_type == tuple or \
                in_type == np.array or in_type == np.ndarray:
            input = torch.tensor(input).float()

155 156 157 158
        if return_tensor:
            return self._limit_predict(input)[0]
        else:
            return self._limit_predict(input)[0].item()
159

Martin Řepa's avatar
backup  
Martin Řepa committed
160 161 162 163 164 165 166 167 168 169

def setup_loger(conf):
    log_format = ('%(asctime)-15s\t%(name)s:%(levelname)s\t'
                  '%(module)s:%(funcName)s:%(lineno)s\t%(message)s')
    level = logging.DEBUG if conf.base_conf.debug else logging.INFO
    logging.basicConfig(level=level, format=log_format)


if __name__ == '__main__':
    setup_loger(RootConfig())
170
    benign_x, _ = np_arrays_from_scored_csv(
171
        Path('all_benign_scored.csv'), 0, 500)
172
    malicious_x, _ = np_arrays_from_scored_csv(
173
        Path('scored_malicious.csv'), 1, 1)
174 175 176 177

    benign_unique_x, counts = np.unique(benign_x, axis=0, return_counts=True)
    probs_benign = np.array([count / len(benign_x) for count in counts])
    benign_y = np.zeros(len(benign_unique_x))
178
    benign_data = FormattedData(benign_unique_x, probs_benign, benign_y)
179 180 181 182

    malicious_unique_x, counts = np.unique(malicious_x, axis=0, return_counts=True)
    probs_malicious = np.array([count / len(malicious_unique_x) for count in counts])
    malicious_y = np.ones(len(malicious_unique_x))
183
    malicious_data = FormattedData(malicious_unique_x, probs_malicious, malicious_y)
Martin Řepa's avatar
backup  
Martin Řepa committed
184 185

    nn = NeuralNetwork()
186 187
    nn.set_data(benign_data, malicious_data)
    nn.train()
Martin Řepa's avatar
backup  
Martin Řepa committed
188