network.py 5.91 KB
Newer Older
Martin Řepa's avatar
backup  
Martin Řepa committed
1
import logging
2 3
from pathlib import Path

4
import attr
5
import numpy as np
Martin Řepa's avatar
backup  
Martin Řepa committed
6
import torch
7 8
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
Martin Řepa's avatar
backup  
Martin Řepa committed
9 10
from torch import nn
from torch import optim
11

Martin Řepa's avatar
backup  
Martin Řepa committed
12
from config import NeuralNetworkConfig, TrainingNnConfig, RootConfig
13 14
from src.data.loader import np_arrays_from_scored_csv

Martin Řepa's avatar
backup  
Martin Řepa committed
15 16
logger = logging.getLogger(__name__)

17
@attr.s
18
class FormattedData:
19 20 21 22 23
    unique_x: np.array = attr.ib()
    probs_x: np.array = attr.ib()
    y: np.array = attr.ib()


Martin Řepa's avatar
backup  
Martin Řepa committed
24 25 26 27 28 29 30 31
class OrderCounter:
    order = 0

    @staticmethod
    def next():
        OrderCounter.order += 1
        return OrderCounter.order

32 33

class NeuralNetwork:
34
    def __init__(self, input_features=2,
Martin Řepa's avatar
backup  
Martin Řepa committed
35 36 37 38 39 40 41 42 43
                 nn_conf: NeuralNetworkConfig = NeuralNetworkConfig(),
                 nn_train_conf: TrainingNnConfig = TrainingNnConfig()):
        self.model = nn.Sequential(
            nn.Linear(input_features, 10),
            nn.ReLU(),
            nn.Linear(10, 12),
            nn.ReLU(),
            nn.Linear(12, 1),
            nn.Sigmoid()
44
        )
45
        self.epochs = nn_conf.epochs
Martin Řepa's avatar
backup  
Martin Řepa committed
46
        self.validation_split = nn_train_conf.validation_split
47
        self.id = OrderCounter.next()
48

49
        # Variables used for loss function
50 51 52 53 54
        self.attacker_actions: FormattedData = None
        self.benign_data: FormattedData = None

        # Variable for value of loss function in last epoch measuring quality
        self.final_loss = None
55

56 57
        # PyTorch built in Binary Cross-entropy loss function
        # self.loss_fn = nn.BCELoss()
58

59
    def __str__(self):
60
        return f'Neural network id:{self.id}, final loss: {self.final_loss}'
61 62 63 64 65 66 67

    def set_data(self, benign_data, attack):
        self.attacker_actions = attack
        self.benign_data = benign_data

    def loss_function(self, x, limits, real_y, probs):
        zero_sum_part = real_y*(1-limits)*torch.prod(x, dim=1)*probs
68
        fp_cost = (1-real_y)*probs*torch.pow(limits, 2)
69 70 71 72 73 74 75
        sum_loss = torch.add(torch.sum(zero_sum_part), torch.sum(fp_cost))
        return torch.div(sum_loss, len(x))

    def _prepare_data(self):
        defender = self.benign_data
        attacker = self.attacker_actions

76
        x = np.concatenate((defender.unique_x, attacker.unique_x), axis=0)
77
        y = np.concatenate((defender.y, attacker.y), axis=0)
78
        probs = np.concatenate((defender.probs_x, attacker.probs_x), axis=0)
79 80 81 82

        # Shuffle before splitting
        x, y, probs = shuffle(x, y, probs, random_state=1)

83 84 85 86
        # TODO use validation data aswell
        self.x_train = torch.from_numpy(x).float()
        self.y_train = torch.from_numpy(y).float()
        self.probs_train = torch.from_numpy(probs).float()
87 88 89 90 91 92

    def train(self):
        self._prepare_data()
        self._train()

    def _train(self):
93
        learning_rate = 0.5e-2
Martin Řepa's avatar
backup  
Martin Řepa committed
94
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
95

Martin Řepa's avatar
backup  
Martin Řepa committed
96 97
        for e in range(self.epochs):
            # Forward pass: compute predicted y by passing x to the model.
98
            train_limits = self._limit_predict(self.x_train, with_grad=True)
99

100 101 102 103
            # Compute loss.
            loss = self.loss_function(self.x_train, train_limits, self.y_train,
                                      self.probs_train)
            # loss = self.loss_fn(train_limits, self.y_train)
Martin Řepa's avatar
backup  
Martin Řepa committed
104 105 106 107

            # Compute validation loss and report some info
            if e % 5 == 0:
                logging.debug(f'Epoch: {e}/{self.epochs},\t'
108
                              f'TrainLoss: {loss},\t')
Martin Řepa's avatar
backup  
Martin Řepa committed
109 110 111 112

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables it will update
            optimizer.zero_grad()
113

Martin Řepa's avatar
backup  
Martin Řepa committed
114 115 116
            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()
117

Martin Řepa's avatar
backup  
Martin Řepa committed
118 119 120
            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optimizer.step()
121
        self.final_loss = loss
122

123
    def _raw_predict(self, tensor: torch.Tensor):
124 125
        pred = self.model(tensor)
        return pred.flatten().float()
126

127
    def _limit_predict(self, x: torch.Tensor, with_grad=False):
128 129 130 131 132
        if with_grad:
                raw_prediction = self._raw_predict(x)
        else:
            with torch.no_grad():
                raw_prediction = self._raw_predict(x)
133

134 135 136 137
        # The same as lambda p: 0 if p < 0.5 else (p - 0.5) * 2
        clamped = raw_prediction.clamp(min=0.5, max=1)
        limit = torch.mul(torch.add(clamped, -0.5), 2)
        return limit
Martin Řepa's avatar
backup  
Martin Řepa committed
138

139 140 141 142 143 144 145 146
    def predict_single_limit(self, input):
        in_type = type(input)
        if in_type == list or in_type == tuple or \
                in_type == np.array or in_type == np.ndarray:
            input = torch.tensor(input).float()

        return self._limit_predict(input)[0].item()

Martin Řepa's avatar
backup  
Martin Řepa committed
147 148 149 150 151 152 153 154 155 156

def setup_loger(conf):
    log_format = ('%(asctime)-15s\t%(name)s:%(levelname)s\t'
                  '%(module)s:%(funcName)s:%(lineno)s\t%(message)s')
    level = logging.DEBUG if conf.base_conf.debug else logging.INFO
    logging.basicConfig(level=level, format=log_format)


if __name__ == '__main__':
    setup_loger(RootConfig())
157
    benign_x, _ = np_arrays_from_scored_csv(
158
        Path('all_benign_scored.csv'), 0, 500)
159
    malicious_x, _ = np_arrays_from_scored_csv(
160
        Path('scored_malicious.csv'), 1, 1)
161 162 163 164

    benign_unique_x, counts = np.unique(benign_x, axis=0, return_counts=True)
    probs_benign = np.array([count / len(benign_x) for count in counts])
    benign_y = np.zeros(len(benign_unique_x))
165
    benign_data = FormattedData(benign_unique_x, probs_benign, benign_y)
166 167 168 169

    malicious_unique_x, counts = np.unique(malicious_x, axis=0, return_counts=True)
    probs_malicious = np.array([count / len(malicious_unique_x) for count in counts])
    malicious_y = np.ones(len(malicious_unique_x))
170
    malicious_data = FormattedData(malicious_unique_x, probs_malicious, malicious_y)
Martin Řepa's avatar
backup  
Martin Řepa committed
171 172

    nn = NeuralNetwork()
173 174
    nn.set_data(benign_data, malicious_data)
    nn.train()
Martin Řepa's avatar
backup  
Martin Řepa committed
175