Commit c534fab9 authored by Martin Řepa's avatar Martin Řepa

Merge branch 'init'

parents 2e9c2c59 29f24b18
venv
materials
src/data/raw
.idea
\ No newline at end of file
This diff is collapsed.
from typing import List, Callable
from math import log
import attr
@attr.s
class Feature:
name = attr.ib(factory=str)
func = attr.ib(factory=Callable)
def uncommon_letters_score(word: str) -> int:
unusual = ('q', 'x', 'z', 'f')
score = 0
for letter in word.lower():
if letter in unusual:
score += 1
return score
def normalised_letters_score(word: str) -> float:
return uncommon_letters_score(word) / 255
def entropy(word: str) -> float:
e = 0.0
length = len(word)
occurrence = {}
for letter in word:
occurrence[letter] = occurrence.get(letter, 0) + 1
for k, v in occurrence.items():
p = v / length
e -= p * log(p, 2)
return e
def norm_entropy(word: str) -> float:
return entropy(word) / 5.2
def norm_len(word: str) -> float:
return len(word) / 255
def initial_line(features: List[Feature], debug: bool = False) -> str:
line = 'query' if debug else ''
for feature in features:
if line:
line = f'{line},{feature.name}'
else:
line = feature.name
return line
def score_query(features: List[Feature], query: str, debug: bool = False) -> str:
line = query if debug else ''
for feature in features:
if line:
line = f'{line},{feature.func(query)}'
else:
line = f'{feature.func(query)}'
return line
from pathlib import Path
import numpy as np
import pandas
def np_arrays_from_scored_csv(file: Path, label: int, count_max: int = None, shuffle=False):
content = pandas.read_csv(file)
batch = []
labels = []
if shuffle:
content = content.sample(frac=1)
for idx, row in content.iterrows():
record = []
for item in row:
record.append(float(item))
batch.append(record)
labels.append(label)
if len(batch) == count_max:
break
return np.array(batch, np.float), np.array(labels, np.int8)
if __name__ == "__main__":
a = np_arrays_from_scored_csv(Path('scored/scored_malicious.csv'), 1, 100)
print(a)
This diff is collapsed.
This diff is collapsed.
#normalised entropy normalised length
0.7644570725417812 0.14901960784313725
0.6487610119759972 0.058823529411764705
0.6228656262697367 0.050980392156862744
0.6734524541770424 0.12156862745098039
0.5769230769230769 0.03137254901960784
0.6844383789661873 0.09803921568627451
0.1923076923076923 0.00784313725490196
0.728527180703221 0.11372549019607843
0.523447710555262 0.0392156862745098
0.6880422175220736 0.06274509803921569
0.5170618991424745 0.047058823529411764
0.7278734283533927 0.08235294117647059
0.5740823338527407 0.054901960784313725
0.43269230769230765 0.03137254901960784
0.6744020376170228 0.058823529411764705
0.7474696515636562 0.09411764705882353
0.7253952324758078 0.09411764705882353
0.6524514250863047 0.050980392156862744
0.746547136180166 0.09019607843137255
0.5821128525690937 0.050980392156862744
0.5485032695120159 0.06274509803921569
0.7120137523499513 0.11372549019607843
0.6629350960463866 0.09019607843137255
0.8597255953484009 0.17254901960784313
0.6820075662828875 0.0784313725490196
0.652642486838989 0.10196078431372549
0.7095584100383743 0.08235294117647059
0.5844516676736675 0.054901960784313725
0.6656384885021169 0.07058823529411765
0.6645481116499121 0.10196078431372549
0.6793410110581961 0.10196078431372549
0.6885504230244426 0.10196078431372549
0.6589633544719672 0.11372549019607843
0.6504938621684762 0.10196078431372549
0.1923076923076923 0.00784313725490196
0.710889422915196 0.09019607843137255
0.6796575040731424 0.09019607843137255
0.687811085559359 0.07450980392156863
0.6729283744485949 0.06666666666666667
0.6681740361741211 0.10196078431372549
0.5668659190807865 0.03529411764705882
0.6656384885021169 0.07058823529411765
0.7181362218410107 0.10196078431372549
0.665638488502117 0.07058823529411765
0.684330502509405 0.08235294117647059
0.664723992717491 0.058823529411764705
0.7705863130359535 0.12549019607843137
0.611924195146195 0.054901960784313725
0.6936416792644848 0.08627450980392157
0.6793410110581961 0.10196078431372549
This diff is collapsed.
#normalised entropy normalised length
0.9661064770376071 0.996078431372549
0.9610788560382011 0.996078431372549
0.9579793120834494 0.7372549019607844
0.9638118622668527 0.9921568627450981
0.9616181747939105 0.9921568627450981
0.970843382770317 0.9921568627450981
0.9636374461384305 0.9921568627450981
0.7645715212200813 0.2
0.9629108881373653 0.996078431372549
0.9624670315806567 0.9921568627450981
0.9612622516402483 0.9921568627450981
0.969517759610237 0.9921568627450981
0.9732506972020125 0.9921568627450981
0.970654588741244 0.9921568627450981
0.9602540346154786 0.9921568627450981
0.9557187990195201 0.996078431372549
0.9591474425451948 0.996078431372549
0.9763388643138835 0.9921568627450981
0.9714102661565281 0.996078431372549
0.8678333801854506 0.21568627450980393
0.9720180205687999 0.9921568627450981
0.9660264424349851 0.9921568627450981
0.958100902724699 0.996078431372549
0.9633172650419695 0.996078431372549
0.97101966840149 0.996078431372549
0.958566746473564 0.996078431372549
0.9665604824002637 0.996078431372549
0.9646730848103486 0.996078431372549
0.9592227384561766 0.996078431372549
0.9482644048575208 0.5176470588235295
0.9644728182269342 0.996078431372549
0.9671892948089387 0.9921568627450981
0.972833060048564 0.996078431372549
0.9671906494517046 0.996078431372549
0.9625013167117836 0.9921568627450981
0.9603983498050831 0.9921568627450981
0.9045124885619298 0.21568627450980393
0.9657052169917598 0.996078431372549
0.9681739135520024 0.996078431372549
0.9726964093999644 0.9921568627450981
0.9755889511207536 0.9921568627450981
0.960900346780429 0.996078431372549
0.9644852644107168 0.9921568627450981
0.9640389783314874 0.9921568627450981
0.9533661640425967 0.4196078431372549
0.9686229747256779 0.9921568627450981
0.9669524163592599 0.9921568627450981
0.9433884160478205 0.4196078431372549
0.9714749468195338 0.996078431372549
0.9583216630448729 0.996078431372549
import random
from pathlib import Path
from typing import List
from pandas import read_csv
from src.data.features import Feature, initial_line, score_query, norm_entropy, norm_len
def score_benign_dns_log(features: List[Feature], debug=False) -> List[str]:
file_path = Path('raw/dns.11-24-29-12-00-00.log')
result = [initial_line(features, debug)]
queries = []
with open(file_path, 'r', encoding='utf-8') as log_file:
for line in log_file:
if line.startswith('#'):
continue
queries.append(score_query(features, line.split("\t")[8]))
random.shuffle(queries)
result.extend(queries)
return result
def score_csv_dns_log(features: List[Feature], debug=False) -> List[str]:
content = read_csv('raw/b32_M250.csv')
result = [initial_line(features, debug)]
queries = []
for info in content.Info:
splited = info.split(' ')
if splited[2] == 'response':
continue
queries.append(score_query(features, splited[4]))
random.shuffle(queries)
result.extend(queries)
return result
def write_scored(path: str, result: List[str]):
with open(path, 'w', encoding='utf-8') as file:
for item in result:
file.write(f'{item}\n')
if __name__ == "__main__":
features = [
Feature('normalised entropy', norm_entropy),
Feature('normalised length', norm_len)
]
res = score_benign_dns_log(features)
write_scored('scored/all_benign_scored.csv', res)
res = score_csv_dns_log(features)
write_scored('scored/scored_malicious.csv', res)
import functools
import itertools
import operator
from typing import List
import numpy as np
from src.game_solver import GameSolver
from src.neural_networks.network import NeuralNetwork
FEATURES_NO = 2
SUCCESS_ATTACK_CONST=1
def create_attacker_actions(features_no: int):
one_axis = np.linspace(0,1, 101) # [0.00, 0.01, 0.02, ..., 0.99, 1.00]
return list(itertools.product(one_axis, *itertools.repeat(one_axis, features_no-1)))
def utility(attacker_features: List[float], defender_network: NeuralNetwork):
pred = defender_network.predict(attacker_features)
return functools.reduce(operator.mul, attacker_features, 1) * (1 - pred) * SUCCESS_ATTACK_CONST
def solve_game():
print('Creating actions')
actions_attacker = create_attacker_actions(FEATURES_NO)
gs = GameSolver()
val, ordered_actions_p1, prob_p1, ordered_actions_p2, prob_p2 = gs.double_oracle(actions_attacker, utility, log=True)
print('\n\n-------------------------------------------------')
print(f'Game has ended with value: {val}')
print('Attacker: action x probability')
for a, p in zip(ordered_actions_p1, prob_p1):
print(f'{a} x {p}')
print('\nDefender: action x probability')
for nn, p in zip(ordered_actions_p2, prob_p2):
print(f'{nn} x {p}')
print('-------------------------------------------------')
def main():
solve_game()
if __name__ == "__main__":
main()
import operator
from itertools import count
from pathlib import Path
from typing import List, Callable
import pulp
from src.data.loader import np_arrays_from_scored_csv
from src.neural_networks.network import NeuralNetwork
FP_CONSTANT = 10
BENIGN_DATA_N0 = 1000
MALICIOUS_DATA_N0 = 100
EPSILON=0.05
class GameSolver:
def __init__(self):
self.benign_data = np_arrays_from_scored_csv(Path('data/scored/all_benign_scored.csv'), 0, BENIGN_DATA_N0)
# Test data to calculate how many false positives the neural networks do
# self.benign_test_data = np_arrays_from_scored_csv(Path('data/scored/all_benign_scored.csv'), 0, 100, True)
def _get_trained_nn(self, attacker_features_x: List[List[float]]) -> NeuralNetwork:
# Initialize the model
network = NeuralNetwork()
network.train(attacker_features_x, self.benign_data)
network.calc_n0_false_positives(self.benign_data[0])
return network
def double_oracle(self, actions_p1, utility, log=False):
used_actions_p1 = set(actions_p1[:1])
used_actions_p2 = {self._get_trained_nn([[]])}
for i in count():
if log:
print(f'\n\nIteration: {i}\n')
ordered_used_actions_p1 = list(used_actions_p1)
ordered_used_actions_p2 = list(used_actions_p2)
value, probs_p1, probs_p2 = solve_zero_sum_game_pulp(ordered_used_actions_p1, ordered_used_actions_p2,
utility, log)
br_p1 = self.best_response_p1(actions_p1, ordered_used_actions_p2, probs_p2, utility)
br_p2 = self.best_response_p2(ordered_used_actions_p1, probs_p1)
if br_p1 in used_actions_p1 and self.is_nn_similiar(br_p2, ordered_used_actions_p2, EPSILON, used_actions_p1, utility):
return value, ordered_used_actions_p1, probs_p1, ordered_used_actions_p2, probs_p2
used_actions_p1.add(br_p1)
used_actions_p2.add(br_p2)
def is_nn_similiar(self,
new_nn: NeuralNetwork,
old_nns: List,
epsilon: float,
used_actions_p1,
utility):
# Compares utilities of a new neural network with utilities of the old neural networks and checks if the new one
# is similar to some old (ie. difference of its utilities is for every p1 action is lower than epsilon)
print('Let\'s compare new neural networks with the others:')
utilities_of_new_nn = [utility(a1, new_nn) for a1 in used_actions_p1]
for old_nn in old_nns:
as_good = True
for new_utility, a1 in zip(utilities_of_new_nn, used_actions_p1):
old_utility = utility(a1, old_nn)
print(f'old utility: {old_utility}, new utility: {new_utility}, difference: {abs(old_utility-new_utility)}')
if abs(old_utility - new_utility) > epsilon:
as_good=False
print('########################################################')
break
if as_good:
print("Jep, this is already there.")
return True
return False
def best_response_p1(self, actions_p1, used_actions_p2, probs_p2, utility):
return max(actions_p1, key=lambda a1: sum(map(operator.mul, map(lambda a2: utility(a1, a2), used_actions_p2), probs_p2)))
def best_response_p2(self, used_actions_p1, probs_p1):
malicious_features = []
for ai, pi in zip(used_actions_p1, probs_p1):
count = int(MALICIOUS_DATA_N0*pi)
for i in range(count):
malicious_features.append(ai)
print('Let\'s train new NN with this malicious data:')
print(f'{malicious_features}\n')
return self._get_trained_nn(malicious_features)
def solve_zero_sum_game_pulp(actions_p1: List[List[float]],
actions_p2: List[NeuralNetwork],
utility: Callable,
log:bool):
if log:
print('Going to solve with LP')
print(f'Attacker\'s actions by now: {actions_p1}')
print(f'Deffender\'s actions by now: {actions_p2}')
# Create LP problem
m = pulp.LpProblem("Zero sum game", pulp.LpMinimize)
# Minimizing value "v"
v = pulp.LpVariable("v")
m += v
# Player two probability vector
probs_p_two = [pulp.LpVariable("np" + str(i), 0, 1) for i in range(len(actions_p2))]
m += pulp.lpSum(probs_p_two) == 1 # Probabilities sum to 1
suma = []
i=0
for a2 in actions_p2:
suma.append(probs_p_two[i]*a2.get_false_positive_rate())
i += 1
fp_constraint = pulp.lpSum(suma) <= FP_CONSTANT
m += fp_constraint
constraints = []
for a1 in actions_p1:
suma = []
j = 0
for a2 in actions_p2:
suma.append(probs_p_two[j]*utility(a1, a2))
j+=1
constraints.append(pulp.lpSum(suma) <= v)
for c in constraints:
m += c
m.solve()
if log:
print(f'LP solved')
print(f'Value of the game: {v.varValue}')
print(f'Number of false positives in this game: {fp_constraint}')
print(f'Found solution: {pulp.LpStatus[m.status]}')
print(f'Attacker\' probabilities:')
print(f'{list(str(abs(c.pi)) + " " for c in constraints)}')
print(f'Deffender\'s probabilities:')
print(f'{list(str(prob.varValue) + " " for prob in probs_p_two)}')
return v.varValue, [abs(c.pi) for c in constraints], [prob.varValue for prob in probs_p_two]
if __name__ == "__main__":
gs = GameSolver()
# nn1 = gs._get_trained_nn([[]])
# # print(nn1.predict([0.9, 0.9]))
# res = solve_zero_sum_game_pulp([[0.5, 0.1]], [nn1], utility)
# print(res)
from pathlib import Path
from typing import List, Tuple
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from src.data.loader import np_arrays_from_scored_csv
class NeuralNetwork:
def __init__(self, input_features: int = 2):
self.model = keras.Sequential([
keras.layers.Dense(10, activation='relu', input_shape=(input_features,)),
keras.layers.Dense(12, activation='relu'),
keras.layers.Dense(1, activation='sigmoid'),
]
)
self.model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
self.false_positives = None
def train(self,
attacker_features_x: List[List[float]],
benign_data: Tuple[np.ndarray, np.ndarray]):
x, y = benign_data
# There are some attacker's features
attacker_features_x = np.array(attacker_features_x)
if len(attacker_features_x[0]):
attacker_features_y = [1 for _ in attacker_features_x]
x = np.concatenate((x, attacker_features_x), axis=0)
y = np.concatenate((y, attacker_features_y), axis=0)
x, y = shuffle(x, y, random_state=1)
self.model.fit(x, y, epochs=40, class_weight={0: 1, 1: 5}) # Train the model
def calc_n0_false_positives(self, x_test: np.ndarray):
res = self.model.predict(x_test)
self.false_positives = sum(map(lambda x: 0 if x <= 0.5 else 1, res))
def predict(self, attacker_features: List[float]) -> int:
features = np.array([attacker_features])
prediction = self.model.predict(features)
# 1 -> malicious | 0 -> benign
return 0 if prediction[0][0] <= 0.5 else 1
def get_false_positive_rate(self):
return self.false_positives
def __str__(self):
return f'(Neural network {self.__hash__()} with FP n0: {self.false_positives})'
if __name__ == '__main__':
# ---------------------------------------- Prepare the data ----------------------------------------
# benign_x, benign_y = np_arrays_from_scored_csv(Path('../data/scored/all_benign_scored.csv'), 0, 1000)
# malicious_x, malicious_y = np_arrays_from_scored_csv(Path('../data/scored/scored_malicious.csv'), 1, 50)
benign_x, benign_y = np_arrays_from_scored_csv(Path('../data/scored/all_benign_scored.csv'), 0, 100)
malicious_x, malicious_y = np_arrays_from_scored_csv(Path('../data/scored/scored_malicious.csv'), 1, 10)
x = np.concatenate((benign_x, malicious_x), axis=0)
y = np.concatenate((benign_y, malicious_y), axis=0)
x, y = shuffle(x, y, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=41)
# Initialize the model
network = NeuralNetwork()
network.model.fit(x_train, y_train, epochs=40)
test_loss, test_acc = network.model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)
print(network.calc_n0_false_positives(x_test))
# print(network.model.predict(np.array([[0.85, 0.98]]))[0])
# Make a png model
# keras.utils.plot_model(network.model, to_file='model.png', show_shapes=True)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment