Commit f2eaae74 authored by Michal Sustr's avatar Michal Sustr
Browse files

Add a simple MCCFR test, add seeding of algorithm

parent 41b7ef8e
......@@ -133,6 +133,7 @@ set(SOURCE_FILES
utils/utils.h
utils/functools.h
#utils/highQualityRandom.h utils/highQualityRandom.cpp
)
set(TEST_FILES
......@@ -145,6 +146,8 @@ set(TEST_FILES
tests/matchingPenniesTest.cpp
tests/cacheTest.cpp
tests/cfrTest.cpp
tests/mccfrTest.cpp
)
# -------------------- Executables, linking --------------------
......
......@@ -124,15 +124,14 @@ unordered_map<shared_ptr<InformationSet>, double> initInformationSetCounters(con
return informationSetCounters;
}
BehavioralStrategy mccfr_ExternalSampling(const Domain &domain, int noIterations) {
BehavioralStrategy mccfr_ExternalSampling(const Domain &domain, int noIterations, int seed) {
auto regretTables = initRegretTables(domain);
auto cummulativeStrategyTables = initCummulativeStrategyTables(domain);
BehavioralStrategy sigma;
sigma = uniformStrategy(domain);
double epsilon = 0.3;
std::random_device rd;
std::mt19937 mt(rd());
std::mt19937 mt(seed);
std::uniform_real_distribution<double> dist(0.0, 1.0);
......@@ -240,7 +239,7 @@ BehavioralStrategy mccfr_ExternalSampling(const Domain &domain, int noIterations
return strategyFromCummulativeStrategyTable(domain, cummulativeStrategyTables);
}
BehavioralStrategy mccfr_OutcomeSampling(const Domain &domain, int noIterations) {
BehavioralStrategy mccfr_OutcomeSampling(const Domain &domain, int noIterations, int seed) {
auto regretTables = initRegretTables(domain);
auto cummulativeStrategyTables = initCummulativeStrategyTables(domain);
BehavioralStrategy sigma;
......@@ -248,8 +247,7 @@ BehavioralStrategy mccfr_OutcomeSampling(const Domain &domain, int noIterations)
sigma = uniformStrategy(domain);
double epsilon = 0.6;
std::random_device rd;
std::mt19937 mt(rd());
std::mt19937 mt(seed);
std::uniform_real_distribution<double> dist(0.0, 1.0);
......
......@@ -63,12 +63,12 @@ unordered_map<shared_ptr<InformationSet>, double> initInformationSetCounters(con
/**
* MCCFR - External sampling with Stochastically-Weighted Averaging
*/
BehavioralStrategy mccfr_ExternalSampling(const Domain &domain, int noIterations);
BehavioralStrategy mccfr_ExternalSampling(const Domain &domain, int noIterations, int seed);
/**
* MCCFR - Outcome sampling with with Optimistic Averaging
*/
BehavioralStrategy mccfr_OutcomeSampling(const Domain &domain, int noIterations);
BehavioralStrategy mccfr_OutcomeSampling(const Domain &domain, int noIterations, int seed);
} // namespace algorithms
} // namespace GTLib2
......
......@@ -38,6 +38,7 @@
#include "algorithms/bestResponse.h"
#include "algorithms/equilibrium.h"
#include "domains/goofSpiel.h"
#include "domains/matching_pennies.h"
#include "algorithms/utility.h"
#include "algorithms/cfr.h"
#include "algorithms/mccfr.h"
......@@ -98,7 +99,7 @@ int main(int argc, char *argv[]) {
//printStrategy(gsd, tmpStrat);
//auto strategy = mccfr_ExternalSampling(gsd, noIterations);
auto strategy = mccfr_OutcomeSampling(gsd, noIterations);
auto strategy = mccfr_OutcomeSampling(gsd, noIterations, 0);
//printStrategy(gsd, strategy);
//auto strategy = mccfr_ExternalSampling(mp, noIterations);
......
/*
Copyright 2019 Faculty of Electrical Engineering at CTU in Prague
This file is part of Game Theoretic Library.
Game Theoretic Library is free software: you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation, either version 3
of the License, or (at your option) any later version.
Game Theoretic Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Game Theoretic Library.
If not, see <http://www.gnu.org/licenses/>.
*/
#include "algorithms/cfr.h"
#include "algorithms/strategy.h"
#include "domains/matching_pennies.h"
#include "tests/domainsTest.h"
#include <boost/test/unit_test.hpp>
#include <algorithms/utility.h>
#include <algorithms/mccfr.h>
#include "algorithms/tree.h"
#include "domains/goofSpiel.h"
#include "algorithms/bestResponse.h"
namespace GTLib2 {
using domains::MatchingPenniesDomain;
using domains::MatchingPenniesAction;
using algorithms::CFRData;
using algorithms::mccfr_OutcomeSampling;
BOOST_AUTO_TEST_SUITE(MCCFRTest)
BOOST_AUTO_TEST_CASE(CheckConvergenceInSmallDomain) {
domains::IIGoofSpielDomain domain(3, 3, 0);
double expectedUtilities[] = {0.0, -0.00273666, -0.001406, -0.000945625, -0.000712317,};
double expectedBestResp0[] = {1.33333, 0.00533179, 0.00265306, 0.0017659, 0.00132337,};
double expectedBestResp1[] = {1.33333, 0.00988586, 0.00492998, 0.00328382, 0.0024618,};
for (int i = 0; i < 5; ++i) {
auto strategy = mccfr_OutcomeSampling(domain, 2000 * i, 0);
auto bestResp0 = algorithms::bestResponseTo(strategy, Player(1), Player(0), domain).second;
auto bestResp1 = algorithms::bestResponseTo(strategy, Player(0), Player(1), domain).second;
double utility = algorithms::computeUtilityTwoPlayersGame(
domain, strategy, strategy, Player(0), Player(1)).first;
BOOST_CHECK(std::abs(utility - expectedUtilities[i]) <= 0.00001);
BOOST_CHECK(std::abs(bestResp0 - expectedBestResp0[i]) <= 0.00001);
BOOST_CHECK(std::abs(bestResp1 - expectedBestResp1[i]) <= 0.00001);
}
}
BOOST_AUTO_TEST_SUITE_END()
} // namespace GTLib2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment