~bonbon/gmcts

cb1748753b054956459d5e9101270e51aa405377 — bonbon 1 year, 6 months ago e8594bb v1.1.1
use democracy to get the best action among trees
2 files changed, 30 insertions(+), 34 deletions(-)

M mcts.go
M tree.go
M mcts.go => mcts.go +12 -34
@@ 77,45 77,23 @@ func (m *MCTS) BestAction() Action {

	//Safe guard set in place in case we're dealing
	//with a terminal state
	rootState := m.init
	baseActions := rootState.GetActions()
	if len(baseActions) == 0 || rootState.IsTerminal() {
	if m.init.IsTerminal() {
		return nil
	}

	//Loop through each action and node and calculate the best
	//winrate each action had when searching the trees
	bestAction := baseActions[0]
	bestWinRate := 0.0
	playerTakingAction := rootState.Player()
	for _, a := range baseActions {
		var score float64
		var visits float64

		for i := range m.trees {
			var child *node
			root := m.trees[i].current
			for j := 0; j < root.actionCount; j++ {
				if a == root.actions[j] {
					child = root.children[j]
					break
				}
			}
			if child == nil {
				continue
			}
			score += child.nodeScore[playerTakingAction]
			visits += child.nodeVisits
		}

		if visits == 0 {
			continue
		}
	//Democracy Section: each tree votes for an action
	actionScore := make(map[Action]int)
	for _, t := range m.trees {
		actionScore[t.bestAction()]++
	}

		winRate := score / visits
		if winRate > bestWinRate {
	//Democracy Section: the action with the most votes wins
	var bestAction Action
	var mostVotes int
	for a, s := range actionScore {
		if s > mostVotes {
			bestAction = a
			bestWinRate = winRate
			mostVotes = s
		}
	}
	return bestAction

M tree.go => tree.go +18 -0
@@ 44,3 44,21 @@ func (t *Tree) SearchRounds(rounds int) {
func (t *Tree) search() {
	t.current.runSimulation()
}

func (t *Tree) bestAction() Action {
	root := t.current

	//Select the child with the highest winrate
	var bestAction Action
	bestWinRate := -1.0
	player := root.state.Player()
	for i := 0; i < root.actionCount; i++ {
		winRate := root.children[i].nodeScore[player] / root.childVisits[i]
		if winRate > bestWinRate {
			bestAction = root.actions[i]
			bestWinRate = winRate
		}
	}

	return bestAction
}