~bonbon/gmcts

ref: 73493dface28f87d1a6c0fd5f18ca256d445dda2 gmcts/models.go -rw-r--r-- 1.8 KiB
73493dfabonbon rework action selection system 1 year, 9 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package gmcts

import (
	"math/rand"
	"sync"
)

//Player is an id for the player
type Player int

//Game is the interface that represents game states.
//
//Any implementation of Game should be comparable (i.e. be a key in a map)
//and immutable (state cannot change as this package calls any function).
type Game interface {
	//Len returns the number of actions to consider.
	Len() int

	//ApplyAction applies the ith action (0-indexed) to the game state,
	//and returns a new game state and an error for invalid actions
	ApplyAction(i int) (Game, error)

	//Hash returns a unique representation of the state.
	//Any return value must be comparable.
	Hash() interface{}

	//Player returns the player that can take the next action
	Player() Player

	//IsTerminal returns true if this game state is a terminal state
	IsTerminal() bool

	//Winners returns a list of players that have won the game if
	//IsTerminal() returns true
	Winners() []Player
}

type gameState struct {
	Game
	gameHash
}

type gameHash struct {
	hash interface{}

	//This is to separate states that seemingly look the same,
	//but actually occur on different turn orders. Without this,
	//the directed tree that multiple parent nodes will just
	//become a directed graph, which this MCTS implementation
	//cannot handle properly.
	turn int
}

//MCTS contains functionality for the MCTS algorithm
type MCTS struct {
	init  Game
	trees []*Tree
	mutex *sync.RWMutex
	seed  int64
}

type node struct {
	state gameState
	tree  *Tree

	children          []*node
	unvisitedChildren []*node
	childVisits       []float64
	actionCount       int

	nodeScore  map[Player]float64
	nodeVisits int
}

//Tree represents a game state tree
type Tree struct {
	current          *node
	gameStates       map[gameHash]*node
	explorationConst float64
	randSource       *rand.Rand
}