accurating

 1from .model import (
 2    MatchResultArrays,
 3    Config,
 4    Model,
 5    fit,
 6    win_prob,
 7    data_from_dicts,
 8)
 9
10
11__all__ = [
12    "fit",
13    "data_from_dicts",
14    "MatchResultArrays",
15    "win_prob",
16    "Config",
17    "Model",
18]
def fit( data: accurating.MatchResultArrays, config: accurating.Config) -> accurating.Model:
119def fit(
120    data: MatchResultArrays,
121    config: Config,
122) -> Model:
123    """Fits the model to data according to config.
124    The time complexity is O(match_count * player_count * max(season) * steps)
125    """
126    if config.do_log:
127        print(config)
128    p1_win_probs = data.p1_win_prob
129    p1s = data.p1
130    p2s = data.p2
131    seasons = data.season
132
133    p1_win_probs = (1 - config.smoothing) * \
134        p1_win_probs + config.smoothing * 0.5
135    p2_win_probs = 1.0 - p1_win_probs
136
137    player_count = int(jnp.maximum(jnp.max(p1s), jnp.max(p2s)) + 1)
138    season_count = int(jnp.max(seasons) + 1)
139
140    (data_size,) = p1s.shape
141    assert seasons.shape == (data_size,)
142    assert p1s.shape == (data_size,)
143    assert p2s.shape == (data_size,)
144    assert p1_win_probs.shape == (data_size,)
145
146    winner_prior = config.winner_prior_rating / config.rating_difference_for_2_to_1_odds
147    loser_prior = config.loser_prior_rating / config.rating_difference_for_2_to_1_odds
148
149    def model(params):
150        log_likelihood = 0.0
151        ratings = params['rating']
152        assert ratings.shape == (player_count, season_count)
153        p1_ratings = ratings[p1s, seasons]
154        p2_ratings = ratings[p2s, seasons]
155
156        assert p1_ratings.shape == (data_size,)
157        assert p2_ratings.shape == (data_size,)
158
159        # We need to sum instead of averaging, because the more data we have, the more should it outweigh the priors
160        # and even the season_rating_stability.
161        mean_log_data_prob = jnp.sum(log_data_prob(p1_ratings, p2_ratings, p1_win_probs, p2_win_probs))
162        log_likelihood += mean_log_data_prob
163
164        if config.season_rating_stability > 0.0:
165            log_likelihood -= config.season_rating_stability * jnp.sum((ratings[:, 1:] - ratings[:, :-1])**2)
166
167        if config.winner_prior_match_count > 0.0:
168            log_likelihood += jnp.sum(log_data_prob(ratings, jnp.ones_like(ratings) * winner_prior, 0.0, config.winner_prior_match_count))
169
170        if config.loser_prior_match_count > 0.0:
171            log_likelihood += jnp.sum(log_data_prob(ratings, jnp.ones_like(ratings) * loser_prior, config.loser_prior_match_count, 0.0))
172
173        geomean_data_prob = jnp.exp2(mean_log_data_prob / data_size)
174        return log_likelihood / data_size, geomean_data_prob
175
176        # TODO: This is an experiment trying to evaluate ELO playing consistency. Try again and delete if does not work.
177        # cons = params['consistency']
178        # p1_cons = jnp.take(cons, p1s)
179        # p2_cons = jnp.take(cons, p2s)
180        # winner_win_prob_log = 0.0
181        # winner_win_prob_log += p1_win_probs * log_win_prob_diff(diff/jnp.exp(p1_cons)) + p2_win_probs * log_win_prob_diff(-diff/jnp.exp(p1_cons))
182        # winner_win_prob_log += p1_win_probs * log_win_prob_diff(diff/jnp.exp(p2_cons)) + p2_win_probs * log_win_prob_diff(-diff/jnp.exp(p2_cons))
183        # winner_win_prob_log /= 2
184        # return jnp.sum(winner_win_prob_log) - 0.005*jnp.sum(cons ** 2) # or mean?
185
186    # Optimize for these params:
187    rating = jnp.zeros([player_count, season_count], dtype=jnp.float64) + (loser_prior + winner_prior) / 2.0
188    params = { 'rating': rating }
189    # 'consistency': jnp.zeros([player_count, season_count]),
190
191    # Momentum gradient descent with restarts
192    m_lr = 1.0
193    lr = float(config.initial_lr)
194    momentum = tree_map(jnp.zeros_like, params)
195    last_params = params
196    last_eval = -1e8  # eval of initial data is -1, but regularizations might push it lower.
197    last_grad = tree_map(jnp.zeros_like, params)
198    last_reset_step = 0
199
200    for i in range(config.max_steps):
201        (eval, model_fit), grad = jax.value_and_grad(model, has_aux=True)(params)
202
203        if False:
204            # Standard batch gradient descent algorithm works too. Just use good LR.
205            params = tree_map(lambda p, g: p + lr * g, params, grad)
206        else:
207            if eval < last_eval:
208                if config.do_log:
209                    print(f'reset to {jnp.exp2(last_eval)}')
210                lr /= 1.5
211                if last_reset_step == i-1:
212                    lr /= 4
213                last_reset_step = i
214                momentum = tree_map(jnp.zeros_like, params)
215                # momentum /= 2.
216                params, eval, grad = last_params, last_eval, last_grad
217            else:
218                last_params, last_eval, last_grad = params, eval, grad
219            momentum = tree_map(lambda m, g: m_lr * m + g, momentum, grad)
220            params = tree_map(lambda p, m: p + lr * m, params, momentum)
221
222        max_d_rating = jnp.max(
223            jnp.abs(params['rating'] - last_params['rating']))
224
225        if config.do_log:
226            g = jnp.linalg.norm(grad['rating'])
227            print(
228                f'Step {i:4}: eval={jnp.exp2(eval):0.12f} pred_power={model_fit:0.6f} lr={lr: 4.4f} grad={g:2.4f} delta={max_d_rating}')
229
230        if max_d_rating < 1e-15:
231            break
232
233        lr *= 1.5 ** (1.0 / 12)
234
235    def postprocess():
236        rating = {}
237        last_rating = []
238        for id, name in enumerate(data.player_name):
239            rating[name] = {}
240            for season in range(season_count):
241                rating[name][season] = float(params['rating'][id, season]) * config.rating_difference_for_2_to_1_odds
242            last_rating.append((rating[name][season_count - 1], name))
243        if config.do_log:
244            headers = ['Nick']
245            for season in range(season_count-1, -1, -1):
246                headers.append(f'S{season}')
247            last_rating.sort(reverse=True)
248            table = []
249            for _, name in last_rating:
250                # if len(table) > 10: break # max rows
251                row = [name]
252                for season in range(season_count-1, 0, -1):
253                    row.append(rating[name][season])
254                table.append(row)
255            print(tabulate(table, headers=headers, floatfmt=".1f", numalign="decimal"))
256
257        return Model(rating=rating)
258
259    return postprocess()

Fits the model to data according to config. The time complexity is O(match_count * player_count * max(season) * steps)

def data_from_dicts(matches) -> accurating.MatchResultArrays:
262def data_from_dicts(matches) -> MatchResultArrays:
263    player_set = set()
264
265    for match in matches:
266        player_set.add(match['p1'])
267        player_set.add(match['p2'])
268        assert match['winner'] == match['p1'] or match['winner'] == match['p2'], match
269        assert isinstance(match['season'], int)
270
271    player_name = sorted(list(player_set))
272
273    p1 = []
274    p2 = []
275    p1_win_prob = []
276    season = []
277
278    for match in matches:
279        p1.append(player_name.index(match['p1']))
280        p2.append(player_name.index(match['p2']))
281        p1_win = match['winner'] == match['p1']
282        p1_win_prob.append(1.0 if p1_win else 0.0)
283        season.append(match['season'])
284
285    return MatchResultArrays(
286        p1=np.array(p1),
287        p2=np.array(p2),
288        p1_win_prob=np.array(p1_win_prob),
289        season=np.array(season),
290        player_name=player_name,
291    )
@dataclasses.dataclass
class MatchResultArrays:
39@dataclasses.dataclass
40class MatchResultArrays:
41    """Match data for AccuRating in numpy arrays.
42    All attributes have a shape (match_count,).
43    """
44
45    p1: np.ndarray
46    """Player 1 id (small integer)."""
47
48    p2: np.ndarray
49    """Player 2 id (small integer)."""
50
51    p1_win_prob: np.ndarray
52    """1.0 if p1 wins, 0.0 if p2 wins. Can be any number in [0.0, 1.0]."""
53
54    season: np.ndarray
55    """Currently the seasons have to be small integers."""
56
57    player_name: list[str] | None
58    """Indexed with player id. Not used in the training."""

Match data for AccuRating in numpy arrays. All attributes have a shape (match_count,).

MatchResultArrays( p1: numpy.ndarray, p2: numpy.ndarray, p1_win_prob: numpy.ndarray, season: numpy.ndarray, player_name: list[str] | None)
p1: numpy.ndarray

Player 1 id (small integer).

p2: numpy.ndarray

Player 2 id (small integer).

p1_win_prob: numpy.ndarray

1.0 if p1 wins, 0.0 if p2 wins. Can be any number in [0.0, 1.0].

season: numpy.ndarray

Currently the seasons have to be small integers.

player_name: list[str] | None

Indexed with player id. Not used in the training.

def win_prob(rating, opp_rating):
18def win_prob(rating, opp_rating):
19    """Probability of win for given ratings."""
20    return 1.0 / (1.0 + jnp.exp2(opp_rating-rating))
21    # This is more understandable and equivalent:
22    # return jnp.exp2(rating) / (jnp.exp2(rating) + jnp.exp2(opp_rating))

Probability of win for given ratings.

@dataclasses.dataclass
class Config:
 61@dataclasses.dataclass
 62class Config:
 63    """AccuRating configuration."""
 64
 65    season_rating_stability: float
 66    """Rating stability across seasons.
 67
 68    Currently the seasons have to be small integers.
 69    season_rating_stability = 0 means that ratings at each season are completly separate.
 70    season_rating_stability = inf means that ratings at each season should be the same."""
 71
 72    smoothing: float
 73    """ Balance between match results and player pairings as the sources of data.
 74    There are two sources of data:
 75    - Match result: Winner probably has a higher rating than the looser.
 76    - Player pairing: Matched players probably have similar strength.
 77    Setting smoothing to 0.0 ignorse player pairing as and would rely on the match result only.
 78    Setting smoothing to 1.0 ignorse match result would rely on player pairing only.
 79
 80    Typically, in the absence of data ratings assume a prior that the skill of a player some fixed value like 1000.
 81    This allows the rating to not escape to infinity when only losses or only wins are available.
 82    Smoothing essentially allows to specify that the looser (in every match) had a small chance of winning.
 83    This is also known as 'label smoothing'."""
 84
 85    winner_prior_rating: float = 4000.0
 86    winner_prior_match_count: float = 0.0
 87    loser_prior_rating: float = 1000.0
 88    loser_prior_match_count: float = 0.0
 89    """Adds two virtual players with a fixed ratings of winner_prior_rating and loser_prior_rating that will always win and always lose.
 90    Adds to the data set, for every player and *every season*, winner_prior_match_count (loser_prior_match_count) games with them.
 91    The match_counts should be much smaller than the actual number of matches that players played.
 92    If match_counts are set to 0.0 the prior is disabled and so the resulting ratings float (can be shifted as a whole by a constant).
 93    """
 94
 95    max_steps: int = 1_000_000
 96    """Limits the number of passes over the dataset."""
 97
 98    do_log: bool = False
 99    """Enables additional logging."""
100
101    initial_lr: float = 10000.0
102    """It is automatically adjusted, but sometimes it is too large and blows up."""
103
104    rating_difference_for_2_to_1_odds: float = 100.0
105    """That many points difference creates 2:1 win odds.
106    Twice the difference predicts 5:1 odds.
107    You can change it to 120.412 to match chess ELO scale.
108    Apart from rescaling the final result, it also rescales prior_ratings in this config above."""

AccuRating configuration.

Config( season_rating_stability: float, smoothing: float, winner_prior_rating: float = 4000.0, winner_prior_match_count: float = 0.0, loser_prior_rating: float = 1000.0, loser_prior_match_count: float = 0.0, max_steps: int = 1000000, do_log: bool = False, initial_lr: float = 10000.0, rating_difference_for_2_to_1_odds: float = 100.0)
season_rating_stability: float

Rating stability across seasons.

Currently the seasons have to be small integers. season_rating_stability = 0 means that ratings at each season are completly separate. season_rating_stability = inf means that ratings at each season should be the same.

smoothing: float

Balance between match results and player pairings as the sources of data. There are two sources of data:

  • Match result: Winner probably has a higher rating than the looser.
  • Player pairing: Matched players probably have similar strength. Setting smoothing to 0.0 ignorse player pairing as and would rely on the match result only. Setting smoothing to 1.0 ignorse match result would rely on player pairing only.

Typically, in the absence of data ratings assume a prior that the skill of a player some fixed value like 1000. This allows the rating to not escape to infinity when only losses or only wins are available. Smoothing essentially allows to specify that the looser (in every match) had a small chance of winning. This is also known as 'label smoothing'.

loser_prior_match_count: float = 0.0

Adds two virtual players with a fixed ratings of winner_prior_rating and loser_prior_rating that will always win and always lose. Adds to the data set, for every player and every season, winner_prior_match_count (loser_prior_match_count) games with them. The match_counts should be much smaller than the actual number of matches that players played. If match_counts are set to 0.0 the prior is disabled and so the resulting ratings float (can be shifted as a whole by a constant).

max_steps: int = 1000000

Limits the number of passes over the dataset.

do_log: bool = False

Enables additional logging.

initial_lr: float = 10000.0

It is automatically adjusted, but sometimes it is too large and blows up.

rating_difference_for_2_to_1_odds: float = 100.0

That many points difference creates 2:1 win odds. Twice the difference predicts 5:1 odds. You can change it to 120.412 to match chess ELO scale. Apart from rescaling the final result, it also rescales prior_ratings in this config above.

@dataclasses.dataclass
class Model:
111@dataclasses.dataclass
112class Model:
113    """Trained model."""
114
115    rating: dict[str, dict[int, float]]
116    """Player rating, indexed by name and season"""

Trained model.

Model(rating: dict[str, dict[int, float]])
rating: dict[str, dict[int, float]]

Player rating, indexed by name and season