accurating
119def fit( 120 data: MatchResultArrays, 121 config: Config, 122) -> Model: 123 """Fits the model to data according to config. 124 The time complexity is O(match_count * player_count * max(season) * steps) 125 """ 126 if config.do_log: 127 print(config) 128 p1_win_probs = data.p1_win_prob 129 p1s = data.p1 130 p2s = data.p2 131 seasons = data.season 132 133 p1_win_probs = (1 - config.smoothing) * \ 134 p1_win_probs + config.smoothing * 0.5 135 p2_win_probs = 1.0 - p1_win_probs 136 137 player_count = int(jnp.maximum(jnp.max(p1s), jnp.max(p2s)) + 1) 138 season_count = int(jnp.max(seasons) + 1) 139 140 (data_size,) = p1s.shape 141 assert seasons.shape == (data_size,) 142 assert p1s.shape == (data_size,) 143 assert p2s.shape == (data_size,) 144 assert p1_win_probs.shape == (data_size,) 145 146 winner_prior = config.winner_prior_rating / config.rating_difference_for_2_to_1_odds 147 loser_prior = config.loser_prior_rating / config.rating_difference_for_2_to_1_odds 148 149 def model(params): 150 log_likelihood = 0.0 151 ratings = params['rating'] 152 assert ratings.shape == (player_count, season_count) 153 p1_ratings = ratings[p1s, seasons] 154 p2_ratings = ratings[p2s, seasons] 155 156 assert p1_ratings.shape == (data_size,) 157 assert p2_ratings.shape == (data_size,) 158 159 # We need to sum instead of averaging, because the more data we have, the more should it outweigh the priors 160 # and even the season_rating_stability. 161 mean_log_data_prob = jnp.sum(log_data_prob(p1_ratings, p2_ratings, p1_win_probs, p2_win_probs)) 162 log_likelihood += mean_log_data_prob 163 164 if config.season_rating_stability > 0.0: 165 log_likelihood -= config.season_rating_stability * jnp.sum((ratings[:, 1:] - ratings[:, :-1])**2) 166 167 if config.winner_prior_match_count > 0.0: 168 log_likelihood += jnp.sum(log_data_prob(ratings, jnp.ones_like(ratings) * winner_prior, 0.0, config.winner_prior_match_count)) 169 170 if config.loser_prior_match_count > 0.0: 171 log_likelihood += jnp.sum(log_data_prob(ratings, jnp.ones_like(ratings) * loser_prior, config.loser_prior_match_count, 0.0)) 172 173 geomean_data_prob = jnp.exp2(mean_log_data_prob / data_size) 174 return log_likelihood / data_size, geomean_data_prob 175 176 # TODO: This is an experiment trying to evaluate ELO playing consistency. Try again and delete if does not work. 177 # cons = params['consistency'] 178 # p1_cons = jnp.take(cons, p1s) 179 # p2_cons = jnp.take(cons, p2s) 180 # winner_win_prob_log = 0.0 181 # winner_win_prob_log += p1_win_probs * log_win_prob_diff(diff/jnp.exp(p1_cons)) + p2_win_probs * log_win_prob_diff(-diff/jnp.exp(p1_cons)) 182 # winner_win_prob_log += p1_win_probs * log_win_prob_diff(diff/jnp.exp(p2_cons)) + p2_win_probs * log_win_prob_diff(-diff/jnp.exp(p2_cons)) 183 # winner_win_prob_log /= 2 184 # return jnp.sum(winner_win_prob_log) - 0.005*jnp.sum(cons ** 2) # or mean? 185 186 # Optimize for these params: 187 rating = jnp.zeros([player_count, season_count], dtype=jnp.float64) + (loser_prior + winner_prior) / 2.0 188 params = { 'rating': rating } 189 # 'consistency': jnp.zeros([player_count, season_count]), 190 191 # Momentum gradient descent with restarts 192 m_lr = 1.0 193 lr = float(config.initial_lr) 194 momentum = tree_map(jnp.zeros_like, params) 195 last_params = params 196 last_eval = -1e8 # eval of initial data is -1, but regularizations might push it lower. 197 last_grad = tree_map(jnp.zeros_like, params) 198 last_reset_step = 0 199 200 for i in range(config.max_steps): 201 (eval, model_fit), grad = jax.value_and_grad(model, has_aux=True)(params) 202 203 if False: 204 # Standard batch gradient descent algorithm works too. Just use good LR. 205 params = tree_map(lambda p, g: p + lr * g, params, grad) 206 else: 207 if eval < last_eval: 208 if config.do_log: 209 print(f'reset to {jnp.exp2(last_eval)}') 210 lr /= 1.5 211 if last_reset_step == i-1: 212 lr /= 4 213 last_reset_step = i 214 momentum = tree_map(jnp.zeros_like, params) 215 # momentum /= 2. 216 params, eval, grad = last_params, last_eval, last_grad 217 else: 218 last_params, last_eval, last_grad = params, eval, grad 219 momentum = tree_map(lambda m, g: m_lr * m + g, momentum, grad) 220 params = tree_map(lambda p, m: p + lr * m, params, momentum) 221 222 max_d_rating = jnp.max( 223 jnp.abs(params['rating'] - last_params['rating'])) 224 225 if config.do_log: 226 g = jnp.linalg.norm(grad['rating']) 227 print( 228 f'Step {i:4}: eval={jnp.exp2(eval):0.12f} pred_power={model_fit:0.6f} lr={lr: 4.4f} grad={g:2.4f} delta={max_d_rating}') 229 230 if max_d_rating < 1e-15: 231 break 232 233 lr *= 1.5 ** (1.0 / 12) 234 235 def postprocess(): 236 rating = {} 237 last_rating = [] 238 for id, name in enumerate(data.player_name): 239 rating[name] = {} 240 for season in range(season_count): 241 rating[name][season] = float(params['rating'][id, season]) * config.rating_difference_for_2_to_1_odds 242 last_rating.append((rating[name][season_count - 1], name)) 243 if config.do_log: 244 headers = ['Nick'] 245 for season in range(season_count-1, -1, -1): 246 headers.append(f'S{season}') 247 last_rating.sort(reverse=True) 248 table = [] 249 for _, name in last_rating: 250 # if len(table) > 10: break # max rows 251 row = [name] 252 for season in range(season_count-1, 0, -1): 253 row.append(rating[name][season]) 254 table.append(row) 255 print(tabulate(table, headers=headers, floatfmt=".1f", numalign="decimal")) 256 257 return Model(rating=rating) 258 259 return postprocess()
Fits the model to data according to config. The time complexity is O(match_count * player_count * max(season) * steps)
262def data_from_dicts(matches) -> MatchResultArrays: 263 player_set = set() 264 265 for match in matches: 266 player_set.add(match['p1']) 267 player_set.add(match['p2']) 268 assert match['winner'] == match['p1'] or match['winner'] == match['p2'], match 269 assert isinstance(match['season'], int) 270 271 player_name = sorted(list(player_set)) 272 273 p1 = [] 274 p2 = [] 275 p1_win_prob = [] 276 season = [] 277 278 for match in matches: 279 p1.append(player_name.index(match['p1'])) 280 p2.append(player_name.index(match['p2'])) 281 p1_win = match['winner'] == match['p1'] 282 p1_win_prob.append(1.0 if p1_win else 0.0) 283 season.append(match['season']) 284 285 return MatchResultArrays( 286 p1=np.array(p1), 287 p2=np.array(p2), 288 p1_win_prob=np.array(p1_win_prob), 289 season=np.array(season), 290 player_name=player_name, 291 )
39@dataclasses.dataclass 40class MatchResultArrays: 41 """Match data for AccuRating in numpy arrays. 42 All attributes have a shape (match_count,). 43 """ 44 45 p1: np.ndarray 46 """Player 1 id (small integer).""" 47 48 p2: np.ndarray 49 """Player 2 id (small integer).""" 50 51 p1_win_prob: np.ndarray 52 """1.0 if p1 wins, 0.0 if p2 wins. Can be any number in [0.0, 1.0].""" 53 54 season: np.ndarray 55 """Currently the seasons have to be small integers.""" 56 57 player_name: list[str] | None 58 """Indexed with player id. Not used in the training."""
Match data for AccuRating in numpy arrays. All attributes have a shape (match_count,).
18def win_prob(rating, opp_rating): 19 """Probability of win for given ratings.""" 20 return 1.0 / (1.0 + jnp.exp2(opp_rating-rating)) 21 # This is more understandable and equivalent: 22 # return jnp.exp2(rating) / (jnp.exp2(rating) + jnp.exp2(opp_rating))
Probability of win for given ratings.
61@dataclasses.dataclass 62class Config: 63 """AccuRating configuration.""" 64 65 season_rating_stability: float 66 """Rating stability across seasons. 67 68 Currently the seasons have to be small integers. 69 season_rating_stability = 0 means that ratings at each season are completly separate. 70 season_rating_stability = inf means that ratings at each season should be the same.""" 71 72 smoothing: float 73 """ Balance between match results and player pairings as the sources of data. 74 There are two sources of data: 75 - Match result: Winner probably has a higher rating than the looser. 76 - Player pairing: Matched players probably have similar strength. 77 Setting smoothing to 0.0 ignorse player pairing as and would rely on the match result only. 78 Setting smoothing to 1.0 ignorse match result would rely on player pairing only. 79 80 Typically, in the absence of data ratings assume a prior that the skill of a player some fixed value like 1000. 81 This allows the rating to not escape to infinity when only losses or only wins are available. 82 Smoothing essentially allows to specify that the looser (in every match) had a small chance of winning. 83 This is also known as 'label smoothing'.""" 84 85 winner_prior_rating: float = 4000.0 86 winner_prior_match_count: float = 0.0 87 loser_prior_rating: float = 1000.0 88 loser_prior_match_count: float = 0.0 89 """Adds two virtual players with a fixed ratings of winner_prior_rating and loser_prior_rating that will always win and always lose. 90 Adds to the data set, for every player and *every season*, winner_prior_match_count (loser_prior_match_count) games with them. 91 The match_counts should be much smaller than the actual number of matches that players played. 92 If match_counts are set to 0.0 the prior is disabled and so the resulting ratings float (can be shifted as a whole by a constant). 93 """ 94 95 max_steps: int = 1_000_000 96 """Limits the number of passes over the dataset.""" 97 98 do_log: bool = False 99 """Enables additional logging.""" 100 101 initial_lr: float = 10000.0 102 """It is automatically adjusted, but sometimes it is too large and blows up.""" 103 104 rating_difference_for_2_to_1_odds: float = 100.0 105 """That many points difference creates 2:1 win odds. 106 Twice the difference predicts 5:1 odds. 107 You can change it to 120.412 to match chess ELO scale. 108 Apart from rescaling the final result, it also rescales prior_ratings in this config above."""
AccuRating configuration.
Rating stability across seasons.
Currently the seasons have to be small integers. season_rating_stability = 0 means that ratings at each season are completly separate. season_rating_stability = inf means that ratings at each season should be the same.
Balance between match results and player pairings as the sources of data. There are two sources of data:
- Match result: Winner probably has a higher rating than the looser.
- Player pairing: Matched players probably have similar strength. Setting smoothing to 0.0 ignorse player pairing as and would rely on the match result only. Setting smoothing to 1.0 ignorse match result would rely on player pairing only.
Typically, in the absence of data ratings assume a prior that the skill of a player some fixed value like 1000. This allows the rating to not escape to infinity when only losses or only wins are available. Smoothing essentially allows to specify that the looser (in every match) had a small chance of winning. This is also known as 'label smoothing'.
Adds two virtual players with a fixed ratings of winner_prior_rating and loser_prior_rating that will always win and always lose. Adds to the data set, for every player and every season, winner_prior_match_count (loser_prior_match_count) games with them. The match_counts should be much smaller than the actual number of matches that players played. If match_counts are set to 0.0 the prior is disabled and so the resulting ratings float (can be shifted as a whole by a constant).
111@dataclasses.dataclass 112class Model: 113 """Trained model.""" 114 115 rating: dict[str, dict[int, float]] 116 """Player rating, indexed by name and season"""
Trained model.