S2S_Evaluation / elo_rank.py
KurtDu's picture
Upload 4 files
dff2993 verified
raw
history blame
4.87 kB
import random
import json
class EloRank:
def __init__(self, initial_rating=1000, k_factor=32):
"""
Initialize the EloRank class.
:param initial_rating: Initial ELO rating for each model.
:param k_factor: The K-factor that determines the sensitivity of rating changes.
"""
self.ratings = {}
self.initial_rating = initial_rating
self.k_factor = k_factor
self.wins = {}
def add_model(self, model_id):
"""
Add a new model with the initial rating.
:param model_id: Unique identifier for the model.
"""
self.ratings[model_id] = self.initial_rating
self.wins[model_id] = 0
def record_match(self, winner, loser):
"""
Update the ratings based on a match result.
:param winner: Model ID of the winner.
:param loser: Model ID of the loser.
"""
rating_winner = self.ratings[winner]
rating_loser = self.ratings[loser]
expected_winner = self.expected_score(rating_winner, rating_loser)
expected_loser = self.expected_score(rating_loser, rating_winner)
self.ratings[winner] += self.k_factor * (1 - expected_winner)
self.ratings[loser] += self.k_factor * (0 - expected_loser)
# Update win count
self.wins[winner] += 1
def expected_score(self, rating_a, rating_b):
"""
Calculate the expected score for a model.
:param rating_a: Rating of model A.
:param rating_b: Rating of model B.
:return: Expected score.
"""
return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
def get_rating(self, model_id):
"""
Get the current rating of a model.
:param model_id: Unique identifier for the model.
:return: Current rating of the model.
"""
return self.ratings.get(model_id, None)
def get_wins(self, model_id):
"""
Get the number of wins of a model.
:param model_id: Unique identifier for the model.
:return: Number of wins of the model.
"""
return self.wins.get(model_id, 0)
def get_top_models(self, n=2):
"""
Get the top N models by rating.
:param n: Number of top models to retrieve.
:return: List of model IDs of the top models.
"""
return sorted(self.ratings, key=self.ratings.get, reverse=True)[:n]
def sample_next_match(self):
"""
Sample the next match based on the probability proportional to the current rating.
This approach helps accelerate the convergence of ranking.
:return: Tuple of two model IDs for the next match.
"""
model_ids = list(self.ratings.keys())
probabilities = [self.ratings[model_id] for model_id in model_ids]
total_rating = sum(probabilities)
probabilities = [rating / total_rating for rating in probabilities]
# Sample two different models for the next match
next_match = random.choices(model_ids, probabilities, k=2)
while next_match[0] == next_match[1]:
next_match = random.choices(model_ids, probabilities, k=2)
return tuple(next_match)
def process_match_records(self, file_path):
"""
Process match records from a JSON file and update ratings and win counts accordingly.
:param file_path: Path to the JSON file containing match records.
"""
with open(file_path, 'r') as file:
match_records = json.load(file)
for record in match_records:
winner = record['winner']
model_1 = record['model_1']
model_2 = record['model_2']
# Add models if they are not already added
if model_1 not in self.ratings:
self.add_model(model_1)
if model_2 not in self.ratings:
self.add_model(model_2)
# Record the match result
if winner == model_1:
self.record_match(model_1, model_2)
elif winner == model_2:
self.record_match(model_2, model_1)
# # Example Usage
# e = EloRank()
# e.add_model('model_A')
# e.add_model('model_B')
# e.add_model('model_C')
# e.record_match('model_A', 'model_B')
# print(e.get_rating('model_A')) # Should be greater than the initial rating
# print(e.get_rating('model_B')) # Should be less than the initial rating
# print(e.get_top_models(2)) # Get the top 2 models
# print(e.sample_next_match()) # Sample the next match based on ratings
# # Process match records from a JSON file
# e.process_match_records('match_records.json')
# print(e.get_wins('model_A')) # Get the number of wins for model_A