5 Model Evaluation (Human Eye Check) on Real Games - T20_Final_Example.R
5.1 Example Match: India vs South Africa, T20 World Cup Final 2024 at Bridgetown
Get the playing XI for both teams
team1_playingxi = player_data %>% filter(ID == 2729)
team1_playingxi = team1_playingxi[1:22,]
team1_playingxi = team1_playingxi %>% filter(Country == "IND")
team1_playingxi = team1_playingxi$Player
team2_playingxi = player_data %>% filter(ID == 2729)
team2_playingxi = team2_playingxi[1:22,]
team2_playingxi = team2_playingxi %>% filter(Country == "RSA")
team2_playingxi = team2_playingxi$PlayerGet the ground buffs
Get the player ratings per match for the playing xi
team1_playingxi_ratings = player_rankings_2 %>% filter(Player %in% team1_playingxi)
team2_playingxi_ratings = player_rankings_2 %>% filter(Player %in% team2_playingxi)Multiply the ground buffs Weight it by batter rating, ie if the batter rating is higher, the ground buffs will have more impact
team1_playingxi_ratings$BatImpactperGame_2 = 0
team2_playingxi_ratings$BatImpactperGame_2 = 0
team1_playingxi_ratings$BowlImpactperGame_2 = 0
team2_playingxi_ratings$BowlImpactperGame_2 = 0Evaluate player performance in Team 1 according to their ratings and ground conditions
for(i in 1:nrow(team1_playingxi_ratings)){
# Sort by batting rating
team1_playingxi_ratings = team1_playingxi_ratings %>% arrange(desc(BatImpactperGame))
# Divide by appropriate factor, assume only 8 batsmen will play
if(i < 9){
team1_avg = team1_playingxi_ratings$BatImpactperGame[i] * (1 / median(venue_factors$BattingScale2)) * ground_buffs$BattingScale2[1] * ((9-i)/(10-i))
team1_playingxi_ratings$BatImpactperGame_2[i] = team1_avg
}
else {
# Bottom batsmen don't get a buff and don't play as much
team1_avg = team1_playingxi_ratings$BatImpactperGame[i] / 5
team1_playingxi_ratings$BatImpactperGame_2[i] = team1_avg
}
team1_playingxi_ratings = team1_playingxi_ratings %>% arrange(desc(BowlImpactperGame))
if(i < 7) {
team1_avg = team1_playingxi_ratings$BowlImpactperGame[i] * (1 / median(venue_factors$BowlingScale2)) * ground_buffs$BowlingScale2[1] * ((8-i)/(9-i))
team1_playingxi_ratings$BowlImpactperGame_2[i] = team1_avg
}
else {
# Bottom bowlers don't bowl in the game
team1_playingxi_ratings$BowlImpactperGame_2[i] = 0
}
}Evaluate player performance in Team 2 according to their ratings and ground conditions
for(i in 1:nrow(team2_playingxi_ratings)){
team2_playingxi_ratings = team2_playingxi_ratings %>% arrange(desc(BatImpactperGame))
# Divide by appropriate factor, assume only 8 batsmen will play
if(i < 9){
team2_avg = team2_playingxi_ratings$BatImpactperGame[i] * (1 / median(venue_factors$BattingScale2)) * ground_buffs$BattingScale2[1] * ((9-i)/(10-i))
team2_playingxi_ratings$BatImpactperGame_2[i] = team2_avg
}
else{
# Bottom batsmen don't get a buff and don't play as much
team2_avg = team2_playingxi_ratings$BatImpactperGame[i] / 5
team2_playingxi_ratings$BatImpactperGame_2[i] = team2_avg
}
team2_playingxi_ratings = team2_playingxi_ratings %>% arrange(desc(BowlImpactperGame))
if(i < 7){
team2_avg = team2_playingxi_ratings$BowlImpactperGame[i] * (1 / median(venue_factors$BowlingScale2)) * ground_buffs$BowlingScale2[1] * ((8-i)/(9-i))
team2_playingxi_ratings$BowlImpactperGame_2[i] = team2_avg
}
else{
# Bottom bowlers don't bowl in the game
team2_playingxi_ratings$BowlImpactperGame_2[i] = 0
}
}Rearrange data to make sure 11 batting rankings correct
team1_batting = (team1_playingxi_ratings %>% arrange(desc(BatImpactperGame_2)))$BatImpactperGame_2
team2_batting = (team2_playingxi_ratings %>% arrange(desc(BatImpactperGame_2)))$BatImpactperGame_2
team1_bowling = (team1_playingxi_ratings %>% arrange(desc(BowlImpactperGame_2)))$BowlImpactperGame_2
team2_bowling = (team2_playingxi_ratings %>% arrange(desc(BowlImpactperGame_2)))$BowlImpactperGame_2Calculate win probability
ind_winprob = predict(final_model, newx = (data.frame(Batsman1_T1 = team1_batting[1],
Batsman2_T1 = team1_batting[2],
Batsman3_T1 = team1_batting[3],
Batsman4_T1 = team1_batting[4],
Batsman5_T1 = team1_batting[5],
Batsman6_T1 = team1_batting[6],
Batsman7_T1 = team1_batting[7],
Batsman8_T1 = team1_batting[8],
Bowler1_T1 = team1_bowling[1],
Bowler2_T1 = team1_bowling[2],
Bowler3_T1 = team1_bowling[3],
Bowler4_T1 = team1_bowling[4],
Bowler5_T1 = team1_bowling[5],
Bowler6_T1 = team1_bowling[6],
Batsman1_T2 = team2_batting[1],
Batsman2_T2 = team2_batting[2],
Batsman3_T2 = team2_batting[3],
Batsman4_T2 = team2_batting[4],
Batsman5_T2 = team2_batting[5],
Batsman6_T2 = team2_batting[6],
Batsman7_T2 = team2_batting[7],
Batsman8_T2 = team2_batting[8],
Bowler1_T2 = team2_bowling[1],
Bowler2_T2 = team2_bowling[2],
Bowler3_T2 = team2_bowling[3],
Bowler4_T2 = team2_bowling[4],
Bowler5_T2 = team2_bowling[5],
Bowler6_T2 = team2_bowling[6]))
, type = "response")
ind_winprob = ind_winprob
print(paste("India win probability: ", round(ind_winprob, 4) * 100, "%"))## [1] "India win probability: 53.3 %"
rsa_winprob = 1 - ind_winprob
print(paste("South Africa win probability: ", round(rsa_winprob, 4) * 100, "%"))## [1] "South Africa win probability: 46.7 %"
5.2 With random effect correction based on number of matches played
Predict the T20 World Cup Final 2024 with random effect as well, or any game you’d like
Select games that can be passed into model with easy access; check Match_Data-Cricinfo file for more:
IND vs SL, 2014 T20 WC final (SL won by 6 wickets, Mirpur) Match ID 400
AFG vs WI, 2016 T20 WC match (AFG won by 6 runs, Nagpur) Match ID 552
AUS vs NZL, 2021 T20 WC final (AUS won by 8 wickets, Dubai (DICS)) Match ID 1428
ENG vs PAK, 2022 T20 WC final (ENG won by 5 wickets, Melbourne) Match ID 1879
IND vs PAK, 2024 T20 WC match (IND won by 6 runs, New York) Match ID 2658 - This example, the model got wrong
Most recent T20I: AUS vs RSA at Cairns (AUS won by 2 wickets, August 16 2025) Match ID 3407
country1 = "AUS"
country2 = "RSA"
Venue = "Cairns"
MatchID = 3407
team1_playingxi = player_data %>% filter(ID == MatchID)
team1_playingxi = team1_playingxi[1:22,]
team1_playingxi = team1_playingxi %>% filter(Country == country1)
team1_playingxi = team1_playingxi$Player
team2_playingxi = player_data %>% filter(ID == MatchID)
team2_playingxi = team2_playingxi[1:22,]
team2_playingxi = team2_playingxi %>% filter(Country == country2)
team2_playingxi = team2_playingxi$PlayerGet the ground buffs and the player ratings per match for the playing xi
ground_buffs = venue_factors %>% filter(Ground == Venue)
team1_playingxi_ratings = player_rankings_2 %>% filter(Player %in% team1_playingxi)
team2_playingxi_ratings = player_rankings_2 %>% filter(Player %in% team2_playingxi)Multiply the ground buffs Weight it by batter rating, ie if the batter rating is higher, the ground buffs will have more impact
team1_playingxi_ratings$BatImpactperGame_2 = 0
team2_playingxi_ratings$BatImpactperGame_2 = 0
team1_playingxi_ratings$BowlImpactperGame_2 = 0
team2_playingxi_ratings$BowlImpactperGame_2 = 0Simulate 20 times to get a better idea of win probability with random effects
kvec = numeric(20)
for(k in 1:20){
for(i in 1:nrow(team1_playingxi_ratings)){
#sort by batting rating
team1_playingxi_ratings = team1_playingxi_ratings %>% arrange(desc(BatImpactperGame))
#divide by appropriate factor, assume only 8 batsmen will play
if(i < 9){
team1_avg = team1_playingxi_ratings$BatImpactperGame[i] * (1 / median(venue_factors$BattingScale2)) * ground_buffs$BattingScale2[1] * ((9-i)/(10-i))
team1_playingxi_ratings$BatImpactperGame_2[i] = rnorm(1, team1_avg, abs(team1_avg / team1_playingxi_ratings$MatchesPlayed[i])) # Random process
}
else {
#Bottom batsmen don't get a buff and don't play as much
team1_avg = team1_playingxi_ratings$BatImpactperGame[i] / 5
team1_playingxi_ratings$BatImpactperGame_2[i] = rnorm(1, team1_avg, abs(team1_avg / team1_playingxi_ratings$MatchesPlayed[i])) # Random process
#team1_playingxi_ratings$BatImpactperGame_2[i] = team1_avg
}
team1_playingxi_ratings = team1_playingxi_ratings %>% arrange(desc(BowlImpactperGame))
if(i < 7) {
team1_avg = team1_playingxi_ratings$BowlImpactperGame[i] * (1 / median(venue_factors$BowlingScale2)) * ground_buffs$BowlingScale2[1] * ((8-i)/(9-i))
team1_playingxi_ratings$BowlImpactperGame_2[i] = rnorm(1, team1_avg, abs(team1_avg / team1_playingxi_ratings$MatchesPlayed[i])) # Random process
#team1_playingxi_ratings$BowlImpactperGame_2[i] = team1_avg
}
else {
#Bottom bowlers don't bowl in the game
team1_playingxi_ratings$BowlImpactperGame_2[i] = 0
}
}
for(i in 1:nrow(team2_playingxi_ratings)){
team2_playingxi_ratings = team2_playingxi_ratings %>% arrange(desc(BatImpactperGame))
#divide by appropriate factor, assume only 8 batsmen will play
if(i < 9){
team2_avg = team2_playingxi_ratings$BatImpactperGame[i] * (1 / median(venue_factors$BattingScale2)) * ground_buffs$BattingScale2[1] * ((9-i)/(10-i))
team2_playingxi_ratings$BatImpactperGame_2[i] = rnorm(1, team2_avg, abs(team2_avg / team2_playingxi_ratings$MatchesPlayed[i])) # Random process
}
else{
#Bottom batsmen don't get a buff and don't play as much
team2_avg = team2_playingxi_ratings$BatImpactperGame[i] / 5
team2_playingxi_ratings$BatImpactperGame_2[i] = rnorm(1, team2_avg, abs(team2_avg / team2_playingxi_ratings$MatchesPlayed[i])) # Random process
}
team2_playingxi_ratings = team2_playingxi_ratings %>% arrange(desc(BowlImpactperGame))
if(i < 7){
team2_avg = team2_playingxi_ratings$BowlImpactperGame[i] * (1 / median(venue_factors$BowlingScale2)) * ground_buffs$BowlingScale2[1] * ((8-i)/(9-i))
team2_playingxi_ratings$BowlImpactperGame_2[i] = rnorm(1, team2_avg, abs(team2_avg / team2_playingxi_ratings$MatchesPlayed[i])) # Random process
}
else{
#Bottom bowlers don't bowl in the game
team2_playingxi_ratings$BowlImpactperGame_2[i] = 0
}
}
# Rearrange data to make sure 11 batting rankings correct
team1_batting = (team1_playingxi_ratings %>% arrange(desc(BatImpactperGame_2)))$BatImpactperGame_2
team2_batting = (team2_playingxi_ratings %>% arrange(desc(BatImpactperGame_2)))$BatImpactperGame_2
team1_bowling = (team1_playingxi_ratings %>% arrange(desc(BowlImpactperGame_2)))$BowlImpactperGame_2
team2_bowling = (team2_playingxi_ratings %>% arrange(desc(BowlImpactperGame_2)))$BowlImpactperGame_2
winprob = predict(final_model, newx = (data.frame(Batsman1_T1 = team1_batting[1],
Batsman2_T1 = team1_batting[2],
Batsman3_T1 = team1_batting[3],
Batsman4_T1 = team1_batting[4],
Batsman5_T1 = team1_batting[5],
Batsman6_T1 = team1_batting[6],
Batsman7_T1 = team1_batting[7],
Batsman8_T1 = team1_batting[8],
Bowler1_T1 = team1_bowling[1],
Bowler2_T1 = team1_bowling[2],
Bowler3_T1 = team1_bowling[3],
Bowler4_T1 = team1_bowling[4],
Bowler5_T1 = team1_bowling[5],
Bowler6_T1 = team1_bowling[6],
Batsman1_T2 = team2_batting[1],
Batsman2_T2 = team2_batting[2],
Batsman3_T2 = team2_batting[3],
Batsman4_T2 = team2_batting[4],
Batsman5_T2 = team2_batting[5],
Batsman6_T2 = team2_batting[6],
Batsman7_T2 = team2_batting[7],
Batsman8_T2 = team2_batting[8],
Bowler1_T2 = team2_bowling[1],
Bowler2_T2 = team2_bowling[2],
Bowler3_T2 = team2_bowling[3],
Bowler4_T2 = team2_bowling[4],
Bowler5_T2 = team2_bowling[5],
Bowler6_T2 = team2_bowling[6]))
, type = "response")
kvec[k] = winprob
}
kvec = kvec
print(paste(country1, "win probability: ", round(mean(kvec), 4) * 100, "%"))## [1] "AUS win probability: 65.59 %"
## [1] "RSA win probability: 34.41 %"