3 Venue Impact Factors and Team Rankings - Venue_Team_Rankings.R

3.1 Venue Impact

Create venue impact factors

venue_factors <- player_data %>%
  group_by(Ground) %>%
  summarise(
    AvgBatImpact = mean(BattingImpact, na.rm = TRUE),
    AvgBowlImpact = mean(BowlingImpact, na.rm = TRUE)
  )

Venue considerations

venue_factors <- venue_factors %>%
  mutate(
    #standardize
    BattingScale = (AvgBatImpact - mean(venue_factors$AvgBatImpact, na.rm = TRUE)) / sd(venue_factors$AvgBatImpact, na.rm = TRUE),
    BowlingScale = (AvgBowlImpact - mean(venue_factors$AvgBowlImpact, na.rm = TRUE)) / sd(venue_factors$AvgBowlImpact, na.rm = TRUE)
  )
battingranking = venue_factors %>%
  arrange(desc(BowlingScale - BattingScale))
bowlingranking = venue_factors %>%
  arrange(BattingScale - BowlingScale)

Min max scale venue factors bating scale and bowling scale

venue_factors$BattingScale2 = (venue_factors$BattingScale - min(venue_factors$BattingScale)) / (max(venue_factors$BattingScale) - min(venue_factors$BattingScale))
venue_factors$BowlingScale2 = (venue_factors$BowlingScale - min(venue_factors$BowlingScale)) / (max(venue_factors$BowlingScale) - min(venue_factors$BowlingScale))
battingranking = venue_factors %>%
  arrange((BowlingScale2 - BattingScale2))
bowlingranking = venue_factors %>%
  arrange(BattingScale2 - BowlingScale2)
battingranking
## # A tibble: 107 × 7
##    Ground   AvgBatImpact AvgBowlImpact BattingScale BowlingScale BattingScale2
##    <chr>           <dbl>         <dbl>        <dbl>        <dbl>         <dbl>
##  1 Guwahati         4.58          1.26         2.85        -2.44         1    
##  2 Hyderab…         4.46          1.41         2.70        -2.22         0.975
##  3 Bloemfo…         3.76          1.30         1.85        -2.38         0.831
##  4 Brabour…         4.00          1.57         2.15        -2.00         0.881
##  5 Mohali           3.89          1.92         2.02        -1.48         0.859
##  6 Dharams…         3.23          1.78         1.22        -1.68         0.723
##  7 Wankhede         3.63          2.18         1.70        -1.10         0.805
##  8 Hamilton         3.41          2.05         1.44        -1.29         0.760
##  9 Kimberl…         3.17          1.87         1.15        -1.55         0.712
## 10 Centuri…         3.37          2.09         1.39        -1.23         0.753
## # ℹ 97 more rows
## # ℹ 1 more variable: BowlingScale2 <dbl>
bowlingranking
## # A tibble: 107 × 7
##    Ground   AvgBatImpact AvgBowlImpact BattingScale BowlingScale BattingScale2
##    <chr>           <dbl>         <dbl>        <dbl>        <dbl>         <dbl>
##  1 Dallas        -0.312           5.05       -3.04         3.08         0     
##  2 New York      -0.0327          5.15       -2.71         3.22         0.0570
##  3 Kingsto…       0.831           4.54       -1.67         2.33         0.233 
##  4 Colombo…       0.520           4.02       -2.04         1.58         0.170 
##  5 Port of…       0.906           4.23       -1.58         1.88         0.249 
##  6 King Ci…       0.626           3.59       -1.91         0.948        0.192 
##  7 Raipur         1.68            4.35       -0.643        2.05         0.407 
##  8 Leeds          1.93            4.49       -0.348        2.25         0.457 
##  9 Hangzhou       0.546           3.36       -2.01         0.609        0.175 
## 10 Hambant…       0.918           3.47       -1.56         0.774        0.251 
## # ℹ 97 more rows
## # ℹ 1 more variable: BowlingScale2 <dbl>
hist(battingranking$BattingScale2, col = "blue", main = "Venue Batting Scale Distribution", xlab = "Batting Scale")

hist(bowlingranking$BowlingScale2, col = "red", main = "Venue Bowling Scale Distribution", xlab = "Bowling Scale")

Plot Bowling - Batting Impact top 15 for best bowling grounds

library(ggplot2)
ggplot(bowlingranking[1:15, ], aes(x = reorder(Ground, BowlingScale2 - BattingScale2), y = BowlingScale2 - BattingScale2)) +
  geom_bar(stat = "identity", fill = "red") +
  coord_flip() +
  labs(title = "Top 15 Bowling Grounds: Bowling Impact - Batting Impact",
       x = "Venue",
       y = "Bowling - Batting Impact") +
  theme_minimal()

Plot Batting - Bowling Impact top 15 for best batting grounds

ggplot(battingranking[1:15, ], aes(x = reorder(Ground, BattingScale2 - BowlingScale2), y = BattingScale2 - BowlingScale2)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "Top 15 Batting Grounds: Batting Impact - Bowling Impact",
       x = "Venue",
       y = "Batting - Bowling Impact") +
  theme_minimal()

3.2 Team Ranking

Print ranking of teams

team_ranking <- team_impact %>%
  group_by(Country) %>%
  summarise(TotalImpact = sum(TotalImpact, na.rm = TRUE)) %>%
  arrange(desc(TotalImpact))


team_ranking_recent <- team_impact %>%
  # keep only matches in your recent set
  #filter(ID %in% unique(recent_matches$ID)) %>%
  # join match dates so we can order matches
  left_join(match_data_file_cricinfo %>% select(ID, Date), by = "ID") %>%
  arrange(Country, desc(Date)) %>%
  group_by(Country) %>%
  # keep only the last 15 matches per team
  slice_head(n = 10) %>%
  summarise(
    TotalImpact = mean(TotalImpact, na.rm = TRUE),
    nMatches = n()
  ) %>%
  mutate(TotalImpact_perGame = TotalImpact / nMatches) %>%
  arrange(desc(TotalImpact))

(team_ranking_recent)
## # A tibble: 12 × 4
##    Country TotalImpact nMatches TotalImpact_perGame
##    <chr>         <dbl>    <int>               <dbl>
##  1 IND            56.2       10                5.62
##  2 NZL            46.8       10                4.68
##  3 AUS            44.4       10                4.44
##  4 RSA            39.3       10                3.93
##  5 PAK            38.4       10                3.84
##  6 ENG            37.1       10                3.71
##  7 BAN            36.9       10                3.69
##  8 WI             36.3       10                3.63
##  9 SL             35.2       10                3.52
## 10 AFG            33.4       10                3.34
## 11 IRE            26.4       10                2.64
## 12 ZIM            19.4       10                1.94

Plot team rankings

library(ggplot2)
ggplot(team_ranking_recent, aes(x = reorder(Country, TotalImpact_perGame), y = TotalImpact_perGame)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +
  labs(title = "Team Rankings Based on Recent Matches",
       x = "Country",
       y = "Total Impact") +
  theme_minimal()

Get data for Lord’s

battingranking[80,]
## # A tibble: 1 × 7
##   Ground AvgBatImpact AvgBowlImpact BattingScale BowlingScale BattingScale2
##   <chr>         <dbl>         <dbl>        <dbl>        <dbl>         <dbl>
## 1 Lord's         1.64          3.26       -0.695        0.459         0.398
## # ℹ 1 more variable: BowlingScale2 <dbl>