#loading the required libraries
import pandas as pd
from matplotlib import pyplot as plt
#import seaborn as sns
#loading the ipl matches dataset
ipl=pd.read_csv('matches.csv')
#having a glance at the first five records of the dataset
ipl.head()
id | season | city | date | team1 | team2 | toss_winner | toss_decision | result | dl_applied | winner | win_by_runs | win_by_wickets | player_of_match | venue | umpire1 | umpire2 | umpire3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2017 | Hyderabad | 2017-04-05 | Sunrisers Hyderabad | Royal Challengers Bangalore | Royal Challengers Bangalore | field | normal | 0 | Sunrisers Hyderabad | 35 | 0 | Yuvraj Singh | Rajiv Gandhi International Stadium, Uppal | AY Dandekar | NJ Llong | NaN |
1 | 2 | 2017 | Pune | 2017-04-06 | Mumbai Indians | Rising Pune Supergiant | Rising Pune Supergiant | field | normal | 0 | Rising Pune Supergiant | 0 | 7 | SPD Smith | Maharashtra Cricket Association Stadium | A Nand Kishore | S Ravi | NaN |
2 | 3 | 2017 | Rajkot | 2017-04-07 | Gujarat Lions | Kolkata Knight Riders | Kolkata Knight Riders | field | normal | 0 | Kolkata Knight Riders | 0 | 10 | CA Lynn | Saurashtra Cricket Association Stadium | Nitin Menon | CK Nandan | NaN |
3 | 4 | 2017 | Indore | 2017-04-08 | Rising Pune Supergiant | Kings XI Punjab | Kings XI Punjab | field | normal | 0 | Kings XI Punjab | 0 | 6 | GJ Maxwell | Holkar Cricket Stadium | AK Chaudhary | C Shamshuddin | NaN |
4 | 5 | 2017 | Bangalore | 2017-04-08 | Royal Challengers Bangalore | Delhi Daredevils | Royal Challengers Bangalore | bat | normal | 0 | Royal Challengers Bangalore | 15 | 0 | KM Jadhav | M Chinnaswamy Stadium | NaN | NaN | NaN |
#Lookin at the number of rows and columns in the dataset
ipl.shape
(756, 18)
#Getting the frequency of most man of the match awards
ipl['player_of_match'].value_counts()
CH Gayle 21 AB de Villiers 20 RG Sharma 17 MS Dhoni 17 DA Warner 17 .. PD Collingwood 1 NV Ojha 1 AC Voges 1 J Theron 1 S Hetmyer 1 Name: player_of_match, Length: 226, dtype: int64
#Getting the top 10 players with most man of the match awards
ipl['player_of_match'].value_counts()[0:10]
CH Gayle 21 AB de Villiers 20 RG Sharma 17 MS Dhoni 17 DA Warner 17 YK Pathan 16 SR Watson 15 SK Raina 14 G Gambhir 13 MEK Hussey 12 Name: player_of_match, dtype: int64
#Getting the top 5 players with most man of the match awards
ipl['player_of_match'].value_counts()[0:5]
CH Gayle 21 AB de Villiers 20 RG Sharma 17 MS Dhoni 17 DA Warner 17 Name: player_of_match, dtype: int64
#making a bar-plot for the top 5 players with most man of the match awards
plt.figure(figsize=(9,5))
plt.bar(list(ipl['player_of_match'].value_counts()[0:5].keys()),list(ipl['player_of_match'].value_counts()[0:5]))
plt.show()
#Getting the frequency of result column
ipl['result'].value_counts()
normal 743 tie 9 no result 4 Name: result, dtype: int64
#Finding out the number of toss wins w.r.t each team
ipl['toss_winner'].value_counts()
Mumbai Indians 98 Kolkata Knight Riders 92 Chennai Super Kings 89 Royal Challengers Bangalore 81 Kings XI Punjab 81 Delhi Daredevils 80 Rajasthan Royals 80 Sunrisers Hyderabad 46 Deccan Chargers 43 Pune Warriors 20 Gujarat Lions 15 Delhi Capitals 10 Kochi Tuskers Kerala 8 Rising Pune Supergiants 7 Rising Pune Supergiant 6 Name: toss_winner, dtype: int64
#Extracting the records where a team won batting first
batting_first=ipl[ipl['win_by_runs']!=0]
#Looking at the head
batting_first.head()
id | season | city | date | team1 | team2 | toss_winner | toss_decision | result | dl_applied | winner | win_by_runs | win_by_wickets | player_of_match | venue | umpire1 | umpire2 | umpire3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2017 | Hyderabad | 2017-04-05 | Sunrisers Hyderabad | Royal Challengers Bangalore | Royal Challengers Bangalore | field | normal | 0 | Sunrisers Hyderabad | 35 | 0 | Yuvraj Singh | Rajiv Gandhi International Stadium, Uppal | AY Dandekar | NJ Llong | NaN |
4 | 5 | 2017 | Bangalore | 2017-04-08 | Royal Challengers Bangalore | Delhi Daredevils | Royal Challengers Bangalore | bat | normal | 0 | Royal Challengers Bangalore | 15 | 0 | KM Jadhav | M Chinnaswamy Stadium | NaN | NaN | NaN |
8 | 9 | 2017 | Pune | 2017-04-11 | Delhi Daredevils | Rising Pune Supergiant | Rising Pune Supergiant | field | normal | 0 | Delhi Daredevils | 97 | 0 | SV Samson | Maharashtra Cricket Association Stadium | AY Dandekar | S Ravi | NaN |
13 | 14 | 2017 | Kolkata | 2017-04-15 | Kolkata Knight Riders | Sunrisers Hyderabad | Sunrisers Hyderabad | field | normal | 0 | Kolkata Knight Riders | 17 | 0 | RV Uthappa | Eden Gardens | AY Dandekar | NJ Llong | NaN |
14 | 15 | 2017 | Delhi | 2017-04-15 | Delhi Daredevils | Kings XI Punjab | Delhi Daredevils | bat | normal | 0 | Delhi Daredevils | 51 | 0 | CJ Anderson | Feroz Shah Kotla | YC Barde | Nitin Menon | NaN |
#Making a histogram
plt.figure(figsize=(7,3))
plt.hist(batting_first['win_by_runs'])
plt.title('Distribution if Runs')
plt.xlabel('no. of Runs')
plt.ylabel('no. of times/Matches')
plt.show()
#Finding out the number of wins w.r.t each team after batting first
batting_first['winner'].value_counts()
Mumbai Indians 57 Chennai Super Kings 52 Kings XI Punjab 38 Kolkata Knight Riders 36 Royal Challengers Bangalore 35 Sunrisers Hyderabad 30 Rajasthan Royals 27 Delhi Daredevils 25 Deccan Chargers 18 Pune Warriors 6 Rising Pune Supergiant 5 Delhi Capitals 3 Kochi Tuskers Kerala 2 Rising Pune Supergiants 2 Gujarat Lions 1 Name: winner, dtype: int64
#Making a bar-plot for top 3 teams with most wins after batting first
plt.figure(figsize=(7,3))
plt.bar(list(batting_first['winner'].value_counts()[0:3].keys()),list(batting_first['winner'].value_counts()[0:3]),color=["blue","yellow","orange"])
plt.show()
#Making a pie chart
plt.figure(figsize=(5,5))
plt.pie(list(batting_first['winner'].value_counts()),labels=list(batting_first['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()
#extracting those records where a team has won after batting second
batting_second=ipl[ipl['win_by_wickets']!=0]
#looking at the head
batting_second.head()
id | season | city | date | team1 | team2 | toss_winner | toss_decision | result | dl_applied | winner | win_by_runs | win_by_wickets | player_of_match | venue | umpire1 | umpire2 | umpire3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | 2017 | Pune | 2017-04-06 | Mumbai Indians | Rising Pune Supergiant | Rising Pune Supergiant | field | normal | 0 | Rising Pune Supergiant | 0 | 7 | SPD Smith | Maharashtra Cricket Association Stadium | A Nand Kishore | S Ravi | NaN |
2 | 3 | 2017 | Rajkot | 2017-04-07 | Gujarat Lions | Kolkata Knight Riders | Kolkata Knight Riders | field | normal | 0 | Kolkata Knight Riders | 0 | 10 | CA Lynn | Saurashtra Cricket Association Stadium | Nitin Menon | CK Nandan | NaN |
3 | 4 | 2017 | Indore | 2017-04-08 | Rising Pune Supergiant | Kings XI Punjab | Kings XI Punjab | field | normal | 0 | Kings XI Punjab | 0 | 6 | GJ Maxwell | Holkar Cricket Stadium | AK Chaudhary | C Shamshuddin | NaN |
5 | 6 | 2017 | Hyderabad | 2017-04-09 | Gujarat Lions | Sunrisers Hyderabad | Sunrisers Hyderabad | field | normal | 0 | Sunrisers Hyderabad | 0 | 9 | Rashid Khan | Rajiv Gandhi International Stadium, Uppal | A Deshmukh | NJ Llong | NaN |
6 | 7 | 2017 | Mumbai | 2017-04-09 | Kolkata Knight Riders | Mumbai Indians | Mumbai Indians | field | normal | 0 | Mumbai Indians | 0 | 4 | N Rana | Wankhede Stadium | Nitin Menon | CK Nandan | NaN |
#Making a histogram for frequency of wins w.r.t number of wickets
plt.figure(figsize=(5,5))
plt.hist(batting_second['win_by_wickets'],bins=30)
plt.xlabel('no. of wickets remaining')
plt.ylabel('no. of times/Matches')
plt.show()
#Finding out the frequency of number of wins w.r.t each time after batting second
batting_second['winner'].value_counts()
Kolkata Knight Riders 56 Mumbai Indians 50 Royal Challengers Bangalore 48 Chennai Super Kings 48 Rajasthan Royals 46 Kings XI Punjab 42 Delhi Daredevils 42 Sunrisers Hyderabad 27 Gujarat Lions 12 Deccan Chargers 11 Pune Warriors 6 Delhi Capitals 6 Rising Pune Supergiant 5 Kochi Tuskers Kerala 4 Rising Pune Supergiants 3 Name: winner, dtype: int64
#Making a bar plot for top-3 teams with most wins after batting second
plt.figure(figsize=(7,3))
plt.bar(list(batting_second['winner'].value_counts()[0:3].keys()),list(batting_second['winner'].value_counts()[0:3]),color=["purple","blue","red"])
plt.show()
#Making a pie chart for distribution of most wins after batting second
plt.figure(figsize=(7,7))
plt.pie(list(batting_second['winner'].value_counts()),labels=list(batting_second['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()
#Looking at the number of matches played each season
ipl['season'].value_counts()
2013 76 2012 74 2011 73 2010 60 2014 60 2016 60 2018 60 2019 60 2017 59 2015 59 2008 58 2009 57 Name: season, dtype: int64
#Looking at the number of matches played in each city
ipl['city'].value_counts()
Mumbai 101 Kolkata 77 Delhi 74 Bangalore 66 Hyderabad 64 Chennai 57 Jaipur 47 Chandigarh 46 Pune 38 Durban 15 Bengaluru 14 Visakhapatnam 13 Centurion 12 Ahmedabad 12 Rajkot 10 Mohali 10 Indore 9 Dharamsala 9 Johannesburg 8 Cuttack 7 Ranchi 7 Port Elizabeth 7 Cape Town 7 Abu Dhabi 7 Sharjah 6 Raipur 6 Kochi 5 Kanpur 4 Nagpur 3 Kimberley 3 East London 3 Bloemfontein 2 Name: city, dtype: int64
#Finding out how many times a team has won the match after winning the toss
import numpy as np
np.sum(ipl['toss_winner']==ipl['winner'])
393
325/636
0.5110062893081762
deliveries=pd.read_csv('deliveries.csv')
---------------------------------------------------------------------------FileNotFoundError Traceback (most recent call last)Cell In[27], line 1 ----> 1 deliveries=pd.read_csv('deliveries.csv') File /lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(*args, **kwargs) File /lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs) 325 if len(args) > num_allow_args: 326 warnings.warn( 327 msg.format(arguments=_format_argument_list(allow_args)), 328 FutureWarning, 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(*args, **kwargs) File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options) 935 kwds_defaults = _refine_defaults_read( 936 dialect, 937 delimiter, (...) 946 defaults={"delimiter": ","}, 947 ) 948 kwds.update(kwds_defaults) --> 950 return _read(filepath_or_buffer, kwds) File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds) 602 _validate_names(kwds.get("names", None)) 604 # Create the parser. --> 605 parser = TextFileReader(filepath_or_buffer, **kwds) 607 if chunksize or iterator: 608 return parser File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds) 1439 self.options["has_index_names"] = kwds["has_index_names"] 1441 self.handles: IOHandles | None = None -> 1442 self._engine = self._make_engine(f, self.engine) File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine) 1733 if "b" not in mode: 1734 mode += "b" -> 1735 self.handles = get_handle( 1736 f, 1737 mode, 1738 encoding=self.options.get("encoding", None), 1739 compression=self.options.get("compression", None), 1740 memory_map=self.options.get("memory_map", False), 1741 is_text=is_text, 1742 errors=self.options.get("encoding_errors", "strict"), 1743 storage_options=self.options.get("storage_options", None), 1744 ) 1745 assert self.handles is not None 1746 f = self.handles.handle File /lib/python3.11/site-packages/pandas/io/common.py:856, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options) 851 elif isinstance(handle, str): 852 # Check whether the filename is to be opened in binary mode. 853 # Binary mode does not support 'encoding' and 'newline'. 854 if ioargs.encoding and "b" not in ioargs.mode: 855 # Encoding --> 856 handle = open( 857 handle, 858 ioargs.mode, 859 encoding=ioargs.encoding, 860 errors=errors, 861 newline="", 862 ) 863 else: 864 # Binary mode 865 handle = open(handle, ioargs.mode) FileNotFoundError: [Errno 44] No such file or directory: 'deliveries.csv'
deliveries.head()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[28], line 1 ----> 1 deliveries.head() NameError: name 'deliveries' is not defined
deliveries['match_id'].unique()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[29], line 1 ----> 1 deliveries['match_id'].unique() NameError: name 'deliveries' is not defined
match_1=deliveries[deliveries['match_id']==1]
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[30], line 1 ----> 1 match_1=deliveries[deliveries['match_id']==1] NameError: name 'deliveries' is not defined
match_1.head()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[31], line 1 ----> 1 match_1.head() NameError: name 'match_1' is not defined
match_1.shape
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[32], line 1 ----> 1 match_1.shape NameError: name 'match_1' is not defined
srh=match_1[match_1['inning']==1]
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[33], line 1 ----> 1 srh=match_1[match_1['inning']==1] NameError: name 'match_1' is not defined
srh['batsman_runs'].value_counts()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[34], line 1 ----> 1 srh['batsman_runs'].value_counts() NameError: name 'srh' is not defined
srh['dismissal_kind'].value_counts()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[35], line 1 ----> 1 srh['dismissal_kind'].value_counts() NameError: name 'srh' is not defined
rcb=match_1[match_1['inning']==2]
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[36], line 1 ----> 1 rcb=match_1[match_1['inning']==2] NameError: name 'match_1' is not defined
rcb['batsman_runs'].value_counts()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[37], line 1 ----> 1 rcb['batsman_runs'].value_counts() NameError: name 'rcb' is not defined
rcb['dismissal_kind'].value_counts()
---------------------------------------------------------------------------NameError Traceback (most recent call last)Cell In[38], line 1 ----> 1 rcb['dismissal_kind'].value_counts() NameError: name 'rcb' is not defined