In [1]:
#loading the required libraries
import pandas as pd
from matplotlib import pyplot as plt
#import seaborn as sns
In [2]:
#loading the ipl matches dataset
ipl=pd.read_csv('matches.csv')
In [39]:
#having a glance at the first five records of the dataset
ipl.head()
Out [39]:
id season city date team1 team2 toss_winner toss_decision result dl_applied winner win_by_runs win_by_wickets player_of_match venue umpire1 umpire2 umpire3
0 1 2017 Hyderabad 2017-04-05 Sunrisers Hyderabad Royal Challengers Bangalore Royal Challengers Bangalore field normal 0 Sunrisers Hyderabad 35 0 Yuvraj Singh Rajiv Gandhi International Stadium, Uppal AY Dandekar NJ Llong NaN
1 2 2017 Pune 2017-04-06 Mumbai Indians Rising Pune Supergiant Rising Pune Supergiant field normal 0 Rising Pune Supergiant 0 7 SPD Smith Maharashtra Cricket Association Stadium A Nand Kishore S Ravi NaN
2 3 2017 Rajkot 2017-04-07 Gujarat Lions Kolkata Knight Riders Kolkata Knight Riders field normal 0 Kolkata Knight Riders 0 10 CA Lynn Saurashtra Cricket Association Stadium Nitin Menon CK Nandan NaN
3 4 2017 Indore 2017-04-08 Rising Pune Supergiant Kings XI Punjab Kings XI Punjab field normal 0 Kings XI Punjab 0 6 GJ Maxwell Holkar Cricket Stadium AK Chaudhary C Shamshuddin NaN
4 5 2017 Bangalore 2017-04-08 Royal Challengers Bangalore Delhi Daredevils Royal Challengers Bangalore bat normal 0 Royal Challengers Bangalore 15 0 KM Jadhav M Chinnaswamy Stadium NaN NaN NaN
In [4]:
#Lookin at the number of rows and columns in the dataset
ipl.shape
Out [4]:
(756, 18)
In [5]:
#Getting the frequency of most man of the match awards
ipl['player_of_match'].value_counts()
Out [5]:
CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
                  ..
PD Collingwood     1
NV Ojha            1
AC Voges           1
J Theron           1
S Hetmyer          1
Name: player_of_match, Length: 226, dtype: int64
In [6]:
#Getting the top 10 players with most man of the match awards
ipl['player_of_match'].value_counts()[0:10]
Out [6]:
CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
YK Pathan         16
SR Watson         15
SK Raina          14
G Gambhir         13
MEK Hussey        12
Name: player_of_match, dtype: int64
In [7]:
#Getting the top 5 players with most man of the match awards
ipl['player_of_match'].value_counts()[0:5]
Out [7]:
CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
Name: player_of_match, dtype: int64
In [8]:
#making a bar-plot for the top 5 players with most man of the match awards
plt.figure(figsize=(9,5))
plt.bar(list(ipl['player_of_match'].value_counts()[0:5].keys()),list(ipl['player_of_match'].value_counts()[0:5]))
plt.show()
In [9]:
#Getting the frequency of result column
ipl['result'].value_counts()
Out [9]:
normal       743
tie            9
no result      4
Name: result, dtype: int64
In [10]:
#Finding out the number of toss wins w.r.t each team
ipl['toss_winner'].value_counts()
Out [10]:
Mumbai Indians                 98
Kolkata Knight Riders          92
Chennai Super Kings            89
Royal Challengers Bangalore    81
Kings XI Punjab                81
Delhi Daredevils               80
Rajasthan Royals               80
Sunrisers Hyderabad            46
Deccan Chargers                43
Pune Warriors                  20
Gujarat Lions                  15
Delhi Capitals                 10
Kochi Tuskers Kerala            8
Rising Pune Supergiants         7
Rising Pune Supergiant          6
Name: toss_winner, dtype: int64
In [11]:
#Extracting the records where a team won batting first
batting_first=ipl[ipl['win_by_runs']!=0]
In [12]:
#Looking at the head
batting_first.head()
Out [12]:
id season city date team1 team2 toss_winner toss_decision result dl_applied winner win_by_runs win_by_wickets player_of_match venue umpire1 umpire2 umpire3
0 1 2017 Hyderabad 2017-04-05 Sunrisers Hyderabad Royal Challengers Bangalore Royal Challengers Bangalore field normal 0 Sunrisers Hyderabad 35 0 Yuvraj Singh Rajiv Gandhi International Stadium, Uppal AY Dandekar NJ Llong NaN
4 5 2017 Bangalore 2017-04-08 Royal Challengers Bangalore Delhi Daredevils Royal Challengers Bangalore bat normal 0 Royal Challengers Bangalore 15 0 KM Jadhav M Chinnaswamy Stadium NaN NaN NaN
8 9 2017 Pune 2017-04-11 Delhi Daredevils Rising Pune Supergiant Rising Pune Supergiant field normal 0 Delhi Daredevils 97 0 SV Samson Maharashtra Cricket Association Stadium AY Dandekar S Ravi NaN
13 14 2017 Kolkata 2017-04-15 Kolkata Knight Riders Sunrisers Hyderabad Sunrisers Hyderabad field normal 0 Kolkata Knight Riders 17 0 RV Uthappa Eden Gardens AY Dandekar NJ Llong NaN
14 15 2017 Delhi 2017-04-15 Delhi Daredevils Kings XI Punjab Delhi Daredevils bat normal 0 Delhi Daredevils 51 0 CJ Anderson Feroz Shah Kotla YC Barde Nitin Menon NaN
In [13]:
#Making a histogram 
plt.figure(figsize=(7,3))
plt.hist(batting_first['win_by_runs'])
plt.title('Distribution if Runs')
plt.xlabel('no. of Runs')
plt.ylabel('no. of times/Matches')
plt.show()
In [14]:
#Finding out the number of wins w.r.t each team after batting first
batting_first['winner'].value_counts()
Out [14]:
Mumbai Indians                 57
Chennai Super Kings            52
Kings XI Punjab                38
Kolkata Knight Riders          36
Royal Challengers Bangalore    35
Sunrisers Hyderabad            30
Rajasthan Royals               27
Delhi Daredevils               25
Deccan Chargers                18
Pune Warriors                   6
Rising Pune Supergiant          5
Delhi Capitals                  3
Kochi Tuskers Kerala            2
Rising Pune Supergiants         2
Gujarat Lions                   1
Name: winner, dtype: int64
In [15]:
#Making a bar-plot for top 3 teams with most wins after batting first
plt.figure(figsize=(7,3))
plt.bar(list(batting_first['winner'].value_counts()[0:3].keys()),list(batting_first['winner'].value_counts()[0:3]),color=["blue","yellow","orange"])
plt.show()
In [ ]:
In [16]:
#Making a pie chart
plt.figure(figsize=(5,5))
plt.pie(list(batting_first['winner'].value_counts()),labels=list(batting_first['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()
In [17]:
#extracting those records where a team has won after batting second
batting_second=ipl[ipl['win_by_wickets']!=0]
In [18]:
#looking at the head
batting_second.head()
Out [18]:
id season city date team1 team2 toss_winner toss_decision result dl_applied winner win_by_runs win_by_wickets player_of_match venue umpire1 umpire2 umpire3
1 2 2017 Pune 2017-04-06 Mumbai Indians Rising Pune Supergiant Rising Pune Supergiant field normal 0 Rising Pune Supergiant 0 7 SPD Smith Maharashtra Cricket Association Stadium A Nand Kishore S Ravi NaN
2 3 2017 Rajkot 2017-04-07 Gujarat Lions Kolkata Knight Riders Kolkata Knight Riders field normal 0 Kolkata Knight Riders 0 10 CA Lynn Saurashtra Cricket Association Stadium Nitin Menon CK Nandan NaN
3 4 2017 Indore 2017-04-08 Rising Pune Supergiant Kings XI Punjab Kings XI Punjab field normal 0 Kings XI Punjab 0 6 GJ Maxwell Holkar Cricket Stadium AK Chaudhary C Shamshuddin NaN
5 6 2017 Hyderabad 2017-04-09 Gujarat Lions Sunrisers Hyderabad Sunrisers Hyderabad field normal 0 Sunrisers Hyderabad 0 9 Rashid Khan Rajiv Gandhi International Stadium, Uppal A Deshmukh NJ Llong NaN
6 7 2017 Mumbai 2017-04-09 Kolkata Knight Riders Mumbai Indians Mumbai Indians field normal 0 Mumbai Indians 0 4 N Rana Wankhede Stadium Nitin Menon CK Nandan NaN
In [19]:
#Making a histogram for frequency of wins w.r.t number of wickets
plt.figure(figsize=(5,5))
plt.hist(batting_second['win_by_wickets'],bins=30)
plt.xlabel('no. of wickets remaining')
plt.ylabel('no. of times/Matches')
plt.show()
In [20]:
#Finding out the frequency of number of wins w.r.t each time after batting second
batting_second['winner'].value_counts()
Out [20]:
Kolkata Knight Riders          56
Mumbai Indians                 50
Royal Challengers Bangalore    48
Chennai Super Kings            48
Rajasthan Royals               46
Kings XI Punjab                42
Delhi Daredevils               42
Sunrisers Hyderabad            27
Gujarat Lions                  12
Deccan Chargers                11
Pune Warriors                   6
Delhi Capitals                  6
Rising Pune Supergiant          5
Kochi Tuskers Kerala            4
Rising Pune Supergiants         3
Name: winner, dtype: int64
In [21]:
#Making a bar plot for top-3 teams with most wins after batting second
plt.figure(figsize=(7,3))
plt.bar(list(batting_second['winner'].value_counts()[0:3].keys()),list(batting_second['winner'].value_counts()[0:3]),color=["purple","blue","red"])
plt.show()
In [22]:
#Making a pie chart for distribution of most wins after batting second
plt.figure(figsize=(7,7))
plt.pie(list(batting_second['winner'].value_counts()),labels=list(batting_second['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()
In [23]:
#Looking at the number of matches played each season
ipl['season'].value_counts()
Out [23]:
2013    76
2012    74
2011    73
2010    60
2014    60
2016    60
2018    60
2019    60
2017    59
2015    59
2008    58
2009    57
Name: season, dtype: int64
In [24]:
#Looking at the number of matches played in each city
ipl['city'].value_counts()
Out [24]:
Mumbai            101
Kolkata            77
Delhi              74
Bangalore          66
Hyderabad          64
Chennai            57
Jaipur             47
Chandigarh         46
Pune               38
Durban             15
Bengaluru          14
Visakhapatnam      13
Centurion          12
Ahmedabad          12
Rajkot             10
Mohali             10
Indore              9
Dharamsala          9
Johannesburg        8
Cuttack             7
Ranchi              7
Port Elizabeth      7
Cape Town           7
Abu Dhabi           7
Sharjah             6
Raipur              6
Kochi               5
Kanpur              4
Nagpur              3
Kimberley           3
East London         3
Bloemfontein        2
Name: city, dtype: int64
In [25]:
#Finding out how many times a team has won the match after winning the toss
import numpy as np
np.sum(ipl['toss_winner']==ipl['winner'])
Out [25]:
393
In [26]:
325/636
Out [26]:
0.5110062893081762
In [27]:
deliveries=pd.read_csv('deliveries.csv')
---------------------------------------------------------------------------FileNotFoundError                         Traceback (most recent call last)Cell In[27], line 1
----> 1 deliveries=pd.read_csv('deliveries.csv')
File /lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
    209     else:
    210         kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File /lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    325 if len(args) > num_allow_args:
    326     warnings.warn(
    327         msg.format(arguments=_format_argument_list(allow_args)),
    328         FutureWarning,
    329         stacklevel=find_stack_level(),
    330     )
--> 331 return func(*args, **kwargs)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    935 kwds_defaults = _refine_defaults_read(
    936     dialect,
    937     delimiter,
   (...)
    946     defaults={"delimiter": ","},
    947 )
    948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds)
    602 _validate_names(kwds.get("names", None))
    604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
    607 if chunksize or iterator:
    608     return parser
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
   1439     self.options["has_index_names"] = kwds["has_index_names"]
   1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine)
   1733     if "b" not in mode:
   1734         mode += "b"
-> 1735 self.handles = get_handle(
   1736     f,
   1737     mode,
   1738     encoding=self.options.get("encoding", None),
   1739     compression=self.options.get("compression", None),
   1740     memory_map=self.options.get("memory_map", False),
   1741     is_text=is_text,
   1742     errors=self.options.get("encoding_errors", "strict"),
   1743     storage_options=self.options.get("storage_options", None),
   1744 )
   1745 assert self.handles is not None
   1746 f = self.handles.handle
File /lib/python3.11/site-packages/pandas/io/common.py:856, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    851 elif isinstance(handle, str):
    852     # Check whether the filename is to be opened in binary mode.
    853     # Binary mode does not support 'encoding' and 'newline'.
    854     if ioargs.encoding and "b" not in ioargs.mode:
    855         # Encoding
--> 856         handle = open(
    857             handle,
    858             ioargs.mode,
    859             encoding=ioargs.encoding,
    860             errors=errors,
    861             newline="",
    862         )
    863     else:
    864         # Binary mode
    865         handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 44] No such file or directory: 'deliveries.csv'
In [28]:
deliveries.head()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[28], line 1
----> 1 deliveries.head()
NameError: name 'deliveries' is not defined
In [29]:
deliveries['match_id'].unique()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[29], line 1
----> 1 deliveries['match_id'].unique()
NameError: name 'deliveries' is not defined
In [30]:
match_1=deliveries[deliveries['match_id']==1]
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[30], line 1
----> 1 match_1=deliveries[deliveries['match_id']==1]
NameError: name 'deliveries' is not defined
In [31]:
match_1.head()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[31], line 1
----> 1 match_1.head()
NameError: name 'match_1' is not defined
In [32]:
match_1.shape
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[32], line 1
----> 1 match_1.shape
NameError: name 'match_1' is not defined
In [33]:
srh=match_1[match_1['inning']==1]
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[33], line 1
----> 1 srh=match_1[match_1['inning']==1]
NameError: name 'match_1' is not defined
In [34]:
srh['batsman_runs'].value_counts()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[34], line 1
----> 1 srh['batsman_runs'].value_counts()
NameError: name 'srh' is not defined
In [35]:
srh['dismissal_kind'].value_counts()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[35], line 1
----> 1 srh['dismissal_kind'].value_counts()
NameError: name 'srh' is not defined
In [36]:
rcb=match_1[match_1['inning']==2]
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[36], line 1
----> 1 rcb=match_1[match_1['inning']==2]
NameError: name 'match_1' is not defined
In [37]:
rcb['batsman_runs'].value_counts()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[37], line 1
----> 1 rcb['batsman_runs'].value_counts()
NameError: name 'rcb' is not defined
In [38]:
rcb['dismissal_kind'].value_counts()
---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[38], line 1
----> 1 rcb['dismissal_kind'].value_counts()
NameError: name 'rcb' is not defined
In [ ]: