In [1]:

#loading the required libraries
import pandas as pd
from matplotlib import pyplot as plt
#import seaborn as sns

In [2]:

#loading the ipl matches dataset
ipl=pd.read_csv('matches.csv')

In [39]:

#having a glance at the first five records of the dataset
ipl.head()

Out [39]:

	id	season	city	date	team1	team2	toss_winner	toss_decision	result	winner	win_by_runs	win_by_wickets	player_of_match	venue	umpire1	umpire2	umpire3
0	1	2017	Hyderabad	2017-04-05	Sunrisers Hyderabad	Royal Challengers Bangalore	Royal Challengers Bangalore	field	normal	Sunrisers Hyderabad	35	0	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
1	2	2017	Pune	2017-04-06	Mumbai Indians	Rising Pune Supergiant	Rising Pune Supergiant	field	normal	Rising Pune Supergiant	0	7	SPD Smith	Maharashtra Cricket Association Stadium	A Nand Kishore	S Ravi	NaN
2	3	2017	Rajkot	2017-04-07	Gujarat Lions	Kolkata Knight Riders	Kolkata Knight Riders	field	normal	Kolkata Knight Riders	0	10	CA Lynn	Saurashtra Cricket Association Stadium	Nitin Menon	CK Nandan	NaN
3	4	2017	Indore	2017-04-08	Rising Pune Supergiant	Kings XI Punjab	Kings XI Punjab	field	normal	Kings XI Punjab	0	6	GJ Maxwell	Holkar Cricket Stadium	AK Chaudhary	C Shamshuddin	NaN
4	5	2017	Bangalore	2017-04-08	Royal Challengers Bangalore	Delhi Daredevils	Royal Challengers Bangalore	bat	normal	Royal Challengers Bangalore	15	0	KM Jadhav	M Chinnaswamy Stadium	NaN	NaN	NaN

In [4]:

#Lookin at the number of rows and columns in the dataset
ipl.shape

Out [4]:

(756, 18)

In [5]:

#Getting the frequency of most man of the match awards
ipl['player_of_match'].value_counts()

Out [5]:

CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
                  ..
PD Collingwood     1
NV Ojha            1
AC Voges           1
J Theron           1
S Hetmyer          1
Name: player_of_match, Length: 226, dtype: int64

In [6]:

#Getting the top 10 players with most man of the match awards
ipl['player_of_match'].value_counts()[0:10]

Out [6]:

CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
YK Pathan         16
SR Watson         15
SK Raina          14
G Gambhir         13
MEK Hussey        12
Name: player_of_match, dtype: int64

In [7]:

#Getting the top 5 players with most man of the match awards
ipl['player_of_match'].value_counts()[0:5]

Out [7]:

CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
Name: player_of_match, dtype: int64

In [8]:

#making a bar-plot for the top 5 players with most man of the match awards
plt.figure(figsize=(9,5))
plt.bar(list(ipl['player_of_match'].value_counts()[0:5].keys()),list(ipl['player_of_match'].value_counts()[0:5]))
plt.show()

In [9]:

#Getting the frequency of result column
ipl['result'].value_counts()

Out [9]:

normal       743
tie            9
no result      4
Name: result, dtype: int64

In [10]:

#Finding out the number of toss wins w.r.t each team
ipl['toss_winner'].value_counts()

Out [10]:

Mumbai Indians                 98
Kolkata Knight Riders          92
Chennai Super Kings            89
Royal Challengers Bangalore    81
Kings XI Punjab                81
Delhi Daredevils               80
Rajasthan Royals               80
Sunrisers Hyderabad            46
Deccan Chargers                43
Pune Warriors                  20
Gujarat Lions                  15
Delhi Capitals                 10
Kochi Tuskers Kerala            8
Rising Pune Supergiants         7
Rising Pune Supergiant          6
Name: toss_winner, dtype: int64

In [11]:

#Extracting the records where a team won batting first
batting_first=ipl[ipl['win_by_runs']!=0]

In [12]:

#Looking at the head
batting_first.head()

Out [12]:

	id	season	city	date	team1	team2	toss_winner	toss_decision	result	winner	win_by_runs	player_of_match	venue	umpire1	umpire2	umpire3
0	1	2017	Hyderabad	2017-04-05	Sunrisers Hyderabad	Royal Challengers Bangalore	Royal Challengers Bangalore	field	normal	Sunrisers Hyderabad	35	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
4	5	2017	Bangalore	2017-04-08	Royal Challengers Bangalore	Delhi Daredevils	Royal Challengers Bangalore	bat	normal	Royal Challengers Bangalore	15	KM Jadhav	M Chinnaswamy Stadium	NaN	NaN	NaN
8	9	2017	Pune	2017-04-11	Delhi Daredevils	Rising Pune Supergiant	Rising Pune Supergiant	field	normal	Delhi Daredevils	97	SV Samson	Maharashtra Cricket Association Stadium	AY Dandekar	S Ravi	NaN
13	14	2017	Kolkata	2017-04-15	Kolkata Knight Riders	Sunrisers Hyderabad	Sunrisers Hyderabad	field	normal	Kolkata Knight Riders	17	RV Uthappa	Eden Gardens	AY Dandekar	NJ Llong	NaN
14	15	2017	Delhi	2017-04-15	Delhi Daredevils	Kings XI Punjab	Delhi Daredevils	bat	normal	Delhi Daredevils	51	CJ Anderson	Feroz Shah Kotla	YC Barde	Nitin Menon	NaN

In [13]:

#Making a histogram 
plt.figure(figsize=(7,3))
plt.hist(batting_first['win_by_runs'])
plt.title('Distribution if Runs')
plt.xlabel('no. of Runs')
plt.ylabel('no. of times/Matches')
plt.show()

In [14]:

#Finding out the number of wins w.r.t each team after batting first
batting_first['winner'].value_counts()

Out [14]:

Mumbai Indians                 57
Chennai Super Kings            52
Kings XI Punjab                38
Kolkata Knight Riders          36
Royal Challengers Bangalore    35
Sunrisers Hyderabad            30
Rajasthan Royals               27
Delhi Daredevils               25
Deccan Chargers                18
Pune Warriors                   6
Rising Pune Supergiant          5
Delhi Capitals                  3
Kochi Tuskers Kerala            2
Rising Pune Supergiants         2
Gujarat Lions                   1
Name: winner, dtype: int64

In [15]:

#Making a bar-plot for top 3 teams with most wins after batting first
plt.figure(figsize=(7,3))
plt.bar(list(batting_first['winner'].value_counts()[0:3].keys()),list(batting_first['winner'].value_counts()[0:3]),color=["blue","yellow","orange"])
plt.show()

In [ ]:

In [16]:

#Making a pie chart
plt.figure(figsize=(5,5))
plt.pie(list(batting_first['winner'].value_counts()),labels=list(batting_first['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()

In [17]:

#extracting those records where a team has won after batting second
batting_second=ipl[ipl['win_by_wickets']!=0]

In [18]:

#looking at the head
batting_second.head()

Out [18]:

	id	season	city	date	team1	team2	toss_winner	toss_decision	result	winner	win_by_wickets	player_of_match	venue	umpire1	umpire2	umpire3
1	2	2017	Pune	2017-04-06	Mumbai Indians	Rising Pune Supergiant	Rising Pune Supergiant	field	normal	Rising Pune Supergiant	7	SPD Smith	Maharashtra Cricket Association Stadium	A Nand Kishore	S Ravi	NaN
2	3	2017	Rajkot	2017-04-07	Gujarat Lions	Kolkata Knight Riders	Kolkata Knight Riders	field	normal	Kolkata Knight Riders	10	CA Lynn	Saurashtra Cricket Association Stadium	Nitin Menon	CK Nandan	NaN
3	4	2017	Indore	2017-04-08	Rising Pune Supergiant	Kings XI Punjab	Kings XI Punjab	field	normal	Kings XI Punjab	6	GJ Maxwell	Holkar Cricket Stadium	AK Chaudhary	C Shamshuddin	NaN
5	6	2017	Hyderabad	2017-04-09	Gujarat Lions	Sunrisers Hyderabad	Sunrisers Hyderabad	field	normal	Sunrisers Hyderabad	9	Rashid Khan	Rajiv Gandhi International Stadium, Uppal	A Deshmukh	NJ Llong	NaN
6	7	2017	Mumbai	2017-04-09	Kolkata Knight Riders	Mumbai Indians	Mumbai Indians	field	normal	Mumbai Indians	4	N Rana	Wankhede Stadium	Nitin Menon	CK Nandan	NaN

In [19]:

#Making a histogram for frequency of wins w.r.t number of wickets
plt.figure(figsize=(5,5))
plt.hist(batting_second['win_by_wickets'],bins=30)
plt.xlabel('no. of wickets remaining')
plt.ylabel('no. of times/Matches')
plt.show()

In [20]:

#Finding out the frequency of number of wins w.r.t each time after batting second
batting_second['winner'].value_counts()

Out [20]:

Kolkata Knight Riders          56
Mumbai Indians                 50
Royal Challengers Bangalore    48
Chennai Super Kings            48
Rajasthan Royals               46
Kings XI Punjab                42
Delhi Daredevils               42
Sunrisers Hyderabad            27
Gujarat Lions                  12
Deccan Chargers                11
Pune Warriors                   6
Delhi Capitals                  6
Rising Pune Supergiant          5
Kochi Tuskers Kerala            4
Rising Pune Supergiants         3
Name: winner, dtype: int64

In [21]:

#Making a bar plot for top-3 teams with most wins after batting second
plt.figure(figsize=(7,3))
plt.bar(list(batting_second['winner'].value_counts()[0:3].keys()),list(batting_second['winner'].value_counts()[0:3]),color=["purple","blue","red"])
plt.show()

In [22]:

#Making a pie chart for distribution of most wins after batting second
plt.figure(figsize=(7,7))
plt.pie(list(batting_second['winner'].value_counts()),labels=list(batting_second['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()

In [23]:

#Looking at the number of matches played each season
ipl['season'].value_counts()

Out [23]:

2013    76
2012    74
2011    73
2010    60
2014    60
2016    60
2018    60
2019    60
2017    59
2015    59
2008    58
2009    57
Name: season, dtype: int64

In [24]:

#Looking at the number of matches played in each city
ipl['city'].value_counts()

Out [24]:

Mumbai            101
Kolkata            77
Delhi              74
Bangalore          66
Hyderabad          64
Chennai            57
Jaipur             47
Chandigarh         46
Pune               38
Durban             15
Bengaluru          14
Visakhapatnam      13
Centurion          12
Ahmedabad          12
Rajkot             10
Mohali             10
Indore              9
Dharamsala          9
Johannesburg        8
Cuttack             7
Ranchi              7
Port Elizabeth      7
Cape Town           7
Abu Dhabi           7
Sharjah             6
Raipur              6
Kochi               5
Kanpur              4
Nagpur              3
Kimberley           3
East London         3
Bloemfontein        2
Name: city, dtype: int64

In [25]:

#Finding out how many times a team has won the match after winning the toss
import numpy as np
np.sum(ipl['toss_winner']==ipl['winner'])

Out [25]:

In [26]:

325/636

Out [26]:

0.5110062893081762

In [27]:

deliveries=pd.read_csv('deliveries.csv')

---------------------------------------------------------------------------FileNotFoundError                         Traceback (most recent call last)Cell In[27], line 1
----> 1 deliveries=pd.read_csv('deliveries.csv')
File /lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
    209     else:
    210         kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File /lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    325 if len(args) > num_allow_args:
    326     warnings.warn(
    327         msg.format(arguments=_format_argument_list(allow_args)),
    328         FutureWarning,
    329         stacklevel=find_stack_level(),
    330     )
--> 331 return func(*args, **kwargs)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    935 kwds_defaults = _refine_defaults_read(
    936     dialect,
    937     delimiter,
   (...)
    946     defaults={"delimiter": ","},
    947 )
    948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds)
    602 _validate_names(kwds.get("names", None))
    604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
    607 if chunksize or iterator:
    608     return parser
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
   1439     self.options["has_index_names"] = kwds["has_index_names"]
   1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine)
   1733     if "b" not in mode:
   1734         mode += "b"
-> 1735 self.handles = get_handle(
   1736     f,
   1737     mode,
   1738     encoding=self.options.get("encoding", None),
   1739     compression=self.options.get("compression", None),
   1740     memory_map=self.options.get("memory_map", False),
   1741     is_text=is_text,
   1742     errors=self.options.get("encoding_errors", "strict"),
   1743     storage_options=self.options.get("storage_options", None),
   1744 )
   1745 assert self.handles is not None
   1746 f = self.handles.handle
File /lib/python3.11/site-packages/pandas/io/common.py:856, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    851 elif isinstance(handle, str):
    852     # Check whether the filename is to be opened in binary mode.
    853     # Binary mode does not support 'encoding' and 'newline'.
    854     if ioargs.encoding and "b" not in ioargs.mode:
    855         # Encoding
--> 856         handle = open(
    857             handle,
    858             ioargs.mode,
    859             encoding=ioargs.encoding,
    860             errors=errors,
    861             newline="",
    862         )
    863     else:
    864         # Binary mode
    865         handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 44] No such file or directory: 'deliveries.csv'

In [28]:

deliveries.head()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[28], line 1
----> 1 deliveries.head()
NameError: name 'deliveries' is not defined

In [29]:

deliveries['match_id'].unique()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[29], line 1
----> 1 deliveries['match_id'].unique()
NameError: name 'deliveries' is not defined

In [30]:

match_1=deliveries[deliveries['match_id']==1]

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[30], line 1
----> 1 match_1=deliveries[deliveries['match_id']==1]
NameError: name 'deliveries' is not defined

In [31]:

match_1.head()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[31], line 1
----> 1 match_1.head()
NameError: name 'match_1' is not defined

In [32]:

match_1.shape

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[32], line 1
----> 1 match_1.shape
NameError: name 'match_1' is not defined

In [33]:

srh=match_1[match_1['inning']==1]

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[33], line 1
----> 1 srh=match_1[match_1['inning']==1]
NameError: name 'match_1' is not defined

In [34]:

srh['batsman_runs'].value_counts()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[34], line 1
----> 1 srh['batsman_runs'].value_counts()
NameError: name 'srh' is not defined

In [35]:

srh['dismissal_kind'].value_counts()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[35], line 1
----> 1 srh['dismissal_kind'].value_counts()
NameError: name 'srh' is not defined

In [36]:

rcb=match_1[match_1['inning']==2]

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[36], line 1
----> 1 rcb=match_1[match_1['inning']==2]
NameError: name 'match_1' is not defined

In [37]:

rcb['batsman_runs'].value_counts()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[37], line 1
----> 1 rcb['batsman_runs'].value_counts()
NameError: name 'rcb' is not defined

In [38]:

rcb['dismissal_kind'].value_counts()

---------------------------------------------------------------------------NameError                                 Traceback (most recent call last)Cell In[38], line 1
----> 1 rcb['dismissal_kind'].value_counts()
NameError: name 'rcb' is not defined

In [ ]: