In [1]:
#loading the required libraries
import pandas as pd
from matplotlib import pyplot as plt
#import seaborn as sns
In [2]:
#loading the Transfer list dataset
pla=pd.read_csv('dataset.csv')
In [3]:
#having a glance at records of the dataset
pla
Out [3]:
Pos. Player Year Fee From Club To Club
0 1 Neymar 2017 €222m Barcelona PSG
1 2 Kylian Mbappe 2017 €145m Monaco PSG
2 3 Joao Felix 2019 €126m Benfica Atletico Madrid
3 4 Enzo Fernandez 2023 €121m Benfica Chelsea
4 5 Philippe Coutinho 2018 €120m Liverpool Barcelona
... ... ... ... ... ... ...
97 90 Victor Osimhen 2020 €50m Lille Napoli
98 97 Anthony Martial 2015 €49.3m Monaco Manchester United
99 98 Gylfi Sigurdsson 2017 €49.2m Swansea City Everton
100 99 Leroy Sane 2020 €49m Manchester City Bayern Munich
101 99 Christian Vieri 1999 €49m Lazio Inter

102 rows × 6 columns

In [4]:
#Looking at the number of rows(no. of palyers) and columns in dataset
pla.shape
Out [4]:
(102, 6)
In [5]:
#Getting the frequency of teams buying most no. players in the list
pla['To Club'].value_counts()
Out [5]:
 Chelsea              15
 Manchester City      13
 Manchester United    12
 Barcelona            11
 Real Madrid          10
 PSG                   6
 Juventus              6
 Arsenal               6
 Liverpool             5
 Atletico Madrid       3
 Bayern Munich         2
 Tottenham             2
 Shanghai SIPG         2
 Inter                 2
 Newcastle United      1
 Manchester City       1
 Bayern Munich         1
 Lazio                 1
 Jiangsu Suning        1
 Napoli                1
 Everton               1
Name: To Club, dtype: int64
In [6]:
#making a bar-plot for the top 5 teams buying most no. players in the list
plt.figure(figsize=(9,5))
plt.bar(list(pla['To Club'].value_counts()[0:5].keys()),list(pla['To Club'].value_counts()[0:5]))
plt.xlabel('Teams')
plt.ylabel('No of times')
plt.show()
In [7]:
#Getting the frequency of team with most sales in th list
pla['From Club'].value_counts()[0:9]
Out [7]:
Monaco                8
Juventus              5
Benfica               4
Liverpool             4
Manchester City       4
Borussia Dortmund     4
Real Madrid           4
Chelsea               4
Barcelona             3
Name: From Club, dtype: int64
In [8]:
#replacing the pound and 'm' symbol in it
pla['Fee'] = pla['Fee'].str.replace('€', '').str.replace('m', '').astype(float)
In [9]:
pla
Out [9]:
Pos. Player Year Fee From Club To Club
0 1 Neymar 2017 222.0 Barcelona PSG
1 2 Kylian Mbappe 2017 145.0 Monaco PSG
2 3 Joao Felix 2019 126.0 Benfica Atletico Madrid
3 4 Enzo Fernandez 2023 121.0 Benfica Chelsea
4 5 Philippe Coutinho 2018 120.0 Liverpool Barcelona
... ... ... ... ... ... ...
97 90 Victor Osimhen 2020 50.0 Lille Napoli
98 97 Anthony Martial 2015 49.3 Monaco Manchester United
99 98 Gylfi Sigurdsson 2017 49.2 Swansea City Everton
100 99 Leroy Sane 2020 49.0 Manchester City Bayern Munich
101 99 Christian Vieri 1999 49.0 Lazio Inter

102 rows × 6 columns

In [10]:
#Extracting the records where a team has purchased a player equal to or with a lesser value than 100
pla[pla['Fee']<=100]
Out [10]:
Pos. Player Year Fee From Club To Club
11 12 Cristiano Ronaldo 2018 100.0 Real Madrid Juventus
12 12 Eden Hazard 2019 100.0 Chelsea Real Madrid
13 12 Mykhailo Mudryk 2023 100.0 Shakhtar Donetsk Chelsea
14 14 Antony 2022 95.0 Ajax Manchester United
15 15 Cristiano Ronaldo 2009 94.0 Manchester United Real Madrid
... ... ... ... ... ... ...
97 90 Victor Osimhen 2020 50.0 Lille Napoli
98 97 Anthony Martial 2015 49.3 Monaco Manchester United
99 98 Gylfi Sigurdsson 2017 49.2 Swansea City Everton
100 99 Leroy Sane 2020 49.0 Manchester City Bayern Munich
101 99 Christian Vieri 1999 49.0 Lazio Inter

91 rows × 6 columns

In [11]:
PSG_players = pla[pla['To Club'].str.contains('PSG')]
PSG_players
Out [11]:
Pos. Player Year Fee From Club To Club
0 1 Neymar 2017 222.0 Barcelona PSG
1 2 Kylian Mbappe 2017 145.0 Monaco PSG
57 56 Edinson Cavani 2013 64.5 Napoli PSG
60 59 David Luiz 2014 62.5 Chelsea PSG
61 60 Angel Di Maria 2015 61.6 Manchester United PSG
63 62 Achraf Hakimi 2021 60.0 Inter PSG
In [12]:
pla.groupby('To Club')['Fee'].sum().sort_values(ascending=False)
Out [12]:
To Club
 Chelsea              1127.50
 Barcelona             882.00
 Manchester United     871.00
 Manchester City       856.50
 Real Madrid           777.30
 PSG                   615.60
 Juventus              464.00
 Arsenal               356.65
 Liverpool             342.50
 Atletico Madrid       257.00
 Inter                 129.00
 Shanghai SIPG         118.90
 Tottenham             118.00
 Bayern Munich         116.00
 Bayern Munich          80.00
 Manchester City        75.00
 Newcastle United       65.00
 Lazio                  55.00
 Jiangsu Suning         50.00
 Napoli                 50.00
 Everton                49.20
Name: Fee, dtype: float64
In [13]:
#Total fee and no. of players of the top 5 teams that spent most of the money
team_stats = pla.groupby('To Club').agg({'Fee': ['sum', 'count']})
team_stats.columns = ['Total Fee', 'No. of Players']
team_stats = team_stats.sort_values(by='Total Fee', ascending=False)
team_stats.head()
Out [13]:
Total Fee No. of Players
To Club
Chelsea 1127.5 15
Barcelona 882.0 11
Manchester United 871.0 12
Manchester City 856.5 13
Real Madrid 777.3 10
In [16]:
#Total fee and no. of players of the top 5 teams that spent earned of the money
team_stats = pla.groupby('From Club').agg({'Fee': ['sum', 'count']})
team_stats.columns = ['Total Fee', 'No. of Players']
team_stats = team_stats.sort_values(by='Total Fee', ascending=False)
plt.bar(list(team_stats['Total Fee'][0:5].keys()),list(team_stats['No. of Players'][0:5]))
plt.xlabel('Teams')
plt.ylabel('No. of Players')
plt.show()
team_stats.head()
Out [16]:
Total Fee No. of Players
From Club
Monaco 577.5 8
Benfica 390.0 4
Juventus 374.5 5
Barcelona 354.0 3
Liverpool 330.4 4
In [ ]: