#loading the required libraries
import pandas as pd
from matplotlib import pyplot as plt
#import seaborn as sns
#loading the Transfer list dataset
pla=pd.read_csv('dataset.csv')
#having a glance at records of the dataset
pla
Pos. | Player | Year | Fee | From Club | To Club | |
---|---|---|---|---|---|---|
0 | 1 | Neymar | 2017 | €222m | Barcelona | PSG |
1 | 2 | Kylian Mbappe | 2017 | €145m | Monaco | PSG |
2 | 3 | Joao Felix | 2019 | €126m | Benfica | Atletico Madrid |
3 | 4 | Enzo Fernandez | 2023 | €121m | Benfica | Chelsea |
4 | 5 | Philippe Coutinho | 2018 | €120m | Liverpool | Barcelona |
... | ... | ... | ... | ... | ... | ... |
97 | 90 | Victor Osimhen | 2020 | €50m | Lille | Napoli |
98 | 97 | Anthony Martial | 2015 | €49.3m | Monaco | Manchester United |
99 | 98 | Gylfi Sigurdsson | 2017 | €49.2m | Swansea City | Everton |
100 | 99 | Leroy Sane | 2020 | €49m | Manchester City | Bayern Munich |
101 | 99 | Christian Vieri | 1999 | €49m | Lazio | Inter |
102 rows × 6 columns
#Looking at the number of rows(no. of palyers) and columns in dataset
pla.shape
(102, 6)
#Getting the frequency of teams buying most no. players in the list
pla['To Club'].value_counts()
Chelsea 15 Manchester City 13 Manchester United 12 Barcelona 11 Real Madrid 10 PSG 6 Juventus 6 Arsenal 6 Liverpool 5 Atletico Madrid 3 Bayern Munich 2 Tottenham 2 Shanghai SIPG 2 Inter 2 Newcastle United 1 Manchester City 1 Bayern Munich 1 Lazio 1 Jiangsu Suning 1 Napoli 1 Everton 1 Name: To Club, dtype: int64
#making a bar-plot for the top 5 teams buying most no. players in the list
plt.figure(figsize=(9,5))
plt.bar(list(pla['To Club'].value_counts()[0:5].keys()),list(pla['To Club'].value_counts()[0:5]))
plt.xlabel('Teams')
plt.ylabel('No of times')
plt.show()
#Getting the frequency of team with most sales in th list
pla['From Club'].value_counts()[0:9]
Monaco 8 Juventus 5 Benfica 4 Liverpool 4 Manchester City 4 Borussia Dortmund 4 Real Madrid 4 Chelsea 4 Barcelona 3 Name: From Club, dtype: int64
#replacing the pound and 'm' symbol in it
pla['Fee'] = pla['Fee'].str.replace('€', '').str.replace('m', '').astype(float)
pla
Pos. | Player | Year | Fee | From Club | To Club | |
---|---|---|---|---|---|---|
0 | 1 | Neymar | 2017 | 222.0 | Barcelona | PSG |
1 | 2 | Kylian Mbappe | 2017 | 145.0 | Monaco | PSG |
2 | 3 | Joao Felix | 2019 | 126.0 | Benfica | Atletico Madrid |
3 | 4 | Enzo Fernandez | 2023 | 121.0 | Benfica | Chelsea |
4 | 5 | Philippe Coutinho | 2018 | 120.0 | Liverpool | Barcelona |
... | ... | ... | ... | ... | ... | ... |
97 | 90 | Victor Osimhen | 2020 | 50.0 | Lille | Napoli |
98 | 97 | Anthony Martial | 2015 | 49.3 | Monaco | Manchester United |
99 | 98 | Gylfi Sigurdsson | 2017 | 49.2 | Swansea City | Everton |
100 | 99 | Leroy Sane | 2020 | 49.0 | Manchester City | Bayern Munich |
101 | 99 | Christian Vieri | 1999 | 49.0 | Lazio | Inter |
102 rows × 6 columns
#Extracting the records where a team has purchased a player equal to or with a lesser value than 100
pla[pla['Fee']<=100]
Pos. | Player | Year | Fee | From Club | To Club | |
---|---|---|---|---|---|---|
11 | 12 | Cristiano Ronaldo | 2018 | 100.0 | Real Madrid | Juventus |
12 | 12 | Eden Hazard | 2019 | 100.0 | Chelsea | Real Madrid |
13 | 12 | Mykhailo Mudryk | 2023 | 100.0 | Shakhtar Donetsk | Chelsea |
14 | 14 | Antony | 2022 | 95.0 | Ajax | Manchester United |
15 | 15 | Cristiano Ronaldo | 2009 | 94.0 | Manchester United | Real Madrid |
... | ... | ... | ... | ... | ... | ... |
97 | 90 | Victor Osimhen | 2020 | 50.0 | Lille | Napoli |
98 | 97 | Anthony Martial | 2015 | 49.3 | Monaco | Manchester United |
99 | 98 | Gylfi Sigurdsson | 2017 | 49.2 | Swansea City | Everton |
100 | 99 | Leroy Sane | 2020 | 49.0 | Manchester City | Bayern Munich |
101 | 99 | Christian Vieri | 1999 | 49.0 | Lazio | Inter |
91 rows × 6 columns
PSG_players = pla[pla['To Club'].str.contains('PSG')]
PSG_players
Pos. | Player | Year | Fee | From Club | To Club | |
---|---|---|---|---|---|---|
0 | 1 | Neymar | 2017 | 222.0 | Barcelona | PSG |
1 | 2 | Kylian Mbappe | 2017 | 145.0 | Monaco | PSG |
57 | 56 | Edinson Cavani | 2013 | 64.5 | Napoli | PSG |
60 | 59 | David Luiz | 2014 | 62.5 | Chelsea | PSG |
61 | 60 | Angel Di Maria | 2015 | 61.6 | Manchester United | PSG |
63 | 62 | Achraf Hakimi | 2021 | 60.0 | Inter | PSG |
pla.groupby('To Club')['Fee'].sum().sort_values(ascending=False)
To Club Chelsea 1127.50 Barcelona 882.00 Manchester United 871.00 Manchester City 856.50 Real Madrid 777.30 PSG 615.60 Juventus 464.00 Arsenal 356.65 Liverpool 342.50 Atletico Madrid 257.00 Inter 129.00 Shanghai SIPG 118.90 Tottenham 118.00 Bayern Munich 116.00 Bayern Munich 80.00 Manchester City 75.00 Newcastle United 65.00 Lazio 55.00 Jiangsu Suning 50.00 Napoli 50.00 Everton 49.20 Name: Fee, dtype: float64
#Total fee and no. of players of the top 5 teams that spent most of the money
team_stats = pla.groupby('To Club').agg({'Fee': ['sum', 'count']})
team_stats.columns = ['Total Fee', 'No. of Players']
team_stats = team_stats.sort_values(by='Total Fee', ascending=False)
team_stats.head()
Total Fee | No. of Players | |
---|---|---|
To Club | ||
Chelsea | 1127.5 | 15 |
Barcelona | 882.0 | 11 |
Manchester United | 871.0 | 12 |
Manchester City | 856.5 | 13 |
Real Madrid | 777.3 | 10 |
#Total fee and no. of players of the top 5 teams that spent earned of the money
team_stats = pla.groupby('From Club').agg({'Fee': ['sum', 'count']})
team_stats.columns = ['Total Fee', 'No. of Players']
team_stats = team_stats.sort_values(by='Total Fee', ascending=False)
plt.bar(list(team_stats['Total Fee'][0:5].keys()),list(team_stats['No. of Players'][0:5]))
plt.xlabel('Teams')
plt.ylabel('No. of Players')
plt.show()
team_stats.head()
Total Fee | No. of Players | |
---|---|---|
From Club | ||
Monaco | 577.5 | 8 |
Benfica | 390.0 | 4 |
Juventus | 374.5 | 5 |
Barcelona | 354.0 | 3 |
Liverpool | 330.4 | 4 |